xref: /netbsd-src/external/bsd/mdocml/dist/mansearch.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	Id: mansearch.c,v 1.65 2016/07/09 15:24:19 schwarze Exp  */
2 /*
3  * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/mman.h>
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <glob.h>
30 #include <limits.h>
31 #include <regex.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stddef.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 
39 #include <sqlite3.h>
40 #ifndef SQLITE_DETERMINISTIC
41 #define SQLITE_DETERMINISTIC 0
42 #endif
43 
44 #include "main.h"
45 #include "mandoc.h"
46 #include "mandoc_aux.h"
47 #include "mandoc_ohash.h"
48 #include "manconf.h"
49 #include "mansearch.h"
50 
51 extern int mansearch_keymax;
52 extern const char *const mansearch_keynames[];
53 
54 #define	SQL_BIND_TEXT(_db, _s, _i, _v) \
55 	do { if (SQLITE_OK != sqlite3_bind_text \
56 		((_s), (_i)++, (_v), -1, SQLITE_STATIC)) \
57 		errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \
58 	} while (0)
59 #define	SQL_BIND_INT64(_db, _s, _i, _v) \
60 	do { if (SQLITE_OK != sqlite3_bind_int64 \
61 		((_s), (_i)++, (_v))) \
62 		errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \
63 	} while (0)
64 #define	SQL_BIND_BLOB(_db, _s, _i, _v) \
65 	do { if (SQLITE_OK != sqlite3_bind_blob \
66 		((_s), (_i)++, (&_v), sizeof(_v), SQLITE_STATIC)) \
67 		errx((int)MANDOCLEVEL_SYSERR, "%s", sqlite3_errmsg((_db))); \
68 	} while (0)
69 
70 struct	expr {
71 	regex_t		 regexp;  /* compiled regexp, if applicable */
72 	const char	*substr;  /* to search for, if applicable */
73 	struct expr	*next;    /* next in sequence */
74 	uint64_t	 bits;    /* type-mask */
75 	int		 equal;   /* equality, not subsring match */
76 	int		 open;    /* opening parentheses before */
77 	int		 and;	  /* logical AND before */
78 	int		 close;   /* closing parentheses after */
79 };
80 
81 struct	match {
82 	uint64_t	 pageid; /* identifier in database */
83 	uint64_t	 bits; /* name type mask */
84 	char		*desc; /* manual page description */
85 	int		 form; /* bit field: formatted, zipped? */
86 };
87 
88 static	void		 buildnames(const struct mansearch *,
89 				struct manpage *, sqlite3 *,
90 				sqlite3_stmt *, uint64_t,
91 				const char *, int form);
92 static	char		*buildoutput(sqlite3 *, sqlite3_stmt *,
93 				 uint64_t, uint64_t);
94 static	struct expr	*exprcomp(const struct mansearch *,
95 				int, char *[]);
96 static	void		 exprfree(struct expr *);
97 static	struct expr	*exprterm(const struct mansearch *, char *, int);
98 static	int		 manpage_compare(const void *, const void *);
99 static	void		 sql_append(char **sql, size_t *sz,
100 				const char *newstr, int count);
101 static	void		 sql_match(sqlite3_context *context,
102 				int argc, sqlite3_value **argv);
103 static	void		 sql_regexp(sqlite3_context *context,
104 				int argc, sqlite3_value **argv);
105 static	char		*sql_statement(const struct expr *);
106 
107 
108 int
109 mansearch_setup(int start)
110 {
111 	static void	*pagecache;
112 	int		 c;
113 
114 #define	PC_PAGESIZE	1280
115 #define	PC_NUMPAGES	256
116 
117 	if (start) {
118 		if (NULL != pagecache) {
119 			warnx("pagecache already enabled");
120 			return (int)MANDOCLEVEL_BADARG;
121 		}
122 
123 		pagecache = mmap(NULL, PC_PAGESIZE * PC_NUMPAGES,
124 		    PROT_READ | PROT_WRITE,
125 		    MAP_SHARED | MAP_ANON, -1, 0);
126 
127 		if (MAP_FAILED == pagecache) {
128 			warn("mmap");
129 			pagecache = NULL;
130 			return (int)MANDOCLEVEL_SYSERR;
131 		}
132 
133 		c = sqlite3_config(SQLITE_CONFIG_PAGECACHE,
134 		    pagecache, PC_PAGESIZE, PC_NUMPAGES);
135 
136 		if (SQLITE_OK == c)
137 			return (int)MANDOCLEVEL_OK;
138 
139 		warnx("pagecache: %s", sqlite3_errstr(c));
140 
141 	} else if (NULL == pagecache) {
142 		warnx("pagecache missing");
143 		return (int)MANDOCLEVEL_BADARG;
144 	}
145 
146 	if (-1 == munmap(pagecache, PC_PAGESIZE * PC_NUMPAGES)) {
147 		warn("munmap");
148 		pagecache = NULL;
149 		return (int)MANDOCLEVEL_SYSERR;
150 	}
151 
152 	pagecache = NULL;
153 	return (int)MANDOCLEVEL_OK;
154 }
155 
156 int
157 mansearch(const struct mansearch *search,
158 		const struct manpaths *paths,
159 		int argc, char *argv[],
160 		struct manpage **res, size_t *sz)
161 {
162 	int64_t		 pageid;
163 	uint64_t	 outbit, iterbit;
164 	char		 buf[PATH_MAX];
165 	char		*sql;
166 	struct manpage	*mpage;
167 	struct expr	*e, *ep;
168 	sqlite3		*db;
169 	sqlite3_stmt	*s, *s2;
170 	struct match	*mp;
171 	struct ohash	 htab;
172 	unsigned int	 idx;
173 	size_t		 i, j, cur, maxres;
174 	int		 c, chdir_status, getcwd_status, indexbit;
175 
176 	if (argc == 0 || (e = exprcomp(search, argc, argv)) == NULL) {
177 		*sz = 0;
178 		return 0;
179 	}
180 
181 	cur = maxres = 0;
182 	*res = NULL;
183 
184 	if (NULL != search->outkey) {
185 		outbit = TYPE_Nd;
186 		for (indexbit = 0, iterbit = 1;
187 		     indexbit < mansearch_keymax;
188 		     indexbit++, iterbit <<= 1) {
189 			if (0 == strcasecmp(search->outkey,
190 			    mansearch_keynames[indexbit])) {
191 				outbit = iterbit;
192 				break;
193 			}
194 		}
195 	} else
196 		outbit = 0;
197 
198 	/*
199 	 * Remember the original working directory, if possible.
200 	 * This will be needed if the second or a later directory
201 	 * is given as a relative path.
202 	 * Do not error out if the current directory is not
203 	 * searchable: Maybe it won't be needed after all.
204 	 */
205 
206 	if (getcwd(buf, PATH_MAX) == NULL) {
207 		getcwd_status = 0;
208 		(void)strlcpy(buf, strerror(errno), sizeof(buf));
209 	} else
210 		getcwd_status = 1;
211 
212 	sql = sql_statement(e);
213 
214 	/*
215 	 * Loop over the directories (containing databases) for us to
216 	 * search.
217 	 * Don't let missing/bad databases/directories phase us.
218 	 * In each, try to open the resident database and, if it opens,
219 	 * scan it for our match expression.
220 	 */
221 
222 	chdir_status = 0;
223 	for (i = 0; i < paths->sz; i++) {
224 		if (chdir_status && paths->paths[i][0] != '/') {
225 			if ( ! getcwd_status) {
226 				warnx("%s: getcwd: %s", paths->paths[i], buf);
227 				continue;
228 			} else if (chdir(buf) == -1) {
229 				warn("%s", buf);
230 				continue;
231 			}
232 		}
233 		if (chdir(paths->paths[i]) == -1) {
234 			warn("%s", paths->paths[i]);
235 			continue;
236 		}
237 		chdir_status = 1;
238 
239 		c = sqlite3_open_v2(MANDOC_DB, &db,
240 		    SQLITE_OPEN_READONLY, NULL);
241 
242 		if (SQLITE_OK != c) {
243 			warn("%s/%s", paths->paths[i], MANDOC_DB);
244 			sqlite3_close(db);
245 			continue;
246 		}
247 
248 		/*
249 		 * Define the SQL functions for substring
250 		 * and regular expression matching.
251 		 */
252 
253 		c = sqlite3_create_function(db, "match", 2,
254 		    SQLITE_UTF8 | SQLITE_DETERMINISTIC,
255 		    NULL, sql_match, NULL, NULL);
256 		assert(SQLITE_OK == c);
257 		c = sqlite3_create_function(db, "regexp", 2,
258 		    SQLITE_UTF8 | SQLITE_DETERMINISTIC,
259 		    NULL, sql_regexp, NULL, NULL);
260 		assert(SQLITE_OK == c);
261 
262 		j = 1;
263 		c = sqlite3_prepare_v2(db, sql, -1, &s, NULL);
264 		if (SQLITE_OK != c)
265 			errx((int)MANDOCLEVEL_SYSERR,
266 			    "%s", sqlite3_errmsg(db));
267 
268 		for (ep = e; NULL != ep; ep = ep->next) {
269 			if (NULL == ep->substr) {
270 				SQL_BIND_BLOB(db, s, j, ep->regexp);
271 			} else
272 				SQL_BIND_TEXT(db, s, j, ep->substr);
273 			if (0 == ((TYPE_Nd | TYPE_Nm) & ep->bits))
274 				SQL_BIND_INT64(db, s, j, ep->bits);
275 		}
276 
277 		mandoc_ohash_init(&htab, 4, offsetof(struct match, pageid));
278 
279 		/*
280 		 * Hash each entry on its [unique] document identifier.
281 		 * This is a uint64_t.
282 		 * Instead of using a hash function, simply convert the
283 		 * uint64_t to a uint32_t, the hash value's type.
284 		 * This gives good performance and preserves the
285 		 * distribution of buckets in the table.
286 		 */
287 		while (SQLITE_ROW == (c = sqlite3_step(s))) {
288 			pageid = sqlite3_column_int64(s, 2);
289 			idx = ohash_lookup_memory(&htab,
290 			    (char *)&pageid, sizeof(uint64_t),
291 			    (uint32_t)pageid);
292 
293 			if (NULL != ohash_find(&htab, idx))
294 				continue;
295 
296 			mp = mandoc_calloc(1, sizeof(struct match));
297 			mp->pageid = pageid;
298 			mp->form = sqlite3_column_int(s, 1);
299 			mp->bits = sqlite3_column_int64(s, 3);
300 			if (TYPE_Nd == outbit)
301 				mp->desc = mandoc_strdup((const char *)
302 				    sqlite3_column_text(s, 0));
303 			ohash_insert(&htab, idx, mp);
304 		}
305 
306 		if (SQLITE_DONE != c)
307 			warnx("%s", sqlite3_errmsg(db));
308 
309 		sqlite3_finalize(s);
310 
311 		c = sqlite3_prepare_v2(db,
312 		    "SELECT sec, arch, name, pageid FROM mlinks "
313 		    "WHERE pageid=? ORDER BY sec, arch, name",
314 		    -1, &s, NULL);
315 		if (SQLITE_OK != c)
316 			errx((int)MANDOCLEVEL_SYSERR,
317 			    "%s", sqlite3_errmsg(db));
318 
319 		c = sqlite3_prepare_v2(db,
320 		    "SELECT bits, key, pageid FROM keys "
321 		    "WHERE pageid=? AND bits & ?",
322 		    -1, &s2, NULL);
323 		if (SQLITE_OK != c)
324 			errx((int)MANDOCLEVEL_SYSERR,
325 			    "%s", sqlite3_errmsg(db));
326 
327 		for (mp = ohash_first(&htab, &idx);
328 				NULL != mp;
329 				mp = ohash_next(&htab, &idx)) {
330 			if (cur + 1 > maxres) {
331 				maxres += 1024;
332 				*res = mandoc_reallocarray(*res,
333 				    maxres, sizeof(struct manpage));
334 			}
335 			mpage = *res + cur;
336 			mpage->ipath = i;
337 			mpage->bits = mp->bits;
338 			mpage->sec = 10;
339 			mpage->form = mp->form;
340 			buildnames(search, mpage, db, s, mp->pageid,
341 			    paths->paths[i], mp->form);
342 			if (mpage->names != NULL) {
343 				mpage->output = TYPE_Nd & outbit ?
344 				    mp->desc : outbit ?
345 				    buildoutput(db, s2, mp->pageid, outbit) :
346 				    NULL;
347 				cur++;
348 			}
349 			free(mp);
350 		}
351 
352 		sqlite3_finalize(s);
353 		sqlite3_finalize(s2);
354 		sqlite3_close(db);
355 		ohash_delete(&htab);
356 
357 		/*
358 		 * In man(1) mode, prefer matches in earlier trees
359 		 * over matches in later trees.
360 		 */
361 
362 		if (cur && search->firstmatch)
363 			break;
364 	}
365 	qsort(*res, cur, sizeof(struct manpage), manpage_compare);
366 	if (chdir_status && getcwd_status && chdir(buf) == -1)
367 		warn("%s", buf);
368 	exprfree(e);
369 	free(sql);
370 	*sz = cur;
371 	return 1;
372 }
373 
374 void
375 mansearch_free(struct manpage *res, size_t sz)
376 {
377 	size_t	 i;
378 
379 	for (i = 0; i < sz; i++) {
380 		free(res[i].file);
381 		free(res[i].names);
382 		free(res[i].output);
383 	}
384 	free(res);
385 }
386 
387 static int
388 manpage_compare(const void *vp1, const void *vp2)
389 {
390 	const struct manpage	*mp1, *mp2;
391 	int			 diff;
392 
393 	mp1 = vp1;
394 	mp2 = vp2;
395 	return (diff = mp2->bits - mp1->bits) ? diff :
396 	    (diff = mp1->sec - mp2->sec) ? diff :
397 	    strcasecmp(mp1->names, mp2->names);
398 }
399 
400 static void
401 buildnames(const struct mansearch *search, struct manpage *mpage,
402 		sqlite3 *db, sqlite3_stmt *s,
403 		uint64_t pageid, const char *path, int form)
404 {
405 	glob_t		 globinfo;
406 	char		*firstname, *newnames, *prevsec, *prevarch;
407 	const char	*oldnames, *sep1, *name, *sec, *sep2, *arch, *fsec;
408 	size_t		 i;
409 	int		 c, globres;
410 
411 	mpage->file = NULL;
412 	mpage->names = NULL;
413 	firstname = prevsec = prevarch = NULL;
414 	i = 1;
415 	SQL_BIND_INT64(db, s, i, pageid);
416 	while (SQLITE_ROW == (c = sqlite3_step(s))) {
417 
418 		/* Decide whether we already have some names. */
419 
420 		if (NULL == mpage->names) {
421 			oldnames = "";
422 			sep1 = "";
423 		} else {
424 			oldnames = mpage->names;
425 			sep1 = ", ";
426 		}
427 
428 		/* Fetch the next name, rejecting sec/arch mismatches. */
429 
430 		sec = (const char *)sqlite3_column_text(s, 0);
431 		if (search->sec != NULL && strcasecmp(sec, search->sec))
432 			continue;
433 		arch = (const char *)sqlite3_column_text(s, 1);
434 		if (search->arch != NULL && *arch != '\0' &&
435 		    strcasecmp(arch, search->arch))
436 			continue;
437 		name = (const char *)sqlite3_column_text(s, 2);
438 
439 		/* Remember the first section found. */
440 
441 		if (9 < mpage->sec && '1' <= *sec && '9' >= *sec)
442 			mpage->sec = (*sec - '1') + 1;
443 
444 		/* If the section changed, append the old one. */
445 
446 		if (NULL != prevsec &&
447 		    (strcmp(sec, prevsec) ||
448 		     strcmp(arch, prevarch))) {
449 			sep2 = '\0' == *prevarch ? "" : "/";
450 			mandoc_asprintf(&newnames, "%s(%s%s%s)",
451 			    oldnames, prevsec, sep2, prevarch);
452 			free(mpage->names);
453 			oldnames = mpage->names = newnames;
454 			free(prevsec);
455 			free(prevarch);
456 			prevsec = prevarch = NULL;
457 		}
458 
459 		/* Save the new section, to append it later. */
460 
461 		if (NULL == prevsec) {
462 			prevsec = mandoc_strdup(sec);
463 			prevarch = mandoc_strdup(arch);
464 		}
465 
466 		/* Append the new name. */
467 
468 		mandoc_asprintf(&newnames, "%s%s%s",
469 		    oldnames, sep1, name);
470 		free(mpage->names);
471 		mpage->names = newnames;
472 
473 		/* Also save the first file name encountered. */
474 
475 		if (mpage->file != NULL)
476 			continue;
477 
478 		if (form & FORM_SRC) {
479 			sep1 = "man";
480 			fsec = sec;
481 		} else {
482 			sep1 = "cat";
483 			fsec = "0";
484 		}
485 		sep2 = *arch == '\0' ? "" : "/";
486 		mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.%s",
487 		    path, sep1, sec, sep2, arch, name, fsec);
488 		if (access(mpage->file, R_OK) != -1)
489 			continue;
490 
491 		/* Handle unusual file name extensions. */
492 
493 		if (firstname == NULL)
494 			firstname = mpage->file;
495 		else
496 			free(mpage->file);
497 		mandoc_asprintf(&mpage->file, "%s/%s%s%s%s/%s.*",
498 		    path, sep1, sec, sep2, arch, name);
499 		globres = glob(mpage->file, 0, NULL, &globinfo);
500 		free(mpage->file);
501 		mpage->file = globres ? NULL :
502 		    mandoc_strdup(*globinfo.gl_pathv);
503 		globfree(&globinfo);
504 	}
505 	if (c != SQLITE_DONE)
506 		warnx("%s", sqlite3_errmsg(db));
507 	sqlite3_reset(s);
508 
509 	/* If none of the files is usable, use the first name. */
510 
511 	if (mpage->file == NULL)
512 		mpage->file = firstname;
513 	else if (mpage->file != firstname)
514 		free(firstname);
515 
516 	/* Append one final section to the names. */
517 
518 	if (prevsec != NULL) {
519 		sep2 = *prevarch == '\0' ? "" : "/";
520 		mandoc_asprintf(&newnames, "%s(%s%s%s)",
521 		    mpage->names, prevsec, sep2, prevarch);
522 		free(mpage->names);
523 		mpage->names = newnames;
524 		free(prevsec);
525 		free(prevarch);
526 	}
527 }
528 
529 static char *
530 buildoutput(sqlite3 *db, sqlite3_stmt *s, uint64_t pageid, uint64_t outbit)
531 {
532 	char		*output, *newoutput;
533 	const char	*oldoutput, *sep1, *data;
534 	size_t		 i;
535 	int		 c;
536 
537 	output = NULL;
538 	i = 1;
539 	SQL_BIND_INT64(db, s, i, pageid);
540 	SQL_BIND_INT64(db, s, i, outbit);
541 	while (SQLITE_ROW == (c = sqlite3_step(s))) {
542 		if (NULL == output) {
543 			oldoutput = "";
544 			sep1 = "";
545 		} else {
546 			oldoutput = output;
547 			sep1 = " # ";
548 		}
549 		data = (const char *)sqlite3_column_text(s, 1);
550 		mandoc_asprintf(&newoutput, "%s%s%s",
551 		    oldoutput, sep1, data);
552 		free(output);
553 		output = newoutput;
554 	}
555 	if (SQLITE_DONE != c)
556 		warnx("%s", sqlite3_errmsg(db));
557 	sqlite3_reset(s);
558 	return output;
559 }
560 
561 /*
562  * Implement substring match as an application-defined SQL function.
563  * Using the SQL LIKE or GLOB operators instead would be a bad idea
564  * because that would require escaping metacharacters in the string
565  * being searched for.
566  */
567 static void
568 sql_match(sqlite3_context *context, int argc, sqlite3_value **argv)
569 {
570 
571 	assert(2 == argc);
572 	sqlite3_result_int(context, NULL != strcasestr(
573 	    (const char *)sqlite3_value_text(argv[1]),
574 	    (const char *)sqlite3_value_text(argv[0])));
575 }
576 
577 /*
578  * Implement regular expression match
579  * as an application-defined SQL function.
580  */
581 static void
582 sql_regexp(sqlite3_context *context, int argc, sqlite3_value **argv)
583 {
584 
585 	assert(2 == argc);
586 	sqlite3_result_int(context, !regexec(
587 	    (regex_t *)(intptr_t)sqlite3_value_blob(argv[0]),
588 	    (const char *)sqlite3_value_text(argv[1]),
589 	    0, NULL, 0));
590 }
591 
592 static void
593 sql_append(char **sql, size_t *sz, const char *newstr, int count)
594 {
595 	size_t		 newsz;
596 
597 	newsz = 1 < count ? (size_t)count : strlen(newstr);
598 	*sql = mandoc_realloc(*sql, *sz + newsz + 1);
599 	if (1 < count)
600 		memset(*sql + *sz, *newstr, (size_t)count);
601 	else
602 		memcpy(*sql + *sz, newstr, newsz);
603 	*sz += newsz;
604 	(*sql)[*sz] = '\0';
605 }
606 
607 /*
608  * Prepare the search SQL statement.
609  */
610 static char *
611 sql_statement(const struct expr *e)
612 {
613 	char		*sql;
614 	size_t		 sz;
615 	int		 needop;
616 
617 	sql = mandoc_strdup(e->equal ?
618 	    "SELECT desc, form, pageid, bits "
619 		"FROM mpages NATURAL JOIN names WHERE " :
620 	    "SELECT desc, form, pageid, 0 FROM mpages WHERE ");
621 	sz = strlen(sql);
622 
623 	for (needop = 0; NULL != e; e = e->next) {
624 		if (e->and)
625 			sql_append(&sql, &sz, " AND ", 1);
626 		else if (needop)
627 			sql_append(&sql, &sz, " OR ", 1);
628 		if (e->open)
629 			sql_append(&sql, &sz, "(", e->open);
630 		sql_append(&sql, &sz,
631 		    TYPE_Nd & e->bits
632 		    ? (NULL == e->substr
633 			? "desc REGEXP ?"
634 			: "desc MATCH ?")
635 		    : TYPE_Nm == e->bits
636 		    ? (NULL == e->substr
637 			? "pageid IN (SELECT pageid FROM names "
638 			  "WHERE name REGEXP ?)"
639 			: e->equal
640 			? "name = ? "
641 			: "pageid IN (SELECT pageid FROM names "
642 			  "WHERE name MATCH ?)")
643 		    : (NULL == e->substr
644 			? "pageid IN (SELECT pageid FROM keys "
645 			  "WHERE key REGEXP ? AND bits & ?)"
646 			: "pageid IN (SELECT pageid FROM keys "
647 			  "WHERE key MATCH ? AND bits & ?)"), 1);
648 		if (e->close)
649 			sql_append(&sql, &sz, ")", e->close);
650 		needop = 1;
651 	}
652 
653 	return sql;
654 }
655 
656 /*
657  * Compile a set of string tokens into an expression.
658  * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
659  * "(", "foo=bar", etc.).
660  */
661 static struct expr *
662 exprcomp(const struct mansearch *search, int argc, char *argv[])
663 {
664 	uint64_t	 mask;
665 	int		 i, toopen, logic, igncase, toclose;
666 	struct expr	*first, *prev, *cur, *next;
667 
668 	first = cur = NULL;
669 	logic = igncase = toopen = toclose = 0;
670 
671 	for (i = 0; i < argc; i++) {
672 		if (0 == strcmp("(", argv[i])) {
673 			if (igncase)
674 				goto fail;
675 			toopen++;
676 			toclose++;
677 			continue;
678 		} else if (0 == strcmp(")", argv[i])) {
679 			if (toopen || logic || igncase || NULL == cur)
680 				goto fail;
681 			cur->close++;
682 			if (0 > --toclose)
683 				goto fail;
684 			continue;
685 		} else if (0 == strcmp("-a", argv[i])) {
686 			if (toopen || logic || igncase || NULL == cur)
687 				goto fail;
688 			logic = 1;
689 			continue;
690 		} else if (0 == strcmp("-o", argv[i])) {
691 			if (toopen || logic || igncase || NULL == cur)
692 				goto fail;
693 			logic = 2;
694 			continue;
695 		} else if (0 == strcmp("-i", argv[i])) {
696 			if (igncase)
697 				goto fail;
698 			igncase = 1;
699 			continue;
700 		}
701 		next = exprterm(search, argv[i], !igncase);
702 		if (NULL == next)
703 			goto fail;
704 		if (NULL == first)
705 			first = next;
706 		else
707 			cur->next = next;
708 		prev = cur = next;
709 
710 		/*
711 		 * Searching for descriptions must be split out
712 		 * because they are stored in the mpages table,
713 		 * not in the keys table.
714 		 */
715 
716 		for (mask = TYPE_Nm; mask <= TYPE_Nd; mask <<= 1) {
717 			if (mask & cur->bits && ~mask & cur->bits) {
718 				next = mandoc_calloc(1,
719 				    sizeof(struct expr));
720 				memcpy(next, cur, sizeof(struct expr));
721 				prev->open = 1;
722 				cur->bits = mask;
723 				cur->next = next;
724 				cur = next;
725 				cur->bits &= ~mask;
726 			}
727 		}
728 		prev->and = (1 == logic);
729 		prev->open += toopen;
730 		if (cur != prev)
731 			cur->close = 1;
732 
733 		toopen = logic = igncase = 0;
734 	}
735 	if ( ! (toopen || logic || igncase || toclose))
736 		return first;
737 
738 fail:
739 	if (NULL != first)
740 		exprfree(first);
741 	return NULL;
742 }
743 
744 static struct expr *
745 exprterm(const struct mansearch *search, char *buf, int cs)
746 {
747 	char		 errbuf[BUFSIZ];
748 	struct expr	*e;
749 	char		*key, *val;
750 	uint64_t	 iterbit;
751 	int		 i, irc;
752 
753 	if ('\0' == *buf)
754 		return NULL;
755 
756 	e = mandoc_calloc(1, sizeof(struct expr));
757 
758 	if (search->argmode == ARG_NAME) {
759 		e->bits = TYPE_Nm;
760 		e->substr = buf;
761 		e->equal = 1;
762 		return e;
763 	}
764 
765 	/*
766 	 * Separate macro keys from search string.
767 	 * If needed, request regular expression handling
768 	 * by setting e->substr to NULL.
769 	 */
770 
771 	if (search->argmode == ARG_WORD) {
772 		e->bits = TYPE_Nm;
773 		e->substr = NULL;
774 #if HAVE_REWB_BSD
775 		mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", buf);
776 #elif HAVE_REWB_SYSV
777 		mandoc_asprintf(&val, "\\<%s\\>", buf);
778 #else
779 		mandoc_asprintf(&val,
780 		    "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", buf);
781 #endif
782 		cs = 0;
783 	} else if ((val = strpbrk(buf, "=~")) == NULL) {
784 		e->bits = TYPE_Nm | TYPE_Nd;
785 		e->substr = buf;
786 	} else {
787 		if (val == buf)
788 			e->bits = TYPE_Nm | TYPE_Nd;
789 		if ('=' == *val)
790 			e->substr = val + 1;
791 		*val++ = '\0';
792 		if (NULL != strstr(buf, "arch"))
793 			cs = 0;
794 	}
795 
796 	/* Compile regular expressions. */
797 
798 	if (NULL == e->substr) {
799 		irc = regcomp(&e->regexp, val,
800 		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
801 		if (search->argmode == ARG_WORD)
802 			free(val);
803 		if (irc) {
804 			regerror(irc, &e->regexp, errbuf, sizeof(errbuf));
805 			warnx("regcomp: %s", errbuf);
806 			free(e);
807 			return NULL;
808 		}
809 	}
810 
811 	if (e->bits)
812 		return e;
813 
814 	/*
815 	 * Parse out all possible fields.
816 	 * If the field doesn't resolve, bail.
817 	 */
818 
819 	while (NULL != (key = strsep(&buf, ","))) {
820 		if ('\0' == *key)
821 			continue;
822 		for (i = 0, iterbit = 1;
823 		     i < mansearch_keymax;
824 		     i++, iterbit <<= 1) {
825 			if (0 == strcasecmp(key,
826 			    mansearch_keynames[i])) {
827 				e->bits |= iterbit;
828 				break;
829 			}
830 		}
831 		if (i == mansearch_keymax) {
832 			if (strcasecmp(key, "any")) {
833 				free(e);
834 				return NULL;
835 			}
836 			e->bits |= ~0ULL;
837 		}
838 	}
839 
840 	return e;
841 }
842 
843 static void
844 exprfree(struct expr *p)
845 {
846 	struct expr	*pp;
847 
848 	while (NULL != p) {
849 		pp = p->next;
850 		free(p);
851 		p = pp;
852 	}
853 }
854