xref: /netbsd-src/usr.sbin/makemandb/apropos-utils.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	$NetBSD: apropos-utils.c,v 1.30 2017/01/10 04:34:07 kamil Exp $	*/
2 /*-
3  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
4  * All rights reserved.
5  *
6  * This code was developed as part of Google's Summer of Code 2011 program.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: apropos-utils.c,v 1.30 2017/01/10 04:34:07 kamil Exp $");
35 
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38 
39 #include <assert.h>
40 #include <ctype.h>
41 #include <err.h>
42 #include <math.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <util.h>
47 #include <zlib.h>
48 #include <term.h>
49 #include <unistd.h>
50 #undef tab	// XXX: manconf.h
51 
52 #include "apropos-utils.h"
53 #include "manconf.h"
54 
55 typedef struct orig_callback_data {
56 	void *data;
57 	int (*callback) (void *, const char *, const char *, const char *,
58 		const char *, size_t);
59 } orig_callback_data;
60 
61 typedef struct inverse_document_frequency {
62 	double value;
63 	int status;
64 } inverse_document_frequency;
65 
66 /* weights for individual columns */
67 static const double col_weights[] = {
68 	2.0,	// NAME
69 	2.00,	// Name-description
70 	0.55,	// DESCRIPTION
71 	0.10,	// LIBRARY
72 	0.001,	//RETURN VALUES
73 	0.20,	//ENVIRONMENT
74 	0.01,	//FILES
75 	0.001,	//EXIT STATUS
76 	2.00,	//DIAGNOSTICS
77 	0.05,	//ERRORS
78 	0.00,	//md5_hash
79 	1.00	//machine
80 };
81 
82 /*
83  * lower --
84  *  Converts the string str to lower case
85  */
86 char *
87 lower(char *str)
88 {
89 	assert(str);
90 	int i = 0;
91 	char c;
92 	while (str[i] != '\0') {
93 		c = tolower((unsigned char) str[i]);
94 		str[i++] = c;
95 	}
96 	return str;
97 }
98 
99 /*
100 * concat--
101 *  Utility function. Concatenates together: dst, a space character and src.
102 * dst + " " + src
103 */
104 void
105 concat(char **dst, const char *src)
106 {
107 	concat2(dst, src, strlen(src));
108 }
109 
110 void
111 concat2(char **dst, const char *src, size_t srclen)
112 {
113 	size_t totallen, dstlen;
114 	assert(src != NULL);
115 
116 	/*
117 	 * If destination buffer dst is NULL, then simply
118 	 * strdup the source buffer
119 	 */
120 	if (*dst == NULL) {
121 		*dst = estrndup(src, srclen);
122 		return;
123 	}
124 
125 	dstlen = strlen(*dst);
126 	/*
127 	 * NUL Byte and separator space
128 	 */
129 	totallen = dstlen + srclen + 2;
130 
131 	*dst = erealloc(*dst, totallen);
132 
133 	/* Append a space at the end of dst */
134 	(*dst)[dstlen++] = ' ';
135 
136 	/* Now, copy src at the end of dst */
137 	memcpy(*dst + dstlen, src, srclen);
138 	(*dst)[dstlen + srclen] = '\0';
139 }
140 
141 void
142 close_db(sqlite3 *db)
143 {
144 	sqlite3_close(db);
145 	sqlite3_shutdown();
146 }
147 
148 /*
149  * create_db --
150  *  Creates the database schema.
151  */
152 static int
153 create_db(sqlite3 *db)
154 {
155 	const char *sqlstr = NULL;
156 	char *schemasql;
157 	char *errmsg = NULL;
158 
159 /*------------------------ Create the tables------------------------------*/
160 
161 #if NOTYET
162 	sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
163 #else
164 	sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
165 #endif
166 
167 	schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
168 	    APROPOS_SCHEMA_VERSION);
169 	sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
170 	if (errmsg != NULL)
171 		goto out;
172 	sqlite3_free(schemasql);
173 
174 	sqlstr =
175 	    //mandb
176 	    "CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
177 		"name_desc, desc, lib, return_vals, env, files, "
178 		"exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
179 		"compress=zip, uncompress=unzip, tokenize=porter, "
180 		"notindexed=section, notindexed=md5_hash); "
181 	    //mandb_meta
182 	    "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
183 		"file UNIQUE, md5_hash UNIQUE, id  INTEGER PRIMARY KEY); "
184 	    //mandb_links
185 	    "CREATE TABLE IF NOT EXISTS mandb_links(link, target, section, "
186 		"machine, md5_hash); ";
187 
188 	sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
189 	if (errmsg != NULL)
190 		goto out;
191 
192 	sqlstr =
193 	    "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
194 		"(link); "
195 	    "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
196 		"(device, inode); "
197 	    "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links "
198 		"(md5_hash);";
199 	sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
200 	if (errmsg != NULL)
201 		goto out;
202 	return 0;
203 
204 out:
205 	warnx("%s", errmsg);
206 	free(errmsg);
207 	sqlite3_close(db);
208 	sqlite3_shutdown();
209 	return -1;
210 }
211 
212 /*
213  * zip --
214  *  User defined Sqlite function to compress the FTS table
215  */
216 static void
217 zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
218 {
219 	int nin;
220 	long int nout;
221 	const unsigned char * inbuf;
222 	unsigned char *outbuf;
223 
224 	assert(nval == 1);
225 	nin = sqlite3_value_bytes(apval[0]);
226 	inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
227 	nout = nin + 13 + (nin + 999) / 1000;
228 	outbuf = emalloc(nout);
229 	compress(outbuf, (unsigned long *) &nout, inbuf, nin);
230 	sqlite3_result_blob(pctx, outbuf, nout, free);
231 }
232 
233 /*
234  * unzip --
235  *  User defined Sqlite function to uncompress the FTS table.
236  */
237 static void
238 unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
239 {
240 	unsigned int rc;
241 	unsigned char *outbuf;
242 	z_stream stream;
243 
244 	assert(nval == 1);
245 	stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
246 	stream.avail_in = sqlite3_value_bytes(apval[0]);
247 	stream.avail_out = stream.avail_in * 2 + 100;
248 	stream.next_out = outbuf = emalloc(stream.avail_out);
249 	stream.zalloc = NULL;
250 	stream.zfree = NULL;
251 
252 	if (inflateInit(&stream) != Z_OK) {
253 		free(outbuf);
254 		return;
255 	}
256 
257 	while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
258 		if (rc != Z_OK ||
259 		    (stream.avail_out != 0 && stream.avail_in == 0)) {
260 			free(outbuf);
261 			return;
262 		}
263 		outbuf = erealloc(outbuf, stream.total_out * 2);
264 		stream.next_out = outbuf + stream.total_out;
265 		stream.avail_out = stream.total_out;
266 	}
267 	if (inflateEnd(&stream) != Z_OK) {
268 		free(outbuf);
269 		return;
270 	}
271 	outbuf = erealloc(outbuf, stream.total_out);
272 	sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free);
273 }
274 
275 /*
276  * get_dbpath --
277  *   Read the path of the database from man.conf and return.
278  */
279 char *
280 get_dbpath(const char *manconf)
281 {
282 	TAG *tp;
283 	char *dbpath;
284 
285 	config(manconf);
286 	tp = gettag("_mandb", 1);
287 	if (!tp)
288 		return NULL;
289 
290 	if (TAILQ_EMPTY(&tp->entrylist))
291 		return NULL;
292 
293 	dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s;
294 	return dbpath;
295 }
296 
297 /* init_db --
298  *   Prepare the database. Register the compress/uncompress functions and the
299  *   stopword tokenizer.
300  *	 db_flag specifies the mode in which to open the database. 3 options are
301  *   available:
302  *   	1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
303  *  	2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
304  *  	3. DB_CREATE: Open in read-write mode. It will try to create the db if
305  *			it does not exist already.
306  *  RETURN VALUES:
307  *		The function will return NULL in case the db does not exist
308  *		and DB_CREATE
309  *  	was not specified. And in case DB_CREATE was specified and yet NULL is
310  *  	returned, then there was some other error.
311  *  	In normal cases the function should return a handle to the db.
312  */
313 sqlite3 *
314 init_db(mandb_access_mode db_flag, const char *manconf)
315 {
316 	sqlite3 *db = NULL;
317 	sqlite3_stmt *stmt;
318 	struct stat sb;
319 	int rc;
320 	int create_db_flag = 0;
321 
322 	char *dbpath = get_dbpath(manconf);
323 	if (dbpath == NULL)
324 		errx(EXIT_FAILURE, "_mandb entry not found in man.conf");
325 
326 	if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) {
327 		/* Database does not exist, check if DB_CREATE was specified,
328 		 * and set flag to create the database schema
329 		 */
330 		if (db_flag != (MANDB_CREATE)) {
331 			warnx("Missing apropos database. "
332 			      "Please run makemandb to create it.");
333 			return NULL;
334 		}
335 		create_db_flag = 1;
336 	} else {
337 		/*
338 		 * Database exists. Check if we have the permissions
339 		 * to read/write the files
340 		 */
341 		int access_mode = R_OK;
342 		switch (db_flag) {
343 		case MANDB_CREATE:
344 		case MANDB_WRITE:
345 			access_mode |= W_OK;
346 			break;
347 		default:
348 			break;
349 		}
350 		if ((access(dbpath, access_mode)) != 0) {
351 			warnx("Unable to access the database, please check"
352 			    " permissions for `%s'", dbpath);
353 			return NULL;
354 		}
355 	}
356 
357 	sqlite3_initialize();
358 	rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL);
359 
360 	if (rc != SQLITE_OK) {
361 		warnx("%s", sqlite3_errmsg(db));
362 		goto error;
363 	}
364 
365 	if (create_db_flag && create_db(db) < 0) {
366 		warnx("%s", "Unable to create database schema");
367 		goto error;
368 	}
369 
370 	rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
371 	if (rc != SQLITE_OK) {
372 		warnx("Unable to query schema version: %s",
373 		    sqlite3_errmsg(db));
374 		goto error;
375 	}
376 	if (sqlite3_step(stmt) != SQLITE_ROW) {
377 		sqlite3_finalize(stmt);
378 		warnx("Unable to query schema version: %s",
379 		    sqlite3_errmsg(db));
380 		goto error;
381 	}
382 	if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
383 		sqlite3_finalize(stmt);
384 		warnx("Incorrect schema version found. "
385 		      "Please run makemandb -f.");
386 		goto error;
387 	}
388 	sqlite3_finalize(stmt);
389 
390 	sqlite3_extended_result_codes(db, 1);
391 
392 	/* Register the zip and unzip functions for FTS compression */
393 	rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip,
394 	    NULL, NULL);
395 	if (rc != SQLITE_OK) {
396 		warnx("Unable to register function: compress: %s",
397 		    sqlite3_errmsg(db));
398 		goto error;
399 	}
400 
401 	rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
402                                  unzip, NULL, NULL);
403 	if (rc != SQLITE_OK) {
404 		warnx("Unable to register function: uncompress: %s",
405 		    sqlite3_errmsg(db));
406 		goto error;
407 	}
408 	return db;
409 
410 error:
411 	close_db(db);
412 	return NULL;
413 }
414 
415 /*
416  * rank_func --
417  *  Sqlite user defined function for ranking the documents.
418  *  For each phrase of the query, it computes the tf and idf and adds them over.
419  *  It computes the final rank, by multiplying tf and idf together.
420  *  Weight of term t for document d = (term frequency of t in d *
421  *                                      inverse document frequency of t)
422  *
423  *  Term Frequency of term t in document d = Number of times t occurs in d /
424  *	Number of times t appears in all documents
425  *
426  *  Inverse document frequency of t = log(Total number of documents /
427  *										Number of documents in which t occurs)
428  */
429 static void
430 rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
431 {
432 	inverse_document_frequency *idf = sqlite3_user_data(pctx);
433 	double tf = 0.0;
434 	const unsigned int *matchinfo;
435 	int ncol;
436 	int nphrase;
437 	int iphrase;
438 	int ndoc;
439 	int doclen = 0;
440 	const double k = 3.75;
441 	/*
442 	 * Check that the number of arguments passed to this
443 	 * function is correct.
444 	 */
445 	assert(nval == 1);
446 
447 	matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
448 	nphrase = matchinfo[0];
449 	ncol = matchinfo[1];
450 	ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
451 	for (iphrase = 0; iphrase < nphrase; iphrase++) {
452 		int icol;
453 		const unsigned int *phraseinfo =
454 		    &matchinfo[2 + ncol + iphrase * ncol * 3];
455 		for(icol = 1; icol < ncol; icol++) {
456 
457 			/* nhitcount: number of times the current phrase occurs
458 			 * 	in the current column in the current document.
459 			 * nglobalhitcount: number of times current phrase
460 			 *	occurs in the current column in all documents.
461 			 * ndocshitcount: number of documents in which the
462 			 *	current phrase occurs in the current column at
463 			 *	least once.
464 			 */
465   			int nhitcount = phraseinfo[3 * icol];
466 			int nglobalhitcount = phraseinfo[3 * icol + 1];
467 			int ndocshitcount = phraseinfo[3 * icol + 2];
468 			doclen = matchinfo[2 + icol ];
469 			double weight = col_weights[icol - 1];
470 			if (idf->status == 0 && ndocshitcount)
471 				idf->value +=
472 				    log(((double)ndoc / ndocshitcount))* weight;
473 
474 			/*
475 			 * Dividing the tf by document length to normalize
476 			 * the effect of longer documents.
477 			 */
478 			if (nglobalhitcount > 0 && nhitcount)
479 				tf += (((double)nhitcount  * weight)
480 				    / (nglobalhitcount * doclen));
481 		}
482 	}
483 	idf->status = 1;
484 
485 	/*
486 	 * Final score: Dividing by k + tf further normalizes the weight
487 	 * leading to better results. The value of k is experimental
488 	 */
489 	double score = (tf * idf->value) / (k + tf);
490 	sqlite3_result_double(pctx, score);
491 	return;
492 }
493 
494 /*
495  * generates sql query for matching the user entered query
496  */
497 static char *
498 generate_search_query(query_args *args, const char *snippet_args[3])
499 {
500 	const char *default_snippet_args[3];
501 	char *section_clause = NULL;
502 	char *limit_clause = NULL;
503 	char *machine_clause = NULL;
504 	char *query;
505 
506 	if (args->machine)
507 		easprintf(&machine_clause, "AND machine = \'%s\' ",
508 		    args->machine);
509 
510 
511 	/* We want to build a query of the form: "select x,y,z from mandb where
512 	 * mandb match :query [AND (section LIKE '1' OR section LIKE '2' OR...)]
513 	 * ORDER BY rank DESC..."
514 	 * NOTES:
515 	 *   1. The portion in square brackets is optional, it will be there
516 	 *      only if the user has specified an option on the command line
517 	 *      to search in one or more specific sections.
518 	 */
519 	char *sections_str = args->sec_nums;
520 	char *temp;
521 	if (sections_str) {
522 		while (*sections_str) {
523 			size_t len = strcspn(sections_str, " ");
524 			char *sec = sections_str;
525 			if (sections_str[len] == 0) {
526 				sections_str += len;
527 			} else {
528 				sections_str[len] = 0;
529 				sections_str += len + 1;
530 			}
531 			easprintf(&temp, "\'%s\',", sec);
532 
533 			if (section_clause) {
534 				concat(&section_clause, temp);
535 				free(temp);
536 			} else {
537 				section_clause = temp;
538 			}
539 		}
540 		if (section_clause) {
541 			/*
542 			 * At least one section requested, add glue for query.
543 			 * Before doing that, remove the comma at the end of
544 			 * section_clause
545 			 */
546 			size_t section_clause_len = strlen(section_clause);
547 			if (section_clause[section_clause_len - 1] == ',')
548 				section_clause[section_clause_len - 1] = 0;
549 			temp = section_clause;
550 			easprintf(&section_clause, " AND section IN (%s)", temp);
551 			free(temp);
552 		}
553 	}
554 
555 	if (args->nrec >= 0) {
556 		/* Use the provided number of records and offset */
557 		easprintf(&limit_clause, " LIMIT %d OFFSET %d",
558 		    args->nrec, args->offset);
559 	}
560 
561 	if (snippet_args == NULL) {
562 		default_snippet_args[0] = "";
563 		default_snippet_args[1] = "";
564 		default_snippet_args[2] = "...";
565 		snippet_args = default_snippet_args;
566 	}
567 
568 	if (args->legacy) {
569 	    char *wild;
570 	    easprintf(&wild, "%%%s%%", args->search_str);
571 	    query = sqlite3_mprintf("SELECT section, name, name_desc, machine"
572 		" FROM mandb"
573 		" WHERE name LIKE %Q OR name_desc LIKE %Q "
574 		"%s"
575 		"%s",
576 		wild, wild,
577 		section_clause ? section_clause : "",
578 		limit_clause ? limit_clause : "");
579 		free(wild);
580 	} else {
581 	    query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
582 		" snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
583 		" rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
584 		" FROM mandb"
585 		" WHERE mandb MATCH %Q %s "
586 		"%s"
587 		" ORDER BY rank DESC"
588 		"%s",
589 		snippet_args[0], snippet_args[1], snippet_args[2],
590 		args->search_str, machine_clause ? machine_clause : "",
591 		section_clause ? section_clause : "",
592 		limit_clause ? limit_clause : "");
593 	}
594 
595 	free(machine_clause);
596 	free(section_clause);
597 	free(limit_clause);
598 	return query;
599 }
600 
601 /*
602  * Execute the full text search query and return the number of results
603  * obtained.
604  */
605 static unsigned int
606 execute_search_query(sqlite3 *db, char *query, query_args *args)
607 {
608 	sqlite3_stmt *stmt;
609 	const char *section;
610 	char *name;
611 	char *slash_ptr;
612 	const char *name_desc;
613 	const char *machine;
614 	const char *snippet = "";
615 	const char *name_temp;
616 	char *m = NULL;
617 	int rc;
618 	inverse_document_frequency idf = {0, 0};
619 
620 	if (!args->legacy) {
621 		/* Register the rank function */
622 		rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY,
623 		    (void *) &idf, rank_func, NULL, NULL);
624 		if (rc != SQLITE_OK) {
625 			warnx("Unable to register the ranking function: %s",
626 			    sqlite3_errmsg(db));
627 			sqlite3_close(db);
628 			sqlite3_shutdown();
629 			exit(EXIT_FAILURE);
630 		}
631 	}
632 
633 	rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
634 	if (rc == SQLITE_IOERR) {
635 		warnx("Corrupt database. Please rerun makemandb");
636 		return -1;
637 	} else if (rc != SQLITE_OK) {
638 		warnx("%s", sqlite3_errmsg(db));
639 		return -1;
640 	}
641 
642 	unsigned int nresults = 0;
643 	while (sqlite3_step(stmt) == SQLITE_ROW) {
644 		nresults++;
645 		section = (const char *) sqlite3_column_text(stmt, 0);
646 		name_temp = (const char *) sqlite3_column_text(stmt, 1);
647 		name_desc = (const char *) sqlite3_column_text(stmt, 2);
648 		machine = (const char *) sqlite3_column_text(stmt, 3);
649 		if (!args->legacy)
650 			snippet = (const char *) sqlite3_column_text(stmt, 4);
651 		if ((slash_ptr = strrchr(name_temp, '/')) != NULL)
652 			name_temp = slash_ptr + 1;
653 		if (machine && machine[0]) {
654 			m = estrdup(machine);
655 			easprintf(&name, "%s/%s", lower(m), name_temp);
656 			free(m);
657 		} else {
658 			name = estrdup((const char *)
659 			    sqlite3_column_text(stmt, 1));
660 		}
661 
662 		(args->callback)(args->callback_data, section, name,
663 		    name_desc, snippet, args->legacy? 0: strlen(snippet));
664 		free(name);
665 	}
666 	sqlite3_finalize(stmt);
667 	return nresults;
668 }
669 
670 
671 /*
672  *  run_query_internal --
673  *  Performs the searches for the keywords entered by the user.
674  *  The 2nd param: snippet_args is an array of strings providing values for the
675  *  last three parameters to the snippet function of sqlite. (Look at the docs).
676  *  The 3rd param: args contains rest of the search parameters. Look at
677  *  arpopos-utils.h for the description of individual fields.
678  *
679  */
680 static int
681 run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args)
682 {
683 	char *query;
684 	query = generate_search_query(args, snippet_args);
685 	if (query == NULL) {
686 		*args->errmsg = estrdup("malloc failed");
687 		return -1;
688 	}
689 
690 	execute_search_query(db, query, args);
691 	sqlite3_free(query);
692 	return *(args->errmsg) == NULL ? 0 : -1;
693 }
694 
695 static char *
696 get_escaped_html_string(const char *src, size_t *slen)
697 {
698 	static const char trouble[] = "<>\"&\002\003";
699 	/*
700 	 * First scan the src to find out the number of occurrences
701 	 * of {'>', '<' '"', '&'}.  Then allocate a new buffer with
702 	 * sufficient space to be able to store the quoted versions
703 	 * of the special characters {&gt;, &lt;, &quot;, &amp;}.
704 	 * Copy over the characters from the original src into
705 	 * this buffer while replacing the special characters with
706 	 * their quoted versions.
707 	 */
708 	char *dst, *ddst;
709 	size_t count;
710 	const char *ssrc;
711 
712 	for (count = 0, ssrc = src; *src; count++) {
713 		size_t sz = strcspn(src, trouble);
714 		src += sz + 1;
715 	}
716 
717 
718 #define append(a)				\
719     do {					\
720 	memcpy(dst, (a), sizeof(a) - 1);	\
721 	dst += sizeof(a) - 1; 			\
722     } while (/*CONSTCOND*/0)
723 
724 
725 	ddst = dst = emalloc(*slen + count * 5 + 1);
726 	for (src = ssrc; *src; src++) {
727 		switch (*src) {
728 		case '<':
729 			append("&lt;");
730 			break;
731 		case '>':
732 			append("&gt;");
733 			break;
734 		case '\"':
735 			append("&quot;");
736 			break;
737 		case '&':
738 			/*
739 			 * Don't perform the quoting if this & is part of
740 			 * an mdoc escape sequence, e.g. \&
741 			 */
742 			if (src != ssrc && src[-1] != '\\')
743 				append("&amp;");
744 			else
745 				append("&");
746 			break;
747 		case '\002':
748 			append("<b>");
749 			break;
750 		case '\003':
751 			append("</b>");
752 			break;
753 		default:
754 			*dst++ = *src;
755 			break;
756 		}
757 	}
758 	*dst = '\0';
759 	*slen = dst - ddst;
760 	return ddst;
761 }
762 
763 
764 /*
765  * callback_html --
766  *  Callback function for run_query_html. It builds the html output and then
767  *  calls the actual user supplied callback function.
768  */
769 static int
770 callback_html(void *data, const char *section, const char *name,
771     const char *name_desc, const char *snippet, size_t snippet_length)
772 {
773 	struct orig_callback_data *orig_data = data;
774 	int (*callback)(void *, const char *, const char *, const char *,
775 	    const char *, size_t) = orig_data->callback;
776 	size_t length = snippet_length;
777 	size_t name_description_length = strlen(name_desc);
778 	char *qsnippet = get_escaped_html_string(snippet, &length);
779 	char *qname_description = get_escaped_html_string(name_desc,
780 	    &name_description_length);
781 
782 	(*callback)(orig_data->data, section, name, qname_description,
783 	    qsnippet, length);
784 	free(qsnippet);
785 	free(qname_description);
786 	return 0;
787 }
788 
789 /*
790  * run_query_html --
791  *  Utility function to output query result in HTML format.
792  *  It internally calls run_query only, but it first passes the output to its
793  *  own custom callback function, which preprocess the snippet for quoting
794  *  inline HTML fragments.
795  *  After that it delegates the call the actual user supplied callback function.
796  */
797 static int
798 run_query_html(sqlite3 *db, query_args *args)
799 {
800 	struct orig_callback_data orig_data;
801 	orig_data.callback = args->callback;
802 	orig_data.data = args->callback_data;
803 	const char *snippet_args[] = {"\002", "\003", "..."};
804 	args->callback = &callback_html;
805 	args->callback_data = (void *) &orig_data;
806 	return run_query_internal(db, snippet_args, args);
807 }
808 
809 /*
810  * underline a string, pager style.
811  */
812 static char *
813 ul_pager(int ul, const char *s)
814 {
815 	size_t len;
816 	char *dst, *d;
817 
818 	if (!ul)
819 		return estrdup(s);
820 
821 	// a -> _\ba
822 	len = strlen(s) * 3 + 1;
823 
824 	d = dst = emalloc(len);
825 	while (*s) {
826 		*d++ = '_';
827 		*d++ = '\b';
828 		*d++ = *s++;
829 	}
830 	*d = '\0';
831 	return dst;
832 }
833 
834 /*
835  * callback_pager --
836  *  A callback similar to callback_html. It overstrikes the matching text in
837  *  the snippet so that it appears emboldened when viewed using a pager like
838  *  more or less.
839  */
840 static int
841 callback_pager(void *data, const char *section, const char *name,
842 	const char *name_desc, const char *snippet, size_t snippet_length)
843 {
844 	struct orig_callback_data *orig_data = data;
845 	char *psnippet;
846 	const char *temp = snippet;
847 	int count = 0;
848 	int i = 0, did;
849 	size_t sz = 0;
850 	size_t psnippet_length;
851 
852 	/* Count the number of bytes of matching text. For each of these
853 	 * bytes we will use 2 extra bytes to overstrike it so that it
854 	 * appears bold when viewed using a pager.
855 	 */
856 	while (*temp) {
857 		sz = strcspn(temp, "\002\003");
858 		temp += sz;
859 		if (*temp == '\003') {
860 			count += 2 * (sz);
861 		}
862 		temp++;
863 	}
864 
865 	psnippet_length = snippet_length + count;
866 	psnippet = emalloc(psnippet_length + 1);
867 
868 	/* Copy the bytes from snippet to psnippet:
869 	 * 1. Copy the bytes before \002 as it is.
870 	 * 2. The bytes after \002 need to be overstriked till we
871 	 *    encounter \003.
872 	 * 3. To overstrike a byte 'A' we need to write 'A\bA'
873 	 */
874 	did = 0;
875 	while (*snippet) {
876 		sz = strcspn(snippet, "\002");
877 		memcpy(&psnippet[i], snippet, sz);
878 		snippet += sz;
879 		i += sz;
880 
881 		/* Don't change this. Advancing the pointer without reading the byte
882 		 * is causing strange behavior.
883 		 */
884 		if (*snippet == '\002')
885 			snippet++;
886 		while (*snippet && *snippet != '\003') {
887 			did = 1;
888 			psnippet[i++] = *snippet;
889 			psnippet[i++] = '\b';
890 			psnippet[i++] = *snippet++;
891 		}
892 		if (*snippet)
893 			snippet++;
894 	}
895 
896 	psnippet[i] = 0;
897 	char *ul_section = ul_pager(did, section);
898 	char *ul_name = ul_pager(did, name);
899 	char *ul_name_desc = ul_pager(did, name_desc);
900 	(orig_data->callback)(orig_data->data, ul_section, ul_name,
901 	    ul_name_desc, psnippet, psnippet_length);
902 	free(ul_section);
903 	free(ul_name);
904 	free(ul_name_desc);
905 	free(psnippet);
906 	return 0;
907 }
908 
909 struct term_args {
910 	struct orig_callback_data *orig_data;
911 	const char *smul;
912 	const char *rmul;
913 };
914 
915 /*
916  * underline a string, pager style.
917  */
918 static char *
919 ul_term(const char *s, const struct term_args *ta)
920 {
921 	char *dst;
922 
923 	easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul);
924 	return dst;
925 }
926 
927 /*
928  * callback_term --
929  *  A callback similar to callback_html. It overstrikes the matching text in
930  *  the snippet so that it appears emboldened when viewed using a pager like
931  *  more or less.
932  */
933 static int
934 callback_term(void *data, const char *section, const char *name,
935 	const char *name_desc, const char *snippet, size_t snippet_length)
936 {
937 	struct term_args *ta = data;
938 	struct orig_callback_data *orig_data = ta->orig_data;
939 
940 	char *ul_section = ul_term(section, ta);
941 	char *ul_name = ul_term(name, ta);
942 	char *ul_name_desc = ul_term(name_desc, ta);
943 	(orig_data->callback)(orig_data->data, ul_section, ul_name,
944 	    ul_name_desc, snippet, snippet_length);
945 	free(ul_section);
946 	free(ul_name);
947 	free(ul_name_desc);
948 	return 0;
949 }
950 
951 /*
952  * run_query_pager --
953  *  Utility function similar to run_query_html. This function tries to
954  *  pre-process the result assuming it will be piped to a pager.
955  *  For this purpose it first calls its own callback function callback_pager
956  *  which then delegates the call to the user supplied callback.
957  */
958 static int
959 run_query_pager(sqlite3 *db, query_args *args)
960 {
961 	struct orig_callback_data orig_data;
962 	orig_data.callback = args->callback;
963 	orig_data.data = args->callback_data;
964 	const char *snippet_args[3] = { "\002", "\003", "..." };
965 	args->callback = &callback_pager;
966 	args->callback_data = (void *) &orig_data;
967 	return run_query_internal(db, snippet_args, args);
968 }
969 
970 struct nv {
971 	char *s;
972 	size_t l;
973 };
974 
975 static int
976 term_putc(int c, void *p)
977 {
978 	struct nv *nv = p;
979 	nv->s[nv->l++] = c;
980 	return 0;
981 }
982 
983 static char *
984 term_fix_seq(TERMINAL *ti, const char *seq)
985 {
986 	char *res = estrdup(seq);
987 	struct nv nv;
988 
989 	if (ti == NULL)
990 	    return res;
991 
992 	nv.s = res;
993 	nv.l = 0;
994 	ti_puts(ti, seq, 1, term_putc, &nv);
995 	nv.s[nv.l] = '\0';
996 
997 	return res;
998 }
999 
1000 static void
1001 term_init(int fd, const char *sa[5])
1002 {
1003 	TERMINAL *ti;
1004 	int error;
1005 	const char *bold, *sgr0, *smso, *rmso, *smul, *rmul;
1006 
1007 	if (ti_setupterm(&ti, NULL, fd, &error) == -1) {
1008 		bold = sgr0 = NULL;
1009 		smso = rmso = smul = rmul = "";
1010 		ti = NULL;
1011 	} else {
1012 		bold = ti_getstr(ti, "bold");
1013 		sgr0 = ti_getstr(ti, "sgr0");
1014 		if (bold == NULL || sgr0 == NULL) {
1015 			smso = ti_getstr(ti, "smso");
1016 
1017 			if (smso == NULL ||
1018 			    (rmso = ti_getstr(ti, "rmso")) == NULL)
1019 				smso = rmso = "";
1020 			bold = sgr0 = NULL;
1021 		} else
1022 			smso = rmso = "";
1023 
1024 		smul = ti_getstr(ti, "smul");
1025 		if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL)
1026 			smul = rmul = "";
1027 	}
1028 
1029 	sa[0] = term_fix_seq(ti, bold ? bold : smso);
1030 	sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso);
1031 	sa[2] = estrdup("...");
1032 	sa[3] = term_fix_seq(ti, smul);
1033 	sa[4] = term_fix_seq(ti, rmul);
1034 
1035 	if (ti)
1036 		del_curterm(ti);
1037 }
1038 
1039 /*
1040  * run_query_term --
1041  *  Utility function similar to run_query_html. This function tries to
1042  *  pre-process the result assuming it will be displayed on a terminal
1043  *  For this purpose it first calls its own callback function callback_pager
1044  *  which then delegates the call to the user supplied callback.
1045  */
1046 static int
1047 run_query_term(sqlite3 *db, query_args *args)
1048 {
1049 	struct orig_callback_data orig_data;
1050 	struct term_args ta;
1051 	orig_data.callback = args->callback;
1052 	orig_data.data = args->callback_data;
1053 	const char *snippet_args[5];
1054 
1055 	term_init(STDOUT_FILENO, snippet_args);
1056 	ta.smul = snippet_args[3];
1057 	ta.rmul = snippet_args[4];
1058 	ta.orig_data = (void *) &orig_data;
1059 
1060 	args->callback = &callback_term;
1061 	args->callback_data = &ta;
1062 	return run_query_internal(db, snippet_args, args);
1063 }
1064 
1065 static int
1066 run_query_none(sqlite3 *db, query_args *args)
1067 {
1068 	struct orig_callback_data orig_data;
1069 	orig_data.callback = args->callback;
1070 	orig_data.data = args->callback_data;
1071 	const char *snippet_args[3] = { "", "", "..." };
1072 	args->callback = &callback_pager;
1073 	args->callback_data = (void *) &orig_data;
1074 	return run_query_internal(db, snippet_args, args);
1075 }
1076 
1077 int
1078 run_query(sqlite3 *db, query_format fmt, query_args *args)
1079 {
1080 	switch (fmt) {
1081 	case APROPOS_NONE:
1082 		return run_query_none(db, args);
1083 	case APROPOS_HTML:
1084 		return run_query_html(db, args);
1085 	case APROPOS_TERM:
1086 		return run_query_term(db, args);
1087 	case APROPOS_PAGER:
1088 		return run_query_pager(db, args);
1089 	default:
1090 		warnx("Unknown query format %d", (int)fmt);
1091 		return -1;
1092 	}
1093 }
1094