xref: /netbsd-src/usr.sbin/makemandb/apropos-utils.c (revision a737f1efc9c9888357efb9c2c91a608e40ca64a1)
1*a737f1efSrin /*	$NetBSD: apropos-utils.c,v 1.51 2023/08/03 07:49:23 rin Exp $	*/
2410d0f43Sjoerg /*-
3410d0f43Sjoerg  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
4410d0f43Sjoerg  * All rights reserved.
5410d0f43Sjoerg  *
6410d0f43Sjoerg  * This code was developed as part of Google's Summer of Code 2011 program.
7410d0f43Sjoerg  *
8410d0f43Sjoerg  * Redistribution and use in source and binary forms, with or without
9410d0f43Sjoerg  * modification, are permitted provided that the following conditions
10410d0f43Sjoerg  * are met:
11410d0f43Sjoerg  *
12410d0f43Sjoerg  * 1. Redistributions of source code must retain the above copyright
13410d0f43Sjoerg  *    notice, this list of conditions and the following disclaimer.
14410d0f43Sjoerg  * 2. Redistributions in binary form must reproduce the above copyright
15410d0f43Sjoerg  *    notice, this list of conditions and the following disclaimer in
16410d0f43Sjoerg  *    the documentation and/or other materials provided with the
17410d0f43Sjoerg  *    distribution.
18410d0f43Sjoerg  *
19410d0f43Sjoerg  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20410d0f43Sjoerg  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21410d0f43Sjoerg  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22410d0f43Sjoerg  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23410d0f43Sjoerg  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24410d0f43Sjoerg  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25410d0f43Sjoerg  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26410d0f43Sjoerg  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27410d0f43Sjoerg  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28410d0f43Sjoerg  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29410d0f43Sjoerg  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30410d0f43Sjoerg  * SUCH DAMAGE.
31410d0f43Sjoerg  */
32410d0f43Sjoerg 
33410d0f43Sjoerg #include <sys/cdefs.h>
34*a737f1efSrin __RCSID("$NetBSD: apropos-utils.c,v 1.51 2023/08/03 07:49:23 rin Exp $");
35410d0f43Sjoerg 
36b1203a98Swiz #include <sys/queue.h>
37410d0f43Sjoerg #include <sys/stat.h>
38410d0f43Sjoerg 
39410d0f43Sjoerg #include <assert.h>
40410d0f43Sjoerg #include <ctype.h>
41410d0f43Sjoerg #include <err.h>
42410d0f43Sjoerg #include <math.h>
43410d0f43Sjoerg #include <stdio.h>
44410d0f43Sjoerg #include <stdlib.h>
45410d0f43Sjoerg #include <string.h>
46410d0f43Sjoerg #include <util.h>
47410d0f43Sjoerg #include <zlib.h>
486265ee0dSchristos #include <term.h>
492fe964caSkamil #include <unistd.h>
506265ee0dSchristos #undef tab	// XXX: manconf.h
51410d0f43Sjoerg 
52410d0f43Sjoerg #include "apropos-utils.h"
53188f922dSabhinav #include "custom_apropos_tokenizer.h"
54b1203a98Swiz #include "manconf.h"
55188f922dSabhinav #include "fts3_tokenizer.h"
56410d0f43Sjoerg 
57410d0f43Sjoerg typedef struct orig_callback_data {
58410d0f43Sjoerg 	void *data;
59357f7b44Sabhinav 	int (*callback) (query_callback_args*);
60410d0f43Sjoerg } orig_callback_data;
61410d0f43Sjoerg 
62410d0f43Sjoerg typedef struct inverse_document_frequency {
63410d0f43Sjoerg 	double value;
64410d0f43Sjoerg 	int status;
65410d0f43Sjoerg } inverse_document_frequency;
66410d0f43Sjoerg 
67410d0f43Sjoerg /* weights for individual columns */
68410d0f43Sjoerg static const double col_weights[] = {
69410d0f43Sjoerg 	2.0,	// NAME
70410d0f43Sjoerg 	2.00,	// Name-description
71410d0f43Sjoerg 	0.55,	// DESCRIPTION
72410d0f43Sjoerg 	0.10,	// LIBRARY
73410d0f43Sjoerg 	0.001,	//RETURN VALUES
74410d0f43Sjoerg 	0.20,	//ENVIRONMENT
75410d0f43Sjoerg 	0.01,	//FILES
76410d0f43Sjoerg 	0.001,	//EXIT STATUS
77410d0f43Sjoerg 	2.00,	//DIAGNOSTICS
78410d0f43Sjoerg 	0.05,	//ERRORS
79410d0f43Sjoerg 	0.00,	//md5_hash
80410d0f43Sjoerg 	1.00	//machine
81410d0f43Sjoerg };
82410d0f43Sjoerg 
83f56c3723Sabhinav #ifndef APROPOS_DEBUG
84188f922dSabhinav static int
register_tokenizer(sqlite3 * db)85188f922dSabhinav register_tokenizer(sqlite3 *db)
86188f922dSabhinav {
87188f922dSabhinav 	int rc;
88188f922dSabhinav 	sqlite3_stmt *stmt;
89188f922dSabhinav 	const sqlite3_tokenizer_module *p;
90188f922dSabhinav 	const char *name = "custom_apropos_tokenizer";
91188f922dSabhinav 	get_custom_apropos_tokenizer(&p);
92188f922dSabhinav 	const char *sql = "SELECT fts3_tokenizer(?, ?)";
93188f922dSabhinav 
94188f922dSabhinav 	sqlite3_db_config(db, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0);
95188f922dSabhinav 	rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
96188f922dSabhinav 	if (rc != SQLITE_OK)
97188f922dSabhinav 		return rc;
98188f922dSabhinav 
99188f922dSabhinav 	sqlite3_bind_text(stmt, 1, name, -1, SQLITE_STATIC);
100188f922dSabhinav 	sqlite3_bind_blob(stmt, 2, &p, sizeof(p), SQLITE_STATIC);
101188f922dSabhinav 	sqlite3_step(stmt);
102188f922dSabhinav 
103188f922dSabhinav 	return sqlite3_finalize(stmt);
104188f922dSabhinav }
105f56c3723Sabhinav #endif
106188f922dSabhinav 
107410d0f43Sjoerg /*
108410d0f43Sjoerg  * lower --
109410d0f43Sjoerg  *  Converts the string str to lower case
110410d0f43Sjoerg  */
111410d0f43Sjoerg char *
lower(char * str)112410d0f43Sjoerg lower(char *str)
113410d0f43Sjoerg {
114410d0f43Sjoerg 	assert(str);
115410d0f43Sjoerg 	int i = 0;
116410d0f43Sjoerg 	char c;
117c08af494Sabhinav 	while ((c = str[i]) != '\0')
118c08af494Sabhinav 		str[i++] = tolower((unsigned char) c);
119410d0f43Sjoerg 	return str;
120410d0f43Sjoerg }
121410d0f43Sjoerg 
122410d0f43Sjoerg /*
123410d0f43Sjoerg * concat--
124410d0f43Sjoerg *  Utility function. Concatenates together: dst, a space character and src.
125410d0f43Sjoerg * dst + " " + src
126410d0f43Sjoerg */
127410d0f43Sjoerg void
concat(char ** dst,const char * src)128410d0f43Sjoerg concat(char **dst, const char *src)
129410d0f43Sjoerg {
130410d0f43Sjoerg 	concat2(dst, src, strlen(src));
131410d0f43Sjoerg }
132410d0f43Sjoerg 
133410d0f43Sjoerg void
concat2(char ** dst,const char * src,size_t srclen)134410d0f43Sjoerg concat2(char **dst, const char *src, size_t srclen)
135410d0f43Sjoerg {
13684549e3fSabhinav 	size_t totallen, dstlen;
137b8c9b201Sabhinav 	char *mydst = *dst;
138410d0f43Sjoerg 	assert(src != NULL);
139410d0f43Sjoerg 
1405e64704aSchristos 	/*
1415e64704aSchristos 	 * If destination buffer dst is NULL, then simply
1425e64704aSchristos 	 * strdup the source buffer
1435e64704aSchristos 	 */
144b8c9b201Sabhinav 	if (mydst == NULL) {
145b8c9b201Sabhinav 		mydst = estrndup(src, srclen);
146b8c9b201Sabhinav 		*dst = mydst;
147410d0f43Sjoerg 		return;
148410d0f43Sjoerg 	}
149410d0f43Sjoerg 
150b8c9b201Sabhinav 	dstlen = strlen(mydst);
151410d0f43Sjoerg 	/*
152410d0f43Sjoerg 	 * NUL Byte and separator space
153410d0f43Sjoerg 	 */
15484549e3fSabhinav 	totallen = dstlen + srclen + 2;
155410d0f43Sjoerg 
156b8c9b201Sabhinav 	mydst = erealloc(mydst, totallen);
157410d0f43Sjoerg 
158410d0f43Sjoerg 	/* Append a space at the end of dst */
159b8c9b201Sabhinav 	mydst[dstlen++] = ' ';
160410d0f43Sjoerg 
161410d0f43Sjoerg 	/* Now, copy src at the end of dst */
162b8c9b201Sabhinav 	memcpy(mydst + dstlen, src, srclen);
163b8c9b201Sabhinav 	mydst[dstlen + srclen] = '\0';
164b8c9b201Sabhinav 	*dst = mydst;
165410d0f43Sjoerg }
166410d0f43Sjoerg 
167410d0f43Sjoerg void
close_db(sqlite3 * db)168410d0f43Sjoerg close_db(sqlite3 *db)
169410d0f43Sjoerg {
170410d0f43Sjoerg 	sqlite3_close(db);
171410d0f43Sjoerg 	sqlite3_shutdown();
172410d0f43Sjoerg }
173410d0f43Sjoerg 
174410d0f43Sjoerg /*
175410d0f43Sjoerg  * create_db --
176410d0f43Sjoerg  *  Creates the database schema.
177410d0f43Sjoerg  */
178410d0f43Sjoerg static int
create_db(sqlite3 * db)179410d0f43Sjoerg create_db(sqlite3 *db)
180410d0f43Sjoerg {
181410d0f43Sjoerg 	const char *sqlstr = NULL;
182410d0f43Sjoerg 	char *schemasql;
183410d0f43Sjoerg 	char *errmsg = NULL;
184410d0f43Sjoerg 
185410d0f43Sjoerg /*------------------------ Create the tables------------------------------*/
186410d0f43Sjoerg 
187410d0f43Sjoerg #if NOTYET
188410d0f43Sjoerg 	sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
189410d0f43Sjoerg #else
190410d0f43Sjoerg 	sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
191410d0f43Sjoerg #endif
192410d0f43Sjoerg 
193410d0f43Sjoerg 	schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
194410d0f43Sjoerg 	    APROPOS_SCHEMA_VERSION);
195410d0f43Sjoerg 	sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
196410d0f43Sjoerg 	if (errmsg != NULL)
197410d0f43Sjoerg 		goto out;
198410d0f43Sjoerg 	sqlite3_free(schemasql);
199410d0f43Sjoerg 
2005e64704aSchristos 	sqlstr =
2015e64704aSchristos 	    //mandb
2025e64704aSchristos 	    "CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
203410d0f43Sjoerg 		"name_desc, desc, lib, return_vals, env, files, "
204410d0f43Sjoerg 		"exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
205f56c3723Sabhinav #ifndef APROPOS_DEBUG
206f56c3723Sabhinav 		"compress=zip, uncompress=unzip, tokenize=custom_apropos_tokenizer, "
207f56c3723Sabhinav #else
208f56c3723Sabhinav 		"tokenize=porter, "
209231f71fbSabhinav #endif
210f56c3723Sabhinav 		"notindexed=section, notindexed=md5_hash); "
2115e64704aSchristos 	    //mandb_meta
212410d0f43Sjoerg 	    "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
213410d0f43Sjoerg 		"file UNIQUE, md5_hash UNIQUE, id  INTEGER PRIMARY KEY); "
2145e64704aSchristos 	    //mandb_links
215e70b83fcSabhinav 	    "CREATE TABLE IF NOT EXISTS mandb_links(link COLLATE NOCASE, target, section, "
216933b5da2Sabhinav 		"machine, md5_hash, name_desc); ";
217410d0f43Sjoerg 
218410d0f43Sjoerg 	sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
219410d0f43Sjoerg 	if (errmsg != NULL)
220410d0f43Sjoerg 		goto out;
221410d0f43Sjoerg 
2225e64704aSchristos 	sqlstr =
2235e64704aSchristos 	    "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
224410d0f43Sjoerg 		"(link); "
225410d0f43Sjoerg 	    "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
226d099c692Swiz 		"(device, inode); "
227d099c692Swiz 	    "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links "
228d099c692Swiz 		"(md5_hash);";
229410d0f43Sjoerg 	sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
230410d0f43Sjoerg 	if (errmsg != NULL)
231410d0f43Sjoerg 		goto out;
232410d0f43Sjoerg 	return 0;
233410d0f43Sjoerg 
234410d0f43Sjoerg out:
235410d0f43Sjoerg 	warnx("%s", errmsg);
236410d0f43Sjoerg 	free(errmsg);
237410d0f43Sjoerg 	sqlite3_close(db);
238410d0f43Sjoerg 	sqlite3_shutdown();
239410d0f43Sjoerg 	return -1;
240410d0f43Sjoerg }
241410d0f43Sjoerg 
242410d0f43Sjoerg /*
243410d0f43Sjoerg  * zip --
2446f0eae44Sgutteridge  *  User defined SQLite function to compress the FTS table
245410d0f43Sjoerg  */
246410d0f43Sjoerg static void
zip(sqlite3_context * pctx,int nval,sqlite3_value ** apval)247410d0f43Sjoerg zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
248410d0f43Sjoerg {
249410d0f43Sjoerg 	int nin;
250410d0f43Sjoerg 	long int nout;
251410d0f43Sjoerg 	const unsigned char * inbuf;
252410d0f43Sjoerg 	unsigned char *outbuf;
253410d0f43Sjoerg 
254410d0f43Sjoerg 	assert(nval == 1);
255410d0f43Sjoerg 	nin = sqlite3_value_bytes(apval[0]);
256410d0f43Sjoerg 	inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
257410d0f43Sjoerg 	nout = nin + 13 + (nin + 999) / 1000;
258410d0f43Sjoerg 	outbuf = emalloc(nout);
259410d0f43Sjoerg 	compress(outbuf, (unsigned long *) &nout, inbuf, nin);
260410d0f43Sjoerg 	sqlite3_result_blob(pctx, outbuf, nout, free);
261410d0f43Sjoerg }
262410d0f43Sjoerg 
263410d0f43Sjoerg /*
264410d0f43Sjoerg  * unzip --
2656f0eae44Sgutteridge  *  User defined SQLite function to uncompress the FTS table.
266410d0f43Sjoerg  */
267410d0f43Sjoerg static void
unzip(sqlite3_context * pctx,int nval,sqlite3_value ** apval)268410d0f43Sjoerg unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
269410d0f43Sjoerg {
270410d0f43Sjoerg 	unsigned int rc;
271410d0f43Sjoerg 	unsigned char *outbuf;
272410d0f43Sjoerg 	z_stream stream;
273e6b2ce53Schristos 	long total_out;
274410d0f43Sjoerg 
275410d0f43Sjoerg 	assert(nval == 1);
276e6b2ce53Schristos 	memset(&stream, 0, sizeof(stream));
277410d0f43Sjoerg 	stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
278410d0f43Sjoerg 	stream.avail_in = sqlite3_value_bytes(apval[0]);
279410d0f43Sjoerg 	stream.zalloc = NULL;
280410d0f43Sjoerg 	stream.zfree = NULL;
281410d0f43Sjoerg 
282410d0f43Sjoerg 	if (inflateInit(&stream) != Z_OK) {
283410d0f43Sjoerg 		return;
284410d0f43Sjoerg 	}
285410d0f43Sjoerg 
286e6b2ce53Schristos 	total_out = stream.avail_out = stream.avail_in * 2 + 100;
287e6b2ce53Schristos 	stream.next_out = outbuf = emalloc(stream.avail_out);
288410d0f43Sjoerg 	while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
289410d0f43Sjoerg 		if (rc != Z_OK ||
290410d0f43Sjoerg 		    (stream.avail_out != 0 && stream.avail_in == 0)) {
291410d0f43Sjoerg 			free(outbuf);
292410d0f43Sjoerg 			return;
293410d0f43Sjoerg 		}
294e6b2ce53Schristos 		total_out <<= 1;
295e6b2ce53Schristos 		outbuf = erealloc(outbuf, total_out);
296410d0f43Sjoerg 		stream.next_out = outbuf + stream.total_out;
297e6b2ce53Schristos 		stream.avail_out = total_out - stream.total_out;
298410d0f43Sjoerg 	}
299410d0f43Sjoerg 	if (inflateEnd(&stream) != Z_OK) {
300410d0f43Sjoerg 		free(outbuf);
301410d0f43Sjoerg 		return;
302410d0f43Sjoerg 	}
303e6b2ce53Schristos 	if (stream.total_out == 0) {
304e6b2ce53Schristos 		free(outbuf);
305e6b2ce53Schristos 		return;
306e6b2ce53Schristos 	}
307410d0f43Sjoerg 	outbuf = erealloc(outbuf, stream.total_out);
308410d0f43Sjoerg 	sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free);
309410d0f43Sjoerg }
310410d0f43Sjoerg 
311b1203a98Swiz /*
312b1203a98Swiz  * get_dbpath --
313b1203a98Swiz  *   Read the path of the database from man.conf and return.
314b1203a98Swiz  */
315b1203a98Swiz char *
get_dbpath(const char * manconf)316b1203a98Swiz get_dbpath(const char *manconf)
317b1203a98Swiz {
318b1203a98Swiz 	TAG *tp;
319b1203a98Swiz 	char *dbpath;
320b1203a98Swiz 
321b1203a98Swiz 	config(manconf);
322b1203a98Swiz 	tp = gettag("_mandb", 1);
323b1203a98Swiz 	if (!tp)
324b1203a98Swiz 		return NULL;
325b1203a98Swiz 
326b1203a98Swiz 	if (TAILQ_EMPTY(&tp->entrylist))
327b1203a98Swiz 		return NULL;
328b1203a98Swiz 
329b1203a98Swiz 	dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s;
330b1203a98Swiz 	return dbpath;
331b1203a98Swiz }
332b1203a98Swiz 
333410d0f43Sjoerg /* init_db --
334410d0f43Sjoerg  *   Prepare the database. Register the compress/uncompress functions and the
335410d0f43Sjoerg  *   stopword tokenizer.
336410d0f43Sjoerg  *	 db_flag specifies the mode in which to open the database. 3 options are
337410d0f43Sjoerg  *   available:
338410d0f43Sjoerg  *   	1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
339410d0f43Sjoerg  *  	2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
340410d0f43Sjoerg  *  	3. DB_CREATE: Open in read-write mode. It will try to create the db if
341410d0f43Sjoerg  *			it does not exist already.
342410d0f43Sjoerg  *  RETURN VALUES:
3435e64704aSchristos  *		The function will return NULL in case the db does not exist
3445e64704aSchristos  *		and DB_CREATE
345410d0f43Sjoerg  *  	was not specified. And in case DB_CREATE was specified and yet NULL is
346410d0f43Sjoerg  *  	returned, then there was some other error.
347410d0f43Sjoerg  *  	In normal cases the function should return a handle to the db.
348410d0f43Sjoerg  */
349410d0f43Sjoerg sqlite3 *
init_db(mandb_access_mode db_flag,const char * manconf)35090f8d04eSchristos init_db(mandb_access_mode db_flag, const char *manconf)
351410d0f43Sjoerg {
352410d0f43Sjoerg 	sqlite3 *db = NULL;
353410d0f43Sjoerg 	sqlite3_stmt *stmt;
354410d0f43Sjoerg 	struct stat sb;
355410d0f43Sjoerg 	int rc;
356410d0f43Sjoerg 	int create_db_flag = 0;
357410d0f43Sjoerg 
358b1203a98Swiz 	char *dbpath = get_dbpath(manconf);
359b1203a98Swiz 	if (dbpath == NULL)
360b1203a98Swiz 		errx(EXIT_FAILURE, "_mandb entry not found in man.conf");
36190f8d04eSchristos 
362b1203a98Swiz 	if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) {
36390f8d04eSchristos 		/* Database does not exist, check if DB_CREATE was specified,
36490f8d04eSchristos 		 * and set flag to create the database schema
365410d0f43Sjoerg 		 */
366410d0f43Sjoerg 		if (db_flag != (MANDB_CREATE)) {
367410d0f43Sjoerg 			warnx("Missing apropos database. "
368410d0f43Sjoerg 			      "Please run makemandb to create it.");
369410d0f43Sjoerg 			return NULL;
370410d0f43Sjoerg 		}
371410d0f43Sjoerg 		create_db_flag = 1;
37290f8d04eSchristos 	} else {
37390f8d04eSchristos 		/*
37490f8d04eSchristos 		 * Database exists. Check if we have the permissions
37590f8d04eSchristos 		 * to read/write the files
37690f8d04eSchristos 		 */
37790f8d04eSchristos 		int access_mode = R_OK;
3782c6689d2Schristos 		switch (db_flag) {
37990f8d04eSchristos 		case MANDB_CREATE:
38090f8d04eSchristos 		case MANDB_WRITE:
38190f8d04eSchristos 			access_mode |= W_OK;
38290f8d04eSchristos 			break;
38390f8d04eSchristos 		default:
38490f8d04eSchristos 			break;
38590f8d04eSchristos 		}
38690f8d04eSchristos 		if ((access(dbpath, access_mode)) != 0) {
38790f8d04eSchristos 			warnx("Unable to access the database, please check"
38890f8d04eSchristos 			    " permissions for `%s'", dbpath);
38990f8d04eSchristos 			return NULL;
39090f8d04eSchristos 		}
391410d0f43Sjoerg 	}
392410d0f43Sjoerg 
393410d0f43Sjoerg 	sqlite3_initialize();
394b1203a98Swiz 	rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL);
395410d0f43Sjoerg 
396410d0f43Sjoerg 	if (rc != SQLITE_OK) {
397410d0f43Sjoerg 		warnx("%s", sqlite3_errmsg(db));
39890f8d04eSchristos 		goto error;
399410d0f43Sjoerg 	}
400410d0f43Sjoerg 
401188f922dSabhinav 	sqlite3_extended_result_codes(db, 1);
402188f922dSabhinav 
403f56c3723Sabhinav #ifndef APROPOS_DEBUG
404188f922dSabhinav 	rc = register_tokenizer(db);
405188f922dSabhinav 	if (rc != SQLITE_OK) {
406188f922dSabhinav 		warnx("Unable to register custom tokenizer: %s", sqlite3_errmsg(db));
407188f922dSabhinav 		goto error;
408188f922dSabhinav 	}
409f56c3723Sabhinav #endif
410188f922dSabhinav 
411410d0f43Sjoerg 	if (create_db_flag && create_db(db) < 0) {
412410d0f43Sjoerg 		warnx("%s", "Unable to create database schema");
413410d0f43Sjoerg 		goto error;
414410d0f43Sjoerg 	}
415410d0f43Sjoerg 
416410d0f43Sjoerg 	rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
417410d0f43Sjoerg 	if (rc != SQLITE_OK) {
418d0663c21Sapb 		warnx("Unable to query schema version: %s",
419d0663c21Sapb 		    sqlite3_errmsg(db));
420410d0f43Sjoerg 		goto error;
421410d0f43Sjoerg 	}
422410d0f43Sjoerg 	if (sqlite3_step(stmt) != SQLITE_ROW) {
423410d0f43Sjoerg 		sqlite3_finalize(stmt);
424d0663c21Sapb 		warnx("Unable to query schema version: %s",
425d0663c21Sapb 		    sqlite3_errmsg(db));
426410d0f43Sjoerg 		goto error;
427410d0f43Sjoerg 	}
428410d0f43Sjoerg 	if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
429410d0f43Sjoerg 		sqlite3_finalize(stmt);
430410d0f43Sjoerg 		warnx("Incorrect schema version found. "
431410d0f43Sjoerg 		      "Please run makemandb -f.");
432410d0f43Sjoerg 		goto error;
433410d0f43Sjoerg 	}
434410d0f43Sjoerg 	sqlite3_finalize(stmt);
435410d0f43Sjoerg 
436410d0f43Sjoerg 
437410d0f43Sjoerg 	/* Register the zip and unzip functions for FTS compression */
4385e64704aSchristos 	rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip,
4395e64704aSchristos 	    NULL, NULL);
440410d0f43Sjoerg 	if (rc != SQLITE_OK) {
441d0663c21Sapb 		warnx("Unable to register function: compress: %s",
442d0663c21Sapb 		    sqlite3_errmsg(db));
443410d0f43Sjoerg 		goto error;
444410d0f43Sjoerg 	}
445410d0f43Sjoerg 
446410d0f43Sjoerg 	rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
447410d0f43Sjoerg                                  unzip, NULL, NULL);
448410d0f43Sjoerg 	if (rc != SQLITE_OK) {
449d0663c21Sapb 		warnx("Unable to register function: uncompress: %s",
450d0663c21Sapb 		    sqlite3_errmsg(db));
451410d0f43Sjoerg 		goto error;
452410d0f43Sjoerg 	}
453410d0f43Sjoerg 	return db;
454b1203a98Swiz 
455410d0f43Sjoerg error:
45690f8d04eSchristos 	close_db(db);
457410d0f43Sjoerg 	return NULL;
458410d0f43Sjoerg }
459410d0f43Sjoerg 
460410d0f43Sjoerg /*
461410d0f43Sjoerg  * rank_func --
4626f0eae44Sgutteridge  *  SQLite user defined function for ranking the documents.
463410d0f43Sjoerg  *  For each phrase of the query, it computes the tf and idf and adds them over.
464410d0f43Sjoerg  *  It computes the final rank, by multiplying tf and idf together.
465410d0f43Sjoerg  *  Weight of term t for document d = (term frequency of t in d *
466410d0f43Sjoerg  *                                      inverse document frequency of t)
467410d0f43Sjoerg  *
468410d0f43Sjoerg  *  Term Frequency of term t in document d = Number of times t occurs in d /
4695e64704aSchristos  *	Number of times t appears in all documents
470410d0f43Sjoerg  *
471410d0f43Sjoerg  *  Inverse document frequency of t = log(Total number of documents /
472410d0f43Sjoerg  *										Number of documents in which t occurs)
473410d0f43Sjoerg  */
474410d0f43Sjoerg static void
rank_func(sqlite3_context * pctx,int nval,sqlite3_value ** apval)475410d0f43Sjoerg rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
476410d0f43Sjoerg {
477410d0f43Sjoerg 	inverse_document_frequency *idf = sqlite3_user_data(pctx);
478410d0f43Sjoerg 	double tf = 0.0;
479410d0f43Sjoerg 	const unsigned int *matchinfo;
480410d0f43Sjoerg 	int ncol;
481410d0f43Sjoerg 	int nphrase;
482410d0f43Sjoerg 	int iphrase;
483410d0f43Sjoerg 	int ndoc;
484410d0f43Sjoerg 	int doclen = 0;
485410d0f43Sjoerg 	const double k = 3.75;
4865e64704aSchristos 	/*
4875e64704aSchristos 	 * Check that the number of arguments passed to this
4885e64704aSchristos 	 * function is correct.
4895e64704aSchristos 	 */
490410d0f43Sjoerg 	assert(nval == 1);
491410d0f43Sjoerg 
492410d0f43Sjoerg 	matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
493410d0f43Sjoerg 	nphrase = matchinfo[0];
494410d0f43Sjoerg 	ncol = matchinfo[1];
495410d0f43Sjoerg 	ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
496410d0f43Sjoerg 	for (iphrase = 0; iphrase < nphrase; iphrase++) {
497410d0f43Sjoerg 		int icol;
4985e64704aSchristos 		const unsigned int *phraseinfo =
4995e64704aSchristos 		    &matchinfo[2 + ncol + iphrase * ncol * 3];
500410d0f43Sjoerg 		for(icol = 1; icol < ncol; icol++) {
501410d0f43Sjoerg 
5025e64704aSchristos 			/* nhitcount: number of times the current phrase occurs
5035e64704aSchristos 			 * 	in the current column in the current document.
5045e64704aSchristos 			 * nglobalhitcount: number of times current phrase
5055e64704aSchristos 			 *	occurs in the current column in all documents.
5065e64704aSchristos 			 * ndocshitcount: number of documents in which the
5075e64704aSchristos 			 *	current phrase occurs in the current column at
5085e64704aSchristos 			 *	least once.
509410d0f43Sjoerg 			 */
510410d0f43Sjoerg   			int nhitcount = phraseinfo[3 * icol];
511410d0f43Sjoerg 			int nglobalhitcount = phraseinfo[3 * icol + 1];
512410d0f43Sjoerg 			int ndocshitcount = phraseinfo[3 * icol + 2];
513410d0f43Sjoerg 			doclen = matchinfo[2 + icol ];
514410d0f43Sjoerg 			double weight = col_weights[icol - 1];
515410d0f43Sjoerg 			if (idf->status == 0 && ndocshitcount)
5165e64704aSchristos 				idf->value +=
5175e64704aSchristos 				    log(((double)ndoc / ndocshitcount))* weight;
518410d0f43Sjoerg 
5195e64704aSchristos 			/*
5205e64704aSchristos 			 * Dividing the tf by document length to normalize
5215e64704aSchristos 			 * the effect of longer documents.
522410d0f43Sjoerg 			 */
523410d0f43Sjoerg 			if (nglobalhitcount > 0 && nhitcount)
5245e64704aSchristos 				tf += (((double)nhitcount  * weight)
5255e64704aSchristos 				    / (nglobalhitcount * doclen));
526410d0f43Sjoerg 		}
527410d0f43Sjoerg 	}
528410d0f43Sjoerg 	idf->status = 1;
529410d0f43Sjoerg 
5305e64704aSchristos 	/*
5315e64704aSchristos 	 * Final score: Dividing by k + tf further normalizes the weight
5325e64704aSchristos 	 * leading to better results. The value of k is experimental
533410d0f43Sjoerg 	 */
5345e64704aSchristos 	double score = (tf * idf->value) / (k + tf);
535410d0f43Sjoerg 	sqlite3_result_double(pctx, score);
536410d0f43Sjoerg 	return;
537410d0f43Sjoerg }
538410d0f43Sjoerg 
539410d0f43Sjoerg /*
5404647c1ecSabhinav  * generates sql query for matching the user entered query
541410d0f43Sjoerg  */
5424647c1ecSabhinav static char *
generate_search_query(query_args * args,const char * snippet_args[3])5434647c1ecSabhinav generate_search_query(query_args *args, const char *snippet_args[3])
544410d0f43Sjoerg {
545410d0f43Sjoerg 	const char *default_snippet_args[3];
546410d0f43Sjoerg 	char *section_clause = NULL;
547410d0f43Sjoerg 	char *limit_clause = NULL;
548410d0f43Sjoerg 	char *machine_clause = NULL;
5493c013439Sabhinav 	char *query = NULL;
550410d0f43Sjoerg 
5513c013439Sabhinav 	if (args->machine) {
5523c013439Sabhinav 		machine_clause = sqlite3_mprintf("AND mandb.machine=%Q", args->machine);
5533c013439Sabhinav 		if (machine_clause == NULL)
5543c013439Sabhinav 			goto RETURN;
5553c013439Sabhinav 	}
556410d0f43Sjoerg 
5573c013439Sabhinav 	if (args->nrec >= 0) {
5583c013439Sabhinav 		/* Use the provided number of records and offset */
5593c013439Sabhinav 		limit_clause = sqlite3_mprintf(" LIMIT %d OFFSET %d",
5603c013439Sabhinav 		    args->nrec, args->offset);
5613c013439Sabhinav 		if (limit_clause == NULL)
5623c013439Sabhinav 			goto RETURN;
5633c013439Sabhinav 	}
564410d0f43Sjoerg 
565410d0f43Sjoerg 	/* We want to build a query of the form: "select x,y,z from mandb where
566ba948c91Sabhinav 	 * mandb match :query [AND (section IN ('1', '2')]
567ba948c91Sabhinav 	 * ORDER BY rank DESC [LIMIT 10 OFFSET 0]"
5685e64704aSchristos 	 * NOTES:
569ba948c91Sabhinav 	 *   1. The portion in first pair of square brackets is optional.
570ba948c91Sabhinav 	 *      It will be there only if the user has specified an option
5715e64704aSchristos 	 *      to search in one or more specific sections.
572ba948c91Sabhinav 	 *   2. The LIMIT portion will be there if the user has specified
573ba948c91Sabhinav 	 *      a limit using the -n option.
574410d0f43Sjoerg 	 */
5751373f782Sabhinav 	if (args->sections && args->sections[0]) {
5761373f782Sabhinav 		concat(&section_clause, " AND mandb.section IN (");
5771373f782Sabhinav 		for (size_t i = 0; args->sections[i]; i++) {
578410d0f43Sjoerg 			char *temp;
5791373f782Sabhinav 			char c = args->sections[i + 1]? ',': ')';
5801373f782Sabhinav 			if ((temp = sqlite3_mprintf("%Q%c", args->sections[i], c)) == NULL)
5811373f782Sabhinav 				goto RETURN;
582410d0f43Sjoerg 			concat(&section_clause, temp);
58369479387Sabhinav 			sqlite3_free(temp);
584410d0f43Sjoerg 		}
585410d0f43Sjoerg 	}
5864647c1ecSabhinav 
587410d0f43Sjoerg 	if (snippet_args == NULL) {
588410d0f43Sjoerg 		default_snippet_args[0] = "";
589410d0f43Sjoerg 		default_snippet_args[1] = "";
590410d0f43Sjoerg 		default_snippet_args[2] = "...";
591410d0f43Sjoerg 		snippet_args = default_snippet_args;
592410d0f43Sjoerg 	}
5934647c1ecSabhinav 
594cb0641ebSchristos 	if (args->legacy) {
5952b42c8b2Schristos 	    char *wild;
5962b42c8b2Schristos 	    easprintf(&wild, "%%%s%%", args->search_str);
5974647c1ecSabhinav 	    query = sqlite3_mprintf("SELECT section, name, name_desc, machine"
598cb0641ebSchristos 		" FROM mandb"
5992b42c8b2Schristos 		" WHERE name LIKE %Q OR name_desc LIKE %Q "
600cb0641ebSchristos 		"%s"
601cb0641ebSchristos 		"%s",
602533b5973Schristos 		wild, wild,
603cb0641ebSchristos 		section_clause ? section_clause : "",
604cb0641ebSchristos 		limit_clause ? limit_clause : "");
6052b42c8b2Schristos 		free(wild);
606e70b83fcSabhinav 	} else if (strchr(args->search_str, ' ') == NULL) {
607e70b83fcSabhinav 		/*
608e70b83fcSabhinav 		 * If it's a single word query, we want to search in the
609e70b83fcSabhinav 		 * links table as well. If the link table contains an entry
610e70b83fcSabhinav 		 * for the queried keyword, we want to use that as the name of
611e70b83fcSabhinav 		 * the man page.
612e70b83fcSabhinav 		 * For example, for `apropos realloc` the output should be
613e70b83fcSabhinav 		 * realloc(3) and not malloc(3).
614e70b83fcSabhinav 		 */
615e70b83fcSabhinav 		query = sqlite3_mprintf(
616e70b83fcSabhinav 		    "SELECT section, name, name_desc, machine,"
617e70b83fcSabhinav 		    " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
618e70b83fcSabhinav 		    " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
619e70b83fcSabhinav 		    " FROM mandb WHERE name NOT IN ("
620e70b83fcSabhinav 		    " SELECT target FROM mandb_links WHERE link=%Q AND"
621e70b83fcSabhinav 		    " mandb_links.section=mandb.section) AND mandb MATCH %Q %s %s"
622e70b83fcSabhinav 		    " UNION"
623e70b83fcSabhinav 		    " SELECT mandb.section, mandb_links.link AS name, mandb.name_desc,"
624e70b83fcSabhinav 		    " mandb.machine, '' AS snippet, 100.00 AS rank"
625e70b83fcSabhinav 		    " FROM mandb JOIN mandb_links ON mandb.name=mandb_links.target and"
626e70b83fcSabhinav 		    " mandb.section=mandb_links.section WHERE mandb_links.link=%Q"
627e70b83fcSabhinav 		    " %s %s"
628e70b83fcSabhinav 		    " ORDER BY rank DESC %s",
629e70b83fcSabhinav 		    snippet_args[0], snippet_args[1], snippet_args[2],
630e70b83fcSabhinav 		    args->search_str, args->search_str, section_clause ? section_clause : "",
631e70b83fcSabhinav 		    machine_clause ? machine_clause : "", args->search_str,
632e70b83fcSabhinav 		    machine_clause ? machine_clause : "",
633e70b83fcSabhinav 		    section_clause ? section_clause : "",
634e70b83fcSabhinav 		    limit_clause ? limit_clause : "");
635cb0641ebSchristos 	} else {
636410d0f43Sjoerg 	    query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
637410d0f43Sjoerg 		" snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
638410d0f43Sjoerg 		" rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
639410d0f43Sjoerg 		" FROM mandb"
640410d0f43Sjoerg 		" WHERE mandb MATCH %Q %s "
641410d0f43Sjoerg 		"%s"
642410d0f43Sjoerg 		" ORDER BY rank DESC"
643410d0f43Sjoerg 		"%s",
644cb0641ebSchristos 		snippet_args[0], snippet_args[1], snippet_args[2],
645cb0641ebSchristos 		args->search_str, machine_clause ? machine_clause : "",
646410d0f43Sjoerg 		section_clause ? section_clause : "",
647410d0f43Sjoerg 		limit_clause ? limit_clause : "");
648cb0641ebSchristos 	}
649410d0f43Sjoerg 
6503c013439Sabhinav RETURN:
65169479387Sabhinav 	sqlite3_free(machine_clause);
652a5fb0c00Sleot 	free(section_clause);
65369479387Sabhinav 	sqlite3_free(limit_clause);
6544647c1ecSabhinav 	return query;
655410d0f43Sjoerg }
6564647c1ecSabhinav 
6572d0aa66bSchristos static const char *
get_stmt_col_text(sqlite3_stmt * stmt,int col)6582d0aa66bSchristos get_stmt_col_text(sqlite3_stmt *stmt, int col)
6592d0aa66bSchristos {
6602d0aa66bSchristos 	const char *t = (const char *) sqlite3_column_text(stmt, col);
6612d0aa66bSchristos 	return t == NULL ? "*?*" : t;
6622d0aa66bSchristos }
6632d0aa66bSchristos 
6644647c1ecSabhinav /*
6654647c1ecSabhinav  * Execute the full text search query and return the number of results
6664647c1ecSabhinav  * obtained.
6674647c1ecSabhinav  */
668521bd3eaSgutteridge static int
execute_search_query(sqlite3 * db,char * query,query_args * args)6694647c1ecSabhinav execute_search_query(sqlite3 *db, char *query, query_args *args)
6704647c1ecSabhinav {
6714647c1ecSabhinav 	sqlite3_stmt *stmt;
6724647c1ecSabhinav 	char *name;
6734647c1ecSabhinav 	char *slash_ptr;
6744647c1ecSabhinav 	const char *name_temp;
6754647c1ecSabhinav 	char *m = NULL;
6764647c1ecSabhinav 	int rc;
677357f7b44Sabhinav 	query_callback_args callback_args;
6784647c1ecSabhinav 	inverse_document_frequency idf = {0, 0};
6794647c1ecSabhinav 
6804647c1ecSabhinav 	if (!args->legacy) {
6814647c1ecSabhinav 		/* Register the rank function */
6824647c1ecSabhinav 		rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY,
6834647c1ecSabhinav 		    (void *) &idf, rank_func, NULL, NULL);
6844647c1ecSabhinav 		if (rc != SQLITE_OK) {
6854647c1ecSabhinav 			warnx("Unable to register the ranking function: %s",
6864647c1ecSabhinav 			    sqlite3_errmsg(db));
6874647c1ecSabhinav 			sqlite3_close(db);
6884647c1ecSabhinav 			sqlite3_shutdown();
6894647c1ecSabhinav 			exit(EXIT_FAILURE);
6904647c1ecSabhinav 		}
6914647c1ecSabhinav 	}
6924647c1ecSabhinav 
693410d0f43Sjoerg 	rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
694410d0f43Sjoerg 	if (rc == SQLITE_IOERR) {
695410d0f43Sjoerg 		warnx("Corrupt database. Please rerun makemandb");
696410d0f43Sjoerg 		return -1;
697410d0f43Sjoerg 	} else if (rc != SQLITE_OK) {
698410d0f43Sjoerg 		warnx("%s", sqlite3_errmsg(db));
699410d0f43Sjoerg 		return -1;
700410d0f43Sjoerg 	}
701410d0f43Sjoerg 
702521bd3eaSgutteridge 	int nresults = rc = 0;
703521bd3eaSgutteridge 	while (rc == 0 && sqlite3_step(stmt) == SQLITE_ROW) {
7044647c1ecSabhinav 		nresults++;
7052d0aa66bSchristos 		callback_args.section = get_stmt_col_text(stmt, 0);
7062d0aa66bSchristos 		name_temp = get_stmt_col_text(stmt, 1);
7072d0aa66bSchristos 		callback_args.name_desc = get_stmt_col_text(stmt, 2);
708b0ca50fbSabhinav 		callback_args.machine = (const char *) sqlite3_column_text(stmt, 3);
709496b8ce3Sabhinav 		if (!args->legacy) {
7102d0aa66bSchristos 			callback_args.snippet = get_stmt_col_text(stmt, 4);
7112d0aa66bSchristos 			callback_args.snippet_length =
7122d0aa66bSchristos 			    strlen(callback_args.snippet);
713496b8ce3Sabhinav 		} else {
714357f7b44Sabhinav 			callback_args.snippet = "";
715496b8ce3Sabhinav 			callback_args.snippet_length = 1;
716496b8ce3Sabhinav 		}
717f41e473dSwiz 		if ((slash_ptr = strrchr(name_temp, '/')) != NULL)
718f41e473dSwiz 			name_temp = slash_ptr + 1;
719357f7b44Sabhinav 		if (callback_args.machine && callback_args.machine[0]) {
720357f7b44Sabhinav 			m = estrdup(callback_args.machine);
7215e64704aSchristos 			easprintf(&name, "%s/%s", lower(m), name_temp);
722410d0f43Sjoerg 			free(m);
723410d0f43Sjoerg 		} else {
7242d0aa66bSchristos 			name = estrdup(get_stmt_col_text(stmt, 1));
725410d0f43Sjoerg 		}
726357f7b44Sabhinav 		callback_args.name = name;
727357f7b44Sabhinav 		callback_args.other_data = args->callback_data;
728521bd3eaSgutteridge 		rc = (args->callback)(&callback_args);
729410d0f43Sjoerg 		free(name);
730410d0f43Sjoerg 	}
731410d0f43Sjoerg 	sqlite3_finalize(stmt);
732521bd3eaSgutteridge 	return (rc < 0) ? rc : nresults;
7334647c1ecSabhinav }
7344647c1ecSabhinav 
7354647c1ecSabhinav 
7364647c1ecSabhinav /*
7374647c1ecSabhinav  *  run_query_internal --
7384647c1ecSabhinav  *  Performs the searches for the keywords entered by the user.
7394647c1ecSabhinav  *  The 2nd param: snippet_args is an array of strings providing values for the
7404647c1ecSabhinav  *  last three parameters to the snippet function of sqlite. (Look at the docs).
7414647c1ecSabhinav  *  The 3rd param: args contains rest of the search parameters. Look at
7424647c1ecSabhinav  *  arpopos-utils.h for the description of individual fields.
7434647c1ecSabhinav  *
7444647c1ecSabhinav  */
7454647c1ecSabhinav static int
run_query_internal(sqlite3 * db,const char * snippet_args[3],query_args * args)7464647c1ecSabhinav run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args)
7474647c1ecSabhinav {
7484647c1ecSabhinav 	char *query;
7494647c1ecSabhinav 	query = generate_search_query(args, snippet_args);
7504647c1ecSabhinav 	if (query == NULL) {
7514647c1ecSabhinav 		*args->errmsg = estrdup("malloc failed");
7524647c1ecSabhinav 		return -1;
7534647c1ecSabhinav 	}
7544647c1ecSabhinav 
755521bd3eaSgutteridge 	int rc = execute_search_query(db, query, args);
756410d0f43Sjoerg 	sqlite3_free(query);
757521bd3eaSgutteridge 	return (rc < 0 || *(args->errmsg) != NULL) ? -1 : 0;
758410d0f43Sjoerg }
759410d0f43Sjoerg 
760751d5fc6Schristos static char *
get_escaped_html_string(const char * src,size_t * slen)761751d5fc6Schristos get_escaped_html_string(const char *src, size_t *slen)
762751d5fc6Schristos {
763751d5fc6Schristos 	static const char trouble[] = "<>\"&\002\003";
764751d5fc6Schristos 	/*
765751d5fc6Schristos 	 * First scan the src to find out the number of occurrences
766751d5fc6Schristos 	 * of {'>', '<' '"', '&'}.  Then allocate a new buffer with
767751d5fc6Schristos 	 * sufficient space to be able to store the quoted versions
768751d5fc6Schristos 	 * of the special characters {&gt;, &lt;, &quot;, &amp;}.
769751d5fc6Schristos 	 * Copy over the characters from the original src into
770751d5fc6Schristos 	 * this buffer while replacing the special characters with
771751d5fc6Schristos 	 * their quoted versions.
772751d5fc6Schristos 	 */
773751d5fc6Schristos 	char *dst, *ddst;
774751d5fc6Schristos 	size_t count;
775751d5fc6Schristos 	const char *ssrc;
776751d5fc6Schristos 
777751d5fc6Schristos 	for (count = 0, ssrc = src; *src; count++) {
778751d5fc6Schristos 		size_t sz = strcspn(src, trouble);
779751d5fc6Schristos 		src += sz + 1;
780751d5fc6Schristos 	}
781751d5fc6Schristos 
782751d5fc6Schristos 
7835e64704aSchristos #define append(a)				\
7845e64704aSchristos     do {					\
785751d5fc6Schristos 	memcpy(dst, (a), sizeof(a) - 1);	\
786751d5fc6Schristos 	dst += sizeof(a) - 1; 			\
7876f4965e0Srillig     } while (0)
788751d5fc6Schristos 
7895e64704aSchristos 
790751d5fc6Schristos 	ddst = dst = emalloc(*slen + count * 5 + 1);
791751d5fc6Schristos 	for (src = ssrc; *src; src++) {
792751d5fc6Schristos 		switch (*src) {
793751d5fc6Schristos 		case '<':
794751d5fc6Schristos 			append("&lt;");
795751d5fc6Schristos 			break;
796751d5fc6Schristos 		case '>':
797751d5fc6Schristos 			append("&gt;");
798751d5fc6Schristos 			break;
799751d5fc6Schristos 		case '\"':
800751d5fc6Schristos 			append("&quot;");
801751d5fc6Schristos 			break;
802751d5fc6Schristos 		case '&':
803751d5fc6Schristos 			/*
804751d5fc6Schristos 			 * Don't perform the quoting if this & is part of
805751d5fc6Schristos 			 * an mdoc escape sequence, e.g. \&
806751d5fc6Schristos 			 */
807751d5fc6Schristos 			if (src != ssrc && src[-1] != '\\')
808751d5fc6Schristos 				append("&amp;");
809751d5fc6Schristos 			else
810751d5fc6Schristos 				append("&");
811751d5fc6Schristos 			break;
812751d5fc6Schristos 		case '\002':
813751d5fc6Schristos 			append("<b>");
814751d5fc6Schristos 			break;
815751d5fc6Schristos 		case '\003':
816751d5fc6Schristos 			append("</b>");
817751d5fc6Schristos 			break;
818751d5fc6Schristos 		default:
819751d5fc6Schristos 			*dst++ = *src;
820751d5fc6Schristos 			break;
821751d5fc6Schristos 		}
822751d5fc6Schristos 	}
823751d5fc6Schristos 	*dst = '\0';
824751d5fc6Schristos 	*slen = dst - ddst;
825751d5fc6Schristos 	return ddst;
826751d5fc6Schristos }
827751d5fc6Schristos 
828751d5fc6Schristos 
829410d0f43Sjoerg /*
830410d0f43Sjoerg  * callback_html --
831410d0f43Sjoerg  *  Callback function for run_query_html. It builds the html output and then
832410d0f43Sjoerg  *  calls the actual user supplied callback function.
833410d0f43Sjoerg  */
834410d0f43Sjoerg static int
callback_html(query_callback_args * callback_args)835357f7b44Sabhinav callback_html(query_callback_args *callback_args)
836410d0f43Sjoerg {
837357f7b44Sabhinav 	struct orig_callback_data *orig_data = callback_args->other_data;
838357f7b44Sabhinav 	int (*callback)(query_callback_args*) = orig_data->callback;
839357f7b44Sabhinav 	size_t length = callback_args->snippet_length;
840357f7b44Sabhinav 	size_t name_description_length = strlen(callback_args->name_desc);
841357f7b44Sabhinav 	char *qsnippet = get_escaped_html_string(callback_args->snippet, &length);
842357f7b44Sabhinav 	char *qname_description = get_escaped_html_string(callback_args->name_desc,
843751d5fc6Schristos 	    &name_description_length);
844357f7b44Sabhinav 	callback_args->name_desc = qname_description;
845357f7b44Sabhinav 	callback_args->snippet = qsnippet;
846357f7b44Sabhinav 	callback_args->snippet_length = length;
847357f7b44Sabhinav 	callback_args->other_data = orig_data->data;
848521bd3eaSgutteridge 	int rc = (*callback)(callback_args);
849410d0f43Sjoerg 	free(qsnippet);
850751d5fc6Schristos 	free(qname_description);
851521bd3eaSgutteridge 	return rc;
852410d0f43Sjoerg }
853410d0f43Sjoerg 
854410d0f43Sjoerg /*
855410d0f43Sjoerg  * run_query_html --
856410d0f43Sjoerg  *  Utility function to output query result in HTML format.
857f0a7346dSsnj  *  It internally calls run_query only, but it first passes the output to its
858410d0f43Sjoerg  *  own custom callback function, which preprocess the snippet for quoting
859410d0f43Sjoerg  *  inline HTML fragments.
860410d0f43Sjoerg  *  After that it delegates the call the actual user supplied callback function.
861410d0f43Sjoerg  */
862910ecac4Schristos static int
run_query_html(sqlite3 * db,query_args * args)863410d0f43Sjoerg run_query_html(sqlite3 *db, query_args *args)
864410d0f43Sjoerg {
865410d0f43Sjoerg 	struct orig_callback_data orig_data;
866410d0f43Sjoerg 	orig_data.callback = args->callback;
867410d0f43Sjoerg 	orig_data.data = args->callback_data;
868410d0f43Sjoerg 	const char *snippet_args[] = {"\002", "\003", "..."};
869410d0f43Sjoerg 	args->callback = &callback_html;
870410d0f43Sjoerg 	args->callback_data = (void *) &orig_data;
871910ecac4Schristos 	return run_query_internal(db, snippet_args, args);
872410d0f43Sjoerg }
873410d0f43Sjoerg 
874410d0f43Sjoerg /*
8756265ee0dSchristos  * underline a string, pager style.
8766265ee0dSchristos  */
8776265ee0dSchristos static char *
ul_pager(int ul,const char * s)87882fc5158Schristos ul_pager(int ul, const char *s)
8796265ee0dSchristos {
8806265ee0dSchristos 	size_t len;
8816265ee0dSchristos 	char *dst, *d;
8826265ee0dSchristos 
88382fc5158Schristos 	if (!ul)
88482fc5158Schristos 		return estrdup(s);
88582fc5158Schristos 
8866265ee0dSchristos 	// a -> _\ba
8876265ee0dSchristos 	len = strlen(s) * 3 + 1;
8886265ee0dSchristos 
8896265ee0dSchristos 	d = dst = emalloc(len);
8906265ee0dSchristos 	while (*s) {
8916265ee0dSchristos 		*d++ = '_';
8926265ee0dSchristos 		*d++ = '\b';
8936265ee0dSchristos 		*d++ = *s++;
8946265ee0dSchristos 	}
8956265ee0dSchristos 	*d = '\0';
8966265ee0dSchristos 	return dst;
8976265ee0dSchristos }
8986265ee0dSchristos 
8996265ee0dSchristos /*
900410d0f43Sjoerg  * callback_pager --
901410d0f43Sjoerg  *  A callback similar to callback_html. It overstrikes the matching text in
902410d0f43Sjoerg  *  the snippet so that it appears emboldened when viewed using a pager like
903410d0f43Sjoerg  *  more or less.
904410d0f43Sjoerg  */
905410d0f43Sjoerg static int
callback_pager(query_callback_args * callback_args)906357f7b44Sabhinav callback_pager(query_callback_args *callback_args)
907410d0f43Sjoerg {
908357f7b44Sabhinav 	struct orig_callback_data *orig_data = callback_args->other_data;
909410d0f43Sjoerg 	char *psnippet;
910357f7b44Sabhinav 	const char *temp = callback_args->snippet;
911410d0f43Sjoerg 	int count = 0;
91282fc5158Schristos 	int i = 0, did;
913410d0f43Sjoerg 	size_t sz = 0;
914410d0f43Sjoerg 	size_t psnippet_length;
915410d0f43Sjoerg 
9165e64704aSchristos 	/* Count the number of bytes of matching text. For each of these
9175e64704aSchristos 	 * bytes we will use 2 extra bytes to overstrike it so that it
9185e64704aSchristos 	 * appears bold when viewed using a pager.
919410d0f43Sjoerg 	 */
920410d0f43Sjoerg 	while (*temp) {
921410d0f43Sjoerg 		sz = strcspn(temp, "\002\003");
922410d0f43Sjoerg 		temp += sz;
923410d0f43Sjoerg 		if (*temp == '\003') {
924410d0f43Sjoerg 			count += 2 * (sz);
925410d0f43Sjoerg 		}
926410d0f43Sjoerg 		temp++;
927410d0f43Sjoerg 	}
928410d0f43Sjoerg 
929357f7b44Sabhinav 	psnippet_length = callback_args->snippet_length + count;
930410d0f43Sjoerg 	psnippet = emalloc(psnippet_length + 1);
931410d0f43Sjoerg 
932410d0f43Sjoerg 	/* Copy the bytes from snippet to psnippet:
933410d0f43Sjoerg 	 * 1. Copy the bytes before \002 as it is.
9345e64704aSchristos 	 * 2. The bytes after \002 need to be overstriked till we
9355e64704aSchristos 	 *    encounter \003.
936410d0f43Sjoerg 	 * 3. To overstrike a byte 'A' we need to write 'A\bA'
937410d0f43Sjoerg 	 */
93882fc5158Schristos 	did = 0;
939357f7b44Sabhinav 	const char *snippet = callback_args->snippet;
940410d0f43Sjoerg 	while (*snippet) {
941410d0f43Sjoerg 		sz = strcspn(snippet, "\002");
942410d0f43Sjoerg 		memcpy(&psnippet[i], snippet, sz);
943410d0f43Sjoerg 		snippet += sz;
944410d0f43Sjoerg 		i += sz;
945410d0f43Sjoerg 
946410d0f43Sjoerg 		/* Don't change this. Advancing the pointer without reading the byte
947410d0f43Sjoerg 		 * is causing strange behavior.
948410d0f43Sjoerg 		 */
949410d0f43Sjoerg 		if (*snippet == '\002')
950410d0f43Sjoerg 			snippet++;
951410d0f43Sjoerg 		while (*snippet && *snippet != '\003') {
95282fc5158Schristos 			did = 1;
953410d0f43Sjoerg 			psnippet[i++] = *snippet;
954410d0f43Sjoerg 			psnippet[i++] = '\b';
955410d0f43Sjoerg 			psnippet[i++] = *snippet++;
956410d0f43Sjoerg 		}
957410d0f43Sjoerg 		if (*snippet)
958410d0f43Sjoerg 			snippet++;
959410d0f43Sjoerg 	}
960410d0f43Sjoerg 
961410d0f43Sjoerg 	psnippet[i] = 0;
962357f7b44Sabhinav 	char *ul_section = ul_pager(did, callback_args->section);
963357f7b44Sabhinav 	char *ul_name = ul_pager(did, callback_args->name);
964357f7b44Sabhinav 	char *ul_name_desc = ul_pager(did, callback_args->name_desc);
965357f7b44Sabhinav 	callback_args->section = ul_section;
966357f7b44Sabhinav 	callback_args->name = ul_name;
967357f7b44Sabhinav 	callback_args->name_desc = ul_name_desc;
968357f7b44Sabhinav 	callback_args->snippet = psnippet;
969357f7b44Sabhinav 	callback_args->snippet_length = psnippet_length;
970357f7b44Sabhinav 	callback_args->other_data = orig_data->data;
971521bd3eaSgutteridge 	int rc = (orig_data->callback)(callback_args);
9726265ee0dSchristos 	free(ul_section);
9736265ee0dSchristos 	free(ul_name);
9746265ee0dSchristos 	free(ul_name_desc);
975410d0f43Sjoerg 	free(psnippet);
976521bd3eaSgutteridge 	return rc;
977410d0f43Sjoerg }
978410d0f43Sjoerg 
9796265ee0dSchristos struct term_args {
9806265ee0dSchristos 	struct orig_callback_data *orig_data;
9816265ee0dSchristos 	const char *smul;
9826265ee0dSchristos 	const char *rmul;
9836265ee0dSchristos };
9846265ee0dSchristos 
9856265ee0dSchristos /*
9866265ee0dSchristos  * underline a string, pager style.
9876265ee0dSchristos  */
9886265ee0dSchristos static char *
ul_term(const char * s,const struct term_args * ta)9896265ee0dSchristos ul_term(const char *s, const struct term_args *ta)
9906265ee0dSchristos {
9916265ee0dSchristos 	char *dst;
9926265ee0dSchristos 
9936265ee0dSchristos 	easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul);
9946265ee0dSchristos 	return dst;
9956265ee0dSchristos }
9966265ee0dSchristos 
9976265ee0dSchristos /*
9986265ee0dSchristos  * callback_term --
9996265ee0dSchristos  *  A callback similar to callback_html. It overstrikes the matching text in
10006265ee0dSchristos  *  the snippet so that it appears emboldened when viewed using a pager like
10016265ee0dSchristos  *  more or less.
10026265ee0dSchristos  */
10036265ee0dSchristos static int
callback_term(query_callback_args * callback_args)1004357f7b44Sabhinav callback_term(query_callback_args *callback_args)
10056265ee0dSchristos {
1006357f7b44Sabhinav 	struct term_args *ta = callback_args->other_data;
10076265ee0dSchristos 	struct orig_callback_data *orig_data = ta->orig_data;
10086265ee0dSchristos 
1009357f7b44Sabhinav 	char *ul_section = ul_term(callback_args->section, ta);
1010357f7b44Sabhinav 	char *ul_name = ul_term(callback_args->name, ta);
1011357f7b44Sabhinav 	char *ul_name_desc = ul_term(callback_args->name_desc, ta);
1012357f7b44Sabhinav 	callback_args->section = ul_section;
1013357f7b44Sabhinav 	callback_args->name = ul_name;
1014357f7b44Sabhinav 	callback_args->name_desc = ul_name_desc;
1015357f7b44Sabhinav 	callback_args->other_data = orig_data->data;
1016521bd3eaSgutteridge 	int rc = (orig_data->callback)(callback_args);
10176265ee0dSchristos 	free(ul_section);
10186265ee0dSchristos 	free(ul_name);
10196265ee0dSchristos 	free(ul_name_desc);
1020521bd3eaSgutteridge 	return rc;
10216265ee0dSchristos }
10226265ee0dSchristos 
1023410d0f43Sjoerg /*
1024410d0f43Sjoerg  * run_query_pager --
1025410d0f43Sjoerg  *  Utility function similar to run_query_html. This function tries to
1026410d0f43Sjoerg  *  pre-process the result assuming it will be piped to a pager.
1027f0a7346dSsnj  *  For this purpose it first calls its own callback function callback_pager
1028410d0f43Sjoerg  *  which then delegates the call to the user supplied callback.
1029410d0f43Sjoerg  */
1030910ecac4Schristos static int
run_query_pager(sqlite3 * db,query_args * args)103152222de3Sjoerg run_query_pager(sqlite3 *db, query_args *args)
1032410d0f43Sjoerg {
1033410d0f43Sjoerg 	struct orig_callback_data orig_data;
1034410d0f43Sjoerg 	orig_data.callback = args->callback;
1035410d0f43Sjoerg 	orig_data.data = args->callback_data;
1036910ecac4Schristos 	const char *snippet_args[3] = { "\002", "\003", "..." };
1037410d0f43Sjoerg 	args->callback = &callback_pager;
1038410d0f43Sjoerg 	args->callback_data = (void *) &orig_data;
1039910ecac4Schristos 	return run_query_internal(db, snippet_args, args);
1040410d0f43Sjoerg }
10416265ee0dSchristos 
104262025e09Schristos struct nv {
104362025e09Schristos 	char *s;
104462025e09Schristos 	size_t l;
104562025e09Schristos };
104662025e09Schristos 
104762025e09Schristos static int
term_putc(int c,void * p)104862025e09Schristos term_putc(int c, void *p)
104962025e09Schristos {
105062025e09Schristos 	struct nv *nv = p;
105162025e09Schristos 	nv->s[nv->l++] = c;
105262025e09Schristos 	return 0;
105362025e09Schristos }
105462025e09Schristos 
105562025e09Schristos static char *
term_fix_seq(TERMINAL * ti,const char * seq)105662025e09Schristos term_fix_seq(TERMINAL *ti, const char *seq)
105762025e09Schristos {
105862025e09Schristos 	char *res = estrdup(seq);
105962025e09Schristos 	struct nv nv;
106062025e09Schristos 
106148e922c8Schristos 	if (ti == NULL)
106248e922c8Schristos 	    return res;
106348e922c8Schristos 
106462025e09Schristos 	nv.s = res;
106562025e09Schristos 	nv.l = 0;
106662025e09Schristos 	ti_puts(ti, seq, 1, term_putc, &nv);
106762025e09Schristos 	nv.s[nv.l] = '\0';
106862025e09Schristos 
106962025e09Schristos 	return res;
107062025e09Schristos }
107162025e09Schristos 
10726265ee0dSchristos static void
term_init(int fd,const char * sa[5])10736265ee0dSchristos term_init(int fd, const char *sa[5])
10746265ee0dSchristos {
10756265ee0dSchristos 	TERMINAL *ti;
10766265ee0dSchristos 	int error;
10776265ee0dSchristos 	const char *bold, *sgr0, *smso, *rmso, *smul, *rmul;
10786265ee0dSchristos 
10796265ee0dSchristos 	if (ti_setupterm(&ti, NULL, fd, &error) == -1) {
10806265ee0dSchristos 		bold = sgr0 = NULL;
10816265ee0dSchristos 		smso = rmso = smul = rmul = "";
10826265ee0dSchristos 		ti = NULL;
10836265ee0dSchristos 	} else {
10846265ee0dSchristos 		bold = ti_getstr(ti, "bold");
10856265ee0dSchristos 		sgr0 = ti_getstr(ti, "sgr0");
10866265ee0dSchristos 		if (bold == NULL || sgr0 == NULL) {
10876265ee0dSchristos 			smso = ti_getstr(ti, "smso");
10886265ee0dSchristos 
10896265ee0dSchristos 			if (smso == NULL ||
10906265ee0dSchristos 			    (rmso = ti_getstr(ti, "rmso")) == NULL)
10916265ee0dSchristos 				smso = rmso = "";
10926265ee0dSchristos 			bold = sgr0 = NULL;
10936265ee0dSchristos 		} else
10946265ee0dSchristos 			smso = rmso = "";
10956265ee0dSchristos 
10966265ee0dSchristos 		smul = ti_getstr(ti, "smul");
10976265ee0dSchristos 		if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL)
10986265ee0dSchristos 			smul = rmul = "";
10996265ee0dSchristos 	}
11006265ee0dSchristos 
110162025e09Schristos 	sa[0] = term_fix_seq(ti, bold ? bold : smso);
110262025e09Schristos 	sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso);
11036265ee0dSchristos 	sa[2] = estrdup("...");
110462025e09Schristos 	sa[3] = term_fix_seq(ti, smul);
110562025e09Schristos 	sa[4] = term_fix_seq(ti, rmul);
110662025e09Schristos 
11076265ee0dSchristos 	if (ti)
11086265ee0dSchristos 		del_curterm(ti);
11096265ee0dSchristos }
11106265ee0dSchristos 
11116265ee0dSchristos /*
11126265ee0dSchristos  * run_query_term --
11136265ee0dSchristos  *  Utility function similar to run_query_html. This function tries to
11146265ee0dSchristos  *  pre-process the result assuming it will be displayed on a terminal
1115f0a7346dSsnj  *  For this purpose it first calls its own callback function callback_pager
11166265ee0dSchristos  *  which then delegates the call to the user supplied callback.
11176265ee0dSchristos  */
1118910ecac4Schristos static int
run_query_term(sqlite3 * db,query_args * args)11196265ee0dSchristos run_query_term(sqlite3 *db, query_args *args)
11206265ee0dSchristos {
11216265ee0dSchristos 	struct orig_callback_data orig_data;
11226265ee0dSchristos 	struct term_args ta;
11236265ee0dSchristos 	orig_data.callback = args->callback;
11246265ee0dSchristos 	orig_data.data = args->callback_data;
11256265ee0dSchristos 	const char *snippet_args[5];
1126910ecac4Schristos 
11276265ee0dSchristos 	term_init(STDOUT_FILENO, snippet_args);
11286265ee0dSchristos 	ta.smul = snippet_args[3];
11296265ee0dSchristos 	ta.rmul = snippet_args[4];
11306265ee0dSchristos 	ta.orig_data = (void *) &orig_data;
11316265ee0dSchristos 
11326265ee0dSchristos 	args->callback = &callback_term;
11336265ee0dSchristos 	args->callback_data = &ta;
1134910ecac4Schristos 	return run_query_internal(db, snippet_args, args);
1135910ecac4Schristos }
1136910ecac4Schristos 
1137910ecac4Schristos static int
run_query_none(sqlite3 * db,query_args * args)1138910ecac4Schristos run_query_none(sqlite3 *db, query_args *args)
1139910ecac4Schristos {
1140910ecac4Schristos 	struct orig_callback_data orig_data;
1141910ecac4Schristos 	orig_data.callback = args->callback;
1142910ecac4Schristos 	orig_data.data = args->callback_data;
1143910ecac4Schristos 	const char *snippet_args[3] = { "", "", "..." };
1144910ecac4Schristos 	args->callback = &callback_pager;
1145910ecac4Schristos 	args->callback_data = (void *) &orig_data;
1146910ecac4Schristos 	return run_query_internal(db, snippet_args, args);
1147910ecac4Schristos }
1148910ecac4Schristos 
1149910ecac4Schristos int
run_query(sqlite3 * db,query_format fmt,query_args * args)1150910ecac4Schristos run_query(sqlite3 *db, query_format fmt, query_args *args)
1151910ecac4Schristos {
1152910ecac4Schristos 	switch (fmt) {
1153910ecac4Schristos 	case APROPOS_NONE:
1154910ecac4Schristos 		return run_query_none(db, args);
1155910ecac4Schristos 	case APROPOS_HTML:
1156910ecac4Schristos 		return run_query_html(db, args);
1157910ecac4Schristos 	case APROPOS_TERM:
1158910ecac4Schristos 		return run_query_term(db, args);
1159910ecac4Schristos 	case APROPOS_PAGER:
1160910ecac4Schristos 		return run_query_pager(db, args);
1161910ecac4Schristos 	default:
1162910ecac4Schristos 		warnx("Unknown query format %d", (int)fmt);
1163910ecac4Schristos 		return -1;
1164910ecac4Schristos 	}
11656265ee0dSchristos }
1166