1*a737f1efSrin /* $NetBSD: apropos-utils.c,v 1.51 2023/08/03 07:49:23 rin Exp $ */
2410d0f43Sjoerg /*-
3410d0f43Sjoerg * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
4410d0f43Sjoerg * All rights reserved.
5410d0f43Sjoerg *
6410d0f43Sjoerg * This code was developed as part of Google's Summer of Code 2011 program.
7410d0f43Sjoerg *
8410d0f43Sjoerg * Redistribution and use in source and binary forms, with or without
9410d0f43Sjoerg * modification, are permitted provided that the following conditions
10410d0f43Sjoerg * are met:
11410d0f43Sjoerg *
12410d0f43Sjoerg * 1. Redistributions of source code must retain the above copyright
13410d0f43Sjoerg * notice, this list of conditions and the following disclaimer.
14410d0f43Sjoerg * 2. Redistributions in binary form must reproduce the above copyright
15410d0f43Sjoerg * notice, this list of conditions and the following disclaimer in
16410d0f43Sjoerg * the documentation and/or other materials provided with the
17410d0f43Sjoerg * distribution.
18410d0f43Sjoerg *
19410d0f43Sjoerg * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20410d0f43Sjoerg * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21410d0f43Sjoerg * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22410d0f43Sjoerg * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23410d0f43Sjoerg * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24410d0f43Sjoerg * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25410d0f43Sjoerg * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26410d0f43Sjoerg * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27410d0f43Sjoerg * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28410d0f43Sjoerg * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29410d0f43Sjoerg * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30410d0f43Sjoerg * SUCH DAMAGE.
31410d0f43Sjoerg */
32410d0f43Sjoerg
33410d0f43Sjoerg #include <sys/cdefs.h>
34*a737f1efSrin __RCSID("$NetBSD: apropos-utils.c,v 1.51 2023/08/03 07:49:23 rin Exp $");
35410d0f43Sjoerg
36b1203a98Swiz #include <sys/queue.h>
37410d0f43Sjoerg #include <sys/stat.h>
38410d0f43Sjoerg
39410d0f43Sjoerg #include <assert.h>
40410d0f43Sjoerg #include <ctype.h>
41410d0f43Sjoerg #include <err.h>
42410d0f43Sjoerg #include <math.h>
43410d0f43Sjoerg #include <stdio.h>
44410d0f43Sjoerg #include <stdlib.h>
45410d0f43Sjoerg #include <string.h>
46410d0f43Sjoerg #include <util.h>
47410d0f43Sjoerg #include <zlib.h>
486265ee0dSchristos #include <term.h>
492fe964caSkamil #include <unistd.h>
506265ee0dSchristos #undef tab // XXX: manconf.h
51410d0f43Sjoerg
52410d0f43Sjoerg #include "apropos-utils.h"
53188f922dSabhinav #include "custom_apropos_tokenizer.h"
54b1203a98Swiz #include "manconf.h"
55188f922dSabhinav #include "fts3_tokenizer.h"
56410d0f43Sjoerg
57410d0f43Sjoerg typedef struct orig_callback_data {
58410d0f43Sjoerg void *data;
59357f7b44Sabhinav int (*callback) (query_callback_args*);
60410d0f43Sjoerg } orig_callback_data;
61410d0f43Sjoerg
62410d0f43Sjoerg typedef struct inverse_document_frequency {
63410d0f43Sjoerg double value;
64410d0f43Sjoerg int status;
65410d0f43Sjoerg } inverse_document_frequency;
66410d0f43Sjoerg
67410d0f43Sjoerg /* weights for individual columns */
68410d0f43Sjoerg static const double col_weights[] = {
69410d0f43Sjoerg 2.0, // NAME
70410d0f43Sjoerg 2.00, // Name-description
71410d0f43Sjoerg 0.55, // DESCRIPTION
72410d0f43Sjoerg 0.10, // LIBRARY
73410d0f43Sjoerg 0.001, //RETURN VALUES
74410d0f43Sjoerg 0.20, //ENVIRONMENT
75410d0f43Sjoerg 0.01, //FILES
76410d0f43Sjoerg 0.001, //EXIT STATUS
77410d0f43Sjoerg 2.00, //DIAGNOSTICS
78410d0f43Sjoerg 0.05, //ERRORS
79410d0f43Sjoerg 0.00, //md5_hash
80410d0f43Sjoerg 1.00 //machine
81410d0f43Sjoerg };
82410d0f43Sjoerg
83f56c3723Sabhinav #ifndef APROPOS_DEBUG
84188f922dSabhinav static int
register_tokenizer(sqlite3 * db)85188f922dSabhinav register_tokenizer(sqlite3 *db)
86188f922dSabhinav {
87188f922dSabhinav int rc;
88188f922dSabhinav sqlite3_stmt *stmt;
89188f922dSabhinav const sqlite3_tokenizer_module *p;
90188f922dSabhinav const char *name = "custom_apropos_tokenizer";
91188f922dSabhinav get_custom_apropos_tokenizer(&p);
92188f922dSabhinav const char *sql = "SELECT fts3_tokenizer(?, ?)";
93188f922dSabhinav
94188f922dSabhinav sqlite3_db_config(db, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0);
95188f922dSabhinav rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
96188f922dSabhinav if (rc != SQLITE_OK)
97188f922dSabhinav return rc;
98188f922dSabhinav
99188f922dSabhinav sqlite3_bind_text(stmt, 1, name, -1, SQLITE_STATIC);
100188f922dSabhinav sqlite3_bind_blob(stmt, 2, &p, sizeof(p), SQLITE_STATIC);
101188f922dSabhinav sqlite3_step(stmt);
102188f922dSabhinav
103188f922dSabhinav return sqlite3_finalize(stmt);
104188f922dSabhinav }
105f56c3723Sabhinav #endif
106188f922dSabhinav
107410d0f43Sjoerg /*
108410d0f43Sjoerg * lower --
109410d0f43Sjoerg * Converts the string str to lower case
110410d0f43Sjoerg */
111410d0f43Sjoerg char *
lower(char * str)112410d0f43Sjoerg lower(char *str)
113410d0f43Sjoerg {
114410d0f43Sjoerg assert(str);
115410d0f43Sjoerg int i = 0;
116410d0f43Sjoerg char c;
117c08af494Sabhinav while ((c = str[i]) != '\0')
118c08af494Sabhinav str[i++] = tolower((unsigned char) c);
119410d0f43Sjoerg return str;
120410d0f43Sjoerg }
121410d0f43Sjoerg
122410d0f43Sjoerg /*
123410d0f43Sjoerg * concat--
124410d0f43Sjoerg * Utility function. Concatenates together: dst, a space character and src.
125410d0f43Sjoerg * dst + " " + src
126410d0f43Sjoerg */
127410d0f43Sjoerg void
concat(char ** dst,const char * src)128410d0f43Sjoerg concat(char **dst, const char *src)
129410d0f43Sjoerg {
130410d0f43Sjoerg concat2(dst, src, strlen(src));
131410d0f43Sjoerg }
132410d0f43Sjoerg
133410d0f43Sjoerg void
concat2(char ** dst,const char * src,size_t srclen)134410d0f43Sjoerg concat2(char **dst, const char *src, size_t srclen)
135410d0f43Sjoerg {
13684549e3fSabhinav size_t totallen, dstlen;
137b8c9b201Sabhinav char *mydst = *dst;
138410d0f43Sjoerg assert(src != NULL);
139410d0f43Sjoerg
1405e64704aSchristos /*
1415e64704aSchristos * If destination buffer dst is NULL, then simply
1425e64704aSchristos * strdup the source buffer
1435e64704aSchristos */
144b8c9b201Sabhinav if (mydst == NULL) {
145b8c9b201Sabhinav mydst = estrndup(src, srclen);
146b8c9b201Sabhinav *dst = mydst;
147410d0f43Sjoerg return;
148410d0f43Sjoerg }
149410d0f43Sjoerg
150b8c9b201Sabhinav dstlen = strlen(mydst);
151410d0f43Sjoerg /*
152410d0f43Sjoerg * NUL Byte and separator space
153410d0f43Sjoerg */
15484549e3fSabhinav totallen = dstlen + srclen + 2;
155410d0f43Sjoerg
156b8c9b201Sabhinav mydst = erealloc(mydst, totallen);
157410d0f43Sjoerg
158410d0f43Sjoerg /* Append a space at the end of dst */
159b8c9b201Sabhinav mydst[dstlen++] = ' ';
160410d0f43Sjoerg
161410d0f43Sjoerg /* Now, copy src at the end of dst */
162b8c9b201Sabhinav memcpy(mydst + dstlen, src, srclen);
163b8c9b201Sabhinav mydst[dstlen + srclen] = '\0';
164b8c9b201Sabhinav *dst = mydst;
165410d0f43Sjoerg }
166410d0f43Sjoerg
167410d0f43Sjoerg void
close_db(sqlite3 * db)168410d0f43Sjoerg close_db(sqlite3 *db)
169410d0f43Sjoerg {
170410d0f43Sjoerg sqlite3_close(db);
171410d0f43Sjoerg sqlite3_shutdown();
172410d0f43Sjoerg }
173410d0f43Sjoerg
174410d0f43Sjoerg /*
175410d0f43Sjoerg * create_db --
176410d0f43Sjoerg * Creates the database schema.
177410d0f43Sjoerg */
178410d0f43Sjoerg static int
create_db(sqlite3 * db)179410d0f43Sjoerg create_db(sqlite3 *db)
180410d0f43Sjoerg {
181410d0f43Sjoerg const char *sqlstr = NULL;
182410d0f43Sjoerg char *schemasql;
183410d0f43Sjoerg char *errmsg = NULL;
184410d0f43Sjoerg
185410d0f43Sjoerg /*------------------------ Create the tables------------------------------*/
186410d0f43Sjoerg
187410d0f43Sjoerg #if NOTYET
188410d0f43Sjoerg sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
189410d0f43Sjoerg #else
190410d0f43Sjoerg sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
191410d0f43Sjoerg #endif
192410d0f43Sjoerg
193410d0f43Sjoerg schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
194410d0f43Sjoerg APROPOS_SCHEMA_VERSION);
195410d0f43Sjoerg sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
196410d0f43Sjoerg if (errmsg != NULL)
197410d0f43Sjoerg goto out;
198410d0f43Sjoerg sqlite3_free(schemasql);
199410d0f43Sjoerg
2005e64704aSchristos sqlstr =
2015e64704aSchristos //mandb
2025e64704aSchristos "CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
203410d0f43Sjoerg "name_desc, desc, lib, return_vals, env, files, "
204410d0f43Sjoerg "exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
205f56c3723Sabhinav #ifndef APROPOS_DEBUG
206f56c3723Sabhinav "compress=zip, uncompress=unzip, tokenize=custom_apropos_tokenizer, "
207f56c3723Sabhinav #else
208f56c3723Sabhinav "tokenize=porter, "
209231f71fbSabhinav #endif
210f56c3723Sabhinav "notindexed=section, notindexed=md5_hash); "
2115e64704aSchristos //mandb_meta
212410d0f43Sjoerg "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
213410d0f43Sjoerg "file UNIQUE, md5_hash UNIQUE, id INTEGER PRIMARY KEY); "
2145e64704aSchristos //mandb_links
215e70b83fcSabhinav "CREATE TABLE IF NOT EXISTS mandb_links(link COLLATE NOCASE, target, section, "
216933b5da2Sabhinav "machine, md5_hash, name_desc); ";
217410d0f43Sjoerg
218410d0f43Sjoerg sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
219410d0f43Sjoerg if (errmsg != NULL)
220410d0f43Sjoerg goto out;
221410d0f43Sjoerg
2225e64704aSchristos sqlstr =
2235e64704aSchristos "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
224410d0f43Sjoerg "(link); "
225410d0f43Sjoerg "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
226d099c692Swiz "(device, inode); "
227d099c692Swiz "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links "
228d099c692Swiz "(md5_hash);";
229410d0f43Sjoerg sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
230410d0f43Sjoerg if (errmsg != NULL)
231410d0f43Sjoerg goto out;
232410d0f43Sjoerg return 0;
233410d0f43Sjoerg
234410d0f43Sjoerg out:
235410d0f43Sjoerg warnx("%s", errmsg);
236410d0f43Sjoerg free(errmsg);
237410d0f43Sjoerg sqlite3_close(db);
238410d0f43Sjoerg sqlite3_shutdown();
239410d0f43Sjoerg return -1;
240410d0f43Sjoerg }
241410d0f43Sjoerg
242410d0f43Sjoerg /*
243410d0f43Sjoerg * zip --
2446f0eae44Sgutteridge * User defined SQLite function to compress the FTS table
245410d0f43Sjoerg */
246410d0f43Sjoerg static void
zip(sqlite3_context * pctx,int nval,sqlite3_value ** apval)247410d0f43Sjoerg zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
248410d0f43Sjoerg {
249410d0f43Sjoerg int nin;
250410d0f43Sjoerg long int nout;
251410d0f43Sjoerg const unsigned char * inbuf;
252410d0f43Sjoerg unsigned char *outbuf;
253410d0f43Sjoerg
254410d0f43Sjoerg assert(nval == 1);
255410d0f43Sjoerg nin = sqlite3_value_bytes(apval[0]);
256410d0f43Sjoerg inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
257410d0f43Sjoerg nout = nin + 13 + (nin + 999) / 1000;
258410d0f43Sjoerg outbuf = emalloc(nout);
259410d0f43Sjoerg compress(outbuf, (unsigned long *) &nout, inbuf, nin);
260410d0f43Sjoerg sqlite3_result_blob(pctx, outbuf, nout, free);
261410d0f43Sjoerg }
262410d0f43Sjoerg
263410d0f43Sjoerg /*
264410d0f43Sjoerg * unzip --
2656f0eae44Sgutteridge * User defined SQLite function to uncompress the FTS table.
266410d0f43Sjoerg */
267410d0f43Sjoerg static void
unzip(sqlite3_context * pctx,int nval,sqlite3_value ** apval)268410d0f43Sjoerg unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
269410d0f43Sjoerg {
270410d0f43Sjoerg unsigned int rc;
271410d0f43Sjoerg unsigned char *outbuf;
272410d0f43Sjoerg z_stream stream;
273e6b2ce53Schristos long total_out;
274410d0f43Sjoerg
275410d0f43Sjoerg assert(nval == 1);
276e6b2ce53Schristos memset(&stream, 0, sizeof(stream));
277410d0f43Sjoerg stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
278410d0f43Sjoerg stream.avail_in = sqlite3_value_bytes(apval[0]);
279410d0f43Sjoerg stream.zalloc = NULL;
280410d0f43Sjoerg stream.zfree = NULL;
281410d0f43Sjoerg
282410d0f43Sjoerg if (inflateInit(&stream) != Z_OK) {
283410d0f43Sjoerg return;
284410d0f43Sjoerg }
285410d0f43Sjoerg
286e6b2ce53Schristos total_out = stream.avail_out = stream.avail_in * 2 + 100;
287e6b2ce53Schristos stream.next_out = outbuf = emalloc(stream.avail_out);
288410d0f43Sjoerg while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
289410d0f43Sjoerg if (rc != Z_OK ||
290410d0f43Sjoerg (stream.avail_out != 0 && stream.avail_in == 0)) {
291410d0f43Sjoerg free(outbuf);
292410d0f43Sjoerg return;
293410d0f43Sjoerg }
294e6b2ce53Schristos total_out <<= 1;
295e6b2ce53Schristos outbuf = erealloc(outbuf, total_out);
296410d0f43Sjoerg stream.next_out = outbuf + stream.total_out;
297e6b2ce53Schristos stream.avail_out = total_out - stream.total_out;
298410d0f43Sjoerg }
299410d0f43Sjoerg if (inflateEnd(&stream) != Z_OK) {
300410d0f43Sjoerg free(outbuf);
301410d0f43Sjoerg return;
302410d0f43Sjoerg }
303e6b2ce53Schristos if (stream.total_out == 0) {
304e6b2ce53Schristos free(outbuf);
305e6b2ce53Schristos return;
306e6b2ce53Schristos }
307410d0f43Sjoerg outbuf = erealloc(outbuf, stream.total_out);
308410d0f43Sjoerg sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free);
309410d0f43Sjoerg }
310410d0f43Sjoerg
311b1203a98Swiz /*
312b1203a98Swiz * get_dbpath --
313b1203a98Swiz * Read the path of the database from man.conf and return.
314b1203a98Swiz */
315b1203a98Swiz char *
get_dbpath(const char * manconf)316b1203a98Swiz get_dbpath(const char *manconf)
317b1203a98Swiz {
318b1203a98Swiz TAG *tp;
319b1203a98Swiz char *dbpath;
320b1203a98Swiz
321b1203a98Swiz config(manconf);
322b1203a98Swiz tp = gettag("_mandb", 1);
323b1203a98Swiz if (!tp)
324b1203a98Swiz return NULL;
325b1203a98Swiz
326b1203a98Swiz if (TAILQ_EMPTY(&tp->entrylist))
327b1203a98Swiz return NULL;
328b1203a98Swiz
329b1203a98Swiz dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s;
330b1203a98Swiz return dbpath;
331b1203a98Swiz }
332b1203a98Swiz
333410d0f43Sjoerg /* init_db --
334410d0f43Sjoerg * Prepare the database. Register the compress/uncompress functions and the
335410d0f43Sjoerg * stopword tokenizer.
336410d0f43Sjoerg * db_flag specifies the mode in which to open the database. 3 options are
337410d0f43Sjoerg * available:
338410d0f43Sjoerg * 1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
339410d0f43Sjoerg * 2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
340410d0f43Sjoerg * 3. DB_CREATE: Open in read-write mode. It will try to create the db if
341410d0f43Sjoerg * it does not exist already.
342410d0f43Sjoerg * RETURN VALUES:
3435e64704aSchristos * The function will return NULL in case the db does not exist
3445e64704aSchristos * and DB_CREATE
345410d0f43Sjoerg * was not specified. And in case DB_CREATE was specified and yet NULL is
346410d0f43Sjoerg * returned, then there was some other error.
347410d0f43Sjoerg * In normal cases the function should return a handle to the db.
348410d0f43Sjoerg */
349410d0f43Sjoerg sqlite3 *
init_db(mandb_access_mode db_flag,const char * manconf)35090f8d04eSchristos init_db(mandb_access_mode db_flag, const char *manconf)
351410d0f43Sjoerg {
352410d0f43Sjoerg sqlite3 *db = NULL;
353410d0f43Sjoerg sqlite3_stmt *stmt;
354410d0f43Sjoerg struct stat sb;
355410d0f43Sjoerg int rc;
356410d0f43Sjoerg int create_db_flag = 0;
357410d0f43Sjoerg
358b1203a98Swiz char *dbpath = get_dbpath(manconf);
359b1203a98Swiz if (dbpath == NULL)
360b1203a98Swiz errx(EXIT_FAILURE, "_mandb entry not found in man.conf");
36190f8d04eSchristos
362b1203a98Swiz if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) {
36390f8d04eSchristos /* Database does not exist, check if DB_CREATE was specified,
36490f8d04eSchristos * and set flag to create the database schema
365410d0f43Sjoerg */
366410d0f43Sjoerg if (db_flag != (MANDB_CREATE)) {
367410d0f43Sjoerg warnx("Missing apropos database. "
368410d0f43Sjoerg "Please run makemandb to create it.");
369410d0f43Sjoerg return NULL;
370410d0f43Sjoerg }
371410d0f43Sjoerg create_db_flag = 1;
37290f8d04eSchristos } else {
37390f8d04eSchristos /*
37490f8d04eSchristos * Database exists. Check if we have the permissions
37590f8d04eSchristos * to read/write the files
37690f8d04eSchristos */
37790f8d04eSchristos int access_mode = R_OK;
3782c6689d2Schristos switch (db_flag) {
37990f8d04eSchristos case MANDB_CREATE:
38090f8d04eSchristos case MANDB_WRITE:
38190f8d04eSchristos access_mode |= W_OK;
38290f8d04eSchristos break;
38390f8d04eSchristos default:
38490f8d04eSchristos break;
38590f8d04eSchristos }
38690f8d04eSchristos if ((access(dbpath, access_mode)) != 0) {
38790f8d04eSchristos warnx("Unable to access the database, please check"
38890f8d04eSchristos " permissions for `%s'", dbpath);
38990f8d04eSchristos return NULL;
39090f8d04eSchristos }
391410d0f43Sjoerg }
392410d0f43Sjoerg
393410d0f43Sjoerg sqlite3_initialize();
394b1203a98Swiz rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL);
395410d0f43Sjoerg
396410d0f43Sjoerg if (rc != SQLITE_OK) {
397410d0f43Sjoerg warnx("%s", sqlite3_errmsg(db));
39890f8d04eSchristos goto error;
399410d0f43Sjoerg }
400410d0f43Sjoerg
401188f922dSabhinav sqlite3_extended_result_codes(db, 1);
402188f922dSabhinav
403f56c3723Sabhinav #ifndef APROPOS_DEBUG
404188f922dSabhinav rc = register_tokenizer(db);
405188f922dSabhinav if (rc != SQLITE_OK) {
406188f922dSabhinav warnx("Unable to register custom tokenizer: %s", sqlite3_errmsg(db));
407188f922dSabhinav goto error;
408188f922dSabhinav }
409f56c3723Sabhinav #endif
410188f922dSabhinav
411410d0f43Sjoerg if (create_db_flag && create_db(db) < 0) {
412410d0f43Sjoerg warnx("%s", "Unable to create database schema");
413410d0f43Sjoerg goto error;
414410d0f43Sjoerg }
415410d0f43Sjoerg
416410d0f43Sjoerg rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
417410d0f43Sjoerg if (rc != SQLITE_OK) {
418d0663c21Sapb warnx("Unable to query schema version: %s",
419d0663c21Sapb sqlite3_errmsg(db));
420410d0f43Sjoerg goto error;
421410d0f43Sjoerg }
422410d0f43Sjoerg if (sqlite3_step(stmt) != SQLITE_ROW) {
423410d0f43Sjoerg sqlite3_finalize(stmt);
424d0663c21Sapb warnx("Unable to query schema version: %s",
425d0663c21Sapb sqlite3_errmsg(db));
426410d0f43Sjoerg goto error;
427410d0f43Sjoerg }
428410d0f43Sjoerg if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
429410d0f43Sjoerg sqlite3_finalize(stmt);
430410d0f43Sjoerg warnx("Incorrect schema version found. "
431410d0f43Sjoerg "Please run makemandb -f.");
432410d0f43Sjoerg goto error;
433410d0f43Sjoerg }
434410d0f43Sjoerg sqlite3_finalize(stmt);
435410d0f43Sjoerg
436410d0f43Sjoerg
437410d0f43Sjoerg /* Register the zip and unzip functions for FTS compression */
4385e64704aSchristos rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip,
4395e64704aSchristos NULL, NULL);
440410d0f43Sjoerg if (rc != SQLITE_OK) {
441d0663c21Sapb warnx("Unable to register function: compress: %s",
442d0663c21Sapb sqlite3_errmsg(db));
443410d0f43Sjoerg goto error;
444410d0f43Sjoerg }
445410d0f43Sjoerg
446410d0f43Sjoerg rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
447410d0f43Sjoerg unzip, NULL, NULL);
448410d0f43Sjoerg if (rc != SQLITE_OK) {
449d0663c21Sapb warnx("Unable to register function: uncompress: %s",
450d0663c21Sapb sqlite3_errmsg(db));
451410d0f43Sjoerg goto error;
452410d0f43Sjoerg }
453410d0f43Sjoerg return db;
454b1203a98Swiz
455410d0f43Sjoerg error:
45690f8d04eSchristos close_db(db);
457410d0f43Sjoerg return NULL;
458410d0f43Sjoerg }
459410d0f43Sjoerg
460410d0f43Sjoerg /*
461410d0f43Sjoerg * rank_func --
4626f0eae44Sgutteridge * SQLite user defined function for ranking the documents.
463410d0f43Sjoerg * For each phrase of the query, it computes the tf and idf and adds them over.
464410d0f43Sjoerg * It computes the final rank, by multiplying tf and idf together.
465410d0f43Sjoerg * Weight of term t for document d = (term frequency of t in d *
466410d0f43Sjoerg * inverse document frequency of t)
467410d0f43Sjoerg *
468410d0f43Sjoerg * Term Frequency of term t in document d = Number of times t occurs in d /
4695e64704aSchristos * Number of times t appears in all documents
470410d0f43Sjoerg *
471410d0f43Sjoerg * Inverse document frequency of t = log(Total number of documents /
472410d0f43Sjoerg * Number of documents in which t occurs)
473410d0f43Sjoerg */
474410d0f43Sjoerg static void
rank_func(sqlite3_context * pctx,int nval,sqlite3_value ** apval)475410d0f43Sjoerg rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
476410d0f43Sjoerg {
477410d0f43Sjoerg inverse_document_frequency *idf = sqlite3_user_data(pctx);
478410d0f43Sjoerg double tf = 0.0;
479410d0f43Sjoerg const unsigned int *matchinfo;
480410d0f43Sjoerg int ncol;
481410d0f43Sjoerg int nphrase;
482410d0f43Sjoerg int iphrase;
483410d0f43Sjoerg int ndoc;
484410d0f43Sjoerg int doclen = 0;
485410d0f43Sjoerg const double k = 3.75;
4865e64704aSchristos /*
4875e64704aSchristos * Check that the number of arguments passed to this
4885e64704aSchristos * function is correct.
4895e64704aSchristos */
490410d0f43Sjoerg assert(nval == 1);
491410d0f43Sjoerg
492410d0f43Sjoerg matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
493410d0f43Sjoerg nphrase = matchinfo[0];
494410d0f43Sjoerg ncol = matchinfo[1];
495410d0f43Sjoerg ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
496410d0f43Sjoerg for (iphrase = 0; iphrase < nphrase; iphrase++) {
497410d0f43Sjoerg int icol;
4985e64704aSchristos const unsigned int *phraseinfo =
4995e64704aSchristos &matchinfo[2 + ncol + iphrase * ncol * 3];
500410d0f43Sjoerg for(icol = 1; icol < ncol; icol++) {
501410d0f43Sjoerg
5025e64704aSchristos /* nhitcount: number of times the current phrase occurs
5035e64704aSchristos * in the current column in the current document.
5045e64704aSchristos * nglobalhitcount: number of times current phrase
5055e64704aSchristos * occurs in the current column in all documents.
5065e64704aSchristos * ndocshitcount: number of documents in which the
5075e64704aSchristos * current phrase occurs in the current column at
5085e64704aSchristos * least once.
509410d0f43Sjoerg */
510410d0f43Sjoerg int nhitcount = phraseinfo[3 * icol];
511410d0f43Sjoerg int nglobalhitcount = phraseinfo[3 * icol + 1];
512410d0f43Sjoerg int ndocshitcount = phraseinfo[3 * icol + 2];
513410d0f43Sjoerg doclen = matchinfo[2 + icol ];
514410d0f43Sjoerg double weight = col_weights[icol - 1];
515410d0f43Sjoerg if (idf->status == 0 && ndocshitcount)
5165e64704aSchristos idf->value +=
5175e64704aSchristos log(((double)ndoc / ndocshitcount))* weight;
518410d0f43Sjoerg
5195e64704aSchristos /*
5205e64704aSchristos * Dividing the tf by document length to normalize
5215e64704aSchristos * the effect of longer documents.
522410d0f43Sjoerg */
523410d0f43Sjoerg if (nglobalhitcount > 0 && nhitcount)
5245e64704aSchristos tf += (((double)nhitcount * weight)
5255e64704aSchristos / (nglobalhitcount * doclen));
526410d0f43Sjoerg }
527410d0f43Sjoerg }
528410d0f43Sjoerg idf->status = 1;
529410d0f43Sjoerg
5305e64704aSchristos /*
5315e64704aSchristos * Final score: Dividing by k + tf further normalizes the weight
5325e64704aSchristos * leading to better results. The value of k is experimental
533410d0f43Sjoerg */
5345e64704aSchristos double score = (tf * idf->value) / (k + tf);
535410d0f43Sjoerg sqlite3_result_double(pctx, score);
536410d0f43Sjoerg return;
537410d0f43Sjoerg }
538410d0f43Sjoerg
539410d0f43Sjoerg /*
5404647c1ecSabhinav * generates sql query for matching the user entered query
541410d0f43Sjoerg */
5424647c1ecSabhinav static char *
generate_search_query(query_args * args,const char * snippet_args[3])5434647c1ecSabhinav generate_search_query(query_args *args, const char *snippet_args[3])
544410d0f43Sjoerg {
545410d0f43Sjoerg const char *default_snippet_args[3];
546410d0f43Sjoerg char *section_clause = NULL;
547410d0f43Sjoerg char *limit_clause = NULL;
548410d0f43Sjoerg char *machine_clause = NULL;
5493c013439Sabhinav char *query = NULL;
550410d0f43Sjoerg
5513c013439Sabhinav if (args->machine) {
5523c013439Sabhinav machine_clause = sqlite3_mprintf("AND mandb.machine=%Q", args->machine);
5533c013439Sabhinav if (machine_clause == NULL)
5543c013439Sabhinav goto RETURN;
5553c013439Sabhinav }
556410d0f43Sjoerg
5573c013439Sabhinav if (args->nrec >= 0) {
5583c013439Sabhinav /* Use the provided number of records and offset */
5593c013439Sabhinav limit_clause = sqlite3_mprintf(" LIMIT %d OFFSET %d",
5603c013439Sabhinav args->nrec, args->offset);
5613c013439Sabhinav if (limit_clause == NULL)
5623c013439Sabhinav goto RETURN;
5633c013439Sabhinav }
564410d0f43Sjoerg
565410d0f43Sjoerg /* We want to build a query of the form: "select x,y,z from mandb where
566ba948c91Sabhinav * mandb match :query [AND (section IN ('1', '2')]
567ba948c91Sabhinav * ORDER BY rank DESC [LIMIT 10 OFFSET 0]"
5685e64704aSchristos * NOTES:
569ba948c91Sabhinav * 1. The portion in first pair of square brackets is optional.
570ba948c91Sabhinav * It will be there only if the user has specified an option
5715e64704aSchristos * to search in one or more specific sections.
572ba948c91Sabhinav * 2. The LIMIT portion will be there if the user has specified
573ba948c91Sabhinav * a limit using the -n option.
574410d0f43Sjoerg */
5751373f782Sabhinav if (args->sections && args->sections[0]) {
5761373f782Sabhinav concat(§ion_clause, " AND mandb.section IN (");
5771373f782Sabhinav for (size_t i = 0; args->sections[i]; i++) {
578410d0f43Sjoerg char *temp;
5791373f782Sabhinav char c = args->sections[i + 1]? ',': ')';
5801373f782Sabhinav if ((temp = sqlite3_mprintf("%Q%c", args->sections[i], c)) == NULL)
5811373f782Sabhinav goto RETURN;
582410d0f43Sjoerg concat(§ion_clause, temp);
58369479387Sabhinav sqlite3_free(temp);
584410d0f43Sjoerg }
585410d0f43Sjoerg }
5864647c1ecSabhinav
587410d0f43Sjoerg if (snippet_args == NULL) {
588410d0f43Sjoerg default_snippet_args[0] = "";
589410d0f43Sjoerg default_snippet_args[1] = "";
590410d0f43Sjoerg default_snippet_args[2] = "...";
591410d0f43Sjoerg snippet_args = default_snippet_args;
592410d0f43Sjoerg }
5934647c1ecSabhinav
594cb0641ebSchristos if (args->legacy) {
5952b42c8b2Schristos char *wild;
5962b42c8b2Schristos easprintf(&wild, "%%%s%%", args->search_str);
5974647c1ecSabhinav query = sqlite3_mprintf("SELECT section, name, name_desc, machine"
598cb0641ebSchristos " FROM mandb"
5992b42c8b2Schristos " WHERE name LIKE %Q OR name_desc LIKE %Q "
600cb0641ebSchristos "%s"
601cb0641ebSchristos "%s",
602533b5973Schristos wild, wild,
603cb0641ebSchristos section_clause ? section_clause : "",
604cb0641ebSchristos limit_clause ? limit_clause : "");
6052b42c8b2Schristos free(wild);
606e70b83fcSabhinav } else if (strchr(args->search_str, ' ') == NULL) {
607e70b83fcSabhinav /*
608e70b83fcSabhinav * If it's a single word query, we want to search in the
609e70b83fcSabhinav * links table as well. If the link table contains an entry
610e70b83fcSabhinav * for the queried keyword, we want to use that as the name of
611e70b83fcSabhinav * the man page.
612e70b83fcSabhinav * For example, for `apropos realloc` the output should be
613e70b83fcSabhinav * realloc(3) and not malloc(3).
614e70b83fcSabhinav */
615e70b83fcSabhinav query = sqlite3_mprintf(
616e70b83fcSabhinav "SELECT section, name, name_desc, machine,"
617e70b83fcSabhinav " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
618e70b83fcSabhinav " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
619e70b83fcSabhinav " FROM mandb WHERE name NOT IN ("
620e70b83fcSabhinav " SELECT target FROM mandb_links WHERE link=%Q AND"
621e70b83fcSabhinav " mandb_links.section=mandb.section) AND mandb MATCH %Q %s %s"
622e70b83fcSabhinav " UNION"
623e70b83fcSabhinav " SELECT mandb.section, mandb_links.link AS name, mandb.name_desc,"
624e70b83fcSabhinav " mandb.machine, '' AS snippet, 100.00 AS rank"
625e70b83fcSabhinav " FROM mandb JOIN mandb_links ON mandb.name=mandb_links.target and"
626e70b83fcSabhinav " mandb.section=mandb_links.section WHERE mandb_links.link=%Q"
627e70b83fcSabhinav " %s %s"
628e70b83fcSabhinav " ORDER BY rank DESC %s",
629e70b83fcSabhinav snippet_args[0], snippet_args[1], snippet_args[2],
630e70b83fcSabhinav args->search_str, args->search_str, section_clause ? section_clause : "",
631e70b83fcSabhinav machine_clause ? machine_clause : "", args->search_str,
632e70b83fcSabhinav machine_clause ? machine_clause : "",
633e70b83fcSabhinav section_clause ? section_clause : "",
634e70b83fcSabhinav limit_clause ? limit_clause : "");
635cb0641ebSchristos } else {
636410d0f43Sjoerg query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
637410d0f43Sjoerg " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
638410d0f43Sjoerg " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
639410d0f43Sjoerg " FROM mandb"
640410d0f43Sjoerg " WHERE mandb MATCH %Q %s "
641410d0f43Sjoerg "%s"
642410d0f43Sjoerg " ORDER BY rank DESC"
643410d0f43Sjoerg "%s",
644cb0641ebSchristos snippet_args[0], snippet_args[1], snippet_args[2],
645cb0641ebSchristos args->search_str, machine_clause ? machine_clause : "",
646410d0f43Sjoerg section_clause ? section_clause : "",
647410d0f43Sjoerg limit_clause ? limit_clause : "");
648cb0641ebSchristos }
649410d0f43Sjoerg
6503c013439Sabhinav RETURN:
65169479387Sabhinav sqlite3_free(machine_clause);
652a5fb0c00Sleot free(section_clause);
65369479387Sabhinav sqlite3_free(limit_clause);
6544647c1ecSabhinav return query;
655410d0f43Sjoerg }
6564647c1ecSabhinav
6572d0aa66bSchristos static const char *
get_stmt_col_text(sqlite3_stmt * stmt,int col)6582d0aa66bSchristos get_stmt_col_text(sqlite3_stmt *stmt, int col)
6592d0aa66bSchristos {
6602d0aa66bSchristos const char *t = (const char *) sqlite3_column_text(stmt, col);
6612d0aa66bSchristos return t == NULL ? "*?*" : t;
6622d0aa66bSchristos }
6632d0aa66bSchristos
6644647c1ecSabhinav /*
6654647c1ecSabhinav * Execute the full text search query and return the number of results
6664647c1ecSabhinav * obtained.
6674647c1ecSabhinav */
668521bd3eaSgutteridge static int
execute_search_query(sqlite3 * db,char * query,query_args * args)6694647c1ecSabhinav execute_search_query(sqlite3 *db, char *query, query_args *args)
6704647c1ecSabhinav {
6714647c1ecSabhinav sqlite3_stmt *stmt;
6724647c1ecSabhinav char *name;
6734647c1ecSabhinav char *slash_ptr;
6744647c1ecSabhinav const char *name_temp;
6754647c1ecSabhinav char *m = NULL;
6764647c1ecSabhinav int rc;
677357f7b44Sabhinav query_callback_args callback_args;
6784647c1ecSabhinav inverse_document_frequency idf = {0, 0};
6794647c1ecSabhinav
6804647c1ecSabhinav if (!args->legacy) {
6814647c1ecSabhinav /* Register the rank function */
6824647c1ecSabhinav rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY,
6834647c1ecSabhinav (void *) &idf, rank_func, NULL, NULL);
6844647c1ecSabhinav if (rc != SQLITE_OK) {
6854647c1ecSabhinav warnx("Unable to register the ranking function: %s",
6864647c1ecSabhinav sqlite3_errmsg(db));
6874647c1ecSabhinav sqlite3_close(db);
6884647c1ecSabhinav sqlite3_shutdown();
6894647c1ecSabhinav exit(EXIT_FAILURE);
6904647c1ecSabhinav }
6914647c1ecSabhinav }
6924647c1ecSabhinav
693410d0f43Sjoerg rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
694410d0f43Sjoerg if (rc == SQLITE_IOERR) {
695410d0f43Sjoerg warnx("Corrupt database. Please rerun makemandb");
696410d0f43Sjoerg return -1;
697410d0f43Sjoerg } else if (rc != SQLITE_OK) {
698410d0f43Sjoerg warnx("%s", sqlite3_errmsg(db));
699410d0f43Sjoerg return -1;
700410d0f43Sjoerg }
701410d0f43Sjoerg
702521bd3eaSgutteridge int nresults = rc = 0;
703521bd3eaSgutteridge while (rc == 0 && sqlite3_step(stmt) == SQLITE_ROW) {
7044647c1ecSabhinav nresults++;
7052d0aa66bSchristos callback_args.section = get_stmt_col_text(stmt, 0);
7062d0aa66bSchristos name_temp = get_stmt_col_text(stmt, 1);
7072d0aa66bSchristos callback_args.name_desc = get_stmt_col_text(stmt, 2);
708b0ca50fbSabhinav callback_args.machine = (const char *) sqlite3_column_text(stmt, 3);
709496b8ce3Sabhinav if (!args->legacy) {
7102d0aa66bSchristos callback_args.snippet = get_stmt_col_text(stmt, 4);
7112d0aa66bSchristos callback_args.snippet_length =
7122d0aa66bSchristos strlen(callback_args.snippet);
713496b8ce3Sabhinav } else {
714357f7b44Sabhinav callback_args.snippet = "";
715496b8ce3Sabhinav callback_args.snippet_length = 1;
716496b8ce3Sabhinav }
717f41e473dSwiz if ((slash_ptr = strrchr(name_temp, '/')) != NULL)
718f41e473dSwiz name_temp = slash_ptr + 1;
719357f7b44Sabhinav if (callback_args.machine && callback_args.machine[0]) {
720357f7b44Sabhinav m = estrdup(callback_args.machine);
7215e64704aSchristos easprintf(&name, "%s/%s", lower(m), name_temp);
722410d0f43Sjoerg free(m);
723410d0f43Sjoerg } else {
7242d0aa66bSchristos name = estrdup(get_stmt_col_text(stmt, 1));
725410d0f43Sjoerg }
726357f7b44Sabhinav callback_args.name = name;
727357f7b44Sabhinav callback_args.other_data = args->callback_data;
728521bd3eaSgutteridge rc = (args->callback)(&callback_args);
729410d0f43Sjoerg free(name);
730410d0f43Sjoerg }
731410d0f43Sjoerg sqlite3_finalize(stmt);
732521bd3eaSgutteridge return (rc < 0) ? rc : nresults;
7334647c1ecSabhinav }
7344647c1ecSabhinav
7354647c1ecSabhinav
7364647c1ecSabhinav /*
7374647c1ecSabhinav * run_query_internal --
7384647c1ecSabhinav * Performs the searches for the keywords entered by the user.
7394647c1ecSabhinav * The 2nd param: snippet_args is an array of strings providing values for the
7404647c1ecSabhinav * last three parameters to the snippet function of sqlite. (Look at the docs).
7414647c1ecSabhinav * The 3rd param: args contains rest of the search parameters. Look at
7424647c1ecSabhinav * arpopos-utils.h for the description of individual fields.
7434647c1ecSabhinav *
7444647c1ecSabhinav */
7454647c1ecSabhinav static int
run_query_internal(sqlite3 * db,const char * snippet_args[3],query_args * args)7464647c1ecSabhinav run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args)
7474647c1ecSabhinav {
7484647c1ecSabhinav char *query;
7494647c1ecSabhinav query = generate_search_query(args, snippet_args);
7504647c1ecSabhinav if (query == NULL) {
7514647c1ecSabhinav *args->errmsg = estrdup("malloc failed");
7524647c1ecSabhinav return -1;
7534647c1ecSabhinav }
7544647c1ecSabhinav
755521bd3eaSgutteridge int rc = execute_search_query(db, query, args);
756410d0f43Sjoerg sqlite3_free(query);
757521bd3eaSgutteridge return (rc < 0 || *(args->errmsg) != NULL) ? -1 : 0;
758410d0f43Sjoerg }
759410d0f43Sjoerg
760751d5fc6Schristos static char *
get_escaped_html_string(const char * src,size_t * slen)761751d5fc6Schristos get_escaped_html_string(const char *src, size_t *slen)
762751d5fc6Schristos {
763751d5fc6Schristos static const char trouble[] = "<>\"&\002\003";
764751d5fc6Schristos /*
765751d5fc6Schristos * First scan the src to find out the number of occurrences
766751d5fc6Schristos * of {'>', '<' '"', '&'}. Then allocate a new buffer with
767751d5fc6Schristos * sufficient space to be able to store the quoted versions
768751d5fc6Schristos * of the special characters {>, <, ", &}.
769751d5fc6Schristos * Copy over the characters from the original src into
770751d5fc6Schristos * this buffer while replacing the special characters with
771751d5fc6Schristos * their quoted versions.
772751d5fc6Schristos */
773751d5fc6Schristos char *dst, *ddst;
774751d5fc6Schristos size_t count;
775751d5fc6Schristos const char *ssrc;
776751d5fc6Schristos
777751d5fc6Schristos for (count = 0, ssrc = src; *src; count++) {
778751d5fc6Schristos size_t sz = strcspn(src, trouble);
779751d5fc6Schristos src += sz + 1;
780751d5fc6Schristos }
781751d5fc6Schristos
782751d5fc6Schristos
7835e64704aSchristos #define append(a) \
7845e64704aSchristos do { \
785751d5fc6Schristos memcpy(dst, (a), sizeof(a) - 1); \
786751d5fc6Schristos dst += sizeof(a) - 1; \
7876f4965e0Srillig } while (0)
788751d5fc6Schristos
7895e64704aSchristos
790751d5fc6Schristos ddst = dst = emalloc(*slen + count * 5 + 1);
791751d5fc6Schristos for (src = ssrc; *src; src++) {
792751d5fc6Schristos switch (*src) {
793751d5fc6Schristos case '<':
794751d5fc6Schristos append("<");
795751d5fc6Schristos break;
796751d5fc6Schristos case '>':
797751d5fc6Schristos append(">");
798751d5fc6Schristos break;
799751d5fc6Schristos case '\"':
800751d5fc6Schristos append(""");
801751d5fc6Schristos break;
802751d5fc6Schristos case '&':
803751d5fc6Schristos /*
804751d5fc6Schristos * Don't perform the quoting if this & is part of
805751d5fc6Schristos * an mdoc escape sequence, e.g. \&
806751d5fc6Schristos */
807751d5fc6Schristos if (src != ssrc && src[-1] != '\\')
808751d5fc6Schristos append("&");
809751d5fc6Schristos else
810751d5fc6Schristos append("&");
811751d5fc6Schristos break;
812751d5fc6Schristos case '\002':
813751d5fc6Schristos append("<b>");
814751d5fc6Schristos break;
815751d5fc6Schristos case '\003':
816751d5fc6Schristos append("</b>");
817751d5fc6Schristos break;
818751d5fc6Schristos default:
819751d5fc6Schristos *dst++ = *src;
820751d5fc6Schristos break;
821751d5fc6Schristos }
822751d5fc6Schristos }
823751d5fc6Schristos *dst = '\0';
824751d5fc6Schristos *slen = dst - ddst;
825751d5fc6Schristos return ddst;
826751d5fc6Schristos }
827751d5fc6Schristos
828751d5fc6Schristos
829410d0f43Sjoerg /*
830410d0f43Sjoerg * callback_html --
831410d0f43Sjoerg * Callback function for run_query_html. It builds the html output and then
832410d0f43Sjoerg * calls the actual user supplied callback function.
833410d0f43Sjoerg */
834410d0f43Sjoerg static int
callback_html(query_callback_args * callback_args)835357f7b44Sabhinav callback_html(query_callback_args *callback_args)
836410d0f43Sjoerg {
837357f7b44Sabhinav struct orig_callback_data *orig_data = callback_args->other_data;
838357f7b44Sabhinav int (*callback)(query_callback_args*) = orig_data->callback;
839357f7b44Sabhinav size_t length = callback_args->snippet_length;
840357f7b44Sabhinav size_t name_description_length = strlen(callback_args->name_desc);
841357f7b44Sabhinav char *qsnippet = get_escaped_html_string(callback_args->snippet, &length);
842357f7b44Sabhinav char *qname_description = get_escaped_html_string(callback_args->name_desc,
843751d5fc6Schristos &name_description_length);
844357f7b44Sabhinav callback_args->name_desc = qname_description;
845357f7b44Sabhinav callback_args->snippet = qsnippet;
846357f7b44Sabhinav callback_args->snippet_length = length;
847357f7b44Sabhinav callback_args->other_data = orig_data->data;
848521bd3eaSgutteridge int rc = (*callback)(callback_args);
849410d0f43Sjoerg free(qsnippet);
850751d5fc6Schristos free(qname_description);
851521bd3eaSgutteridge return rc;
852410d0f43Sjoerg }
853410d0f43Sjoerg
854410d0f43Sjoerg /*
855410d0f43Sjoerg * run_query_html --
856410d0f43Sjoerg * Utility function to output query result in HTML format.
857f0a7346dSsnj * It internally calls run_query only, but it first passes the output to its
858410d0f43Sjoerg * own custom callback function, which preprocess the snippet for quoting
859410d0f43Sjoerg * inline HTML fragments.
860410d0f43Sjoerg * After that it delegates the call the actual user supplied callback function.
861410d0f43Sjoerg */
862910ecac4Schristos static int
run_query_html(sqlite3 * db,query_args * args)863410d0f43Sjoerg run_query_html(sqlite3 *db, query_args *args)
864410d0f43Sjoerg {
865410d0f43Sjoerg struct orig_callback_data orig_data;
866410d0f43Sjoerg orig_data.callback = args->callback;
867410d0f43Sjoerg orig_data.data = args->callback_data;
868410d0f43Sjoerg const char *snippet_args[] = {"\002", "\003", "..."};
869410d0f43Sjoerg args->callback = &callback_html;
870410d0f43Sjoerg args->callback_data = (void *) &orig_data;
871910ecac4Schristos return run_query_internal(db, snippet_args, args);
872410d0f43Sjoerg }
873410d0f43Sjoerg
874410d0f43Sjoerg /*
8756265ee0dSchristos * underline a string, pager style.
8766265ee0dSchristos */
8776265ee0dSchristos static char *
ul_pager(int ul,const char * s)87882fc5158Schristos ul_pager(int ul, const char *s)
8796265ee0dSchristos {
8806265ee0dSchristos size_t len;
8816265ee0dSchristos char *dst, *d;
8826265ee0dSchristos
88382fc5158Schristos if (!ul)
88482fc5158Schristos return estrdup(s);
88582fc5158Schristos
8866265ee0dSchristos // a -> _\ba
8876265ee0dSchristos len = strlen(s) * 3 + 1;
8886265ee0dSchristos
8896265ee0dSchristos d = dst = emalloc(len);
8906265ee0dSchristos while (*s) {
8916265ee0dSchristos *d++ = '_';
8926265ee0dSchristos *d++ = '\b';
8936265ee0dSchristos *d++ = *s++;
8946265ee0dSchristos }
8956265ee0dSchristos *d = '\0';
8966265ee0dSchristos return dst;
8976265ee0dSchristos }
8986265ee0dSchristos
8996265ee0dSchristos /*
900410d0f43Sjoerg * callback_pager --
901410d0f43Sjoerg * A callback similar to callback_html. It overstrikes the matching text in
902410d0f43Sjoerg * the snippet so that it appears emboldened when viewed using a pager like
903410d0f43Sjoerg * more or less.
904410d0f43Sjoerg */
905410d0f43Sjoerg static int
callback_pager(query_callback_args * callback_args)906357f7b44Sabhinav callback_pager(query_callback_args *callback_args)
907410d0f43Sjoerg {
908357f7b44Sabhinav struct orig_callback_data *orig_data = callback_args->other_data;
909410d0f43Sjoerg char *psnippet;
910357f7b44Sabhinav const char *temp = callback_args->snippet;
911410d0f43Sjoerg int count = 0;
91282fc5158Schristos int i = 0, did;
913410d0f43Sjoerg size_t sz = 0;
914410d0f43Sjoerg size_t psnippet_length;
915410d0f43Sjoerg
9165e64704aSchristos /* Count the number of bytes of matching text. For each of these
9175e64704aSchristos * bytes we will use 2 extra bytes to overstrike it so that it
9185e64704aSchristos * appears bold when viewed using a pager.
919410d0f43Sjoerg */
920410d0f43Sjoerg while (*temp) {
921410d0f43Sjoerg sz = strcspn(temp, "\002\003");
922410d0f43Sjoerg temp += sz;
923410d0f43Sjoerg if (*temp == '\003') {
924410d0f43Sjoerg count += 2 * (sz);
925410d0f43Sjoerg }
926410d0f43Sjoerg temp++;
927410d0f43Sjoerg }
928410d0f43Sjoerg
929357f7b44Sabhinav psnippet_length = callback_args->snippet_length + count;
930410d0f43Sjoerg psnippet = emalloc(psnippet_length + 1);
931410d0f43Sjoerg
932410d0f43Sjoerg /* Copy the bytes from snippet to psnippet:
933410d0f43Sjoerg * 1. Copy the bytes before \002 as it is.
9345e64704aSchristos * 2. The bytes after \002 need to be overstriked till we
9355e64704aSchristos * encounter \003.
936410d0f43Sjoerg * 3. To overstrike a byte 'A' we need to write 'A\bA'
937410d0f43Sjoerg */
93882fc5158Schristos did = 0;
939357f7b44Sabhinav const char *snippet = callback_args->snippet;
940410d0f43Sjoerg while (*snippet) {
941410d0f43Sjoerg sz = strcspn(snippet, "\002");
942410d0f43Sjoerg memcpy(&psnippet[i], snippet, sz);
943410d0f43Sjoerg snippet += sz;
944410d0f43Sjoerg i += sz;
945410d0f43Sjoerg
946410d0f43Sjoerg /* Don't change this. Advancing the pointer without reading the byte
947410d0f43Sjoerg * is causing strange behavior.
948410d0f43Sjoerg */
949410d0f43Sjoerg if (*snippet == '\002')
950410d0f43Sjoerg snippet++;
951410d0f43Sjoerg while (*snippet && *snippet != '\003') {
95282fc5158Schristos did = 1;
953410d0f43Sjoerg psnippet[i++] = *snippet;
954410d0f43Sjoerg psnippet[i++] = '\b';
955410d0f43Sjoerg psnippet[i++] = *snippet++;
956410d0f43Sjoerg }
957410d0f43Sjoerg if (*snippet)
958410d0f43Sjoerg snippet++;
959410d0f43Sjoerg }
960410d0f43Sjoerg
961410d0f43Sjoerg psnippet[i] = 0;
962357f7b44Sabhinav char *ul_section = ul_pager(did, callback_args->section);
963357f7b44Sabhinav char *ul_name = ul_pager(did, callback_args->name);
964357f7b44Sabhinav char *ul_name_desc = ul_pager(did, callback_args->name_desc);
965357f7b44Sabhinav callback_args->section = ul_section;
966357f7b44Sabhinav callback_args->name = ul_name;
967357f7b44Sabhinav callback_args->name_desc = ul_name_desc;
968357f7b44Sabhinav callback_args->snippet = psnippet;
969357f7b44Sabhinav callback_args->snippet_length = psnippet_length;
970357f7b44Sabhinav callback_args->other_data = orig_data->data;
971521bd3eaSgutteridge int rc = (orig_data->callback)(callback_args);
9726265ee0dSchristos free(ul_section);
9736265ee0dSchristos free(ul_name);
9746265ee0dSchristos free(ul_name_desc);
975410d0f43Sjoerg free(psnippet);
976521bd3eaSgutteridge return rc;
977410d0f43Sjoerg }
978410d0f43Sjoerg
9796265ee0dSchristos struct term_args {
9806265ee0dSchristos struct orig_callback_data *orig_data;
9816265ee0dSchristos const char *smul;
9826265ee0dSchristos const char *rmul;
9836265ee0dSchristos };
9846265ee0dSchristos
9856265ee0dSchristos /*
9866265ee0dSchristos * underline a string, pager style.
9876265ee0dSchristos */
9886265ee0dSchristos static char *
ul_term(const char * s,const struct term_args * ta)9896265ee0dSchristos ul_term(const char *s, const struct term_args *ta)
9906265ee0dSchristos {
9916265ee0dSchristos char *dst;
9926265ee0dSchristos
9936265ee0dSchristos easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul);
9946265ee0dSchristos return dst;
9956265ee0dSchristos }
9966265ee0dSchristos
9976265ee0dSchristos /*
9986265ee0dSchristos * callback_term --
9996265ee0dSchristos * A callback similar to callback_html. It overstrikes the matching text in
10006265ee0dSchristos * the snippet so that it appears emboldened when viewed using a pager like
10016265ee0dSchristos * more or less.
10026265ee0dSchristos */
10036265ee0dSchristos static int
callback_term(query_callback_args * callback_args)1004357f7b44Sabhinav callback_term(query_callback_args *callback_args)
10056265ee0dSchristos {
1006357f7b44Sabhinav struct term_args *ta = callback_args->other_data;
10076265ee0dSchristos struct orig_callback_data *orig_data = ta->orig_data;
10086265ee0dSchristos
1009357f7b44Sabhinav char *ul_section = ul_term(callback_args->section, ta);
1010357f7b44Sabhinav char *ul_name = ul_term(callback_args->name, ta);
1011357f7b44Sabhinav char *ul_name_desc = ul_term(callback_args->name_desc, ta);
1012357f7b44Sabhinav callback_args->section = ul_section;
1013357f7b44Sabhinav callback_args->name = ul_name;
1014357f7b44Sabhinav callback_args->name_desc = ul_name_desc;
1015357f7b44Sabhinav callback_args->other_data = orig_data->data;
1016521bd3eaSgutteridge int rc = (orig_data->callback)(callback_args);
10176265ee0dSchristos free(ul_section);
10186265ee0dSchristos free(ul_name);
10196265ee0dSchristos free(ul_name_desc);
1020521bd3eaSgutteridge return rc;
10216265ee0dSchristos }
10226265ee0dSchristos
1023410d0f43Sjoerg /*
1024410d0f43Sjoerg * run_query_pager --
1025410d0f43Sjoerg * Utility function similar to run_query_html. This function tries to
1026410d0f43Sjoerg * pre-process the result assuming it will be piped to a pager.
1027f0a7346dSsnj * For this purpose it first calls its own callback function callback_pager
1028410d0f43Sjoerg * which then delegates the call to the user supplied callback.
1029410d0f43Sjoerg */
1030910ecac4Schristos static int
run_query_pager(sqlite3 * db,query_args * args)103152222de3Sjoerg run_query_pager(sqlite3 *db, query_args *args)
1032410d0f43Sjoerg {
1033410d0f43Sjoerg struct orig_callback_data orig_data;
1034410d0f43Sjoerg orig_data.callback = args->callback;
1035410d0f43Sjoerg orig_data.data = args->callback_data;
1036910ecac4Schristos const char *snippet_args[3] = { "\002", "\003", "..." };
1037410d0f43Sjoerg args->callback = &callback_pager;
1038410d0f43Sjoerg args->callback_data = (void *) &orig_data;
1039910ecac4Schristos return run_query_internal(db, snippet_args, args);
1040410d0f43Sjoerg }
10416265ee0dSchristos
104262025e09Schristos struct nv {
104362025e09Schristos char *s;
104462025e09Schristos size_t l;
104562025e09Schristos };
104662025e09Schristos
104762025e09Schristos static int
term_putc(int c,void * p)104862025e09Schristos term_putc(int c, void *p)
104962025e09Schristos {
105062025e09Schristos struct nv *nv = p;
105162025e09Schristos nv->s[nv->l++] = c;
105262025e09Schristos return 0;
105362025e09Schristos }
105462025e09Schristos
105562025e09Schristos static char *
term_fix_seq(TERMINAL * ti,const char * seq)105662025e09Schristos term_fix_seq(TERMINAL *ti, const char *seq)
105762025e09Schristos {
105862025e09Schristos char *res = estrdup(seq);
105962025e09Schristos struct nv nv;
106062025e09Schristos
106148e922c8Schristos if (ti == NULL)
106248e922c8Schristos return res;
106348e922c8Schristos
106462025e09Schristos nv.s = res;
106562025e09Schristos nv.l = 0;
106662025e09Schristos ti_puts(ti, seq, 1, term_putc, &nv);
106762025e09Schristos nv.s[nv.l] = '\0';
106862025e09Schristos
106962025e09Schristos return res;
107062025e09Schristos }
107162025e09Schristos
10726265ee0dSchristos static void
term_init(int fd,const char * sa[5])10736265ee0dSchristos term_init(int fd, const char *sa[5])
10746265ee0dSchristos {
10756265ee0dSchristos TERMINAL *ti;
10766265ee0dSchristos int error;
10776265ee0dSchristos const char *bold, *sgr0, *smso, *rmso, *smul, *rmul;
10786265ee0dSchristos
10796265ee0dSchristos if (ti_setupterm(&ti, NULL, fd, &error) == -1) {
10806265ee0dSchristos bold = sgr0 = NULL;
10816265ee0dSchristos smso = rmso = smul = rmul = "";
10826265ee0dSchristos ti = NULL;
10836265ee0dSchristos } else {
10846265ee0dSchristos bold = ti_getstr(ti, "bold");
10856265ee0dSchristos sgr0 = ti_getstr(ti, "sgr0");
10866265ee0dSchristos if (bold == NULL || sgr0 == NULL) {
10876265ee0dSchristos smso = ti_getstr(ti, "smso");
10886265ee0dSchristos
10896265ee0dSchristos if (smso == NULL ||
10906265ee0dSchristos (rmso = ti_getstr(ti, "rmso")) == NULL)
10916265ee0dSchristos smso = rmso = "";
10926265ee0dSchristos bold = sgr0 = NULL;
10936265ee0dSchristos } else
10946265ee0dSchristos smso = rmso = "";
10956265ee0dSchristos
10966265ee0dSchristos smul = ti_getstr(ti, "smul");
10976265ee0dSchristos if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL)
10986265ee0dSchristos smul = rmul = "";
10996265ee0dSchristos }
11006265ee0dSchristos
110162025e09Schristos sa[0] = term_fix_seq(ti, bold ? bold : smso);
110262025e09Schristos sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso);
11036265ee0dSchristos sa[2] = estrdup("...");
110462025e09Schristos sa[3] = term_fix_seq(ti, smul);
110562025e09Schristos sa[4] = term_fix_seq(ti, rmul);
110662025e09Schristos
11076265ee0dSchristos if (ti)
11086265ee0dSchristos del_curterm(ti);
11096265ee0dSchristos }
11106265ee0dSchristos
11116265ee0dSchristos /*
11126265ee0dSchristos * run_query_term --
11136265ee0dSchristos * Utility function similar to run_query_html. This function tries to
11146265ee0dSchristos * pre-process the result assuming it will be displayed on a terminal
1115f0a7346dSsnj * For this purpose it first calls its own callback function callback_pager
11166265ee0dSchristos * which then delegates the call to the user supplied callback.
11176265ee0dSchristos */
1118910ecac4Schristos static int
run_query_term(sqlite3 * db,query_args * args)11196265ee0dSchristos run_query_term(sqlite3 *db, query_args *args)
11206265ee0dSchristos {
11216265ee0dSchristos struct orig_callback_data orig_data;
11226265ee0dSchristos struct term_args ta;
11236265ee0dSchristos orig_data.callback = args->callback;
11246265ee0dSchristos orig_data.data = args->callback_data;
11256265ee0dSchristos const char *snippet_args[5];
1126910ecac4Schristos
11276265ee0dSchristos term_init(STDOUT_FILENO, snippet_args);
11286265ee0dSchristos ta.smul = snippet_args[3];
11296265ee0dSchristos ta.rmul = snippet_args[4];
11306265ee0dSchristos ta.orig_data = (void *) &orig_data;
11316265ee0dSchristos
11326265ee0dSchristos args->callback = &callback_term;
11336265ee0dSchristos args->callback_data = &ta;
1134910ecac4Schristos return run_query_internal(db, snippet_args, args);
1135910ecac4Schristos }
1136910ecac4Schristos
1137910ecac4Schristos static int
run_query_none(sqlite3 * db,query_args * args)1138910ecac4Schristos run_query_none(sqlite3 *db, query_args *args)
1139910ecac4Schristos {
1140910ecac4Schristos struct orig_callback_data orig_data;
1141910ecac4Schristos orig_data.callback = args->callback;
1142910ecac4Schristos orig_data.data = args->callback_data;
1143910ecac4Schristos const char *snippet_args[3] = { "", "", "..." };
1144910ecac4Schristos args->callback = &callback_pager;
1145910ecac4Schristos args->callback_data = (void *) &orig_data;
1146910ecac4Schristos return run_query_internal(db, snippet_args, args);
1147910ecac4Schristos }
1148910ecac4Schristos
1149910ecac4Schristos int
run_query(sqlite3 * db,query_format fmt,query_args * args)1150910ecac4Schristos run_query(sqlite3 *db, query_format fmt, query_args *args)
1151910ecac4Schristos {
1152910ecac4Schristos switch (fmt) {
1153910ecac4Schristos case APROPOS_NONE:
1154910ecac4Schristos return run_query_none(db, args);
1155910ecac4Schristos case APROPOS_HTML:
1156910ecac4Schristos return run_query_html(db, args);
1157910ecac4Schristos case APROPOS_TERM:
1158910ecac4Schristos return run_query_term(db, args);
1159910ecac4Schristos case APROPOS_PAGER:
1160910ecac4Schristos return run_query_pager(db, args);
1161910ecac4Schristos default:
1162910ecac4Schristos warnx("Unknown query format %d", (int)fmt);
1163910ecac4Schristos return -1;
1164910ecac4Schristos }
11656265ee0dSchristos }
1166