1 /* $NetBSD: apropos-utils.c,v 1.40 2017/11/25 14:29:38 abhinav Exp $ */ 2 /*- 3 * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com> 4 * All rights reserved. 5 * 6 * This code was developed as part of Google's Summer of Code 2011 program. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __RCSID("$NetBSD: apropos-utils.c,v 1.40 2017/11/25 14:29:38 abhinav Exp $"); 35 36 #include <sys/queue.h> 37 #include <sys/stat.h> 38 39 #include <assert.h> 40 #include <ctype.h> 41 #include <err.h> 42 #include <math.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <util.h> 47 #include <zlib.h> 48 #include <term.h> 49 #include <unistd.h> 50 #undef tab // XXX: manconf.h 51 52 #include "apropos-utils.h" 53 #include "custom_apropos_tokenizer.h" 54 #include "manconf.h" 55 #include "fts3_tokenizer.h" 56 57 typedef struct orig_callback_data { 58 void *data; 59 int (*callback) (query_callback_args*); 60 } orig_callback_data; 61 62 typedef struct inverse_document_frequency { 63 double value; 64 int status; 65 } inverse_document_frequency; 66 67 /* weights for individual columns */ 68 static const double col_weights[] = { 69 2.0, // NAME 70 2.00, // Name-description 71 0.55, // DESCRIPTION 72 0.10, // LIBRARY 73 0.001, //RETURN VALUES 74 0.20, //ENVIRONMENT 75 0.01, //FILES 76 0.001, //EXIT STATUS 77 2.00, //DIAGNOSTICS 78 0.05, //ERRORS 79 0.00, //md5_hash 80 1.00 //machine 81 }; 82 83 #ifndef APROPOS_DEBUG 84 static int 85 register_tokenizer(sqlite3 *db) 86 { 87 int rc; 88 sqlite3_stmt *stmt; 89 const sqlite3_tokenizer_module *p; 90 const char *name = "custom_apropos_tokenizer"; 91 get_custom_apropos_tokenizer(&p); 92 const char *sql = "SELECT fts3_tokenizer(?, ?)"; 93 94 sqlite3_db_config(db, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); 95 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0); 96 if (rc != SQLITE_OK) 97 return rc; 98 99 sqlite3_bind_text(stmt, 1, name, -1, SQLITE_STATIC); 100 sqlite3_bind_blob(stmt, 2, &p, sizeof(p), SQLITE_STATIC); 101 sqlite3_step(stmt); 102 103 return sqlite3_finalize(stmt); 104 } 105 #endif 106 107 /* 108 * lower -- 109 * Converts the string str to lower case 110 */ 111 char * 112 lower(char *str) 113 { 114 assert(str); 115 int i = 0; 116 char c; 117 while ((c = str[i]) != '\0') 118 str[i++] = tolower((unsigned char) c); 119 return str; 120 } 121 122 /* 123 * concat-- 124 * Utility function. Concatenates together: dst, a space character and src. 125 * dst + " " + src 126 */ 127 void 128 concat(char **dst, const char *src) 129 { 130 concat2(dst, src, strlen(src)); 131 } 132 133 void 134 concat2(char **dst, const char *src, size_t srclen) 135 { 136 size_t totallen, dstlen; 137 char *mydst = *dst; 138 assert(src != NULL); 139 140 /* 141 * If destination buffer dst is NULL, then simply 142 * strdup the source buffer 143 */ 144 if (mydst == NULL) { 145 mydst = estrndup(src, srclen); 146 *dst = mydst; 147 return; 148 } 149 150 dstlen = strlen(mydst); 151 /* 152 * NUL Byte and separator space 153 */ 154 totallen = dstlen + srclen + 2; 155 156 mydst = erealloc(mydst, totallen); 157 158 /* Append a space at the end of dst */ 159 mydst[dstlen++] = ' '; 160 161 /* Now, copy src at the end of dst */ 162 memcpy(mydst + dstlen, src, srclen); 163 mydst[dstlen + srclen] = '\0'; 164 *dst = mydst; 165 } 166 167 void 168 close_db(sqlite3 *db) 169 { 170 sqlite3_close(db); 171 sqlite3_shutdown(); 172 } 173 174 /* 175 * create_db -- 176 * Creates the database schema. 177 */ 178 static int 179 create_db(sqlite3 *db) 180 { 181 const char *sqlstr = NULL; 182 char *schemasql; 183 char *errmsg = NULL; 184 185 /*------------------------ Create the tables------------------------------*/ 186 187 #if NOTYET 188 sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL); 189 #else 190 sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL); 191 #endif 192 193 schemasql = sqlite3_mprintf("PRAGMA user_version = %d", 194 APROPOS_SCHEMA_VERSION); 195 sqlite3_exec(db, schemasql, NULL, NULL, &errmsg); 196 if (errmsg != NULL) 197 goto out; 198 sqlite3_free(schemasql); 199 200 sqlstr = 201 //mandb 202 "CREATE VIRTUAL TABLE mandb USING fts4(section, name, " 203 "name_desc, desc, lib, return_vals, env, files, " 204 "exit_status, diagnostics, errors, md5_hash UNIQUE, machine, " 205 #ifndef APROPOS_DEBUG 206 "compress=zip, uncompress=unzip, tokenize=custom_apropos_tokenizer, " 207 #else 208 "tokenize=porter, " 209 #endif 210 "notindexed=section, notindexed=md5_hash); " 211 //mandb_meta 212 "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, " 213 "file UNIQUE, md5_hash UNIQUE, id INTEGER PRIMARY KEY); " 214 //mandb_links 215 "CREATE TABLE IF NOT EXISTS mandb_links(link COLLATE NOCASE, target, section, " 216 "machine, md5_hash); "; 217 218 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 219 if (errmsg != NULL) 220 goto out; 221 222 sqlstr = 223 "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links " 224 "(link); " 225 "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta " 226 "(device, inode); " 227 "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links " 228 "(md5_hash);"; 229 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 230 if (errmsg != NULL) 231 goto out; 232 return 0; 233 234 out: 235 warnx("%s", errmsg); 236 free(errmsg); 237 sqlite3_close(db); 238 sqlite3_shutdown(); 239 return -1; 240 } 241 242 /* 243 * zip -- 244 * User defined Sqlite function to compress the FTS table 245 */ 246 static void 247 zip(sqlite3_context *pctx, int nval, sqlite3_value **apval) 248 { 249 int nin; 250 long int nout; 251 const unsigned char * inbuf; 252 unsigned char *outbuf; 253 254 assert(nval == 1); 255 nin = sqlite3_value_bytes(apval[0]); 256 inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]); 257 nout = nin + 13 + (nin + 999) / 1000; 258 outbuf = emalloc(nout); 259 compress(outbuf, (unsigned long *) &nout, inbuf, nin); 260 sqlite3_result_blob(pctx, outbuf, nout, free); 261 } 262 263 /* 264 * unzip -- 265 * User defined Sqlite function to uncompress the FTS table. 266 */ 267 static void 268 unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval) 269 { 270 unsigned int rc; 271 unsigned char *outbuf; 272 z_stream stream; 273 274 assert(nval == 1); 275 stream.next_in = __UNCONST(sqlite3_value_blob(apval[0])); 276 stream.avail_in = sqlite3_value_bytes(apval[0]); 277 stream.avail_out = stream.avail_in * 2 + 100; 278 stream.next_out = outbuf = emalloc(stream.avail_out); 279 stream.zalloc = NULL; 280 stream.zfree = NULL; 281 282 if (inflateInit(&stream) != Z_OK) { 283 free(outbuf); 284 return; 285 } 286 287 while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) { 288 if (rc != Z_OK || 289 (stream.avail_out != 0 && stream.avail_in == 0)) { 290 free(outbuf); 291 return; 292 } 293 outbuf = erealloc(outbuf, stream.total_out * 2); 294 stream.next_out = outbuf + stream.total_out; 295 stream.avail_out = stream.total_out; 296 } 297 if (inflateEnd(&stream) != Z_OK) { 298 free(outbuf); 299 return; 300 } 301 outbuf = erealloc(outbuf, stream.total_out); 302 sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free); 303 } 304 305 /* 306 * get_dbpath -- 307 * Read the path of the database from man.conf and return. 308 */ 309 char * 310 get_dbpath(const char *manconf) 311 { 312 TAG *tp; 313 char *dbpath; 314 315 config(manconf); 316 tp = gettag("_mandb", 1); 317 if (!tp) 318 return NULL; 319 320 if (TAILQ_EMPTY(&tp->entrylist)) 321 return NULL; 322 323 dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s; 324 return dbpath; 325 } 326 327 /* init_db -- 328 * Prepare the database. Register the compress/uncompress functions and the 329 * stopword tokenizer. 330 * db_flag specifies the mode in which to open the database. 3 options are 331 * available: 332 * 1. DB_READONLY: Open in READONLY mode. An error if db does not exist. 333 * 2. DB_READWRITE: Open in read-write mode. An error if db does not exist. 334 * 3. DB_CREATE: Open in read-write mode. It will try to create the db if 335 * it does not exist already. 336 * RETURN VALUES: 337 * The function will return NULL in case the db does not exist 338 * and DB_CREATE 339 * was not specified. And in case DB_CREATE was specified and yet NULL is 340 * returned, then there was some other error. 341 * In normal cases the function should return a handle to the db. 342 */ 343 sqlite3 * 344 init_db(mandb_access_mode db_flag, const char *manconf) 345 { 346 sqlite3 *db = NULL; 347 sqlite3_stmt *stmt; 348 struct stat sb; 349 int rc; 350 int create_db_flag = 0; 351 352 char *dbpath = get_dbpath(manconf); 353 if (dbpath == NULL) 354 errx(EXIT_FAILURE, "_mandb entry not found in man.conf"); 355 356 if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) { 357 /* Database does not exist, check if DB_CREATE was specified, 358 * and set flag to create the database schema 359 */ 360 if (db_flag != (MANDB_CREATE)) { 361 warnx("Missing apropos database. " 362 "Please run makemandb to create it."); 363 return NULL; 364 } 365 create_db_flag = 1; 366 } else { 367 /* 368 * Database exists. Check if we have the permissions 369 * to read/write the files 370 */ 371 int access_mode = R_OK; 372 switch (db_flag) { 373 case MANDB_CREATE: 374 case MANDB_WRITE: 375 access_mode |= W_OK; 376 break; 377 default: 378 break; 379 } 380 if ((access(dbpath, access_mode)) != 0) { 381 warnx("Unable to access the database, please check" 382 " permissions for `%s'", dbpath); 383 return NULL; 384 } 385 } 386 387 sqlite3_initialize(); 388 rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL); 389 390 if (rc != SQLITE_OK) { 391 warnx("%s", sqlite3_errmsg(db)); 392 goto error; 393 } 394 395 sqlite3_extended_result_codes(db, 1); 396 397 #ifndef APROPOS_DEBUG 398 rc = register_tokenizer(db); 399 if (rc != SQLITE_OK) { 400 warnx("Unable to register custom tokenizer: %s", sqlite3_errmsg(db)); 401 goto error; 402 } 403 #endif 404 405 if (create_db_flag && create_db(db) < 0) { 406 warnx("%s", "Unable to create database schema"); 407 goto error; 408 } 409 410 rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL); 411 if (rc != SQLITE_OK) { 412 warnx("Unable to query schema version: %s", 413 sqlite3_errmsg(db)); 414 goto error; 415 } 416 if (sqlite3_step(stmt) != SQLITE_ROW) { 417 sqlite3_finalize(stmt); 418 warnx("Unable to query schema version: %s", 419 sqlite3_errmsg(db)); 420 goto error; 421 } 422 if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) { 423 sqlite3_finalize(stmt); 424 warnx("Incorrect schema version found. " 425 "Please run makemandb -f."); 426 goto error; 427 } 428 sqlite3_finalize(stmt); 429 430 431 /* Register the zip and unzip functions for FTS compression */ 432 rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip, 433 NULL, NULL); 434 if (rc != SQLITE_OK) { 435 warnx("Unable to register function: compress: %s", 436 sqlite3_errmsg(db)); 437 goto error; 438 } 439 440 rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL, 441 unzip, NULL, NULL); 442 if (rc != SQLITE_OK) { 443 warnx("Unable to register function: uncompress: %s", 444 sqlite3_errmsg(db)); 445 goto error; 446 } 447 return db; 448 449 error: 450 close_db(db); 451 return NULL; 452 } 453 454 /* 455 * rank_func -- 456 * Sqlite user defined function for ranking the documents. 457 * For each phrase of the query, it computes the tf and idf and adds them over. 458 * It computes the final rank, by multiplying tf and idf together. 459 * Weight of term t for document d = (term frequency of t in d * 460 * inverse document frequency of t) 461 * 462 * Term Frequency of term t in document d = Number of times t occurs in d / 463 * Number of times t appears in all documents 464 * 465 * Inverse document frequency of t = log(Total number of documents / 466 * Number of documents in which t occurs) 467 */ 468 static void 469 rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval) 470 { 471 inverse_document_frequency *idf = sqlite3_user_data(pctx); 472 double tf = 0.0; 473 const unsigned int *matchinfo; 474 int ncol; 475 int nphrase; 476 int iphrase; 477 int ndoc; 478 int doclen = 0; 479 const double k = 3.75; 480 /* 481 * Check that the number of arguments passed to this 482 * function is correct. 483 */ 484 assert(nval == 1); 485 486 matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]); 487 nphrase = matchinfo[0]; 488 ncol = matchinfo[1]; 489 ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol]; 490 for (iphrase = 0; iphrase < nphrase; iphrase++) { 491 int icol; 492 const unsigned int *phraseinfo = 493 &matchinfo[2 + ncol + iphrase * ncol * 3]; 494 for(icol = 1; icol < ncol; icol++) { 495 496 /* nhitcount: number of times the current phrase occurs 497 * in the current column in the current document. 498 * nglobalhitcount: number of times current phrase 499 * occurs in the current column in all documents. 500 * ndocshitcount: number of documents in which the 501 * current phrase occurs in the current column at 502 * least once. 503 */ 504 int nhitcount = phraseinfo[3 * icol]; 505 int nglobalhitcount = phraseinfo[3 * icol + 1]; 506 int ndocshitcount = phraseinfo[3 * icol + 2]; 507 doclen = matchinfo[2 + icol ]; 508 double weight = col_weights[icol - 1]; 509 if (idf->status == 0 && ndocshitcount) 510 idf->value += 511 log(((double)ndoc / ndocshitcount))* weight; 512 513 /* 514 * Dividing the tf by document length to normalize 515 * the effect of longer documents. 516 */ 517 if (nglobalhitcount > 0 && nhitcount) 518 tf += (((double)nhitcount * weight) 519 / (nglobalhitcount * doclen)); 520 } 521 } 522 idf->status = 1; 523 524 /* 525 * Final score: Dividing by k + tf further normalizes the weight 526 * leading to better results. The value of k is experimental 527 */ 528 double score = (tf * idf->value) / (k + tf); 529 sqlite3_result_double(pctx, score); 530 return; 531 } 532 533 /* 534 * generates sql query for matching the user entered query 535 */ 536 static char * 537 generate_search_query(query_args *args, const char *snippet_args[3]) 538 { 539 const char *default_snippet_args[3]; 540 char *section_clause = NULL; 541 char *limit_clause = NULL; 542 char *machine_clause = NULL; 543 char *query = NULL; 544 545 if (args->machine) { 546 machine_clause = sqlite3_mprintf("AND mandb.machine=%Q", args->machine); 547 if (machine_clause == NULL) 548 goto RETURN; 549 } 550 551 if (args->nrec >= 0) { 552 /* Use the provided number of records and offset */ 553 limit_clause = sqlite3_mprintf(" LIMIT %d OFFSET %d", 554 args->nrec, args->offset); 555 if (limit_clause == NULL) 556 goto RETURN; 557 } 558 559 /* We want to build a query of the form: "select x,y,z from mandb where 560 * mandb match :query [AND (section IN ('1', '2')] 561 * ORDER BY rank DESC [LIMIT 10 OFFSET 0]" 562 * NOTES: 563 * 1. The portion in first pair of square brackets is optional. 564 * It will be there only if the user has specified an option 565 * to search in one or more specific sections. 566 * 2. The LIMIT portion will be there if the user has specified 567 * a limit using the -n option. 568 */ 569 if (args->sections && args->sections[0]) { 570 concat(§ion_clause, " AND mandb.section IN ("); 571 for (size_t i = 0; args->sections[i]; i++) { 572 char *temp; 573 char c = args->sections[i + 1]? ',': ')'; 574 if ((temp = sqlite3_mprintf("%Q%c", args->sections[i], c)) == NULL) 575 goto RETURN; 576 concat(§ion_clause, temp); 577 free(temp); 578 } 579 } 580 581 if (snippet_args == NULL) { 582 default_snippet_args[0] = ""; 583 default_snippet_args[1] = ""; 584 default_snippet_args[2] = "..."; 585 snippet_args = default_snippet_args; 586 } 587 588 if (args->legacy) { 589 char *wild; 590 easprintf(&wild, "%%%s%%", args->search_str); 591 query = sqlite3_mprintf("SELECT section, name, name_desc, machine" 592 " FROM mandb" 593 " WHERE name LIKE %Q OR name_desc LIKE %Q " 594 "%s" 595 "%s", 596 wild, wild, 597 section_clause ? section_clause : "", 598 limit_clause ? limit_clause : ""); 599 free(wild); 600 } else if (strchr(args->search_str, ' ') == NULL) { 601 /* 602 * If it's a single word query, we want to search in the 603 * links table as well. If the link table contains an entry 604 * for the queried keyword, we want to use that as the name of 605 * the man page. 606 * For example, for `apropos realloc` the output should be 607 * realloc(3) and not malloc(3). 608 */ 609 query = sqlite3_mprintf( 610 "SELECT section, name, name_desc, machine," 611 " snippet(mandb, %Q, %Q, %Q, -1, 40 )," 612 " rank_func(matchinfo(mandb, \"pclxn\")) AS rank" 613 " FROM mandb WHERE name NOT IN (" 614 " SELECT target FROM mandb_links WHERE link=%Q AND" 615 " mandb_links.section=mandb.section) AND mandb MATCH %Q %s %s" 616 " UNION" 617 " SELECT mandb.section, mandb_links.link AS name, mandb.name_desc," 618 " mandb.machine, '' AS snippet, 100.00 AS rank" 619 " FROM mandb JOIN mandb_links ON mandb.name=mandb_links.target and" 620 " mandb.section=mandb_links.section WHERE mandb_links.link=%Q" 621 " %s %s" 622 " ORDER BY rank DESC %s", 623 snippet_args[0], snippet_args[1], snippet_args[2], 624 args->search_str, args->search_str, section_clause ? section_clause : "", 625 machine_clause ? machine_clause : "", args->search_str, 626 machine_clause ? machine_clause : "", 627 section_clause ? section_clause : "", 628 limit_clause ? limit_clause : ""); 629 } else { 630 query = sqlite3_mprintf("SELECT section, name, name_desc, machine," 631 " snippet(mandb, %Q, %Q, %Q, -1, 40 )," 632 " rank_func(matchinfo(mandb, \"pclxn\")) AS rank" 633 " FROM mandb" 634 " WHERE mandb MATCH %Q %s " 635 "%s" 636 " ORDER BY rank DESC" 637 "%s", 638 snippet_args[0], snippet_args[1], snippet_args[2], 639 args->search_str, machine_clause ? machine_clause : "", 640 section_clause ? section_clause : "", 641 limit_clause ? limit_clause : ""); 642 } 643 644 RETURN: 645 free(machine_clause); 646 free(section_clause); 647 free(limit_clause); 648 return query; 649 } 650 651 /* 652 * Execute the full text search query and return the number of results 653 * obtained. 654 */ 655 static unsigned int 656 execute_search_query(sqlite3 *db, char *query, query_args *args) 657 { 658 sqlite3_stmt *stmt; 659 char *name; 660 char *slash_ptr; 661 const char *name_temp; 662 char *m = NULL; 663 int rc; 664 query_callback_args callback_args; 665 inverse_document_frequency idf = {0, 0}; 666 667 if (!args->legacy) { 668 /* Register the rank function */ 669 rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY, 670 (void *) &idf, rank_func, NULL, NULL); 671 if (rc != SQLITE_OK) { 672 warnx("Unable to register the ranking function: %s", 673 sqlite3_errmsg(db)); 674 sqlite3_close(db); 675 sqlite3_shutdown(); 676 exit(EXIT_FAILURE); 677 } 678 } 679 680 rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL); 681 if (rc == SQLITE_IOERR) { 682 warnx("Corrupt database. Please rerun makemandb"); 683 return -1; 684 } else if (rc != SQLITE_OK) { 685 warnx("%s", sqlite3_errmsg(db)); 686 return -1; 687 } 688 689 unsigned int nresults = 0; 690 while (sqlite3_step(stmt) == SQLITE_ROW) { 691 nresults++; 692 callback_args.section = (const char *) sqlite3_column_text(stmt, 0); 693 name_temp = (const char *) sqlite3_column_text(stmt, 1); 694 callback_args.name_desc = (const char *) sqlite3_column_text(stmt, 2); 695 callback_args.machine = (const char *) sqlite3_column_text(stmt, 3); 696 if (!args->legacy) 697 callback_args.snippet = (const char *) sqlite3_column_text(stmt, 4); 698 else 699 callback_args.snippet = ""; 700 if ((slash_ptr = strrchr(name_temp, '/')) != NULL) 701 name_temp = slash_ptr + 1; 702 if (callback_args.machine && callback_args.machine[0]) { 703 m = estrdup(callback_args.machine); 704 easprintf(&name, "%s/%s", lower(m), name_temp); 705 free(m); 706 } else { 707 name = estrdup((const char *) 708 sqlite3_column_text(stmt, 1)); 709 } 710 callback_args.name = name; 711 callback_args.other_data = args->callback_data; 712 (args->callback)(&callback_args); 713 free(name); 714 } 715 sqlite3_finalize(stmt); 716 return nresults; 717 } 718 719 720 /* 721 * run_query_internal -- 722 * Performs the searches for the keywords entered by the user. 723 * The 2nd param: snippet_args is an array of strings providing values for the 724 * last three parameters to the snippet function of sqlite. (Look at the docs). 725 * The 3rd param: args contains rest of the search parameters. Look at 726 * arpopos-utils.h for the description of individual fields. 727 * 728 */ 729 static int 730 run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args) 731 { 732 char *query; 733 query = generate_search_query(args, snippet_args); 734 if (query == NULL) { 735 *args->errmsg = estrdup("malloc failed"); 736 return -1; 737 } 738 739 execute_search_query(db, query, args); 740 sqlite3_free(query); 741 return *(args->errmsg) == NULL ? 0 : -1; 742 } 743 744 static char * 745 get_escaped_html_string(const char *src, size_t *slen) 746 { 747 static const char trouble[] = "<>\"&\002\003"; 748 /* 749 * First scan the src to find out the number of occurrences 750 * of {'>', '<' '"', '&'}. Then allocate a new buffer with 751 * sufficient space to be able to store the quoted versions 752 * of the special characters {>, <, ", &}. 753 * Copy over the characters from the original src into 754 * this buffer while replacing the special characters with 755 * their quoted versions. 756 */ 757 char *dst, *ddst; 758 size_t count; 759 const char *ssrc; 760 761 for (count = 0, ssrc = src; *src; count++) { 762 size_t sz = strcspn(src, trouble); 763 src += sz + 1; 764 } 765 766 767 #define append(a) \ 768 do { \ 769 memcpy(dst, (a), sizeof(a) - 1); \ 770 dst += sizeof(a) - 1; \ 771 } while (/*CONSTCOND*/0) 772 773 774 ddst = dst = emalloc(*slen + count * 5 + 1); 775 for (src = ssrc; *src; src++) { 776 switch (*src) { 777 case '<': 778 append("<"); 779 break; 780 case '>': 781 append(">"); 782 break; 783 case '\"': 784 append("""); 785 break; 786 case '&': 787 /* 788 * Don't perform the quoting if this & is part of 789 * an mdoc escape sequence, e.g. \& 790 */ 791 if (src != ssrc && src[-1] != '\\') 792 append("&"); 793 else 794 append("&"); 795 break; 796 case '\002': 797 append("<b>"); 798 break; 799 case '\003': 800 append("</b>"); 801 break; 802 default: 803 *dst++ = *src; 804 break; 805 } 806 } 807 *dst = '\0'; 808 *slen = dst - ddst; 809 return ddst; 810 } 811 812 813 /* 814 * callback_html -- 815 * Callback function for run_query_html. It builds the html output and then 816 * calls the actual user supplied callback function. 817 */ 818 static int 819 callback_html(query_callback_args *callback_args) 820 { 821 struct orig_callback_data *orig_data = callback_args->other_data; 822 int (*callback)(query_callback_args*) = orig_data->callback; 823 size_t length = callback_args->snippet_length; 824 size_t name_description_length = strlen(callback_args->name_desc); 825 char *qsnippet = get_escaped_html_string(callback_args->snippet, &length); 826 char *qname_description = get_escaped_html_string(callback_args->name_desc, 827 &name_description_length); 828 callback_args->name_desc = qname_description; 829 callback_args->snippet = qsnippet; 830 callback_args->snippet_length = length; 831 callback_args->other_data = orig_data->data; 832 (*callback)(callback_args); 833 free(qsnippet); 834 free(qname_description); 835 return 0; 836 } 837 838 /* 839 * run_query_html -- 840 * Utility function to output query result in HTML format. 841 * It internally calls run_query only, but it first passes the output to its 842 * own custom callback function, which preprocess the snippet for quoting 843 * inline HTML fragments. 844 * After that it delegates the call the actual user supplied callback function. 845 */ 846 static int 847 run_query_html(sqlite3 *db, query_args *args) 848 { 849 struct orig_callback_data orig_data; 850 orig_data.callback = args->callback; 851 orig_data.data = args->callback_data; 852 const char *snippet_args[] = {"\002", "\003", "..."}; 853 args->callback = &callback_html; 854 args->callback_data = (void *) &orig_data; 855 return run_query_internal(db, snippet_args, args); 856 } 857 858 /* 859 * underline a string, pager style. 860 */ 861 static char * 862 ul_pager(int ul, const char *s) 863 { 864 size_t len; 865 char *dst, *d; 866 867 if (!ul) 868 return estrdup(s); 869 870 // a -> _\ba 871 len = strlen(s) * 3 + 1; 872 873 d = dst = emalloc(len); 874 while (*s) { 875 *d++ = '_'; 876 *d++ = '\b'; 877 *d++ = *s++; 878 } 879 *d = '\0'; 880 return dst; 881 } 882 883 /* 884 * callback_pager -- 885 * A callback similar to callback_html. It overstrikes the matching text in 886 * the snippet so that it appears emboldened when viewed using a pager like 887 * more or less. 888 */ 889 static int 890 callback_pager(query_callback_args *callback_args) 891 { 892 struct orig_callback_data *orig_data = callback_args->other_data; 893 char *psnippet; 894 const char *temp = callback_args->snippet; 895 int count = 0; 896 int i = 0, did; 897 size_t sz = 0; 898 size_t psnippet_length; 899 900 /* Count the number of bytes of matching text. For each of these 901 * bytes we will use 2 extra bytes to overstrike it so that it 902 * appears bold when viewed using a pager. 903 */ 904 while (*temp) { 905 sz = strcspn(temp, "\002\003"); 906 temp += sz; 907 if (*temp == '\003') { 908 count += 2 * (sz); 909 } 910 temp++; 911 } 912 913 psnippet_length = callback_args->snippet_length + count; 914 psnippet = emalloc(psnippet_length + 1); 915 916 /* Copy the bytes from snippet to psnippet: 917 * 1. Copy the bytes before \002 as it is. 918 * 2. The bytes after \002 need to be overstriked till we 919 * encounter \003. 920 * 3. To overstrike a byte 'A' we need to write 'A\bA' 921 */ 922 did = 0; 923 const char *snippet = callback_args->snippet; 924 while (*snippet) { 925 sz = strcspn(snippet, "\002"); 926 memcpy(&psnippet[i], snippet, sz); 927 snippet += sz; 928 i += sz; 929 930 /* Don't change this. Advancing the pointer without reading the byte 931 * is causing strange behavior. 932 */ 933 if (*snippet == '\002') 934 snippet++; 935 while (*snippet && *snippet != '\003') { 936 did = 1; 937 psnippet[i++] = *snippet; 938 psnippet[i++] = '\b'; 939 psnippet[i++] = *snippet++; 940 } 941 if (*snippet) 942 snippet++; 943 } 944 945 psnippet[i] = 0; 946 char *ul_section = ul_pager(did, callback_args->section); 947 char *ul_name = ul_pager(did, callback_args->name); 948 char *ul_name_desc = ul_pager(did, callback_args->name_desc); 949 callback_args->section = ul_section; 950 callback_args->name = ul_name; 951 callback_args->name_desc = ul_name_desc; 952 callback_args->snippet = psnippet; 953 callback_args->snippet_length = psnippet_length; 954 callback_args->other_data = orig_data->data; 955 (orig_data->callback)(callback_args); 956 free(ul_section); 957 free(ul_name); 958 free(ul_name_desc); 959 free(psnippet); 960 return 0; 961 } 962 963 struct term_args { 964 struct orig_callback_data *orig_data; 965 const char *smul; 966 const char *rmul; 967 }; 968 969 /* 970 * underline a string, pager style. 971 */ 972 static char * 973 ul_term(const char *s, const struct term_args *ta) 974 { 975 char *dst; 976 977 easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul); 978 return dst; 979 } 980 981 /* 982 * callback_term -- 983 * A callback similar to callback_html. It overstrikes the matching text in 984 * the snippet so that it appears emboldened when viewed using a pager like 985 * more or less. 986 */ 987 static int 988 callback_term(query_callback_args *callback_args) 989 { 990 struct term_args *ta = callback_args->other_data; 991 struct orig_callback_data *orig_data = ta->orig_data; 992 993 char *ul_section = ul_term(callback_args->section, ta); 994 char *ul_name = ul_term(callback_args->name, ta); 995 char *ul_name_desc = ul_term(callback_args->name_desc, ta); 996 callback_args->section = ul_section; 997 callback_args->name = ul_name; 998 callback_args->name_desc = ul_name_desc; 999 callback_args->other_data = orig_data->data; 1000 (orig_data->callback)(callback_args); 1001 free(ul_section); 1002 free(ul_name); 1003 free(ul_name_desc); 1004 return 0; 1005 } 1006 1007 /* 1008 * run_query_pager -- 1009 * Utility function similar to run_query_html. This function tries to 1010 * pre-process the result assuming it will be piped to a pager. 1011 * For this purpose it first calls its own callback function callback_pager 1012 * which then delegates the call to the user supplied callback. 1013 */ 1014 static int 1015 run_query_pager(sqlite3 *db, query_args *args) 1016 { 1017 struct orig_callback_data orig_data; 1018 orig_data.callback = args->callback; 1019 orig_data.data = args->callback_data; 1020 const char *snippet_args[3] = { "\002", "\003", "..." }; 1021 args->callback = &callback_pager; 1022 args->callback_data = (void *) &orig_data; 1023 return run_query_internal(db, snippet_args, args); 1024 } 1025 1026 struct nv { 1027 char *s; 1028 size_t l; 1029 }; 1030 1031 static int 1032 term_putc(int c, void *p) 1033 { 1034 struct nv *nv = p; 1035 nv->s[nv->l++] = c; 1036 return 0; 1037 } 1038 1039 static char * 1040 term_fix_seq(TERMINAL *ti, const char *seq) 1041 { 1042 char *res = estrdup(seq); 1043 struct nv nv; 1044 1045 if (ti == NULL) 1046 return res; 1047 1048 nv.s = res; 1049 nv.l = 0; 1050 ti_puts(ti, seq, 1, term_putc, &nv); 1051 nv.s[nv.l] = '\0'; 1052 1053 return res; 1054 } 1055 1056 static void 1057 term_init(int fd, const char *sa[5]) 1058 { 1059 TERMINAL *ti; 1060 int error; 1061 const char *bold, *sgr0, *smso, *rmso, *smul, *rmul; 1062 1063 if (ti_setupterm(&ti, NULL, fd, &error) == -1) { 1064 bold = sgr0 = NULL; 1065 smso = rmso = smul = rmul = ""; 1066 ti = NULL; 1067 } else { 1068 bold = ti_getstr(ti, "bold"); 1069 sgr0 = ti_getstr(ti, "sgr0"); 1070 if (bold == NULL || sgr0 == NULL) { 1071 smso = ti_getstr(ti, "smso"); 1072 1073 if (smso == NULL || 1074 (rmso = ti_getstr(ti, "rmso")) == NULL) 1075 smso = rmso = ""; 1076 bold = sgr0 = NULL; 1077 } else 1078 smso = rmso = ""; 1079 1080 smul = ti_getstr(ti, "smul"); 1081 if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL) 1082 smul = rmul = ""; 1083 } 1084 1085 sa[0] = term_fix_seq(ti, bold ? bold : smso); 1086 sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso); 1087 sa[2] = estrdup("..."); 1088 sa[3] = term_fix_seq(ti, smul); 1089 sa[4] = term_fix_seq(ti, rmul); 1090 1091 if (ti) 1092 del_curterm(ti); 1093 } 1094 1095 /* 1096 * run_query_term -- 1097 * Utility function similar to run_query_html. This function tries to 1098 * pre-process the result assuming it will be displayed on a terminal 1099 * For this purpose it first calls its own callback function callback_pager 1100 * which then delegates the call to the user supplied callback. 1101 */ 1102 static int 1103 run_query_term(sqlite3 *db, query_args *args) 1104 { 1105 struct orig_callback_data orig_data; 1106 struct term_args ta; 1107 orig_data.callback = args->callback; 1108 orig_data.data = args->callback_data; 1109 const char *snippet_args[5]; 1110 1111 term_init(STDOUT_FILENO, snippet_args); 1112 ta.smul = snippet_args[3]; 1113 ta.rmul = snippet_args[4]; 1114 ta.orig_data = (void *) &orig_data; 1115 1116 args->callback = &callback_term; 1117 args->callback_data = &ta; 1118 return run_query_internal(db, snippet_args, args); 1119 } 1120 1121 static int 1122 run_query_none(sqlite3 *db, query_args *args) 1123 { 1124 struct orig_callback_data orig_data; 1125 orig_data.callback = args->callback; 1126 orig_data.data = args->callback_data; 1127 const char *snippet_args[3] = { "", "", "..." }; 1128 args->callback = &callback_pager; 1129 args->callback_data = (void *) &orig_data; 1130 return run_query_internal(db, snippet_args, args); 1131 } 1132 1133 int 1134 run_query(sqlite3 *db, query_format fmt, query_args *args) 1135 { 1136 switch (fmt) { 1137 case APROPOS_NONE: 1138 return run_query_none(db, args); 1139 case APROPOS_HTML: 1140 return run_query_html(db, args); 1141 case APROPOS_TERM: 1142 return run_query_term(db, args); 1143 case APROPOS_PAGER: 1144 return run_query_pager(db, args); 1145 default: 1146 warnx("Unknown query format %d", (int)fmt); 1147 return -1; 1148 } 1149 } 1150