1 /* $NetBSD: apropos-utils.c,v 1.29 2016/10/03 13:36:35 abhinav Exp $ */ 2 /*- 3 * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com> 4 * All rights reserved. 5 * 6 * This code was developed as part of Google's Summer of Code 2011 program. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __RCSID("$NetBSD: apropos-utils.c,v 1.29 2016/10/03 13:36:35 abhinav Exp $"); 35 36 #include <sys/queue.h> 37 #include <sys/stat.h> 38 39 #include <assert.h> 40 #include <ctype.h> 41 #include <err.h> 42 #include <math.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <util.h> 47 #include <zlib.h> 48 #include <term.h> 49 #undef tab // XXX: manconf.h 50 51 #include "apropos-utils.h" 52 #include "manconf.h" 53 54 typedef struct orig_callback_data { 55 void *data; 56 int (*callback) (void *, const char *, const char *, const char *, 57 const char *, size_t); 58 } orig_callback_data; 59 60 typedef struct inverse_document_frequency { 61 double value; 62 int status; 63 } inverse_document_frequency; 64 65 /* weights for individual columns */ 66 static const double col_weights[] = { 67 2.0, // NAME 68 2.00, // Name-description 69 0.55, // DESCRIPTION 70 0.10, // LIBRARY 71 0.001, //RETURN VALUES 72 0.20, //ENVIRONMENT 73 0.01, //FILES 74 0.001, //EXIT STATUS 75 2.00, //DIAGNOSTICS 76 0.05, //ERRORS 77 0.00, //md5_hash 78 1.00 //machine 79 }; 80 81 /* 82 * lower -- 83 * Converts the string str to lower case 84 */ 85 char * 86 lower(char *str) 87 { 88 assert(str); 89 int i = 0; 90 char c; 91 while (str[i] != '\0') { 92 c = tolower((unsigned char) str[i]); 93 str[i++] = c; 94 } 95 return str; 96 } 97 98 /* 99 * concat-- 100 * Utility function. Concatenates together: dst, a space character and src. 101 * dst + " " + src 102 */ 103 void 104 concat(char **dst, const char *src) 105 { 106 concat2(dst, src, strlen(src)); 107 } 108 109 void 110 concat2(char **dst, const char *src, size_t srclen) 111 { 112 size_t totallen, dstlen; 113 assert(src != NULL); 114 115 /* 116 * If destination buffer dst is NULL, then simply 117 * strdup the source buffer 118 */ 119 if (*dst == NULL) { 120 *dst = estrndup(src, srclen); 121 return; 122 } 123 124 dstlen = strlen(*dst); 125 /* 126 * NUL Byte and separator space 127 */ 128 totallen = dstlen + srclen + 2; 129 130 *dst = erealloc(*dst, totallen); 131 132 /* Append a space at the end of dst */ 133 (*dst)[dstlen++] = ' '; 134 135 /* Now, copy src at the end of dst */ 136 memcpy(*dst + dstlen, src, srclen); 137 (*dst)[dstlen + srclen] = '\0'; 138 } 139 140 void 141 close_db(sqlite3 *db) 142 { 143 sqlite3_close(db); 144 sqlite3_shutdown(); 145 } 146 147 /* 148 * create_db -- 149 * Creates the database schema. 150 */ 151 static int 152 create_db(sqlite3 *db) 153 { 154 const char *sqlstr = NULL; 155 char *schemasql; 156 char *errmsg = NULL; 157 158 /*------------------------ Create the tables------------------------------*/ 159 160 #if NOTYET 161 sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL); 162 #else 163 sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL); 164 #endif 165 166 schemasql = sqlite3_mprintf("PRAGMA user_version = %d", 167 APROPOS_SCHEMA_VERSION); 168 sqlite3_exec(db, schemasql, NULL, NULL, &errmsg); 169 if (errmsg != NULL) 170 goto out; 171 sqlite3_free(schemasql); 172 173 sqlstr = 174 //mandb 175 "CREATE VIRTUAL TABLE mandb USING fts4(section, name, " 176 "name_desc, desc, lib, return_vals, env, files, " 177 "exit_status, diagnostics, errors, md5_hash UNIQUE, machine, " 178 "compress=zip, uncompress=unzip, tokenize=porter, " 179 "notindexed=section, notindexed=md5_hash); " 180 //mandb_meta 181 "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, " 182 "file UNIQUE, md5_hash UNIQUE, id INTEGER PRIMARY KEY); " 183 //mandb_links 184 "CREATE TABLE IF NOT EXISTS mandb_links(link, target, section, " 185 "machine, md5_hash); "; 186 187 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 188 if (errmsg != NULL) 189 goto out; 190 191 sqlstr = 192 "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links " 193 "(link); " 194 "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta " 195 "(device, inode); " 196 "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links " 197 "(md5_hash);"; 198 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg); 199 if (errmsg != NULL) 200 goto out; 201 return 0; 202 203 out: 204 warnx("%s", errmsg); 205 free(errmsg); 206 sqlite3_close(db); 207 sqlite3_shutdown(); 208 return -1; 209 } 210 211 /* 212 * zip -- 213 * User defined Sqlite function to compress the FTS table 214 */ 215 static void 216 zip(sqlite3_context *pctx, int nval, sqlite3_value **apval) 217 { 218 int nin; 219 long int nout; 220 const unsigned char * inbuf; 221 unsigned char *outbuf; 222 223 assert(nval == 1); 224 nin = sqlite3_value_bytes(apval[0]); 225 inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]); 226 nout = nin + 13 + (nin + 999) / 1000; 227 outbuf = emalloc(nout); 228 compress(outbuf, (unsigned long *) &nout, inbuf, nin); 229 sqlite3_result_blob(pctx, outbuf, nout, free); 230 } 231 232 /* 233 * unzip -- 234 * User defined Sqlite function to uncompress the FTS table. 235 */ 236 static void 237 unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval) 238 { 239 unsigned int rc; 240 unsigned char *outbuf; 241 z_stream stream; 242 243 assert(nval == 1); 244 stream.next_in = __UNCONST(sqlite3_value_blob(apval[0])); 245 stream.avail_in = sqlite3_value_bytes(apval[0]); 246 stream.avail_out = stream.avail_in * 2 + 100; 247 stream.next_out = outbuf = emalloc(stream.avail_out); 248 stream.zalloc = NULL; 249 stream.zfree = NULL; 250 251 if (inflateInit(&stream) != Z_OK) { 252 free(outbuf); 253 return; 254 } 255 256 while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) { 257 if (rc != Z_OK || 258 (stream.avail_out != 0 && stream.avail_in == 0)) { 259 free(outbuf); 260 return; 261 } 262 outbuf = erealloc(outbuf, stream.total_out * 2); 263 stream.next_out = outbuf + stream.total_out; 264 stream.avail_out = stream.total_out; 265 } 266 if (inflateEnd(&stream) != Z_OK) { 267 free(outbuf); 268 return; 269 } 270 outbuf = erealloc(outbuf, stream.total_out); 271 sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free); 272 } 273 274 /* 275 * get_dbpath -- 276 * Read the path of the database from man.conf and return. 277 */ 278 char * 279 get_dbpath(const char *manconf) 280 { 281 TAG *tp; 282 char *dbpath; 283 284 config(manconf); 285 tp = gettag("_mandb", 1); 286 if (!tp) 287 return NULL; 288 289 if (TAILQ_EMPTY(&tp->entrylist)) 290 return NULL; 291 292 dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s; 293 return dbpath; 294 } 295 296 /* init_db -- 297 * Prepare the database. Register the compress/uncompress functions and the 298 * stopword tokenizer. 299 * db_flag specifies the mode in which to open the database. 3 options are 300 * available: 301 * 1. DB_READONLY: Open in READONLY mode. An error if db does not exist. 302 * 2. DB_READWRITE: Open in read-write mode. An error if db does not exist. 303 * 3. DB_CREATE: Open in read-write mode. It will try to create the db if 304 * it does not exist already. 305 * RETURN VALUES: 306 * The function will return NULL in case the db does not exist 307 * and DB_CREATE 308 * was not specified. And in case DB_CREATE was specified and yet NULL is 309 * returned, then there was some other error. 310 * In normal cases the function should return a handle to the db. 311 */ 312 sqlite3 * 313 init_db(mandb_access_mode db_flag, const char *manconf) 314 { 315 sqlite3 *db = NULL; 316 sqlite3_stmt *stmt; 317 struct stat sb; 318 int rc; 319 int create_db_flag = 0; 320 321 char *dbpath = get_dbpath(manconf); 322 if (dbpath == NULL) 323 errx(EXIT_FAILURE, "_mandb entry not found in man.conf"); 324 325 if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) { 326 /* Database does not exist, check if DB_CREATE was specified, 327 * and set flag to create the database schema 328 */ 329 if (db_flag != (MANDB_CREATE)) { 330 warnx("Missing apropos database. " 331 "Please run makemandb to create it."); 332 return NULL; 333 } 334 create_db_flag = 1; 335 } else { 336 /* 337 * Database exists. Check if we have the permissions 338 * to read/write the files 339 */ 340 int access_mode = R_OK; 341 switch (db_flag) { 342 case MANDB_CREATE: 343 case MANDB_WRITE: 344 access_mode |= W_OK; 345 break; 346 default: 347 break; 348 } 349 if ((access(dbpath, access_mode)) != 0) { 350 warnx("Unable to access the database, please check" 351 " permissions for `%s'", dbpath); 352 return NULL; 353 } 354 } 355 356 sqlite3_initialize(); 357 rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL); 358 359 if (rc != SQLITE_OK) { 360 warnx("%s", sqlite3_errmsg(db)); 361 goto error; 362 } 363 364 if (create_db_flag && create_db(db) < 0) { 365 warnx("%s", "Unable to create database schema"); 366 goto error; 367 } 368 369 rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL); 370 if (rc != SQLITE_OK) { 371 warnx("Unable to query schema version: %s", 372 sqlite3_errmsg(db)); 373 goto error; 374 } 375 if (sqlite3_step(stmt) != SQLITE_ROW) { 376 sqlite3_finalize(stmt); 377 warnx("Unable to query schema version: %s", 378 sqlite3_errmsg(db)); 379 goto error; 380 } 381 if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) { 382 sqlite3_finalize(stmt); 383 warnx("Incorrect schema version found. " 384 "Please run makemandb -f."); 385 goto error; 386 } 387 sqlite3_finalize(stmt); 388 389 sqlite3_extended_result_codes(db, 1); 390 391 /* Register the zip and unzip functions for FTS compression */ 392 rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip, 393 NULL, NULL); 394 if (rc != SQLITE_OK) { 395 warnx("Unable to register function: compress: %s", 396 sqlite3_errmsg(db)); 397 goto error; 398 } 399 400 rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL, 401 unzip, NULL, NULL); 402 if (rc != SQLITE_OK) { 403 warnx("Unable to register function: uncompress: %s", 404 sqlite3_errmsg(db)); 405 goto error; 406 } 407 return db; 408 409 error: 410 close_db(db); 411 return NULL; 412 } 413 414 /* 415 * rank_func -- 416 * Sqlite user defined function for ranking the documents. 417 * For each phrase of the query, it computes the tf and idf and adds them over. 418 * It computes the final rank, by multiplying tf and idf together. 419 * Weight of term t for document d = (term frequency of t in d * 420 * inverse document frequency of t) 421 * 422 * Term Frequency of term t in document d = Number of times t occurs in d / 423 * Number of times t appears in all documents 424 * 425 * Inverse document frequency of t = log(Total number of documents / 426 * Number of documents in which t occurs) 427 */ 428 static void 429 rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval) 430 { 431 inverse_document_frequency *idf = sqlite3_user_data(pctx); 432 double tf = 0.0; 433 const unsigned int *matchinfo; 434 int ncol; 435 int nphrase; 436 int iphrase; 437 int ndoc; 438 int doclen = 0; 439 const double k = 3.75; 440 /* 441 * Check that the number of arguments passed to this 442 * function is correct. 443 */ 444 assert(nval == 1); 445 446 matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]); 447 nphrase = matchinfo[0]; 448 ncol = matchinfo[1]; 449 ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol]; 450 for (iphrase = 0; iphrase < nphrase; iphrase++) { 451 int icol; 452 const unsigned int *phraseinfo = 453 &matchinfo[2 + ncol + iphrase * ncol * 3]; 454 for(icol = 1; icol < ncol; icol++) { 455 456 /* nhitcount: number of times the current phrase occurs 457 * in the current column in the current document. 458 * nglobalhitcount: number of times current phrase 459 * occurs in the current column in all documents. 460 * ndocshitcount: number of documents in which the 461 * current phrase occurs in the current column at 462 * least once. 463 */ 464 int nhitcount = phraseinfo[3 * icol]; 465 int nglobalhitcount = phraseinfo[3 * icol + 1]; 466 int ndocshitcount = phraseinfo[3 * icol + 2]; 467 doclen = matchinfo[2 + icol ]; 468 double weight = col_weights[icol - 1]; 469 if (idf->status == 0 && ndocshitcount) 470 idf->value += 471 log(((double)ndoc / ndocshitcount))* weight; 472 473 /* 474 * Dividing the tf by document length to normalize 475 * the effect of longer documents. 476 */ 477 if (nglobalhitcount > 0 && nhitcount) 478 tf += (((double)nhitcount * weight) 479 / (nglobalhitcount * doclen)); 480 } 481 } 482 idf->status = 1; 483 484 /* 485 * Final score: Dividing by k + tf further normalizes the weight 486 * leading to better results. The value of k is experimental 487 */ 488 double score = (tf * idf->value) / (k + tf); 489 sqlite3_result_double(pctx, score); 490 return; 491 } 492 493 /* 494 * generates sql query for matching the user entered query 495 */ 496 static char * 497 generate_search_query(query_args *args, const char *snippet_args[3]) 498 { 499 const char *default_snippet_args[3]; 500 char *section_clause = NULL; 501 char *limit_clause = NULL; 502 char *machine_clause = NULL; 503 char *query; 504 505 if (args->machine) 506 easprintf(&machine_clause, "AND machine = \'%s\' ", 507 args->machine); 508 509 510 /* We want to build a query of the form: "select x,y,z from mandb where 511 * mandb match :query [AND (section LIKE '1' OR section LIKE '2' OR...)] 512 * ORDER BY rank DESC..." 513 * NOTES: 514 * 1. The portion in square brackets is optional, it will be there 515 * only if the user has specified an option on the command line 516 * to search in one or more specific sections. 517 */ 518 char *sections_str = args->sec_nums; 519 char *temp; 520 if (sections_str) { 521 while (*sections_str) { 522 size_t len = strcspn(sections_str, " "); 523 char *sec = sections_str; 524 if (sections_str[len] == 0) { 525 sections_str += len; 526 } else { 527 sections_str[len] = 0; 528 sections_str += len + 1; 529 } 530 easprintf(&temp, "\'%s\',", sec); 531 532 if (section_clause) { 533 concat(§ion_clause, temp); 534 free(temp); 535 } else { 536 section_clause = temp; 537 } 538 } 539 if (section_clause) { 540 /* 541 * At least one section requested, add glue for query. 542 * Before doing that, remove the comma at the end of 543 * section_clause 544 */ 545 size_t section_clause_len = strlen(section_clause); 546 if (section_clause[section_clause_len - 1] == ',') 547 section_clause[section_clause_len - 1] = 0; 548 temp = section_clause; 549 easprintf(§ion_clause, " AND section IN (%s)", temp); 550 free(temp); 551 } 552 } 553 554 if (args->nrec >= 0) { 555 /* Use the provided number of records and offset */ 556 easprintf(&limit_clause, " LIMIT %d OFFSET %d", 557 args->nrec, args->offset); 558 } 559 560 if (snippet_args == NULL) { 561 default_snippet_args[0] = ""; 562 default_snippet_args[1] = ""; 563 default_snippet_args[2] = "..."; 564 snippet_args = default_snippet_args; 565 } 566 567 if (args->legacy) { 568 char *wild; 569 easprintf(&wild, "%%%s%%", args->search_str); 570 query = sqlite3_mprintf("SELECT section, name, name_desc, machine" 571 " FROM mandb" 572 " WHERE name LIKE %Q OR name_desc LIKE %Q " 573 "%s" 574 "%s", 575 wild, wild, 576 section_clause ? section_clause : "", 577 limit_clause ? limit_clause : ""); 578 free(wild); 579 } else { 580 query = sqlite3_mprintf("SELECT section, name, name_desc, machine," 581 " snippet(mandb, %Q, %Q, %Q, -1, 40 )," 582 " rank_func(matchinfo(mandb, \"pclxn\")) AS rank" 583 " FROM mandb" 584 " WHERE mandb MATCH %Q %s " 585 "%s" 586 " ORDER BY rank DESC" 587 "%s", 588 snippet_args[0], snippet_args[1], snippet_args[2], 589 args->search_str, machine_clause ? machine_clause : "", 590 section_clause ? section_clause : "", 591 limit_clause ? limit_clause : ""); 592 } 593 594 free(machine_clause); 595 free(section_clause); 596 free(limit_clause); 597 return query; 598 } 599 600 /* 601 * Execute the full text search query and return the number of results 602 * obtained. 603 */ 604 static unsigned int 605 execute_search_query(sqlite3 *db, char *query, query_args *args) 606 { 607 sqlite3_stmt *stmt; 608 const char *section; 609 char *name; 610 char *slash_ptr; 611 const char *name_desc; 612 const char *machine; 613 const char *snippet = ""; 614 const char *name_temp; 615 char *m = NULL; 616 int rc; 617 inverse_document_frequency idf = {0, 0}; 618 619 if (!args->legacy) { 620 /* Register the rank function */ 621 rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY, 622 (void *) &idf, rank_func, NULL, NULL); 623 if (rc != SQLITE_OK) { 624 warnx("Unable to register the ranking function: %s", 625 sqlite3_errmsg(db)); 626 sqlite3_close(db); 627 sqlite3_shutdown(); 628 exit(EXIT_FAILURE); 629 } 630 } 631 632 rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL); 633 if (rc == SQLITE_IOERR) { 634 warnx("Corrupt database. Please rerun makemandb"); 635 return -1; 636 } else if (rc != SQLITE_OK) { 637 warnx("%s", sqlite3_errmsg(db)); 638 return -1; 639 } 640 641 unsigned int nresults = 0; 642 while (sqlite3_step(stmt) == SQLITE_ROW) { 643 nresults++; 644 section = (const char *) sqlite3_column_text(stmt, 0); 645 name_temp = (const char *) sqlite3_column_text(stmt, 1); 646 name_desc = (const char *) sqlite3_column_text(stmt, 2); 647 machine = (const char *) sqlite3_column_text(stmt, 3); 648 if (!args->legacy) 649 snippet = (const char *) sqlite3_column_text(stmt, 4); 650 if ((slash_ptr = strrchr(name_temp, '/')) != NULL) 651 name_temp = slash_ptr + 1; 652 if (machine && machine[0]) { 653 m = estrdup(machine); 654 easprintf(&name, "%s/%s", lower(m), name_temp); 655 free(m); 656 } else { 657 name = estrdup((const char *) 658 sqlite3_column_text(stmt, 1)); 659 } 660 661 (args->callback)(args->callback_data, section, name, 662 name_desc, snippet, args->legacy? 0: strlen(snippet)); 663 free(name); 664 } 665 sqlite3_finalize(stmt); 666 return nresults; 667 } 668 669 670 /* 671 * run_query_internal -- 672 * Performs the searches for the keywords entered by the user. 673 * The 2nd param: snippet_args is an array of strings providing values for the 674 * last three parameters to the snippet function of sqlite. (Look at the docs). 675 * The 3rd param: args contains rest of the search parameters. Look at 676 * arpopos-utils.h for the description of individual fields. 677 * 678 */ 679 static int 680 run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args) 681 { 682 char *query; 683 query = generate_search_query(args, snippet_args); 684 if (query == NULL) { 685 *args->errmsg = estrdup("malloc failed"); 686 return -1; 687 } 688 689 execute_search_query(db, query, args); 690 sqlite3_free(query); 691 return *(args->errmsg) == NULL ? 0 : -1; 692 } 693 694 static char * 695 get_escaped_html_string(const char *src, size_t *slen) 696 { 697 static const char trouble[] = "<>\"&\002\003"; 698 /* 699 * First scan the src to find out the number of occurrences 700 * of {'>', '<' '"', '&'}. Then allocate a new buffer with 701 * sufficient space to be able to store the quoted versions 702 * of the special characters {>, <, ", &}. 703 * Copy over the characters from the original src into 704 * this buffer while replacing the special characters with 705 * their quoted versions. 706 */ 707 char *dst, *ddst; 708 size_t count; 709 const char *ssrc; 710 711 for (count = 0, ssrc = src; *src; count++) { 712 size_t sz = strcspn(src, trouble); 713 src += sz + 1; 714 } 715 716 717 #define append(a) \ 718 do { \ 719 memcpy(dst, (a), sizeof(a) - 1); \ 720 dst += sizeof(a) - 1; \ 721 } while (/*CONSTCOND*/0) 722 723 724 ddst = dst = emalloc(*slen + count * 5 + 1); 725 for (src = ssrc; *src; src++) { 726 switch (*src) { 727 case '<': 728 append("<"); 729 break; 730 case '>': 731 append(">"); 732 break; 733 case '\"': 734 append("""); 735 break; 736 case '&': 737 /* 738 * Don't perform the quoting if this & is part of 739 * an mdoc escape sequence, e.g. \& 740 */ 741 if (src != ssrc && src[-1] != '\\') 742 append("&"); 743 else 744 append("&"); 745 break; 746 case '\002': 747 append("<b>"); 748 break; 749 case '\003': 750 append("</b>"); 751 break; 752 default: 753 *dst++ = *src; 754 break; 755 } 756 } 757 *dst = '\0'; 758 *slen = dst - ddst; 759 return ddst; 760 } 761 762 763 /* 764 * callback_html -- 765 * Callback function for run_query_html. It builds the html output and then 766 * calls the actual user supplied callback function. 767 */ 768 static int 769 callback_html(void *data, const char *section, const char *name, 770 const char *name_desc, const char *snippet, size_t snippet_length) 771 { 772 struct orig_callback_data *orig_data = data; 773 int (*callback)(void *, const char *, const char *, const char *, 774 const char *, size_t) = orig_data->callback; 775 size_t length = snippet_length; 776 size_t name_description_length = strlen(name_desc); 777 char *qsnippet = get_escaped_html_string(snippet, &length); 778 char *qname_description = get_escaped_html_string(name_desc, 779 &name_description_length); 780 781 (*callback)(orig_data->data, section, name, qname_description, 782 qsnippet, length); 783 free(qsnippet); 784 free(qname_description); 785 return 0; 786 } 787 788 /* 789 * run_query_html -- 790 * Utility function to output query result in HTML format. 791 * It internally calls run_query only, but it first passes the output to its 792 * own custom callback function, which preprocess the snippet for quoting 793 * inline HTML fragments. 794 * After that it delegates the call the actual user supplied callback function. 795 */ 796 static int 797 run_query_html(sqlite3 *db, query_args *args) 798 { 799 struct orig_callback_data orig_data; 800 orig_data.callback = args->callback; 801 orig_data.data = args->callback_data; 802 const char *snippet_args[] = {"\002", "\003", "..."}; 803 args->callback = &callback_html; 804 args->callback_data = (void *) &orig_data; 805 return run_query_internal(db, snippet_args, args); 806 } 807 808 /* 809 * underline a string, pager style. 810 */ 811 static char * 812 ul_pager(int ul, const char *s) 813 { 814 size_t len; 815 char *dst, *d; 816 817 if (!ul) 818 return estrdup(s); 819 820 // a -> _\ba 821 len = strlen(s) * 3 + 1; 822 823 d = dst = emalloc(len); 824 while (*s) { 825 *d++ = '_'; 826 *d++ = '\b'; 827 *d++ = *s++; 828 } 829 *d = '\0'; 830 return dst; 831 } 832 833 /* 834 * callback_pager -- 835 * A callback similar to callback_html. It overstrikes the matching text in 836 * the snippet so that it appears emboldened when viewed using a pager like 837 * more or less. 838 */ 839 static int 840 callback_pager(void *data, const char *section, const char *name, 841 const char *name_desc, const char *snippet, size_t snippet_length) 842 { 843 struct orig_callback_data *orig_data = data; 844 char *psnippet; 845 const char *temp = snippet; 846 int count = 0; 847 int i = 0, did; 848 size_t sz = 0; 849 size_t psnippet_length; 850 851 /* Count the number of bytes of matching text. For each of these 852 * bytes we will use 2 extra bytes to overstrike it so that it 853 * appears bold when viewed using a pager. 854 */ 855 while (*temp) { 856 sz = strcspn(temp, "\002\003"); 857 temp += sz; 858 if (*temp == '\003') { 859 count += 2 * (sz); 860 } 861 temp++; 862 } 863 864 psnippet_length = snippet_length + count; 865 psnippet = emalloc(psnippet_length + 1); 866 867 /* Copy the bytes from snippet to psnippet: 868 * 1. Copy the bytes before \002 as it is. 869 * 2. The bytes after \002 need to be overstriked till we 870 * encounter \003. 871 * 3. To overstrike a byte 'A' we need to write 'A\bA' 872 */ 873 did = 0; 874 while (*snippet) { 875 sz = strcspn(snippet, "\002"); 876 memcpy(&psnippet[i], snippet, sz); 877 snippet += sz; 878 i += sz; 879 880 /* Don't change this. Advancing the pointer without reading the byte 881 * is causing strange behavior. 882 */ 883 if (*snippet == '\002') 884 snippet++; 885 while (*snippet && *snippet != '\003') { 886 did = 1; 887 psnippet[i++] = *snippet; 888 psnippet[i++] = '\b'; 889 psnippet[i++] = *snippet++; 890 } 891 if (*snippet) 892 snippet++; 893 } 894 895 psnippet[i] = 0; 896 char *ul_section = ul_pager(did, section); 897 char *ul_name = ul_pager(did, name); 898 char *ul_name_desc = ul_pager(did, name_desc); 899 (orig_data->callback)(orig_data->data, ul_section, ul_name, 900 ul_name_desc, psnippet, psnippet_length); 901 free(ul_section); 902 free(ul_name); 903 free(ul_name_desc); 904 free(psnippet); 905 return 0; 906 } 907 908 struct term_args { 909 struct orig_callback_data *orig_data; 910 const char *smul; 911 const char *rmul; 912 }; 913 914 /* 915 * underline a string, pager style. 916 */ 917 static char * 918 ul_term(const char *s, const struct term_args *ta) 919 { 920 char *dst; 921 922 easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul); 923 return dst; 924 } 925 926 /* 927 * callback_term -- 928 * A callback similar to callback_html. It overstrikes the matching text in 929 * the snippet so that it appears emboldened when viewed using a pager like 930 * more or less. 931 */ 932 static int 933 callback_term(void *data, const char *section, const char *name, 934 const char *name_desc, const char *snippet, size_t snippet_length) 935 { 936 struct term_args *ta = data; 937 struct orig_callback_data *orig_data = ta->orig_data; 938 939 char *ul_section = ul_term(section, ta); 940 char *ul_name = ul_term(name, ta); 941 char *ul_name_desc = ul_term(name_desc, ta); 942 (orig_data->callback)(orig_data->data, ul_section, ul_name, 943 ul_name_desc, snippet, snippet_length); 944 free(ul_section); 945 free(ul_name); 946 free(ul_name_desc); 947 return 0; 948 } 949 950 /* 951 * run_query_pager -- 952 * Utility function similar to run_query_html. This function tries to 953 * pre-process the result assuming it will be piped to a pager. 954 * For this purpose it first calls its own callback function callback_pager 955 * which then delegates the call to the user supplied callback. 956 */ 957 static int 958 run_query_pager(sqlite3 *db, query_args *args) 959 { 960 struct orig_callback_data orig_data; 961 orig_data.callback = args->callback; 962 orig_data.data = args->callback_data; 963 const char *snippet_args[3] = { "\002", "\003", "..." }; 964 args->callback = &callback_pager; 965 args->callback_data = (void *) &orig_data; 966 return run_query_internal(db, snippet_args, args); 967 } 968 969 struct nv { 970 char *s; 971 size_t l; 972 }; 973 974 static int 975 term_putc(int c, void *p) 976 { 977 struct nv *nv = p; 978 nv->s[nv->l++] = c; 979 return 0; 980 } 981 982 static char * 983 term_fix_seq(TERMINAL *ti, const char *seq) 984 { 985 char *res = estrdup(seq); 986 struct nv nv; 987 988 if (ti == NULL) 989 return res; 990 991 nv.s = res; 992 nv.l = 0; 993 ti_puts(ti, seq, 1, term_putc, &nv); 994 nv.s[nv.l] = '\0'; 995 996 return res; 997 } 998 999 static void 1000 term_init(int fd, const char *sa[5]) 1001 { 1002 TERMINAL *ti; 1003 int error; 1004 const char *bold, *sgr0, *smso, *rmso, *smul, *rmul; 1005 1006 if (ti_setupterm(&ti, NULL, fd, &error) == -1) { 1007 bold = sgr0 = NULL; 1008 smso = rmso = smul = rmul = ""; 1009 ti = NULL; 1010 } else { 1011 bold = ti_getstr(ti, "bold"); 1012 sgr0 = ti_getstr(ti, "sgr0"); 1013 if (bold == NULL || sgr0 == NULL) { 1014 smso = ti_getstr(ti, "smso"); 1015 1016 if (smso == NULL || 1017 (rmso = ti_getstr(ti, "rmso")) == NULL) 1018 smso = rmso = ""; 1019 bold = sgr0 = NULL; 1020 } else 1021 smso = rmso = ""; 1022 1023 smul = ti_getstr(ti, "smul"); 1024 if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL) 1025 smul = rmul = ""; 1026 } 1027 1028 sa[0] = term_fix_seq(ti, bold ? bold : smso); 1029 sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso); 1030 sa[2] = estrdup("..."); 1031 sa[3] = term_fix_seq(ti, smul); 1032 sa[4] = term_fix_seq(ti, rmul); 1033 1034 if (ti) 1035 del_curterm(ti); 1036 } 1037 1038 /* 1039 * run_query_term -- 1040 * Utility function similar to run_query_html. This function tries to 1041 * pre-process the result assuming it will be displayed on a terminal 1042 * For this purpose it first calls its own callback function callback_pager 1043 * which then delegates the call to the user supplied callback. 1044 */ 1045 static int 1046 run_query_term(sqlite3 *db, query_args *args) 1047 { 1048 struct orig_callback_data orig_data; 1049 struct term_args ta; 1050 orig_data.callback = args->callback; 1051 orig_data.data = args->callback_data; 1052 const char *snippet_args[5]; 1053 1054 term_init(STDOUT_FILENO, snippet_args); 1055 ta.smul = snippet_args[3]; 1056 ta.rmul = snippet_args[4]; 1057 ta.orig_data = (void *) &orig_data; 1058 1059 args->callback = &callback_term; 1060 args->callback_data = &ta; 1061 return run_query_internal(db, snippet_args, args); 1062 } 1063 1064 static int 1065 run_query_none(sqlite3 *db, query_args *args) 1066 { 1067 struct orig_callback_data orig_data; 1068 orig_data.callback = args->callback; 1069 orig_data.data = args->callback_data; 1070 const char *snippet_args[3] = { "", "", "..." }; 1071 args->callback = &callback_pager; 1072 args->callback_data = (void *) &orig_data; 1073 return run_query_internal(db, snippet_args, args); 1074 } 1075 1076 int 1077 run_query(sqlite3 *db, query_format fmt, query_args *args) 1078 { 1079 switch (fmt) { 1080 case APROPOS_NONE: 1081 return run_query_none(db, args); 1082 case APROPOS_HTML: 1083 return run_query_html(db, args); 1084 case APROPOS_TERM: 1085 return run_query_term(db, args); 1086 case APROPOS_PAGER: 1087 return run_query_pager(db, args); 1088 default: 1089 warnx("Unknown query format %d", (int)fmt); 1090 return -1; 1091 } 1092 } 1093