xref: /netbsd-src/usr.sbin/makemandb/apropos.c (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1 /*	$NetBSD: apropos.c,v 1.27 2023/08/03 07:49:23 rin Exp $	*/
2 /*-
3  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
4  * All rights reserved.
5  *
6  * This code was developed as part of Google's Summer of Code 2011 program.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: apropos.c,v 1.27 2023/08/03 07:49:23 rin Exp $");
35 
36 #include <err.h>
37 #include <signal.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <util.h>
43 
44 #include "apropos-utils.h"
45 
46 typedef struct apropos_flags {
47 	char **sections;
48 	int nresults;
49 	int pager;
50 	int no_context;
51 	query_format format;
52 	int legacy;
53 	const char *machine;
54 	const char *manconf;
55 } apropos_flags;
56 
57 typedef struct callback_data {
58 	int count;
59 	FILE *out;
60 	apropos_flags *aflags;
61 } callback_data;
62 
63 static char *remove_stopwords(const char *);
64 static int query_callback(query_callback_args *);
65 __dead static void usage(void);
66 
67 #define _PATH_PAGER	"/usr/bin/more -s"
68 #define SECTIONS_ARGS_LENGTH 4;
69 
70 static void
71 parseargs(int argc, char **argv, struct apropos_flags *aflags)
72 {
73 	int ch;
74 	size_t sections_offset = 0;
75 	size_t sections_size = 0;
76 	char **sections = NULL;
77 	char *section;
78 	aflags->manconf = MANCONF;
79 
80 #define RESIZE_SECTIONS(newsize) \
81 	if (sections == NULL || sections_offset > sections_size - 1) { \
82 		sections_size += newsize; \
83 		sections = erealloc(sections, sections_size * sizeof(*sections)); \
84 	}
85 
86 	while ((ch = getopt(argc, argv, "123456789C:hilMmn:PprS:s:")) != -1) {
87 		switch (ch) {
88 		case '1':
89 		case '2':
90 		case '3':
91 		case '4':
92 		case '5':
93 		case '6':
94 		case '7':
95 		case '8':
96 		case '9':
97 			section = emalloc(2);
98 			section[0] = ch;
99 			section[1] = 0;
100 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
101 			sections[sections_offset++] = section;
102 			break;
103 		case 'C':
104 			aflags->manconf = optarg;
105 			break;
106 		case 'h':
107 			aflags->format = APROPOS_HTML;
108 			break;
109 		case 'i':
110 			aflags->format = APROPOS_TERM;
111 			break;
112 		case 'l':
113 			aflags->legacy = 1;
114 			aflags->no_context = 1;
115 			aflags->format = APROPOS_NONE;
116 			break;
117 		case 'M':
118 			aflags->no_context = 1;
119 			break;
120 		case 'm':
121 			aflags->no_context = 0;
122 			break;
123 		case 'n':
124 			aflags->nresults = atoi(optarg);
125 			break;
126 		case 'p':	// user wants a pager
127 			aflags->pager = 1;
128 			/*FALLTHROUGH*/
129 		case 'P':
130 			aflags->format = APROPOS_PAGER;
131 			break;
132 		case 'r':
133 			aflags->format = APROPOS_NONE;
134 			break;
135 		case 'S':
136 			aflags->machine = optarg;
137 			break;
138 		case 's':
139 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
140 			sections[sections_offset++] = estrdup(optarg);
141 			break;
142 		case '?':
143 		default:
144 			usage();
145 		}
146 	}
147 	if (sections) {
148 		RESIZE_SECTIONS(1)
149 		sections[sections_offset] = NULL;
150 	}
151 	aflags->sections = sections;
152 }
153 
154 int
155 main(int argc, char *argv[])
156 {
157 	query_args args;
158 	char *query = NULL;	// the user query
159 	char *errmsg = NULL;
160 	char *str;
161 	int pc = 0;
162 	int rc = 0;
163 	size_t i;
164 	int s;
165 	callback_data cbdata;
166 	cbdata.out = stdout;		// the default output stream
167 	cbdata.count = 0;
168 	apropos_flags aflags;
169 	aflags.sections = NULL;
170 	cbdata.aflags = &aflags;
171 	sqlite3 *db;
172 	setprogname(argv[0]);
173 	if (argc < 2)
174 		usage();
175 
176 	memset(&aflags, 0, sizeof(aflags));
177 
178 	if (!isatty(STDOUT_FILENO))
179 		aflags.format = APROPOS_NONE;
180 	else
181 		aflags.format = APROPOS_TERM;
182 
183 	if ((str = getenv("APROPOS")) != NULL) {
184 		char **ptr = emalloc((strlen(str) + 2) * sizeof(*ptr));
185 #define WS "\t\n\r "
186 		ptr[0] = __UNCONST(getprogname());
187 		for (s = 1, str = strtok(str, WS); str;
188 		    str = strtok(NULL, WS), s++)
189 			ptr[s] = str;
190 		ptr[s] = NULL;
191 		parseargs(s, ptr, &aflags);
192 		free(ptr);
193 		optreset = 1;
194 		optind = 1;
195 	}
196 
197 	parseargs(argc, argv, &aflags);
198 
199 	argc -= optind;
200 	argv += optind;
201 
202 	if (!argc)
203 		usage();
204 
205 	str = NULL;
206 	while (argc--)
207 		concat(&str, *argv++);
208 	query = remove_stopwords(lower(str));
209 
210 	/*
211 	 * If the query consisted only of stopwords and we removed all of
212 	 * them, use the original query.
213 	 */
214 	if (query == NULL)
215 		query = str;
216 	else
217 		free(str);
218 
219 	if ((db = init_db(MANDB_READONLY, aflags.manconf)) == NULL)
220 		exit(EXIT_FAILURE);
221 
222 	/* If user wants to page the output, then set some settings */
223 	if (aflags.pager) {
224 		const char *pager = getenv("PAGER");
225 		if (pager == NULL)
226 			pager = _PATH_PAGER;
227 
228 		/* Don't get killed by a broken pipe */
229 		signal(SIGPIPE, SIG_IGN);
230 
231 		/* Open a pipe to the pager */
232 		if ((cbdata.out = popen(pager, "w")) == NULL) {
233 			close_db(db);
234 			err(EXIT_FAILURE, "pipe failed");
235 		}
236 	}
237 
238 	args.search_str = query;
239 	args.sections = aflags.sections;
240 	args.legacy = aflags.legacy;
241 	args.nrec = aflags.nresults ? aflags.nresults : -1;
242 	args.offset = 0;
243 	args.machine = aflags.machine;
244 	args.callback = &query_callback;
245 	args.callback_data = &cbdata;
246 	args.errmsg = &errmsg;
247 
248 	if (aflags.format == APROPOS_HTML) {
249 		fprintf(cbdata.out, "<html>\n<header>\n<title>apropos results "
250 		    "for %s</title></header>\n<body>\n<table cellpadding=\"4\""
251 		    "style=\"border: 1px solid #000000; border-collapse:"
252 		    "collapse;\" border=\"1\">\n", query);
253 	}
254 	rc = run_query(db, aflags.format, &args);
255 	if (aflags.format == APROPOS_HTML)
256 		fprintf(cbdata.out, "</table>\n</body>\n</html>\n");
257 
258 	if (aflags.pager)
259 		pc = pclose(cbdata.out);
260 	free(query);
261 
262 	if (aflags.sections) {
263 		for(i = 0; aflags.sections[i]; i++)
264 			free(aflags.sections[i]);
265 		free(aflags.sections);
266 	}
267 
268 	close_db(db);
269 	if (errmsg) {
270 		warnx("%s", errmsg);
271 		free(errmsg);
272 		exit(EXIT_FAILURE);
273 	}
274 
275 	if (pc == -1)
276 		err(EXIT_FAILURE, "pclose error");
277 
278 	/*
279 	 * Something wrong with the database, writing output, or a non-existent
280 	 * pager.
281 	 */
282 	if (rc < 0)
283 		exit(EXIT_FAILURE);
284 
285 	if (cbdata.count == 0) {
286 		warnx("No relevant results obtained.\n"
287 		    "Please make sure that you spelled all the terms correctly "
288 		    "or try using different keywords.");
289 	}
290 	return 0;
291 }
292 
293 /*
294  * query_callback --
295  *  Callback function for run_query.
296  *  It simply outputs the results from run_query. If the user specified the -p
297  *  option, then the output is sent to a pager, otherwise stdout is the default
298  *  output stream.
299  */
300 static int
301 query_callback(query_callback_args *qargs)
302 {
303 	callback_data *cbdata = (callback_data *) qargs->other_data;
304 	FILE *out = cbdata->out;
305 	cbdata->count++;
306 	if (cbdata->aflags->format != APROPOS_HTML) {
307 	    fprintf(out, cbdata->aflags->legacy ? "%s(%s) - %s\n" :
308 		"%s (%s)\t%s\n", qargs->name, qargs->section, qargs->name_desc);
309 	    if (cbdata->aflags->no_context == 0)
310 		    fprintf(out, "%s\n\n", qargs->snippet);
311 	} else {
312 	    fprintf(out, "<tr><td>%s(%s)</td><td>%s</td></tr>\n", qargs->name,
313 		qargs->section, qargs->name_desc);
314 	    if (cbdata->aflags->no_context == 0)
315 		    fprintf(out, "<tr><td colspan=2>%s</td></tr>\n", qargs->snippet);
316 	}
317 
318 	return fflush(out);
319 }
320 
321 #include "stopwords.c"
322 
323 /*
324  * remove_stopwords--
325  *  Scans the query and removes any stop words from it.
326  *  Returns the modified query or NULL, if it contained only stop words.
327  */
328 
329 static char *
330 remove_stopwords(const char *query)
331 {
332 	size_t len, idx;
333 	char *output, *buf;
334 	const char *sep, *next;
335 
336 	output = buf = emalloc(strlen(query) + 1);
337 
338 	for (; query[0] != '\0'; query = next) {
339 		sep = strchr(query, ' ');
340 		if (sep == NULL) {
341 			len = strlen(query);
342 			next = query + len;
343 		} else {
344 			len = sep - query;
345 			next = sep + 1;
346 		}
347 		if (len == 0)
348 			continue;
349 		idx = stopwords_hash(query, len);
350 		if (memcmp(stopwords[idx], query, len) == 0 &&
351 		    stopwords[idx][len] == '\0')
352 			continue;
353 		memcpy(buf, query, len);
354 		buf += len;
355 		*buf++ = ' ';
356 	}
357 
358 	if (output == buf) {
359 		free(output);
360 		return NULL;
361 	}
362 	buf[-1] = '\0';
363 	return output;
364 }
365 
366 /*
367  * usage --
368  *	print usage message and die
369  */
370 static void
371 usage(void)
372 {
373 	fprintf(stderr, "Usage: %s [-123456789ilMmpr] [-C path] [-n results] "
374 	    "[-S machine] [-s section] query\n",
375 	    getprogname());
376 	exit(1);
377 }
378