xref: /netbsd-src/usr.sbin/makemandb/apropos.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: apropos.c,v 1.24 2017/11/25 14:29:38 abhinav Exp $	*/
2 /*-
3  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
4  * All rights reserved.
5  *
6  * This code was developed as part of Google's Summer of Code 2011 program.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: apropos.c,v 1.24 2017/11/25 14:29:38 abhinav Exp $");
35 
36 #include <err.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <util.h>
42 
43 #include "apropos-utils.h"
44 
45 typedef struct apropos_flags {
46 	char **sections;
47 	int nresults;
48 	int pager;
49 	int no_context;
50 	query_format format;
51 	int legacy;
52 	const char *machine;
53 	const char *manconf;
54 } apropos_flags;
55 
56 typedef struct callback_data {
57 	int count;
58 	FILE *out;
59 	apropos_flags *aflags;
60 } callback_data;
61 
62 static char *remove_stopwords(const char *);
63 static int query_callback(query_callback_args *);
64 __dead static void usage(void);
65 
66 #define _PATH_PAGER	"/usr/bin/more -s"
67 #define SECTIONS_ARGS_LENGTH 4;
68 
69 static void
70 parseargs(int argc, char **argv, struct apropos_flags *aflags)
71 {
72 	int ch;
73 	size_t sections_offset = 0;
74 	size_t sections_size = 0;
75 	char **sections = NULL;
76 	char *section;
77 	aflags->manconf = MANCONF;
78 
79 #define RESIZE_SECTIONS(newsize) \
80 	if (sections == NULL || sections_offset > sections_size - 1) { \
81 		sections_size += newsize; \
82 		sections = erealloc(sections, sections_size * sizeof(*sections)); \
83 	}
84 
85 	while ((ch = getopt(argc, argv, "123456789C:hilMmn:PprS:s:")) != -1) {
86 		switch (ch) {
87 		case '1':
88 		case '2':
89 		case '3':
90 		case '4':
91 		case '5':
92 		case '6':
93 		case '7':
94 		case '8':
95 		case '9':
96 			section = emalloc(2);
97 			section[0] = ch;
98 			section[1] = 0;
99 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
100 			sections[sections_offset++] = section;
101 			break;
102 		case 'C':
103 			aflags->manconf = optarg;
104 			break;
105 		case 'h':
106 			aflags->format = APROPOS_HTML;
107 			break;
108 		case 'i':
109 			aflags->format = APROPOS_TERM;
110 			break;
111 		case 'l':
112 			aflags->legacy = 1;
113 			aflags->no_context = 1;
114 			aflags->format = APROPOS_NONE;
115 			break;
116 		case 'M':
117 			aflags->no_context = 1;
118 			break;
119 		case 'm':
120 			aflags->no_context = 0;
121 			break;
122 		case 'n':
123 			aflags->nresults = atoi(optarg);
124 			break;
125 		case 'p':	// user wants a pager
126 			aflags->pager = 1;
127 			/*FALLTHROUGH*/
128 		case 'P':
129 			aflags->format = APROPOS_PAGER;
130 			break;
131 		case 'r':
132 			aflags->format = APROPOS_NONE;
133 			break;
134 		case 'S':
135 			aflags->machine = optarg;
136 			break;
137 		case 's':
138 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
139 			sections[sections_offset++] = estrdup(optarg);
140 			break;
141 		case '?':
142 		default:
143 			usage();
144 		}
145 	}
146 	if (sections) {
147 		RESIZE_SECTIONS(1)
148 		sections[sections_offset] = NULL;
149 	}
150 	aflags->sections = sections;
151 }
152 
153 int
154 main(int argc, char *argv[])
155 {
156 	query_args args;
157 	char *query = NULL;	// the user query
158 	char *errmsg = NULL;
159 	char *str;
160 	int rc = 0;
161 	size_t i;
162 	int s;
163 	callback_data cbdata;
164 	cbdata.out = stdout;		// the default output stream
165 	cbdata.count = 0;
166 	apropos_flags aflags;
167 	aflags.sections = NULL;
168 	cbdata.aflags = &aflags;
169 	sqlite3 *db;
170 	setprogname(argv[0]);
171 	if (argc < 2)
172 		usage();
173 
174 	memset(&aflags, 0, sizeof(aflags));
175 
176 	if (!isatty(STDOUT_FILENO))
177 		aflags.format = APROPOS_NONE;
178 	else
179 		aflags.format = APROPOS_TERM;
180 
181 	if ((str = getenv("APROPOS")) != NULL) {
182 		char **ptr = emalloc((strlen(str) + 2) * sizeof(*ptr));
183 #define WS "\t\n\r "
184 		ptr[0] = __UNCONST(getprogname());
185 		for (s = 1, str = strtok(str, WS); str;
186 		    str = strtok(NULL, WS), s++)
187 			ptr[s] = str;
188 		ptr[s] = NULL;
189 		parseargs(s, ptr, &aflags);
190 		free(ptr);
191 		optreset = 1;
192 		optind = 1;
193 	}
194 
195 	parseargs(argc, argv, &aflags);
196 
197 	argc -= optind;
198 	argv += optind;
199 
200 	if (!argc)
201 		usage();
202 
203 	str = NULL;
204 	while (argc--)
205 		concat(&str, *argv++);
206 	query = remove_stopwords(lower(str));
207 
208 	/*
209 	 * If the query consisted only of stopwords and we removed all of
210 	 * them, use the original query.
211 	 */
212 	if (query == NULL)
213 		query = str;
214 	else
215 		free(str);
216 
217 	if ((db = init_db(MANDB_READONLY, aflags.manconf)) == NULL)
218 		exit(EXIT_FAILURE);
219 
220 	/* If user wants to page the output, then set some settings */
221 	if (aflags.pager) {
222 		const char *pager = getenv("PAGER");
223 		if (pager == NULL)
224 			pager = _PATH_PAGER;
225 		/* Open a pipe to the pager */
226 		if ((cbdata.out = popen(pager, "w")) == NULL) {
227 			close_db(db);
228 			err(EXIT_FAILURE, "pipe failed");
229 		}
230 	}
231 
232 	args.search_str = query;
233 	args.sections = aflags.sections;
234 	args.legacy = aflags.legacy;
235 	args.nrec = aflags.nresults ? aflags.nresults : -1;
236 	args.offset = 0;
237 	args.machine = aflags.machine;
238 	args.callback = &query_callback;
239 	args.callback_data = &cbdata;
240 	args.errmsg = &errmsg;
241 
242 	if (aflags.format == APROPOS_HTML) {
243 		fprintf(cbdata.out, "<html>\n<header>\n<title>apropos results "
244 		    "for %s</title></header>\n<body>\n<table cellpadding=\"4\""
245 		    "style=\"border: 1px solid #000000; border-collapse:"
246 		    "collapse;\" border=\"1\">\n", query);
247 	}
248 	rc = run_query(db, aflags.format, &args);
249 	if (aflags.format == APROPOS_HTML)
250 		fprintf(cbdata.out, "</table>\n</body>\n</html>\n");
251 
252 	free(query);
253 
254 	if (aflags.sections) {
255 		for(i = 0; aflags.sections[i]; i++)
256 			free(aflags.sections[i]);
257 		free(aflags.sections);
258 	}
259 
260 	close_db(db);
261 	if (errmsg) {
262 		warnx("%s", errmsg);
263 		free(errmsg);
264 		exit(EXIT_FAILURE);
265 	}
266 
267 	if (rc < 0) {
268 		/* Something wrong with the database. Exit */
269 		exit(EXIT_FAILURE);
270 	}
271 
272 	if (cbdata.count == 0) {
273 		warnx("No relevant results obtained.\n"
274 		    "Please make sure that you spelled all the terms correctly "
275 		    "or try using different keywords.");
276 	}
277 	return 0;
278 }
279 
280 /*
281  * query_callback --
282  *  Callback function for run_query.
283  *  It simply outputs the results from do_query. If the user specified the -p
284  *  option, then the output is sent to a pager, otherwise stdout is the default
285  *  output stream.
286  */
287 static int
288 query_callback(query_callback_args *qargs)
289 {
290 	callback_data *cbdata = (callback_data *) qargs->other_data;
291 	FILE *out = cbdata->out;
292 	cbdata->count++;
293 	if (cbdata->aflags->format != APROPOS_HTML) {
294 	    fprintf(out, cbdata->aflags->legacy ? "%s(%s) - %s\n" :
295 		"%s (%s)\t%s\n", qargs->name, qargs->section, qargs->name_desc);
296 	    if (cbdata->aflags->no_context == 0)
297 		    fprintf(out, "%s\n\n", qargs->snippet);
298 	} else {
299 	    fprintf(out, "<tr><td>%s(%s)</td><td>%s</td></tr>\n", qargs->name,
300 		qargs->section, qargs->name_desc);
301 	    if (cbdata->aflags->no_context == 0)
302 		    fprintf(out, "<tr><td colspan=2>%s</td></tr>\n", qargs->snippet);
303 	}
304 
305 	return 0;
306 }
307 
308 #include "stopwords.c"
309 
310 /*
311  * remove_stopwords--
312  *  Scans the query and removes any stop words from it.
313  *  Returns the modified query or NULL, if it contained only stop words.
314  */
315 
316 static char *
317 remove_stopwords(const char *query)
318 {
319 	size_t len, idx;
320 	char *output, *buf;
321 	const char *sep, *next;
322 
323 	output = buf = emalloc(strlen(query) + 1);
324 
325 	for (; query[0] != '\0'; query = next) {
326 		sep = strchr(query, ' ');
327 		if (sep == NULL) {
328 			len = strlen(query);
329 			next = query + len;
330 		} else {
331 			len = sep - query;
332 			next = sep + 1;
333 		}
334 		if (len == 0)
335 			continue;
336 		idx = stopwords_hash(query, len);
337 		if (memcmp(stopwords[idx], query, len) == 0 &&
338 		    stopwords[idx][len] == '\0')
339 			continue;
340 		memcpy(buf, query, len);
341 		buf += len;
342 		*buf++ = ' ';
343 	}
344 
345 	if (output == buf) {
346 		free(output);
347 		return NULL;
348 	}
349 	buf[-1] = '\0';
350 	return output;
351 }
352 
353 /*
354  * usage --
355  *	print usage message and die
356  */
357 static void
358 usage(void)
359 {
360 	fprintf(stderr, "Usage: %s [-123456789ilMmpr] [-C path] [-n results] "
361 	    "[-S machine] [-s section] query\n",
362 	    getprogname());
363 	exit(1);
364 }
365