xref: /netbsd-src/usr.sbin/makemandb/apropos.c (revision a24efa7dea9f1f56c3bdb15a927d3516792ace1c)
1 /*	$NetBSD: apropos.c,v 1.21 2016/05/22 19:26:04 abhinav Exp $	*/
2 /*-
3  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay@gmail.com>
4  * All rights reserved.
5  *
6  * This code was developed as part of Google's Summer of Code 2011 program.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: apropos.c,v 1.21 2016/05/22 19:26:04 abhinav Exp $");
35 
36 #include <err.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <util.h>
42 
43 #include "apropos-utils.h"
44 
45 typedef struct apropos_flags {
46 	char *sec_nums;
47 	int nresults;
48 	int pager;
49 	int no_context;
50 	query_format format;
51 	int legacy;
52 	const char *machine;
53 	const char *manconf;
54 } apropos_flags;
55 
56 typedef struct callback_data {
57 	int count;
58 	FILE *out;
59 	apropos_flags *aflags;
60 } callback_data;
61 
62 static const unsigned int sections_args_length = 16;
63 
64 static char *remove_stopwords(const char *);
65 static int query_callback(void *, const char * , const char *, const char *,
66 	const char *, size_t);
67 __dead static void usage(void);
68 
69 #define _PATH_PAGER	"/usr/bin/more -s"
70 
71 static void
72 parseargs(int argc, char **argv, struct apropos_flags *aflags)
73 {
74 	int ch;
75 	char sec[2] = {0, 0};
76 	aflags->manconf = MANCONF;
77 
78 	while ((ch = getopt(argc, argv, "123456789C:hilMmn:PprS:s:")) != -1) {
79 		switch (ch) {
80 		case '1':
81 		case '2':
82 		case '3':
83 		case '4':
84 		case '5':
85 		case '6':
86 		case '7':
87 		case '8':
88 		case '9':
89 			/*
90 			 *Generate a space separated list of all the
91 			 * requested sections
92 			 */
93 			sec[0] = (char) ch ;
94 			if (aflags->sec_nums == NULL) {
95 				aflags->sec_nums =
96 				    emalloc(sections_args_length);
97 				memcpy(aflags->sec_nums, sec, 2);
98 			} else
99 				concat2(&aflags->sec_nums, sec, 1);
100 			break;
101 		case 'C':
102 			aflags->manconf = optarg;
103 			break;
104 		case 'h':
105 			aflags->format = APROPOS_HTML;
106 			break;
107 		case 'i':
108 			aflags->format = APROPOS_TERM;
109 			break;
110 		case 'l':
111 			aflags->legacy = 1;
112 			aflags->no_context = 1;
113 			aflags->format = APROPOS_NONE;
114 			break;
115 		case 'M':
116 			aflags->no_context = 1;
117 			break;
118 		case 'm':
119 			aflags->no_context = 0;
120 			break;
121 		case 'n':
122 			aflags->nresults = atoi(optarg);
123 			break;
124 		case 'p':	// user wants a pager
125 			aflags->pager = 1;
126 			/*FALLTHROUGH*/
127 		case 'P':
128 			aflags->format = APROPOS_PAGER;
129 			break;
130 		case 'r':
131 			aflags->format = APROPOS_NONE;
132 			break;
133 		case 'S':
134 			aflags->machine = optarg;
135 			break;
136 		case 's':
137 			if (aflags->sec_nums == NULL) {
138 				size_t arglen = strlen(optarg);
139 				aflags->sec_nums =
140 				    arglen > sections_args_length
141 					? emalloc(arglen + 1)
142 					: emalloc(sections_args_length);
143 				memcpy(aflags->sec_nums, optarg, arglen + 1);
144 			} else
145 				concat(&aflags->sec_nums, optarg);
146 			break;
147 		case '?':
148 		default:
149 			usage();
150 		}
151 	}
152 }
153 
154 int
155 main(int argc, char *argv[])
156 {
157 	query_args args;
158 	char *query = NULL;	// the user query
159 	char *errmsg = NULL;
160 	char *str;
161 	int rc = 0;
162 	int s;
163 	callback_data cbdata;
164 	cbdata.out = stdout;		// the default output stream
165 	cbdata.count = 0;
166 	apropos_flags aflags;
167 	aflags.sec_nums = NULL;
168 	cbdata.aflags = &aflags;
169 	sqlite3 *db;
170 	setprogname(argv[0]);
171 	if (argc < 2)
172 		usage();
173 
174 	memset(&aflags, 0, sizeof(aflags));
175 
176 	if (!isatty(STDOUT_FILENO))
177 		aflags.format = APROPOS_NONE;
178 	else
179 		aflags.format = APROPOS_TERM;
180 
181 	if ((str = getenv("APROPOS")) != NULL) {
182 		char **ptr = emalloc((strlen(str) + 2) * sizeof(*ptr));
183 #define WS "\t\n\r "
184 		ptr[0] = __UNCONST(getprogname());
185 		for (s = 1, str = strtok(str, WS); str;
186 		    str = strtok(NULL, WS), s++)
187 			ptr[s] = str;
188 		ptr[s] = NULL;
189 		parseargs(s, ptr, &aflags);
190 		free(ptr);
191 		optreset = 1;
192 		optind = 1;
193 	}
194 
195 	parseargs(argc, argv, &aflags);
196 
197 	argc -= optind;
198 	argv += optind;
199 
200 	if (!argc)
201 		usage();
202 
203 	str = NULL;
204 	while (argc--)
205 		concat(&str, *argv++);
206 	/* Eliminate any stopwords from the query */
207 	query = remove_stopwords(lower(str));
208 
209 	/*
210 	 * If the query consisted only of stopwords and we removed all of
211 	 * them, use the original query.
212 	 */
213 	if (query == NULL)
214 		query = str;
215 	else
216 		free(str);
217 
218 	if ((db = init_db(MANDB_READONLY, aflags.manconf)) == NULL)
219 		exit(EXIT_FAILURE);
220 
221 	/* If user wants to page the output, then set some settings */
222 	if (aflags.pager) {
223 		const char *pager = getenv("PAGER");
224 		if (pager == NULL)
225 			pager = _PATH_PAGER;
226 		/* Open a pipe to the pager */
227 		if ((cbdata.out = popen(pager, "w")) == NULL) {
228 			close_db(db);
229 			err(EXIT_FAILURE, "pipe failed");
230 		}
231 	}
232 
233 	args.search_str = query;
234 	args.sec_nums = aflags.sec_nums;
235 	args.legacy = aflags.legacy;
236 	args.nrec = aflags.nresults ? aflags.nresults : -1;
237 	args.offset = 0;
238 	args.machine = aflags.machine;
239 	args.callback = &query_callback;
240 	args.callback_data = &cbdata;
241 	args.errmsg = &errmsg;
242 
243 	if (aflags.format == APROPOS_HTML) {
244 		fprintf(cbdata.out, "<html>\n<header>\n<title>apropos results "
245 		    "for %s</title></header>\n<body>\n<table cellpadding=\"4\""
246 		    "style=\"border: 1px solid #000000; border-collapse:"
247 		    "collapse;\" border=\"1\">\n", query);
248 	}
249 	rc = run_query(db, aflags.format, &args);
250 	if (aflags.format == APROPOS_HTML)
251 		fprintf(cbdata.out, "</table>\n</body>\n</html>\n");
252 
253 	free(query);
254 	free(aflags.sec_nums);
255 	close_db(db);
256 	if (errmsg) {
257 		warnx("%s", errmsg);
258 		free(errmsg);
259 		exit(EXIT_FAILURE);
260 	}
261 
262 	if (rc < 0) {
263 		/* Something wrong with the database. Exit */
264 		exit(EXIT_FAILURE);
265 	}
266 
267 	if (cbdata.count == 0) {
268 		warnx("No relevant results obtained.\n"
269 		    "Please make sure that you spelled all the terms correctly "
270 		    "or try using better keywords.");
271 	}
272 	return 0;
273 }
274 
275 /*
276  * query_callback --
277  *  Callback function for run_query.
278  *  It simply outputs the results from do_query. If the user specified the -p
279  *  option, then the output is sent to a pager, otherwise stdout is the default
280  *  output stream.
281  */
282 static int
283 query_callback(void *data, const char *section, const char *name,
284 	const char *name_desc, const char *snippet, size_t snippet_length)
285 {
286 	callback_data *cbdata = (callback_data *) data;
287 	FILE *out = cbdata->out;
288 	cbdata->count++;
289 	if (cbdata->aflags->format != APROPOS_HTML) {
290 	    fprintf(out, cbdata->aflags->legacy ? "%s(%s) - %s\n" :
291 		"%s (%s)\t%s\n", name, section, name_desc);
292 	    if (cbdata->aflags->no_context == 0)
293 		    fprintf(out, "%s\n\n", snippet);
294 	} else {
295 	    fprintf(out, "<tr><td>%s(%s)</td><td>%s</td></tr>\n", name,
296 		section, name_desc);
297 	    if (cbdata->aflags->no_context == 0)
298 		    fprintf(out, "<tr><td colspan=2>%s</td></tr>\n", snippet);
299 	}
300 
301 	return 0;
302 }
303 
304 #include "stopwords.c"
305 
306 /*
307  * remove_stopwords--
308  *  Scans the query and removes any stop words from it.
309  *  Returns the modified query or NULL, if it contained only stop words.
310  */
311 
312 static char *
313 remove_stopwords(const char *query)
314 {
315 	size_t len, idx;
316 	char *output, *buf;
317 	const char *sep, *next;
318 
319 	output = buf = emalloc(strlen(query) + 1);
320 
321 	for (; query[0] != '\0'; query = next) {
322 		sep = strchr(query, ' ');
323 		if (sep == NULL) {
324 			len = strlen(query);
325 			next = query + len;
326 		} else {
327 			len = sep - query;
328 			next = sep + 1;
329 		}
330 		if (len == 0)
331 			continue;
332 		idx = stopwords_hash(query, len);
333 		if (memcmp(stopwords[idx], query, len) == 0 &&
334 		    stopwords[idx][len] == '\0')
335 			continue;
336 		memcpy(buf, query, len);
337 		buf += len;
338 		*buf++ = ' ';
339 	}
340 
341 	if (output == buf) {
342 		free(output);
343 		return NULL;
344 	}
345 	buf[-1] = '\0';
346 	return output;
347 }
348 
349 /*
350  * usage --
351  *	print usage message and die
352  */
353 static void
354 usage(void)
355 {
356 	fprintf(stderr, "Usage: %s [-123456789ilMmpr] [-C path] [-n results] "
357 	    "[-S machine] [-s section] query\n",
358 	    getprogname());
359 	exit(1);
360 }
361