xref: /netbsd-src/usr.bin/look/look.c (revision ae9172d6cd9432a6a1a56760d86b32c57a66c39c)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * David Hitz of Auspex Systems, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 char copyright[] =
39 "@(#) Copyright (c) 1991 The Regents of the University of California.\n\
40  All rights reserved.\n";
41 #endif /* not lint */
42 
43 #ifndef lint
44 /*static char sccsid[] = "from: @(#)look.c	5.1 (Berkeley) 7/21/91";*/
45 static char rcsid[] = "$Id: look.c,v 1.5 1994/03/28 02:16:57 cgd Exp $";
46 #endif /* not lint */
47 
48 /*
49  * look -- find lines in a sorted list.
50  *
51  * The man page said that TABs and SPACEs participate in -d comparisons.
52  * In fact, they were ignored.  This implements historic practice, not
53  * the manual page.
54  */
55 
56 #include <sys/types.h>
57 #include <sys/mman.h>
58 #include <sys/stat.h>
59 #include <errno.h>
60 #include <fcntl.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <ctype.h>
65 #include <unistd.h>
66 #include "pathnames.h"
67 
68 /*
69  * FOLD and DICT convert characters to a normal form for comparison,
70  * according to the user specified flags.
71  *
72  * DICT expects integers because it uses a non-character value to
73  * indicate a character which should not participate in comparisons.
74  */
75 #define	EQUAL		0
76 #define	GREATER		1
77 #define	LESS		(-1)
78 #define NO_COMPARE	(-2)
79 
80 #define	FOLD(c)	(isascii(c) && isupper(c) ? tolower(c) : (c))
81 #define	DICT(c)	(isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
82 
83 int dflag, fflag;
84 
85 char	*binary_search __P((char *, char *, char *));
86 int	 compare __P((char *, char *, char *));
87 void	 err __P((const char *fmt, ...));
88 char	*linear_search __P((char *, char *, char *));
89 int	 look __P((char *, char *, char *));
90 void	 print_from __P((char *, char *, char *));
91 static void	 usage __P((void));
92 
93 main(argc, argv)
94 	int argc;
95 	char *argv[];
96 {
97 	struct stat sb;
98 	int ch, fd;
99 	char *back, *file, *front, *string;
100 
101 	file = _PATH_WORDS;
102 	while ((ch = getopt(argc, argv, "df")) != EOF)
103 		switch(ch) {
104 		case 'd':
105 			dflag = 1;
106 			break;
107 		case 'f':
108 			fflag = 1;
109 			break;
110 		case '?':
111 		default:
112 			usage();
113 		}
114 	argc -= optind;
115 	argv += optind;
116 
117 	switch (argc) {
118 	case 2:				/* Don't set -df for user. */
119 		string = *argv++;
120 		file = *argv;
121 		break;
122 	case 1:				/* But set -df by default. */
123 		dflag = fflag = 1;
124 		string = *argv;
125 		break;
126 	default:
127 		usage();
128 	}
129 
130 	if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb) ||
131 	    (front = mmap(NULL, sb.st_size, PROT_READ, 0, fd,
132 	    (off_t)0)) == NULL)
133 		err("%s: %s", file, strerror(errno));
134 	back = front + sb.st_size;
135 	exit(look(string, front, back));
136 }
137 
138 look(string, front, back)
139 	char *string, *front, *back;
140 {
141 	register int ch;
142 	register char *readp, *writep;
143 
144 	/* Reformat string string to avoid doing it multiple times later. */
145 	for (readp = writep = string; ch = *readp++;) {
146 		if (fflag)
147 			ch = FOLD(ch);
148 		if (dflag)
149 			ch = DICT(ch);
150 		if (ch != NO_COMPARE)
151 			*(writep++) = ch;
152 	}
153 	*writep = '\0';
154 
155 	front = binary_search(string, front, back);
156 	front = linear_search(string, front, back);
157 
158 	if (front)
159 		print_from(string, front, back);
160 	return (front ? 0 : 1);
161 }
162 
163 
164 /*
165  * Binary search for "string" in memory between "front" and "back".
166  *
167  * This routine is expected to return a pointer to the start of a line at
168  * *or before* the first word matching "string".  Relaxing the constraint
169  * this way simplifies the algorithm.
170  *
171  * Invariants:
172  * 	front points to the beginning of a line at or before the first
173  *	matching string.
174  *
175  * 	back points to the beginning of a line at or after the first
176  *	matching line.
177  *
178  * Base of the Invariants.
179  * 	front = NULL;
180  *	back = EOF;
181  *
182  * Advancing the Invariants:
183  *
184  * 	p = first newline after halfway point from front to back.
185  *
186  * 	If the string at "p" is not greater than the string to match,
187  *	p is the new front.  Otherwise it is the new back.
188  *
189  * Termination:
190  *
191  * 	The definition of the routine allows it return at any point,
192  *	since front is always at or before the line to print.
193  *
194  * 	In fact, it returns when the chosen "p" equals "back".  This
195  *	implies that there exists a string is least half as long as
196  *	(back - front), which in turn implies that a linear search will
197  *	be no more expensive than the cost of simply printing a string or two.
198  *
199  * 	Trying to continue with binary search at this point would be
200  *	more trouble than it's worth.
201  */
202 #define	SKIP_PAST_NEWLINE(p, back) \
203 	while (p < back && *p++ != '\n');
204 
205 char *
206 binary_search(string, front, back)
207 	register char *string, *front, *back;
208 {
209 	register char *p;
210 
211 	p = front + (back - front) / 2;
212 	SKIP_PAST_NEWLINE(p, back);
213 
214 	while (p != back) {
215 		if (compare(string, p, back) == GREATER)
216 			front = p;
217 		else
218 			back = p;
219 		p = front + (back - front) / 2;
220 		SKIP_PAST_NEWLINE(p, back);
221 	}
222 	return (front);
223 }
224 
225 /*
226  * Find the first line that starts with string, linearly searching from front
227  * to back.
228  *
229  * Return NULL for no such line.
230  *
231  * This routine assumes:
232  *
233  * 	o front points at the first character in a line.
234  *	o front is before or at the first line to be printed.
235  */
236 char *
237 linear_search(string, front, back)
238 	char *string, *front, *back;
239 {
240 	while (front < back) {
241 		switch (compare(string, front, back)) {
242 		case EQUAL:		/* Found it. */
243 			return (front);
244 			break;
245 		case LESS:		/* No such string. */
246 			return (NULL);
247 			break;
248 		case GREATER:		/* Keep going. */
249 			break;
250 		}
251 		SKIP_PAST_NEWLINE(front, back);
252 	}
253 	return (NULL);
254 }
255 
256 /*
257  * Print as many lines as match string, starting at front.
258  */
259 void
260 print_from(string, front, back)
261 	register char *string, *front, *back;
262 {
263 	for (; front < back && compare(string, front, back) == EQUAL; ++front) {
264 		for (; front < back && *front != '\n'; ++front)
265 			if (putchar(*front) == EOF)
266 				err("stdout: %s", strerror(errno));
267 		if (putchar('\n') == EOF)
268 			err("stdout: %s", strerror(errno));
269 	}
270 }
271 
272 /*
273  * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
274  * string2 (s1 ??? s2).
275  *
276  * 	o Matches up to len(s1) are EQUAL.
277  *	o Matches up to len(s2) are GREATER.
278  *
279  * Compare understands about the -f and -d flags, and treats comparisons
280  * appropriately.
281  *
282  * The string "s1" is null terminated.  The string s2 is '\n' terminated (or
283  * "back" terminated).
284  */
285 int
286 compare(s1, s2, back)
287 	register char *s1, *s2, *back;
288 {
289 	register int ch;
290 
291 	for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
292 		ch = *s2;
293 		if (fflag)
294 			ch = FOLD(ch);
295 		if (dflag)
296 			ch = DICT(ch);
297 
298 		if (ch == NO_COMPARE) {
299 			++s2;		/* Ignore character in comparison. */
300 			continue;
301 		}
302 		if (*s1 != ch)
303 			return (*s1 < ch ? LESS : GREATER);
304 	}
305 	return (*s1 ? GREATER : EQUAL);
306 }
307 
308 static void
309 usage()
310 {
311 	(void)fprintf(stderr, "usage: look [-df] string [file]\n");
312 	exit(2);
313 }
314 
315 #if __STDC__
316 #include <stdarg.h>
317 #else
318 #include <varargs.h>
319 #endif
320 
321 void
322 #if __STDC__
323 err(const char *fmt, ...)
324 #else
325 err(fmt, va_alist)
326 	char *fmt;
327 	va_dcl
328 #endif
329 {
330 	va_list ap;
331 #if __STDC__
332 	va_start(ap, fmt);
333 #else
334 	va_start(ap);
335 #endif
336 	(void)fprintf(stderr, "look: ");
337 	(void)vfprintf(stderr, fmt, ap);
338 	va_end(ap);
339 	(void)fprintf(stderr, "\n");
340 	exit(2);
341 	/* NOTREACHED */
342 }
343