xref: /openbsd-src/usr.bin/spell/look.c (revision 9ffe2a615b36b9151f1f4551f862f600d7f8b67b)
1*9ffe2a61Smillert /*	$OpenBSD: look.c,v 1.7 2016/09/13 15:29:25 millert Exp $	*/
29175dedbSmillert 
39175dedbSmillert /*-
49175dedbSmillert  * Copyright (c) 1991, 1993
59175dedbSmillert  *	The Regents of the University of California.  All rights reserved.
69175dedbSmillert  *
79175dedbSmillert  * This code is derived from software contributed to Berkeley by
89175dedbSmillert  * David Hitz of Auspex Systems, Inc.
99175dedbSmillert  *
109175dedbSmillert  * Redistribution and use in source and binary forms, with or without
119175dedbSmillert  * modification, are permitted provided that the following conditions
129175dedbSmillert  * are met:
139175dedbSmillert  * 1. Redistributions of source code must retain the above copyright
149175dedbSmillert  *    notice, this list of conditions and the following disclaimer.
159175dedbSmillert  * 2. Redistributions in binary form must reproduce the above copyright
169175dedbSmillert  *    notice, this list of conditions and the following disclaimer in the
179175dedbSmillert  *    documentation and/or other materials provided with the distribution.
18f75387cbSmillert  * 3. Neither the name of the University nor the names of its contributors
199175dedbSmillert  *    may be used to endorse or promote products derived from this software
209175dedbSmillert  *    without specific prior written permission.
219175dedbSmillert  *
229175dedbSmillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239175dedbSmillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249175dedbSmillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259175dedbSmillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269175dedbSmillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279175dedbSmillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289175dedbSmillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299175dedbSmillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309175dedbSmillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319175dedbSmillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329175dedbSmillert  * SUCH DAMAGE.
339175dedbSmillert  */
349175dedbSmillert 
359175dedbSmillert #include <sys/types.h>
369175dedbSmillert #include <ctype.h>
379175dedbSmillert #include <stdio.h>
389175dedbSmillert #include <stdlib.h>
399175dedbSmillert #include <string.h>
409175dedbSmillert #include <err.h>
419175dedbSmillert 
429175dedbSmillert u_char	*binary_search(u_char *, u_char *, u_char *);
439175dedbSmillert u_char	*linear_search(u_char *, u_char *, u_char *);
449175dedbSmillert int	 compare(u_char *, u_char *, u_char *);
459175dedbSmillert int	 look(u_char *, u_char *, u_char *);
469175dedbSmillert 
479175dedbSmillert int
look(u_char * string,u_char * front,u_char * back)489175dedbSmillert look(u_char *string, u_char *front, u_char *back)
499175dedbSmillert {
509175dedbSmillert 	u_char *s;
519175dedbSmillert 
529175dedbSmillert 	/* Convert string to lower case before searching. */
539175dedbSmillert 	for (s = string; *s; s++) {
549175dedbSmillert 		if (isupper(*s))
55267e8971Smillert 			*s = tolower(*s);
569175dedbSmillert 	}
579175dedbSmillert 
589175dedbSmillert 	front = binary_search(string, front, back);
599175dedbSmillert 	front = linear_search(string, front, back);
609175dedbSmillert 
619175dedbSmillert 	return (front != NULL);
629175dedbSmillert }
639175dedbSmillert 
649175dedbSmillert /*
659175dedbSmillert  * Binary search for "string" in memory between "front" and "back".
669175dedbSmillert  *
679175dedbSmillert  * This routine is expected to return a pointer to the start of a line at
689175dedbSmillert  * *or before* the first word matching "string".  Relaxing the constraint
699175dedbSmillert  * this way simplifies the algorithm.
709175dedbSmillert  *
719175dedbSmillert  * Invariants:
729175dedbSmillert  * 	front points to the beginning of a line at or before the first
739175dedbSmillert  *	matching string.
749175dedbSmillert  *
759175dedbSmillert  * 	back points to the beginning of a line at or after the first
769175dedbSmillert  *	matching line.
779175dedbSmillert  *
789175dedbSmillert  * Base of the Invariants.
799175dedbSmillert  * 	front = NULL;
809175dedbSmillert  *	back = EOF;
819175dedbSmillert  *
829175dedbSmillert  * Advancing the Invariants:
839175dedbSmillert  *
849175dedbSmillert  * 	p = first newline after halfway point from front to back.
859175dedbSmillert  *
869175dedbSmillert  * 	If the string at "p" is not greater than the string to match,
879175dedbSmillert  *	p is the new front.  Otherwise it is the new back.
889175dedbSmillert  *
899175dedbSmillert  * Termination:
909175dedbSmillert  *
919175dedbSmillert  * 	The definition of the routine allows it return at any point,
929175dedbSmillert  *	since front is always at or before the line to print.
939175dedbSmillert  *
949175dedbSmillert  * 	In fact, it returns when the chosen "p" equals "back".  This
959175dedbSmillert  *	implies that there exists a string is least half as long as
969175dedbSmillert  *	(back - front), which in turn implies that a linear search will
979175dedbSmillert  *	be no more expensive than the cost of simply printing a string or two.
989175dedbSmillert  *
999175dedbSmillert  * 	Trying to continue with binary search at this point would be
1009175dedbSmillert  *	more trouble than it's worth.
1019175dedbSmillert  */
1029175dedbSmillert #define	SKIP_PAST_NEWLINE(p, back) \
1039175dedbSmillert 	while (p < back && *p++ != '\n');
1049175dedbSmillert 
1059175dedbSmillert u_char *
binary_search(u_char * string,u_char * front,u_char * back)1069175dedbSmillert binary_search(u_char *string, u_char *front, u_char *back)
1079175dedbSmillert {
1089175dedbSmillert 	u_char *p;
1099175dedbSmillert 
1109175dedbSmillert 	p = front + (back - front) / 2;
1119175dedbSmillert 	SKIP_PAST_NEWLINE(p, back);
1129175dedbSmillert 
1139175dedbSmillert 	/*
1149175dedbSmillert 	 * If the file changes underneath us, make sure we don't
1159175dedbSmillert 	 * infinitely loop.
1169175dedbSmillert 	 */
1179175dedbSmillert 	while (p < back && back > front) {
1189175dedbSmillert 		if (compare(string, p, back) > 0)
1199175dedbSmillert 			front = p;
1209175dedbSmillert 		else
1219175dedbSmillert 			back = p;
1229175dedbSmillert 		p = front + (back - front) / 2;
1239175dedbSmillert 		SKIP_PAST_NEWLINE(p, back);
1249175dedbSmillert 	}
1259175dedbSmillert 	return (front);
1269175dedbSmillert }
1279175dedbSmillert 
1289175dedbSmillert /*
1299175dedbSmillert  * Find the first line that matches string, linearly searching from front
1309175dedbSmillert  * to back.
1319175dedbSmillert  *
1329175dedbSmillert  * Return NULL for no such line.
1339175dedbSmillert  *
1349175dedbSmillert  * This routine assumes:
1359175dedbSmillert  *
1369175dedbSmillert  * 	o front points at the first character in a line.
1379175dedbSmillert  *	o front is before or at the first line to be printed.
1389175dedbSmillert  */
1399175dedbSmillert u_char *
linear_search(u_char * string,u_char * front,u_char * back)1409175dedbSmillert linear_search(u_char *string, u_char *front, u_char *back)
1419175dedbSmillert {
1429175dedbSmillert 	int result;
1439175dedbSmillert 
1449175dedbSmillert 	while (front < back) {
1459175dedbSmillert 		result = compare(string, front, back);
1469175dedbSmillert 		if (result == 0)
1479175dedbSmillert 			return (front);	/* found it */
1489175dedbSmillert 		if (result < 0)
1499175dedbSmillert 			return (NULL);	/* not there */
1509175dedbSmillert 
1519175dedbSmillert 		SKIP_PAST_NEWLINE(front, back);
1529175dedbSmillert 	}
1539175dedbSmillert 	return (NULL);
1549175dedbSmillert }
1559175dedbSmillert 
1569175dedbSmillert int
compare(u_char * s1,u_char * s2,u_char * back)1579175dedbSmillert compare(u_char *s1, u_char *s2, u_char *back)
1589175dedbSmillert {
1599175dedbSmillert 	int ch;
1609175dedbSmillert 
1619175dedbSmillert 	/* Note that s1 is already upper case. */
1629175dedbSmillert 	for (;; ++s1, ++s2) {
1639175dedbSmillert 		if (*s2 == '\n' || s2 == back)
1649175dedbSmillert 			ch = '\0';
1659175dedbSmillert 		else
166*9ffe2a61Smillert 			ch = tolower(*s2);
1679175dedbSmillert 		if (*s1 != ch)
1689175dedbSmillert 			return (*s1 - ch);
1699175dedbSmillert 		if (ch == '\0')
1709175dedbSmillert 			return (0);
1719175dedbSmillert 	}
1729175dedbSmillert }
173