xref: /openbsd-src/usr.bin/spell/spellprog.c (revision 1ed98fdf61d9dd29369f246109081408082ce54d)
1*1ed98fdfSderaadt /*	$OpenBSD: spellprog.c,v 1.5 2007/09/02 15:19:34 deraadt Exp $	*/
29175dedbSmillert 
39175dedbSmillert /*
49175dedbSmillert  * Copyright (c) 1991, 1993
59175dedbSmillert  *	The Regents of the University of California.  All rights reserved.
69175dedbSmillert  *
79175dedbSmillert  * Redistribution and use in source and binary forms, with or without
89175dedbSmillert  * modification, are permitted provided that the following conditions
99175dedbSmillert  * are met:
109175dedbSmillert  * 1. Redistributions of source code must retain the above copyright
119175dedbSmillert  *    notice, this list of conditions and the following disclaimer.
129175dedbSmillert  * 2. Redistributions in binary form must reproduce the above copyright
139175dedbSmillert  *    notice, this list of conditions and the following disclaimer in the
149175dedbSmillert  *    documentation and/or other materials provided with the distribution.
15f75387cbSmillert  * 3. Neither the name of the University nor the names of its contributors
169175dedbSmillert  *    may be used to endorse or promote products derived from this software
179175dedbSmillert  *    without specific prior written permission.
189175dedbSmillert  *
199175dedbSmillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
209175dedbSmillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
219175dedbSmillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
229175dedbSmillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
239175dedbSmillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
249175dedbSmillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
259175dedbSmillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
269175dedbSmillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
279175dedbSmillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
289175dedbSmillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
299175dedbSmillert  * SUCH DAMAGE.
309175dedbSmillert  *
319175dedbSmillert  *	@(#)spell.h	8.1 (Berkeley) 6/6/93
329175dedbSmillert  */
339175dedbSmillert /*
349175dedbSmillert  * Copyright (C) Caldera International Inc.  2001-2002.
359175dedbSmillert  * All rights reserved.
369175dedbSmillert  *
379175dedbSmillert  * Redistribution and use in source and binary forms, with or without
389175dedbSmillert  * modification, are permitted provided that the following conditions
399175dedbSmillert  * are met:
409175dedbSmillert  * 1. Redistributions of source code and documentation must retain the above
419175dedbSmillert  *    copyright notice, this list of conditions and the following disclaimer.
429175dedbSmillert  * 2. Redistributions in binary form must reproduce the above copyright
439175dedbSmillert  *    notice, this list of conditions and the following disclaimer in the
449175dedbSmillert  *    documentation and/or other materials provided with the distribution.
459175dedbSmillert  * 3. All advertising materials mentioning features or use of this software
469175dedbSmillert  *    must display the following acknowledgement:
479175dedbSmillert  *	This product includes software developed or owned by Caldera
489175dedbSmillert  *	International, Inc.
499175dedbSmillert  * 4. Neither the name of Caldera International, Inc. nor the names of other
509175dedbSmillert  *    contributors may be used to endorse or promote products derived from
519175dedbSmillert  *    this software without specific prior written permission.
529175dedbSmillert  *
539175dedbSmillert  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
549175dedbSmillert  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
559175dedbSmillert  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
569175dedbSmillert  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
579175dedbSmillert  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
589175dedbSmillert  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
599175dedbSmillert  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
609175dedbSmillert  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
619175dedbSmillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
629175dedbSmillert  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
639175dedbSmillert  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
649175dedbSmillert  * POSSIBILITY OF SUCH DAMAGE.
659175dedbSmillert  */
669175dedbSmillert 
679175dedbSmillert #ifndef lint
689175dedbSmillert static const char copyright[] =
699175dedbSmillert "@(#) Copyright (c) 1991, 1993\n\
709175dedbSmillert 	The Regents of the University of California.  All rights reserved.\n";
719175dedbSmillert #endif /* not lint */
729175dedbSmillert 
739175dedbSmillert #ifndef lint
749175dedbSmillert #if 0
759175dedbSmillert static const char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 6/6/93";
769175dedbSmillert #else
779175dedbSmillert #endif
78*1ed98fdfSderaadt static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.5 2007/09/02 15:19:34 deraadt Exp $";
799175dedbSmillert #endif /* not lint */
809175dedbSmillert 
819175dedbSmillert #include <sys/param.h>
829175dedbSmillert #include <sys/mman.h>
839175dedbSmillert #include <sys/stat.h>
849175dedbSmillert 
859175dedbSmillert #include <ctype.h>
869175dedbSmillert #include <err.h>
879175dedbSmillert #include <errno.h>
889175dedbSmillert #include <fcntl.h>
899175dedbSmillert #include <limits.h>
909175dedbSmillert #include <locale.h>
919175dedbSmillert #include <stdio.h>
929175dedbSmillert #include <stdlib.h>
939175dedbSmillert #include <string.h>
949175dedbSmillert #include <unistd.h>
959175dedbSmillert 
969175dedbSmillert #define DLEV 2
979175dedbSmillert 
989175dedbSmillert int	 an(char *, char *, char *, int);
999175dedbSmillert int	 bility(char *, char *, char *, int);
1009175dedbSmillert int	 es(char *, char *, char *, int);
1019175dedbSmillert int	 dict(char *, char *);
1029175dedbSmillert int	 i_to_y(char *, char *, char *, int);
1039175dedbSmillert int	 ily(char *, char *, char *, int);
1049175dedbSmillert int	 ize(char *, char *, char *, int);
1059175dedbSmillert int	 metry(char *, char *, char *, int);
1069175dedbSmillert int	 monosyl(char *, char *);
1079175dedbSmillert int	 ncy(char *, char *, char *, int);
1089175dedbSmillert int	 nop(void);
1099175dedbSmillert int	 trypref(char *, char *, int);
1109175dedbSmillert int	 tryword(char *, char *, int);
1119175dedbSmillert int	 s(char *, char *, char *, int);
1129175dedbSmillert int	 strip(char *, char *, char *, int);
1139175dedbSmillert int	 suffix(char *, int);
1149175dedbSmillert int	 tion(char *, char *, char *, int);
1159175dedbSmillert int	 vowel(int);
1169175dedbSmillert int	 y_to_e(char *, char *, char *, int);
1179175dedbSmillert int	 CCe(char *, char *, char *, int);
1189175dedbSmillert int	 VCe(char *, char *, char *, int);
1199175dedbSmillert char	*lookuppref(char **, char *);
1209175dedbSmillert char	*skipv(char *);
121f05c4e53Smillert char	*estrdup(const char *);
1229175dedbSmillert void	 ise(void);
1239175dedbSmillert void	 print_word(FILE *);
1249175dedbSmillert void	 ztos(char *);
1259175dedbSmillert __dead void usage(void);
1269175dedbSmillert 
1279175dedbSmillert /* from look.c */
1289175dedbSmillert int	 look(unsigned char *, unsigned char *, unsigned char *);
1299175dedbSmillert 
1309175dedbSmillert struct suftab {
131f05c4e53Smillert 	char *suf;
1329175dedbSmillert 	int (*p1)();	/* XXX - variable args */
1339175dedbSmillert 	int n1;
1349175dedbSmillert 	char *d1;
1359175dedbSmillert 	char *a1;
1369175dedbSmillert 	int (*p2)();	/* XXX - variable args */
1379175dedbSmillert 	int n2;
1389175dedbSmillert 	char *d2;
1399175dedbSmillert 	char *a2;
1409175dedbSmillert } suftab[] = {
1419175dedbSmillert 	{"ssen", ily, 4, "-y+iness", "+ness" },
1429175dedbSmillert 	{"ssel", ily, 4, "-y+i+less", "+less" },
1439175dedbSmillert 	{"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
1449175dedbSmillert 	{"s'", s, 2, "", "+'s"},
1459175dedbSmillert 	{"s", s, 1, "", "+s"},
1469175dedbSmillert 	{"ecn", ncy, 1, "", "-t+ce"},
1479175dedbSmillert 	{"ycn", ncy, 1, "", "-cy+t"},
1489175dedbSmillert 	{"ytilb", nop, 0, "", ""},
1499175dedbSmillert 	{"ytilib", bility, 5, "-le+ility", ""},
1509175dedbSmillert 	{"elbaif", i_to_y, 4, "-y+iable", ""},
1519175dedbSmillert 	{"elba", CCe, 4, "-e+able", "+able"},
1529175dedbSmillert 	{"yti", CCe, 3, "-e+ity", "+ity"},
1539175dedbSmillert 	{"ylb", y_to_e, 1, "-e+y", ""},
1549175dedbSmillert 	{"yl", ily, 2, "-y+ily", "+ly"},
1559175dedbSmillert 	{"laci", strip, 2, "", "+al"},
1569175dedbSmillert 	{"latnem", strip, 2, "", "+al"},
1579175dedbSmillert 	{"lanoi", strip, 2, "", "+al"},
1589175dedbSmillert 	{"tnem", strip, 4, "", "+ment"},
1599175dedbSmillert 	{"gni", CCe, 3, "-e+ing", "+ing"},
1609175dedbSmillert 	{"reta", nop, 0, "", ""},
1619175dedbSmillert 	{"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
1629175dedbSmillert 	{"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
1639175dedbSmillert 	{"citsi", strip, 2, "", "+ic"},
1649175dedbSmillert 	{"cihparg", i_to_y, 1, "-y+ic", ""},
1659175dedbSmillert 	{"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
1669175dedbSmillert 	{"cirtem", i_to_y, 1, "-y+ic", ""},
1679175dedbSmillert 	{"yrtem", metry, 0, "-ry+er", ""},
1689175dedbSmillert 	{"cigol", i_to_y, 1, "-y+ic", ""},
1699175dedbSmillert 	{"tsigol", i_to_y, 2, "-y+ist", ""},
1709175dedbSmillert 	{"tsi", VCe, 3, "-e+ist", "+ist"},
1719175dedbSmillert 	{"msi", VCe, 3, "-e+ism", "+ist"},
1729175dedbSmillert 	{"noitacif", i_to_y, 6, "-y+ication", ""},
1739175dedbSmillert 	{"noitazi", ize, 5, "-e+ation", ""},
1749175dedbSmillert 	{"rota", tion, 2, "-e+or", ""},
1759175dedbSmillert 	{"noit", tion, 3, "-e+ion", "+ion"},
1769175dedbSmillert 	{"naino", an, 3, "", "+ian"},
1779175dedbSmillert 	{"na", an, 1, "", "+n"},
1789175dedbSmillert 	{"evit", tion, 3, "-e+ive", "+ive"},
1799175dedbSmillert 	{"ezi", CCe, 3, "-e+ize", "+ize"},
1809175dedbSmillert 	{"pihs", strip, 4, "", "+ship"},
1819175dedbSmillert 	{"dooh", ily, 4, "-y+hood", "+hood"},
1829175dedbSmillert 	{"ekil", strip, 4, "", "+like"},
1839175dedbSmillert 	{ NULL }
1849175dedbSmillert };
1859175dedbSmillert 
1869175dedbSmillert char *preftab[] = {
1879175dedbSmillert 	"anti",
1889175dedbSmillert 	"bio",
1899175dedbSmillert 	"dis",
1909175dedbSmillert 	"electro",
1919175dedbSmillert 	"en",
1929175dedbSmillert 	"fore",
1939175dedbSmillert 	"hyper",
1949175dedbSmillert 	"intra",
1959175dedbSmillert 	"inter",
1969175dedbSmillert 	"iso",
1979175dedbSmillert 	"kilo",
1989175dedbSmillert 	"magneto",
1999175dedbSmillert 	"meta",
2009175dedbSmillert 	"micro",
2019175dedbSmillert 	"milli",
2029175dedbSmillert 	"mis",
2039175dedbSmillert 	"mono",
2049175dedbSmillert 	"multi",
2059175dedbSmillert 	"non",
2069175dedbSmillert 	"out",
2079175dedbSmillert 	"over",
2089175dedbSmillert 	"photo",
2099175dedbSmillert 	"poly",
2109175dedbSmillert 	"pre",
2119175dedbSmillert 	"pseudo",
2129175dedbSmillert 	"re",
2139175dedbSmillert 	"semi",
2149175dedbSmillert 	"stereo",
2159175dedbSmillert 	"sub",
2169175dedbSmillert 	"super",
2179175dedbSmillert 	"thermo",
2189175dedbSmillert 	"ultra",
2199175dedbSmillert 	"under",	/* must precede un */
2209175dedbSmillert 	"un",
2219175dedbSmillert 	NULL
2229175dedbSmillert };
2239175dedbSmillert 
2249175dedbSmillert struct wlist {
2259175dedbSmillert 	int fd;
2269175dedbSmillert 	unsigned char *front;
2279175dedbSmillert 	unsigned char *back;
2289175dedbSmillert } *wlists;
2299175dedbSmillert 
2309175dedbSmillert int vflag;
2319175dedbSmillert int xflag;
2329175dedbSmillert char word[LINE_MAX];
2339175dedbSmillert char original[LINE_MAX];
2349175dedbSmillert char *deriv[40];
2359175dedbSmillert char affix[40];
2369175dedbSmillert 
2379175dedbSmillert /*
2389175dedbSmillert  * The spellprog utility accepts a newline-delimited list of words
2399175dedbSmillert  * on stdin.  For arguments it expects the path to a word list and
2409175dedbSmillert  * the path to a file in which to store found words.
2419175dedbSmillert  *
2429175dedbSmillert  * In normal usage, spell is called twice.  The first time it is
2439175dedbSmillert  * called with a stop list to flag commonly mispelled words.  The
2449175dedbSmillert  * remaining words are then passed to spell again, this time with
2459175dedbSmillert  * the dictionary file as the first (non-flag) argument.
2469175dedbSmillert  *
2479175dedbSmillert  * Unlike historic versions of spellprog, this one does not use
2489175dedbSmillert  * hashed files.  Instead it simply requires that files be sorted
2499175dedbSmillert  * lexigraphically and uses the same algorithm as the look utility.
2509175dedbSmillert  *
2519175dedbSmillert  * Note that spellprog should be called via the spell shell script
2529175dedbSmillert  * and is not meant to be invoked directly by the user.
2539175dedbSmillert  */
2549175dedbSmillert 
2559175dedbSmillert int
2569175dedbSmillert main(int argc, char **argv)
2579175dedbSmillert {
2589175dedbSmillert 	char *ep, *cp, *dp;
2599175dedbSmillert 	char *outfile;
2609175dedbSmillert 	int ch, fold, i;
2619175dedbSmillert 	struct stat sb;
2629175dedbSmillert 	FILE *file, *found;
2639175dedbSmillert 
2649175dedbSmillert 	setlocale(LC_ALL, "");
2659175dedbSmillert 
2669175dedbSmillert 	outfile = NULL;
2679175dedbSmillert 	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
2689175dedbSmillert 		switch (ch) {
2699175dedbSmillert 		case 'b':
2709175dedbSmillert 			/* Use British dictionary and convert ize -> ise. */
2719175dedbSmillert 			ise();
2729175dedbSmillert 			break;
2739175dedbSmillert 		case 'o':
2749175dedbSmillert 			outfile = optarg;
2759175dedbSmillert 			break;
2769175dedbSmillert 		case 'v':
2779175dedbSmillert 			/* Also write derivations to "found" file. */
2789175dedbSmillert 			vflag++;
2799175dedbSmillert 			break;
2809175dedbSmillert 		case 'x':
2819175dedbSmillert 			/* Print plausible stems to stdout. */
2829175dedbSmillert 			xflag++;
2839175dedbSmillert 			break;
2849175dedbSmillert 		default:
2859175dedbSmillert 			usage();
2869175dedbSmillert 		}
2879175dedbSmillert 
2889175dedbSmillert 	}
2899175dedbSmillert 	argc -= optind;
2909175dedbSmillert 	argv += optind;
2919175dedbSmillert 	if (argc < 1)
2929175dedbSmillert 		usage();
2939175dedbSmillert 
2949175dedbSmillert 	/* Open and mmap the word/stop lists. */
295*1ed98fdfSderaadt 	if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL)
2969175dedbSmillert 		err(1, "malloc");
2979175dedbSmillert 	for (i = 0; argc--; i++) {
2989175dedbSmillert 		wlists[i].fd = open(argv[i], O_RDONLY, 0);
2999175dedbSmillert 		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
3009175dedbSmillert 			err(1, "%s", argv[i]);
3019175dedbSmillert 		if (sb.st_size > SIZE_T_MAX)
3029175dedbSmillert 			errx(1, "%s: %s", argv[i], strerror(EFBIG));
3039175dedbSmillert 		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
3049175dedbSmillert 		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
3059175dedbSmillert 		if (wlists[i].front == MAP_FAILED)
3069175dedbSmillert 			err(1, "%s", argv[i]);
3079175dedbSmillert 		wlists[i].back = wlists[i].front + sb.st_size;
3089175dedbSmillert 	}
3099175dedbSmillert 	wlists[i].fd = -1;
3109175dedbSmillert 
3119175dedbSmillert 	/* Open file where found words are to be saved. */
3129175dedbSmillert 	if (outfile == NULL)
3139175dedbSmillert 		found = NULL;
3149175dedbSmillert 	else if ((found = fopen(outfile, "w")) == NULL)
3159175dedbSmillert 		err(1, "cannot open %s", outfile);
3169175dedbSmillert 
3179175dedbSmillert 	for (;; print_word(file)) {
3189175dedbSmillert 		affix[0] = '\0';
3199175dedbSmillert 		file = found;
3209175dedbSmillert 		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
3219175dedbSmillert 			if (ep - word == sizeof(word) - 1) {
3229175dedbSmillert 				*ep = '\0';
3239175dedbSmillert 				warnx("word too long (%s)", word);
3249175dedbSmillert 				while ((ch = getchar()) != '\n')
3259175dedbSmillert 					;	/* slurp until EOL */
3269175dedbSmillert 			}
3279175dedbSmillert 			if (ch == EOF) {
3289175dedbSmillert 				if (found != NULL)
3299175dedbSmillert 					fclose(found);
3309175dedbSmillert 				exit(0);
3319175dedbSmillert 			}
3329175dedbSmillert 		}
3339175dedbSmillert 		for (cp = word, dp = original; cp < ep; )
3349175dedbSmillert 			*dp++ = *cp++;
3359175dedbSmillert 		*dp = '\0';
3369175dedbSmillert 		fold = 0;
3379175dedbSmillert 		for (cp = word; cp < ep; cp++)
3389175dedbSmillert 			if (islower(*cp))
3399175dedbSmillert 				goto lcase;
3409175dedbSmillert 		if (trypref(ep, ".", 0))
3419175dedbSmillert 			continue;
3429175dedbSmillert 		++fold;
3439175dedbSmillert 		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
3449175dedbSmillert 			*dp = tolower(*cp);
3459175dedbSmillert lcase:
3469175dedbSmillert 		if (trypref(ep, ".", 0) || suffix(ep, 0))
3479175dedbSmillert 			continue;
3489175dedbSmillert 		if (isupper(word[0])) {
3499175dedbSmillert 			for (cp = original, dp = word; (*dp = *cp++); dp++) {
3509175dedbSmillert 				if (fold)
3519175dedbSmillert 					*dp = tolower(*dp);
3529175dedbSmillert 			}
3539175dedbSmillert 			word[0] = tolower(word[0]);
3549175dedbSmillert 			goto lcase;
3559175dedbSmillert 		}
3569175dedbSmillert 		file = stdout;
3579175dedbSmillert 	}
3589175dedbSmillert 
3599175dedbSmillert 	exit(0);
3609175dedbSmillert }
3619175dedbSmillert 
3629175dedbSmillert void
3639175dedbSmillert print_word(FILE *f)
3649175dedbSmillert {
3659175dedbSmillert 
3669175dedbSmillert 	if (f != NULL) {
3679175dedbSmillert 		if (vflag && affix[0] != '\0' && affix[0] != '.')
3689175dedbSmillert 			fprintf(f, "%s\t%s\n", affix, original);
3699175dedbSmillert 		else
3709175dedbSmillert 			fprintf(f, "%s\n", original);
3719175dedbSmillert 	}
3729175dedbSmillert }
3739175dedbSmillert 
3749175dedbSmillert /*
3759175dedbSmillert  * For each matching suffix in suftab, call the function associated
3769175dedbSmillert  * with that suffix (p1 and p2).
3779175dedbSmillert  */
3789175dedbSmillert int
3799175dedbSmillert suffix(char *ep, int lev)
3809175dedbSmillert {
3819175dedbSmillert 	struct suftab *t;
3829175dedbSmillert 	char *cp, *sp;
3839175dedbSmillert 
3849175dedbSmillert 	lev += DLEV;
3859175dedbSmillert 	deriv[lev] = deriv[lev-1] = 0;
3869175dedbSmillert 	for (t = suftab; (sp = t->suf); t++) {
3879175dedbSmillert 		cp = ep;
3889175dedbSmillert 		while (*sp) {
3899175dedbSmillert 			if (*--cp != *sp++)
3909175dedbSmillert 				goto next;
3919175dedbSmillert 		}
3929175dedbSmillert 		for (sp = cp; --sp >= word && !vowel(*sp);)
3939175dedbSmillert 			;	/* nothing */
3949175dedbSmillert 		if (sp < word)
3959175dedbSmillert 			return (0);
3969175dedbSmillert 		if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
3979175dedbSmillert 			return (1);
3989175dedbSmillert 		if (t->p2 != NULL) {
3999175dedbSmillert 			deriv[lev] = deriv[lev+1] = '\0';
4009175dedbSmillert 			return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
4019175dedbSmillert 		}
4029175dedbSmillert 		return (0);
4039175dedbSmillert next:		;
4049175dedbSmillert 	}
4059175dedbSmillert 	return (0);
4069175dedbSmillert }
4079175dedbSmillert 
4089175dedbSmillert int
4099175dedbSmillert nop(void)
4109175dedbSmillert {
4119175dedbSmillert 
4129175dedbSmillert 	return (0);
4139175dedbSmillert }
4149175dedbSmillert 
4159175dedbSmillert int
4169175dedbSmillert strip(char *ep, char *d, char *a, int lev)
4179175dedbSmillert {
4189175dedbSmillert 
4199175dedbSmillert 	return (trypref(ep, a, lev) || suffix(ep, lev));
4209175dedbSmillert }
4219175dedbSmillert 
4229175dedbSmillert int
4239175dedbSmillert s(char *ep, char *d, char *a, int lev)
4249175dedbSmillert {
4259175dedbSmillert 
4269175dedbSmillert 	if (lev > DLEV + 1)
4279175dedbSmillert 		return (0);
4289175dedbSmillert 	if (*ep == 's' && ep[-1] == 's')
4299175dedbSmillert 		return (0);
4309175dedbSmillert 	return (strip(ep, d, a, lev));
4319175dedbSmillert }
4329175dedbSmillert 
4339175dedbSmillert int
4349175dedbSmillert an(char *ep, char *d, char *a, int lev)
4359175dedbSmillert {
4369175dedbSmillert 
4379175dedbSmillert 	if (!isupper(*word))	/* must be proper name */
4389175dedbSmillert 		return (0);
4399175dedbSmillert 	return (trypref(ep,a,lev));
4409175dedbSmillert }
4419175dedbSmillert 
4429175dedbSmillert int
4439175dedbSmillert ize(char *ep, char *d, char *a, int lev)
4449175dedbSmillert {
4459175dedbSmillert 
4469175dedbSmillert 	*ep++ = 'e';
4479175dedbSmillert 	return (strip(ep ,"", d, lev));
4489175dedbSmillert }
4499175dedbSmillert 
4509175dedbSmillert int
4519175dedbSmillert y_to_e(char *ep, char *d, char *a, int lev)
4529175dedbSmillert {
4539175dedbSmillert 	char c = *ep;
4549175dedbSmillert 
4559175dedbSmillert 	*ep++ = 'e';
4569175dedbSmillert 	if (strip(ep, "", d, lev))
4579175dedbSmillert 		return (1);
4589175dedbSmillert 	ep[-1] = c;
4599175dedbSmillert 	return (0);
4609175dedbSmillert }
4619175dedbSmillert 
4629175dedbSmillert int
4639175dedbSmillert ily(char *ep, char *d, char *a, int lev)
4649175dedbSmillert {
4659175dedbSmillert 
4669175dedbSmillert 	if (ep[-1] == 'i')
4679175dedbSmillert 		return (i_to_y(ep, d, a, lev));
4689175dedbSmillert 	else
4699175dedbSmillert 		return (strip(ep, d, a, lev));
4709175dedbSmillert }
4719175dedbSmillert 
4729175dedbSmillert int
4739175dedbSmillert ncy(char *ep, char *d, char *a, int lev)
4749175dedbSmillert {
4759175dedbSmillert 
4769175dedbSmillert 	if (skipv(skipv(ep-1)) < word)
4779175dedbSmillert 		return (0);
4789175dedbSmillert 	ep[-1] = 't';
4799175dedbSmillert 	return (strip(ep, d, a, lev));
4809175dedbSmillert }
4819175dedbSmillert 
4829175dedbSmillert int
4839175dedbSmillert bility(char *ep, char *d, char *a, int lev)
4849175dedbSmillert {
4859175dedbSmillert 
4869175dedbSmillert 	*ep++ = 'l';
4879175dedbSmillert 	return (y_to_e(ep, d, a, lev));
4889175dedbSmillert }
4899175dedbSmillert 
4909175dedbSmillert int
4919175dedbSmillert i_to_y(char *ep, char *d, char *a, int lev)
4929175dedbSmillert {
4939175dedbSmillert 
4949175dedbSmillert 	if (ep[-1] == 'i') {
4959175dedbSmillert 		ep[-1] = 'y';
4969175dedbSmillert 		a = d;
4979175dedbSmillert 	}
4989175dedbSmillert 	return (strip(ep, "", a, lev));
4999175dedbSmillert }
5009175dedbSmillert 
5019175dedbSmillert int
5029175dedbSmillert es(char *ep, char *d, char *a, int lev)
5039175dedbSmillert {
5049175dedbSmillert 
5059175dedbSmillert 	if (lev > DLEV)
5069175dedbSmillert 		return (0);
5079175dedbSmillert 
5089175dedbSmillert 	switch (ep[-1]) {
5099175dedbSmillert 	default:
5109175dedbSmillert 		return (0);
5119175dedbSmillert 	case 'i':
5129175dedbSmillert 		return (i_to_y(ep, d, a, lev));
5139175dedbSmillert 	case 's':
5149175dedbSmillert 	case 'h':
5159175dedbSmillert 	case 'z':
5169175dedbSmillert 	case 'x':
5179175dedbSmillert 		return (strip(ep, d, a, lev));
5189175dedbSmillert 	}
5199175dedbSmillert }
5209175dedbSmillert 
5219175dedbSmillert int
5229175dedbSmillert metry(char *ep, char *d, char *a, int lev)
5239175dedbSmillert {
5249175dedbSmillert 
5259175dedbSmillert 	ep[-2] = 'e';
5269175dedbSmillert 	ep[-1] = 'r';
5279175dedbSmillert 	return (strip(ep, d, a, lev));
5289175dedbSmillert }
5299175dedbSmillert 
5309175dedbSmillert int
5319175dedbSmillert tion(char *ep, char *d, char *a, int lev)
5329175dedbSmillert {
5339175dedbSmillert 
5349175dedbSmillert 	switch (ep[-2]) {
5359175dedbSmillert 	case 'c':
5369175dedbSmillert 	case 'r':
5379175dedbSmillert 		return (trypref(ep, a, lev));
5389175dedbSmillert 	case 'a':
5399175dedbSmillert 		return (y_to_e(ep, d, a, lev));
5409175dedbSmillert 	}
5419175dedbSmillert 	return (0);
5429175dedbSmillert }
5439175dedbSmillert 
5449175dedbSmillert /*
5459175dedbSmillert  * Possible consonant-consonant-e ending.
5469175dedbSmillert  */
5479175dedbSmillert int
5489175dedbSmillert CCe(char *ep, char *d, char *a, int lev)
5499175dedbSmillert {
5509175dedbSmillert 
5519175dedbSmillert 	switch (ep[-1]) {
5529175dedbSmillert 	case 'l':
5539175dedbSmillert 		if (vowel(ep[-2]))
5549175dedbSmillert 			break;
5559175dedbSmillert 		switch (ep[-2]) {
5569175dedbSmillert 		case 'l':
5579175dedbSmillert 		case 'r':
5589175dedbSmillert 		case 'w':
5599175dedbSmillert 			break;
5609175dedbSmillert 		default:
5619175dedbSmillert 			return (y_to_e(ep, d, a, lev));
5629175dedbSmillert 		}
5639175dedbSmillert 		break;
5649175dedbSmillert 	case 's':
5659175dedbSmillert 		if (ep[-2] == 's')
5669175dedbSmillert 			break;
5679175dedbSmillert 	case 'c':
5689175dedbSmillert 	case 'g':
5699175dedbSmillert 		if (*ep == 'a')
5709175dedbSmillert 			return (0);
5719175dedbSmillert 	case 'v':
5729175dedbSmillert 	case 'z':
5739175dedbSmillert 		if (vowel(ep[-2]))
5749175dedbSmillert 			break;
5759175dedbSmillert 	case 'u':
5769175dedbSmillert 		if (y_to_e(ep, d, a, lev))
5779175dedbSmillert 			return (1);
5789175dedbSmillert 		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
5799175dedbSmillert 			return (0);
5809175dedbSmillert 	}
5819175dedbSmillert 	return (VCe(ep, d, a, lev));
5829175dedbSmillert }
5839175dedbSmillert 
5849175dedbSmillert /*
5859175dedbSmillert  * Possible consonant-vowel-consonant-e ending.
5869175dedbSmillert  */
5879175dedbSmillert int
5889175dedbSmillert VCe(char *ep, char *d, char *a, int lev)
5899175dedbSmillert {
5909175dedbSmillert 	char c;
5919175dedbSmillert 
5929175dedbSmillert 	c = ep[-1];
5939175dedbSmillert 	if (c == 'e')
5949175dedbSmillert 		return (0);
5959175dedbSmillert 	if (!vowel(c) && vowel(ep[-2])) {
5969175dedbSmillert 		c = *ep;
5979175dedbSmillert 		*ep++ = 'e';
5989175dedbSmillert 		if (trypref(ep, d, lev) || suffix(ep, lev))
5999175dedbSmillert 			return (1);
6009175dedbSmillert 		ep--;
6019175dedbSmillert 		*ep = c;
6029175dedbSmillert 	}
6039175dedbSmillert 	return (strip(ep, d, a, lev));
6049175dedbSmillert }
6059175dedbSmillert 
6069175dedbSmillert char *
6079175dedbSmillert lookuppref(char **wp, char *ep)
6089175dedbSmillert {
6099175dedbSmillert 	char **sp;
6109175dedbSmillert 	char *bp,*cp;
6119175dedbSmillert 
6129175dedbSmillert 	for (sp = preftab; *sp; sp++) {
6139175dedbSmillert 		bp = *wp;
6149175dedbSmillert 		for (cp = *sp; *cp; cp++, bp++) {
6159175dedbSmillert 			if (tolower(*bp) != *cp)
6169175dedbSmillert 				goto next;
6179175dedbSmillert 		}
6189175dedbSmillert 		for (cp = bp; cp < ep; cp++) {
6199175dedbSmillert 			if (vowel(*cp)) {
6209175dedbSmillert 				*wp = bp;
6219175dedbSmillert 				return (*sp);
6229175dedbSmillert 			}
6239175dedbSmillert 		}
6249175dedbSmillert next:		;
6259175dedbSmillert 	}
6269175dedbSmillert 	return (0);
6279175dedbSmillert }
6289175dedbSmillert 
6299175dedbSmillert /*
6309175dedbSmillert  * If the word is not in the dictionary, try stripping off prefixes
6319175dedbSmillert  * until the word is found or we run out of prefixes to check.
6329175dedbSmillert  */
6339175dedbSmillert int
6349175dedbSmillert trypref(char *ep, char *a, int lev)
6359175dedbSmillert {
6369175dedbSmillert 	char *cp;
6379175dedbSmillert 	char *bp;
6389175dedbSmillert 	char *pp;
6399175dedbSmillert 	int val = 0;
6409175dedbSmillert 	char space[20];
6419175dedbSmillert 
6429175dedbSmillert 	deriv[lev] = a;
6439175dedbSmillert 	if (tryword(word, ep, lev))
6449175dedbSmillert 		return (1);
6459175dedbSmillert 	bp = word;
6469175dedbSmillert 	pp = space;
6479175dedbSmillert 	deriv[lev+1] = pp;
6489175dedbSmillert 	while ((cp = lookuppref(&bp, ep))) {
6499175dedbSmillert 		*pp++ = '+';
6509175dedbSmillert 		while ((*pp = *cp++))
6519175dedbSmillert 			pp++;
6529175dedbSmillert 		if (tryword(bp, ep, lev+1)) {
6539175dedbSmillert 			val = 1;
6549175dedbSmillert 			break;
6559175dedbSmillert 		}
6569175dedbSmillert 		if (pp - space >= sizeof(space))
6579175dedbSmillert 			return (0);
6589175dedbSmillert 	}
6599175dedbSmillert 	deriv[lev+1] = deriv[lev+2] = '\0';
6609175dedbSmillert 	return (val);
6619175dedbSmillert }
6629175dedbSmillert 
6639175dedbSmillert int
6649175dedbSmillert tryword(char *bp, char *ep, int lev)
6659175dedbSmillert {
6669175dedbSmillert 	int i, j;
6679175dedbSmillert 	char duple[3];
6689175dedbSmillert 
6699175dedbSmillert 	if (ep-bp <= 1)
6709175dedbSmillert 		return (0);
6719175dedbSmillert 	if (vowel(*ep) && monosyl(bp, ep))
6729175dedbSmillert 		return (0);
6739175dedbSmillert 
6749175dedbSmillert 	i = dict(bp, ep);
6759175dedbSmillert 	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
6769175dedbSmillert 		ep--;
6779175dedbSmillert 		deriv[++lev] = duple;
6789175dedbSmillert 		duple[0] = '+';
6799175dedbSmillert 		duple[1] = *ep;
6809175dedbSmillert 		duple[2] = '\0';
6819175dedbSmillert 		i = dict(bp, ep);
6829175dedbSmillert 	}
6839175dedbSmillert 	if (vflag == 0 || i == 0)
6849175dedbSmillert 		return (i);
6859175dedbSmillert 
6869175dedbSmillert 	/* Also tack on possible derivations. (XXX - warn on truncation?) */
6879175dedbSmillert 	for (j = lev; j > 0; j--) {
6889175dedbSmillert 		if (deriv[j])
6899175dedbSmillert 			strlcat(affix, deriv[j], sizeof(affix));
6909175dedbSmillert 	}
6919175dedbSmillert 	return (i);
6929175dedbSmillert }
6939175dedbSmillert 
6949175dedbSmillert int
6959175dedbSmillert monosyl(char *bp, char *ep)
6969175dedbSmillert {
6979175dedbSmillert 
6989175dedbSmillert 	if (ep < bp + 2)
6999175dedbSmillert 		return (0);
7009175dedbSmillert 	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
7019175dedbSmillert 		return (0);
7029175dedbSmillert 	while (--ep >= bp)
7039175dedbSmillert 		if (vowel(*ep))
7049175dedbSmillert 			return (0);
7059175dedbSmillert 	return (1);
7069175dedbSmillert }
7079175dedbSmillert 
7089175dedbSmillert char *
7099175dedbSmillert skipv(char *s)
7109175dedbSmillert {
7119175dedbSmillert 
7129175dedbSmillert 	if (s >= word && vowel(*s))
7139175dedbSmillert 		s--;
7149175dedbSmillert 	while (s >= word && !vowel(*s))
7159175dedbSmillert 		s--;
7169175dedbSmillert 	return (s);
7179175dedbSmillert }
7189175dedbSmillert 
7199175dedbSmillert int
7209175dedbSmillert vowel(int c)
7219175dedbSmillert {
7229175dedbSmillert 
7239175dedbSmillert 	switch (tolower(c)) {
7249175dedbSmillert 	case 'a':
7259175dedbSmillert 	case 'e':
7269175dedbSmillert 	case 'i':
7279175dedbSmillert 	case 'o':
7289175dedbSmillert 	case 'u':
7299175dedbSmillert 	case 'y':
7309175dedbSmillert 		return (1);
7319175dedbSmillert 	}
7329175dedbSmillert 	return (0);
7339175dedbSmillert }
7349175dedbSmillert 
7359175dedbSmillert /*
7369175dedbSmillert  * Crummy way to Britishise.
7379175dedbSmillert  */
7389175dedbSmillert void
7399175dedbSmillert ise(void)
7409175dedbSmillert {
741f05c4e53Smillert 	struct suftab *tab;
7429175dedbSmillert 
743f05c4e53Smillert 	for (tab = suftab; tab->suf; tab++) {
744f05c4e53Smillert 		/* Assume that suffix will contain 'z' if a1 or d1 do */
745f05c4e53Smillert 		if (strchr(tab->suf, 'z')) {
746f05c4e53Smillert 			tab->suf = estrdup(tab->suf);
747f05c4e53Smillert 			ztos(tab->suf);
748f05c4e53Smillert 			if (strchr(tab->d1, 'z')) {
749f05c4e53Smillert 				tab->d1 = estrdup(tab->d1);
750f05c4e53Smillert 				ztos(tab->d1);
751f05c4e53Smillert 			}
752f05c4e53Smillert 			if (strchr(tab->a1, 'z')) {
753f05c4e53Smillert 				tab->a1 = estrdup(tab->a1);
754f05c4e53Smillert 				ztos(tab->a1);
755f05c4e53Smillert 			}
756f05c4e53Smillert 		}
7579175dedbSmillert 	}
7589175dedbSmillert }
7599175dedbSmillert 
7609175dedbSmillert void
7619175dedbSmillert ztos(char *s)
7629175dedbSmillert {
7639175dedbSmillert 
7649175dedbSmillert 	for (; *s; s++)
7659175dedbSmillert 		if (*s == 'z')
7669175dedbSmillert 			*s = 's';
7679175dedbSmillert }
7689175dedbSmillert 
769f05c4e53Smillert char *
770f05c4e53Smillert estrdup(const char *s)
771f05c4e53Smillert {
772f05c4e53Smillert 	char *d;
773f05c4e53Smillert 
774f05c4e53Smillert 	if ((d = strdup(s)) == NULL)
775f05c4e53Smillert 		err(1, "strdup");
776f05c4e53Smillert 	return (d);
777f05c4e53Smillert }
778f05c4e53Smillert 
7799175dedbSmillert /*
7809175dedbSmillert  * Look up a word in the dictionary.
7819175dedbSmillert  * Returns 1 if found, 0 if not.
7829175dedbSmillert  */
7839175dedbSmillert int
7849175dedbSmillert dict(char *bp, char *ep)
7859175dedbSmillert {
7869175dedbSmillert 	char c;
7879175dedbSmillert 	int i, rval;
7889175dedbSmillert 
7899175dedbSmillert 	c = *ep;
7909175dedbSmillert 	*ep = '\0';
7919175dedbSmillert 	if (xflag)
7929175dedbSmillert 		printf("=%s\n", bp);
7939175dedbSmillert 	for (i = rval = 0; wlists[i].fd != -1; i++) {
7949175dedbSmillert 		if ((rval = look((unsigned char *)bp, wlists[i].front,
7959175dedbSmillert 		    wlists[i].back)) == 1)
7969175dedbSmillert 			break;
7979175dedbSmillert 	}
7989175dedbSmillert 	*ep = c;
7999175dedbSmillert 	return (rval);
8009175dedbSmillert }
8019175dedbSmillert 
8029175dedbSmillert __dead void
8039175dedbSmillert usage(void)
8049175dedbSmillert {
8059175dedbSmillert 	extern char *__progname;
8069175dedbSmillert 
8079175dedbSmillert 	fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n",
8089175dedbSmillert 	    __progname);
8099175dedbSmillert 	exit(1);
8109175dedbSmillert }
811