xref: /openbsd-src/usr.bin/spell/spellprog.c (revision d9a51c353c88dac7b4a389c112b4cfe97b8e3a46)
1*d9a51c35Sjmc /*	$OpenBSD: spellprog.c,v 1.16 2022/12/26 19:16:03 jmc Exp $	*/
29175dedbSmillert 
39175dedbSmillert /*
49175dedbSmillert  * Copyright (c) 1991, 1993
59175dedbSmillert  *	The Regents of the University of California.  All rights reserved.
69175dedbSmillert  *
79175dedbSmillert  * Redistribution and use in source and binary forms, with or without
89175dedbSmillert  * modification, are permitted provided that the following conditions
99175dedbSmillert  * are met:
109175dedbSmillert  * 1. Redistributions of source code must retain the above copyright
119175dedbSmillert  *    notice, this list of conditions and the following disclaimer.
129175dedbSmillert  * 2. Redistributions in binary form must reproduce the above copyright
139175dedbSmillert  *    notice, this list of conditions and the following disclaimer in the
149175dedbSmillert  *    documentation and/or other materials provided with the distribution.
15f75387cbSmillert  * 3. Neither the name of the University nor the names of its contributors
169175dedbSmillert  *    may be used to endorse or promote products derived from this software
179175dedbSmillert  *    without specific prior written permission.
189175dedbSmillert  *
199175dedbSmillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
209175dedbSmillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
219175dedbSmillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
229175dedbSmillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
239175dedbSmillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
249175dedbSmillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
259175dedbSmillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
269175dedbSmillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
279175dedbSmillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
289175dedbSmillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
299175dedbSmillert  * SUCH DAMAGE.
309175dedbSmillert  *
319175dedbSmillert  *	@(#)spell.h	8.1 (Berkeley) 6/6/93
329175dedbSmillert  */
339175dedbSmillert /*
349175dedbSmillert  * Copyright (C) Caldera International Inc.  2001-2002.
359175dedbSmillert  * All rights reserved.
369175dedbSmillert  *
379175dedbSmillert  * Redistribution and use in source and binary forms, with or without
389175dedbSmillert  * modification, are permitted provided that the following conditions
399175dedbSmillert  * are met:
409175dedbSmillert  * 1. Redistributions of source code and documentation must retain the above
419175dedbSmillert  *    copyright notice, this list of conditions and the following disclaimer.
429175dedbSmillert  * 2. Redistributions in binary form must reproduce the above copyright
439175dedbSmillert  *    notice, this list of conditions and the following disclaimer in the
449175dedbSmillert  *    documentation and/or other materials provided with the distribution.
459175dedbSmillert  * 3. All advertising materials mentioning features or use of this software
469175dedbSmillert  *    must display the following acknowledgement:
479175dedbSmillert  *	This product includes software developed or owned by Caldera
489175dedbSmillert  *	International, Inc.
499175dedbSmillert  * 4. Neither the name of Caldera International, Inc. nor the names of other
509175dedbSmillert  *    contributors may be used to endorse or promote products derived from
519175dedbSmillert  *    this software without specific prior written permission.
529175dedbSmillert  *
539175dedbSmillert  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
549175dedbSmillert  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
559175dedbSmillert  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
569175dedbSmillert  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
579175dedbSmillert  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
589175dedbSmillert  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
599175dedbSmillert  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
609175dedbSmillert  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
619175dedbSmillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
629175dedbSmillert  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
639175dedbSmillert  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
649175dedbSmillert  * POSSIBILITY OF SUCH DAMAGE.
659175dedbSmillert  */
669175dedbSmillert 
679175dedbSmillert #include <sys/mman.h>
689175dedbSmillert #include <sys/stat.h>
699175dedbSmillert 
709175dedbSmillert #include <ctype.h>
719175dedbSmillert #include <err.h>
729175dedbSmillert #include <errno.h>
739175dedbSmillert #include <fcntl.h>
749175dedbSmillert #include <limits.h>
754239b822Smillert #include <stdint.h>
769175dedbSmillert #include <stdio.h>
779175dedbSmillert #include <stdlib.h>
789175dedbSmillert #include <string.h>
799175dedbSmillert #include <unistd.h>
809175dedbSmillert 
819175dedbSmillert #define DLEV 2
829175dedbSmillert 
839175dedbSmillert int	 an(char *, char *, char *, int);
849175dedbSmillert int	 bility(char *, char *, char *, int);
859175dedbSmillert int	 es(char *, char *, char *, int);
869175dedbSmillert int	 dict(char *, char *);
879175dedbSmillert int	 i_to_y(char *, char *, char *, int);
889175dedbSmillert int	 ily(char *, char *, char *, int);
899175dedbSmillert int	 ize(char *, char *, char *, int);
909175dedbSmillert int	 metry(char *, char *, char *, int);
919175dedbSmillert int	 monosyl(char *, char *);
929175dedbSmillert int	 ncy(char *, char *, char *, int);
93e14aac28Snicm int	 nop(char *, char *, char *, int);
949175dedbSmillert int	 trypref(char *, char *, int);
959175dedbSmillert int	 tryword(char *, char *, int);
969175dedbSmillert int	 s(char *, char *, char *, int);
979175dedbSmillert int	 strip(char *, char *, char *, int);
989175dedbSmillert int	 suffix(char *, int);
999175dedbSmillert int	 tion(char *, char *, char *, int);
100f69332c7Sderaadt int	 vowel(unsigned char);
1019175dedbSmillert int	 y_to_e(char *, char *, char *, int);
1029175dedbSmillert int	 CCe(char *, char *, char *, int);
1039175dedbSmillert int	 VCe(char *, char *, char *, int);
1049175dedbSmillert char	*lookuppref(char **, char *);
1059175dedbSmillert char	*skipv(char *);
106f05c4e53Smillert char	*estrdup(const char *);
1079175dedbSmillert void	 ise(void);
1089175dedbSmillert void	 print_word(FILE *);
1099175dedbSmillert void	 ztos(char *);
110a2969341Sschwarze static void __dead usage(void);
1119175dedbSmillert 
1129175dedbSmillert /* from look.c */
1139175dedbSmillert int	 look(unsigned char *, unsigned char *, unsigned char *);
1149175dedbSmillert 
1159175dedbSmillert struct suftab {
116f05c4e53Smillert 	char *suf;
117e14aac28Snicm 	int (*p1)(char *, char *, char *, int);
1189175dedbSmillert 	int n1;
1199175dedbSmillert 	char *d1;
1209175dedbSmillert 	char *a1;
121e14aac28Snicm 	int (*p2)(char *, char *, char *, int);
1229175dedbSmillert 	int n2;
1239175dedbSmillert 	char *d2;
1249175dedbSmillert 	char *a2;
1259175dedbSmillert } suftab[] = {
1269175dedbSmillert 	{"ssen", ily, 4, "-y+iness", "+ness" },
1279175dedbSmillert 	{"ssel", ily, 4, "-y+i+less", "+less" },
1289175dedbSmillert 	{"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
1299175dedbSmillert 	{"s'", s, 2, "", "+'s"},
1309175dedbSmillert 	{"s", s, 1, "", "+s"},
1319175dedbSmillert 	{"ecn", ncy, 1, "", "-t+ce"},
1329175dedbSmillert 	{"ycn", ncy, 1, "", "-cy+t"},
1339175dedbSmillert 	{"ytilb", nop, 0, "", ""},
1349175dedbSmillert 	{"ytilib", bility, 5, "-le+ility", ""},
1359175dedbSmillert 	{"elbaif", i_to_y, 4, "-y+iable", ""},
1369175dedbSmillert 	{"elba", CCe, 4, "-e+able", "+able"},
1379175dedbSmillert 	{"yti", CCe, 3, "-e+ity", "+ity"},
1389175dedbSmillert 	{"ylb", y_to_e, 1, "-e+y", ""},
1399175dedbSmillert 	{"yl", ily, 2, "-y+ily", "+ly"},
1409175dedbSmillert 	{"laci", strip, 2, "", "+al"},
1419175dedbSmillert 	{"latnem", strip, 2, "", "+al"},
1429175dedbSmillert 	{"lanoi", strip, 2, "", "+al"},
1439175dedbSmillert 	{"tnem", strip, 4, "", "+ment"},
1449175dedbSmillert 	{"gni", CCe, 3, "-e+ing", "+ing"},
1459175dedbSmillert 	{"reta", nop, 0, "", ""},
1469175dedbSmillert 	{"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
1479175dedbSmillert 	{"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
1489175dedbSmillert 	{"citsi", strip, 2, "", "+ic"},
1499175dedbSmillert 	{"cihparg", i_to_y, 1, "-y+ic", ""},
1509175dedbSmillert 	{"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
1519175dedbSmillert 	{"cirtem", i_to_y, 1, "-y+ic", ""},
1529175dedbSmillert 	{"yrtem", metry, 0, "-ry+er", ""},
1539175dedbSmillert 	{"cigol", i_to_y, 1, "-y+ic", ""},
1549175dedbSmillert 	{"tsigol", i_to_y, 2, "-y+ist", ""},
1559175dedbSmillert 	{"tsi", VCe, 3, "-e+ist", "+ist"},
1569175dedbSmillert 	{"msi", VCe, 3, "-e+ism", "+ist"},
1579175dedbSmillert 	{"noitacif", i_to_y, 6, "-y+ication", ""},
1589175dedbSmillert 	{"noitazi", ize, 5, "-e+ation", ""},
1599175dedbSmillert 	{"rota", tion, 2, "-e+or", ""},
1609175dedbSmillert 	{"noit", tion, 3, "-e+ion", "+ion"},
1619175dedbSmillert 	{"naino", an, 3, "", "+ian"},
1629175dedbSmillert 	{"na", an, 1, "", "+n"},
1639175dedbSmillert 	{"evit", tion, 3, "-e+ive", "+ive"},
1649175dedbSmillert 	{"ezi", CCe, 3, "-e+ize", "+ize"},
1659175dedbSmillert 	{"pihs", strip, 4, "", "+ship"},
1669175dedbSmillert 	{"dooh", ily, 4, "-y+hood", "+hood"},
1679175dedbSmillert 	{"ekil", strip, 4, "", "+like"},
1689175dedbSmillert 	{ NULL }
1699175dedbSmillert };
1709175dedbSmillert 
1719175dedbSmillert char *preftab[] = {
1729175dedbSmillert 	"anti",
1739175dedbSmillert 	"bio",
1749175dedbSmillert 	"dis",
1759175dedbSmillert 	"electro",
1769175dedbSmillert 	"en",
1779175dedbSmillert 	"fore",
1789175dedbSmillert 	"hyper",
1799175dedbSmillert 	"intra",
1809175dedbSmillert 	"inter",
1819175dedbSmillert 	"iso",
1829175dedbSmillert 	"kilo",
1839175dedbSmillert 	"magneto",
1849175dedbSmillert 	"meta",
1859175dedbSmillert 	"micro",
1869175dedbSmillert 	"milli",
1879175dedbSmillert 	"mis",
1889175dedbSmillert 	"mono",
1899175dedbSmillert 	"multi",
1909175dedbSmillert 	"non",
1919175dedbSmillert 	"out",
1929175dedbSmillert 	"over",
1939175dedbSmillert 	"photo",
1949175dedbSmillert 	"poly",
1959175dedbSmillert 	"pre",
1969175dedbSmillert 	"pseudo",
1979175dedbSmillert 	"re",
1989175dedbSmillert 	"semi",
1999175dedbSmillert 	"stereo",
2009175dedbSmillert 	"sub",
2019175dedbSmillert 	"super",
2029175dedbSmillert 	"thermo",
2039175dedbSmillert 	"ultra",
2049175dedbSmillert 	"under",	/* must precede un */
2059175dedbSmillert 	"un",
2069175dedbSmillert 	NULL
2079175dedbSmillert };
2089175dedbSmillert 
2099175dedbSmillert struct wlist {
2109175dedbSmillert 	int fd;
2119175dedbSmillert 	unsigned char *front;
2129175dedbSmillert 	unsigned char *back;
2139175dedbSmillert } *wlists;
2149175dedbSmillert 
2159175dedbSmillert int vflag;
2169175dedbSmillert int xflag;
2179175dedbSmillert char word[LINE_MAX];
2189175dedbSmillert char original[LINE_MAX];
2199175dedbSmillert char *deriv[40];
2209175dedbSmillert char affix[40];
2219175dedbSmillert 
2229175dedbSmillert /*
2239175dedbSmillert  * The spellprog utility accepts a newline-delimited list of words
2249175dedbSmillert  * on stdin.  For arguments it expects the path to a word list and
2259175dedbSmillert  * the path to a file in which to store found words.
2269175dedbSmillert  *
2279175dedbSmillert  * In normal usage, spell is called twice.  The first time it is
228*d9a51c35Sjmc  * called with a stop list to flag commonly misspelled words.  The
2299175dedbSmillert  * remaining words are then passed to spell again, this time with
2309175dedbSmillert  * the dictionary file as the first (non-flag) argument.
2319175dedbSmillert  *
2329175dedbSmillert  * Unlike historic versions of spellprog, this one does not use
2339175dedbSmillert  * hashed files.  Instead it simply requires that files be sorted
2349175dedbSmillert  * lexigraphically and uses the same algorithm as the look utility.
2359175dedbSmillert  *
2369175dedbSmillert  * Note that spellprog should be called via the spell shell script
2379175dedbSmillert  * and is not meant to be invoked directly by the user.
2389175dedbSmillert  */
2399175dedbSmillert 
2409175dedbSmillert int
main(int argc,char ** argv)2419175dedbSmillert main(int argc, char **argv)
2429175dedbSmillert {
2439175dedbSmillert 	char *ep, *cp, *dp;
2449175dedbSmillert 	char *outfile;
2459175dedbSmillert 	int ch, fold, i;
2469175dedbSmillert 	struct stat sb;
2479175dedbSmillert 	FILE *file, *found;
2489175dedbSmillert 
2496ed9540bSderaadt 	if (pledge("stdio rpath wpath cpath", NULL) == -1)
2506ed9540bSderaadt 		err(1, "pledge");
2516ed9540bSderaadt 
2529175dedbSmillert 	outfile = NULL;
2539175dedbSmillert 	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
2549175dedbSmillert 		switch (ch) {
2559175dedbSmillert 		case 'b':
2569175dedbSmillert 			/* Use British dictionary and convert ize -> ise. */
2579175dedbSmillert 			ise();
2589175dedbSmillert 			break;
2599175dedbSmillert 		case 'o':
2609175dedbSmillert 			outfile = optarg;
2619175dedbSmillert 			break;
2629175dedbSmillert 		case 'v':
2639175dedbSmillert 			/* Also write derivations to "found" file. */
264e9f70589Sderaadt 			vflag = 1;
2659175dedbSmillert 			break;
2669175dedbSmillert 		case 'x':
2679175dedbSmillert 			/* Print plausible stems to stdout. */
268e9f70589Sderaadt 			xflag = 1;
2699175dedbSmillert 			break;
2709175dedbSmillert 		default:
2719175dedbSmillert 			usage();
2729175dedbSmillert 		}
2739175dedbSmillert 
2749175dedbSmillert 	}
2759175dedbSmillert 	argc -= optind;
2769175dedbSmillert 	argv += optind;
2779175dedbSmillert 	if (argc < 1)
2789175dedbSmillert 		usage();
2799175dedbSmillert 
2809175dedbSmillert 	/* Open and mmap the word/stop lists. */
2811ed98fdfSderaadt 	if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL)
2829175dedbSmillert 		err(1, "malloc");
2839175dedbSmillert 	for (i = 0; argc--; i++) {
284b7041c07Sderaadt 		wlists[i].fd = open(argv[i], O_RDONLY);
2859175dedbSmillert 		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
2869175dedbSmillert 			err(1, "%s", argv[i]);
2874239b822Smillert 		if (sb.st_size > SIZE_MAX)
2885ad04d35Sguenther 			errc(1, EFBIG, "%s", argv[i]);
2899175dedbSmillert 		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
2909175dedbSmillert 		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
2919175dedbSmillert 		if (wlists[i].front == MAP_FAILED)
2929175dedbSmillert 			err(1, "%s", argv[i]);
2939175dedbSmillert 		wlists[i].back = wlists[i].front + sb.st_size;
2949175dedbSmillert 	}
2959175dedbSmillert 	wlists[i].fd = -1;
2969175dedbSmillert 
2979175dedbSmillert 	/* Open file where found words are to be saved. */
2989175dedbSmillert 	if (outfile == NULL)
2999175dedbSmillert 		found = NULL;
3009175dedbSmillert 	else if ((found = fopen(outfile, "w")) == NULL)
3019175dedbSmillert 		err(1, "cannot open %s", outfile);
3029175dedbSmillert 
3039175dedbSmillert 	for (;; print_word(file)) {
3049175dedbSmillert 		affix[0] = '\0';
3059175dedbSmillert 		file = found;
3069175dedbSmillert 		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
3079175dedbSmillert 			if (ep - word == sizeof(word) - 1) {
3089175dedbSmillert 				*ep = '\0';
3099175dedbSmillert 				warnx("word too long (%s)", word);
3109175dedbSmillert 				while ((ch = getchar()) != '\n')
3119175dedbSmillert 					;	/* slurp until EOL */
3129175dedbSmillert 			}
3139175dedbSmillert 			if (ch == EOF) {
3149175dedbSmillert 				if (found != NULL)
3159175dedbSmillert 					fclose(found);
316a2969341Sschwarze 				return (0);
3179175dedbSmillert 			}
3189175dedbSmillert 		}
3199175dedbSmillert 		for (cp = word, dp = original; cp < ep; )
3209175dedbSmillert 			*dp++ = *cp++;
3219175dedbSmillert 		*dp = '\0';
3229175dedbSmillert 		fold = 0;
3239175dedbSmillert 		for (cp = word; cp < ep; cp++)
324f69332c7Sderaadt 			if (islower((unsigned char)*cp))
3259175dedbSmillert 				goto lcase;
3269175dedbSmillert 		if (trypref(ep, ".", 0))
3279175dedbSmillert 			continue;
3289175dedbSmillert 		++fold;
3299175dedbSmillert 		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
330f69332c7Sderaadt 			*dp = tolower((unsigned char)*cp);
3319175dedbSmillert lcase:
3329175dedbSmillert 		if (trypref(ep, ".", 0) || suffix(ep, 0))
3339175dedbSmillert 			continue;
334f69332c7Sderaadt 		if (isupper((unsigned char)word[0])) {
3359175dedbSmillert 			for (cp = original, dp = word; (*dp = *cp++); dp++) {
3369175dedbSmillert 				if (fold)
337f69332c7Sderaadt 					*dp = tolower((unsigned char)*dp);
3389175dedbSmillert 			}
339f69332c7Sderaadt 			word[0] = tolower((unsigned char)word[0]);
3409175dedbSmillert 			goto lcase;
3419175dedbSmillert 		}
3429175dedbSmillert 		file = stdout;
3439175dedbSmillert 	}
3449175dedbSmillert 
345a2969341Sschwarze 	return (0);
3469175dedbSmillert }
3479175dedbSmillert 
3489175dedbSmillert void
print_word(FILE * f)3499175dedbSmillert print_word(FILE *f)
3509175dedbSmillert {
3519175dedbSmillert 
3529175dedbSmillert 	if (f != NULL) {
3539175dedbSmillert 		if (vflag && affix[0] != '\0' && affix[0] != '.')
3549175dedbSmillert 			fprintf(f, "%s\t%s\n", affix, original);
3559175dedbSmillert 		else
3569175dedbSmillert 			fprintf(f, "%s\n", original);
3579175dedbSmillert 	}
3589175dedbSmillert }
3599175dedbSmillert 
3609175dedbSmillert /*
3619175dedbSmillert  * For each matching suffix in suftab, call the function associated
3629175dedbSmillert  * with that suffix (p1 and p2).
3639175dedbSmillert  */
3649175dedbSmillert int
suffix(char * ep,int lev)3659175dedbSmillert suffix(char *ep, int lev)
3669175dedbSmillert {
3679175dedbSmillert 	struct suftab *t;
3689175dedbSmillert 	char *cp, *sp;
3699175dedbSmillert 
3709175dedbSmillert 	lev += DLEV;
3719175dedbSmillert 	deriv[lev] = deriv[lev-1] = 0;
3729175dedbSmillert 	for (t = suftab; (sp = t->suf); t++) {
3739175dedbSmillert 		cp = ep;
3749175dedbSmillert 		while (*sp) {
3759175dedbSmillert 			if (*--cp != *sp++)
3769175dedbSmillert 				goto next;
3779175dedbSmillert 		}
3789175dedbSmillert 		for (sp = cp; --sp >= word && !vowel(*sp);)
3799175dedbSmillert 			;	/* nothing */
3809175dedbSmillert 		if (sp < word)
3819175dedbSmillert 			return (0);
3829175dedbSmillert 		if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
3839175dedbSmillert 			return (1);
3849175dedbSmillert 		if (t->p2 != NULL) {
385e14aac28Snicm 			deriv[lev] = deriv[lev+1] = 0;
3869175dedbSmillert 			return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
3879175dedbSmillert 		}
3889175dedbSmillert 		return (0);
3899175dedbSmillert next:		;
3909175dedbSmillert 	}
3919175dedbSmillert 	return (0);
3929175dedbSmillert }
3939175dedbSmillert 
3949175dedbSmillert int
nop(char * ep,char * d,char * a,int lev)395e14aac28Snicm nop(char *ep, char *d, char *a, int lev)
3969175dedbSmillert {
3979175dedbSmillert 
3989175dedbSmillert 	return (0);
3999175dedbSmillert }
4009175dedbSmillert 
4019175dedbSmillert int
strip(char * ep,char * d,char * a,int lev)4029175dedbSmillert strip(char *ep, char *d, char *a, int lev)
4039175dedbSmillert {
4049175dedbSmillert 
4059175dedbSmillert 	return (trypref(ep, a, lev) || suffix(ep, lev));
4069175dedbSmillert }
4079175dedbSmillert 
4089175dedbSmillert int
s(char * ep,char * d,char * a,int lev)4099175dedbSmillert s(char *ep, char *d, char *a, int lev)
4109175dedbSmillert {
4119175dedbSmillert 
4129175dedbSmillert 	if (lev > DLEV + 1)
4139175dedbSmillert 		return (0);
4149175dedbSmillert 	if (*ep == 's' && ep[-1] == 's')
4159175dedbSmillert 		return (0);
4169175dedbSmillert 	return (strip(ep, d, a, lev));
4179175dedbSmillert }
4189175dedbSmillert 
4199175dedbSmillert int
an(char * ep,char * d,char * a,int lev)4209175dedbSmillert an(char *ep, char *d, char *a, int lev)
4219175dedbSmillert {
4229175dedbSmillert 
423f69332c7Sderaadt 	if (!isupper((unsigned char)*word))	/* must be proper name */
4249175dedbSmillert 		return (0);
4259175dedbSmillert 	return (trypref(ep,a,lev));
4269175dedbSmillert }
4279175dedbSmillert 
4289175dedbSmillert int
ize(char * ep,char * d,char * a,int lev)4299175dedbSmillert ize(char *ep, char *d, char *a, int lev)
4309175dedbSmillert {
4319175dedbSmillert 
4329175dedbSmillert 	*ep++ = 'e';
4339175dedbSmillert 	return (strip(ep ,"", d, lev));
4349175dedbSmillert }
4359175dedbSmillert 
4369175dedbSmillert int
y_to_e(char * ep,char * d,char * a,int lev)4379175dedbSmillert y_to_e(char *ep, char *d, char *a, int lev)
4389175dedbSmillert {
4399175dedbSmillert 	char c = *ep;
4409175dedbSmillert 
4419175dedbSmillert 	*ep++ = 'e';
4429175dedbSmillert 	if (strip(ep, "", d, lev))
4439175dedbSmillert 		return (1);
4449175dedbSmillert 	ep[-1] = c;
4459175dedbSmillert 	return (0);
4469175dedbSmillert }
4479175dedbSmillert 
4489175dedbSmillert int
ily(char * ep,char * d,char * a,int lev)4499175dedbSmillert ily(char *ep, char *d, char *a, int lev)
4509175dedbSmillert {
4519175dedbSmillert 
4529175dedbSmillert 	if (ep[-1] == 'i')
4539175dedbSmillert 		return (i_to_y(ep, d, a, lev));
4549175dedbSmillert 	else
4559175dedbSmillert 		return (strip(ep, d, a, lev));
4569175dedbSmillert }
4579175dedbSmillert 
4589175dedbSmillert int
ncy(char * ep,char * d,char * a,int lev)4599175dedbSmillert ncy(char *ep, char *d, char *a, int lev)
4609175dedbSmillert {
4619175dedbSmillert 
4629175dedbSmillert 	if (skipv(skipv(ep-1)) < word)
4639175dedbSmillert 		return (0);
4649175dedbSmillert 	ep[-1] = 't';
4659175dedbSmillert 	return (strip(ep, d, a, lev));
4669175dedbSmillert }
4679175dedbSmillert 
4689175dedbSmillert int
bility(char * ep,char * d,char * a,int lev)4699175dedbSmillert bility(char *ep, char *d, char *a, int lev)
4709175dedbSmillert {
4719175dedbSmillert 
4729175dedbSmillert 	*ep++ = 'l';
4739175dedbSmillert 	return (y_to_e(ep, d, a, lev));
4749175dedbSmillert }
4759175dedbSmillert 
4769175dedbSmillert int
i_to_y(char * ep,char * d,char * a,int lev)4779175dedbSmillert i_to_y(char *ep, char *d, char *a, int lev)
4789175dedbSmillert {
4799175dedbSmillert 
4809175dedbSmillert 	if (ep[-1] == 'i') {
4819175dedbSmillert 		ep[-1] = 'y';
4829175dedbSmillert 		a = d;
4839175dedbSmillert 	}
4849175dedbSmillert 	return (strip(ep, "", a, lev));
4859175dedbSmillert }
4869175dedbSmillert 
4879175dedbSmillert int
es(char * ep,char * d,char * a,int lev)4889175dedbSmillert es(char *ep, char *d, char *a, int lev)
4899175dedbSmillert {
4909175dedbSmillert 
4919175dedbSmillert 	if (lev > DLEV)
4929175dedbSmillert 		return (0);
4939175dedbSmillert 
4949175dedbSmillert 	switch (ep[-1]) {
4959175dedbSmillert 	default:
4969175dedbSmillert 		return (0);
4979175dedbSmillert 	case 'i':
4989175dedbSmillert 		return (i_to_y(ep, d, a, lev));
4999175dedbSmillert 	case 's':
5009175dedbSmillert 	case 'h':
5019175dedbSmillert 	case 'z':
5029175dedbSmillert 	case 'x':
5039175dedbSmillert 		return (strip(ep, d, a, lev));
5049175dedbSmillert 	}
5059175dedbSmillert }
5069175dedbSmillert 
5079175dedbSmillert int
metry(char * ep,char * d,char * a,int lev)5089175dedbSmillert metry(char *ep, char *d, char *a, int lev)
5099175dedbSmillert {
5109175dedbSmillert 
5119175dedbSmillert 	ep[-2] = 'e';
5129175dedbSmillert 	ep[-1] = 'r';
5139175dedbSmillert 	return (strip(ep, d, a, lev));
5149175dedbSmillert }
5159175dedbSmillert 
5169175dedbSmillert int
tion(char * ep,char * d,char * a,int lev)5179175dedbSmillert tion(char *ep, char *d, char *a, int lev)
5189175dedbSmillert {
5199175dedbSmillert 
5209175dedbSmillert 	switch (ep[-2]) {
5219175dedbSmillert 	case 'c':
5229175dedbSmillert 	case 'r':
5239175dedbSmillert 		return (trypref(ep, a, lev));
5249175dedbSmillert 	case 'a':
5259175dedbSmillert 		return (y_to_e(ep, d, a, lev));
5269175dedbSmillert 	}
5279175dedbSmillert 	return (0);
5289175dedbSmillert }
5299175dedbSmillert 
5309175dedbSmillert /*
5319175dedbSmillert  * Possible consonant-consonant-e ending.
5329175dedbSmillert  */
5339175dedbSmillert int
CCe(char * ep,char * d,char * a,int lev)5349175dedbSmillert CCe(char *ep, char *d, char *a, int lev)
5359175dedbSmillert {
5369175dedbSmillert 
5379175dedbSmillert 	switch (ep[-1]) {
5389175dedbSmillert 	case 'l':
5399175dedbSmillert 		if (vowel(ep[-2]))
5409175dedbSmillert 			break;
5419175dedbSmillert 		switch (ep[-2]) {
5429175dedbSmillert 		case 'l':
5439175dedbSmillert 		case 'r':
5449175dedbSmillert 		case 'w':
5459175dedbSmillert 			break;
5469175dedbSmillert 		default:
5479175dedbSmillert 			return (y_to_e(ep, d, a, lev));
5489175dedbSmillert 		}
5499175dedbSmillert 		break;
5509175dedbSmillert 	case 's':
5519175dedbSmillert 		if (ep[-2] == 's')
5529175dedbSmillert 			break;
5539175dedbSmillert 	case 'c':
5549175dedbSmillert 	case 'g':
5559175dedbSmillert 		if (*ep == 'a')
5569175dedbSmillert 			return (0);
5579175dedbSmillert 	case 'v':
5589175dedbSmillert 	case 'z':
5599175dedbSmillert 		if (vowel(ep[-2]))
5609175dedbSmillert 			break;
5619175dedbSmillert 	case 'u':
5629175dedbSmillert 		if (y_to_e(ep, d, a, lev))
5639175dedbSmillert 			return (1);
5649175dedbSmillert 		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
5659175dedbSmillert 			return (0);
5669175dedbSmillert 	}
5679175dedbSmillert 	return (VCe(ep, d, a, lev));
5689175dedbSmillert }
5699175dedbSmillert 
5709175dedbSmillert /*
5719175dedbSmillert  * Possible consonant-vowel-consonant-e ending.
5729175dedbSmillert  */
5739175dedbSmillert int
VCe(char * ep,char * d,char * a,int lev)5749175dedbSmillert VCe(char *ep, char *d, char *a, int lev)
5759175dedbSmillert {
5769175dedbSmillert 	char c;
5779175dedbSmillert 
5789175dedbSmillert 	c = ep[-1];
5799175dedbSmillert 	if (c == 'e')
5809175dedbSmillert 		return (0);
5819175dedbSmillert 	if (!vowel(c) && vowel(ep[-2])) {
5829175dedbSmillert 		c = *ep;
5839175dedbSmillert 		*ep++ = 'e';
5849175dedbSmillert 		if (trypref(ep, d, lev) || suffix(ep, lev))
5859175dedbSmillert 			return (1);
5869175dedbSmillert 		ep--;
5879175dedbSmillert 		*ep = c;
5889175dedbSmillert 	}
5899175dedbSmillert 	return (strip(ep, d, a, lev));
5909175dedbSmillert }
5919175dedbSmillert 
5929175dedbSmillert char *
lookuppref(char ** wp,char * ep)5939175dedbSmillert lookuppref(char **wp, char *ep)
5949175dedbSmillert {
5959175dedbSmillert 	char **sp;
5969175dedbSmillert 	char *bp,*cp;
5979175dedbSmillert 
5989175dedbSmillert 	for (sp = preftab; *sp; sp++) {
5999175dedbSmillert 		bp = *wp;
6009175dedbSmillert 		for (cp = *sp; *cp; cp++, bp++) {
601f69332c7Sderaadt 			if (tolower((unsigned char)*bp) != *cp)
6029175dedbSmillert 				goto next;
6039175dedbSmillert 		}
6049175dedbSmillert 		for (cp = bp; cp < ep; cp++) {
6059175dedbSmillert 			if (vowel(*cp)) {
6069175dedbSmillert 				*wp = bp;
6079175dedbSmillert 				return (*sp);
6089175dedbSmillert 			}
6099175dedbSmillert 		}
6109175dedbSmillert next:		;
6119175dedbSmillert 	}
6129175dedbSmillert 	return (0);
6139175dedbSmillert }
6149175dedbSmillert 
6159175dedbSmillert /*
6169175dedbSmillert  * If the word is not in the dictionary, try stripping off prefixes
6179175dedbSmillert  * until the word is found or we run out of prefixes to check.
6189175dedbSmillert  */
6199175dedbSmillert int
trypref(char * ep,char * a,int lev)6209175dedbSmillert trypref(char *ep, char *a, int lev)
6219175dedbSmillert {
6229175dedbSmillert 	char *cp;
6239175dedbSmillert 	char *bp;
6249175dedbSmillert 	char *pp;
6259175dedbSmillert 	int val = 0;
6269175dedbSmillert 	char space[20];
6279175dedbSmillert 
6289175dedbSmillert 	deriv[lev] = a;
6299175dedbSmillert 	if (tryword(word, ep, lev))
6309175dedbSmillert 		return (1);
6319175dedbSmillert 	bp = word;
6329175dedbSmillert 	pp = space;
6339175dedbSmillert 	deriv[lev+1] = pp;
6349175dedbSmillert 	while ((cp = lookuppref(&bp, ep))) {
6359175dedbSmillert 		*pp++ = '+';
6369175dedbSmillert 		while ((*pp = *cp++))
6379175dedbSmillert 			pp++;
6389175dedbSmillert 		if (tryword(bp, ep, lev+1)) {
6399175dedbSmillert 			val = 1;
6409175dedbSmillert 			break;
6419175dedbSmillert 		}
6429175dedbSmillert 		if (pp - space >= sizeof(space))
6439175dedbSmillert 			return (0);
6449175dedbSmillert 	}
645e14aac28Snicm 	deriv[lev+1] = deriv[lev+2] = 0;
6469175dedbSmillert 	return (val);
6479175dedbSmillert }
6489175dedbSmillert 
6499175dedbSmillert int
tryword(char * bp,char * ep,int lev)6509175dedbSmillert tryword(char *bp, char *ep, int lev)
6519175dedbSmillert {
6529175dedbSmillert 	int i, j;
6539175dedbSmillert 	char duple[3];
6549175dedbSmillert 
6559175dedbSmillert 	if (ep-bp <= 1)
6569175dedbSmillert 		return (0);
6579175dedbSmillert 	if (vowel(*ep) && monosyl(bp, ep))
6589175dedbSmillert 		return (0);
6599175dedbSmillert 
6609175dedbSmillert 	i = dict(bp, ep);
6619175dedbSmillert 	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
6629175dedbSmillert 		ep--;
6639175dedbSmillert 		deriv[++lev] = duple;
6649175dedbSmillert 		duple[0] = '+';
6659175dedbSmillert 		duple[1] = *ep;
6669175dedbSmillert 		duple[2] = '\0';
6679175dedbSmillert 		i = dict(bp, ep);
6689175dedbSmillert 	}
6699175dedbSmillert 	if (vflag == 0 || i == 0)
6709175dedbSmillert 		return (i);
6719175dedbSmillert 
6729175dedbSmillert 	/* Also tack on possible derivations. (XXX - warn on truncation?) */
6739175dedbSmillert 	for (j = lev; j > 0; j--) {
6749175dedbSmillert 		if (deriv[j])
6759175dedbSmillert 			strlcat(affix, deriv[j], sizeof(affix));
6769175dedbSmillert 	}
6779175dedbSmillert 	return (i);
6789175dedbSmillert }
6799175dedbSmillert 
6809175dedbSmillert int
monosyl(char * bp,char * ep)6819175dedbSmillert monosyl(char *bp, char *ep)
6829175dedbSmillert {
6839175dedbSmillert 
6849175dedbSmillert 	if (ep < bp + 2)
6859175dedbSmillert 		return (0);
6869175dedbSmillert 	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
6879175dedbSmillert 		return (0);
6889175dedbSmillert 	while (--ep >= bp)
6899175dedbSmillert 		if (vowel(*ep))
6909175dedbSmillert 			return (0);
6919175dedbSmillert 	return (1);
6929175dedbSmillert }
6939175dedbSmillert 
6949175dedbSmillert char *
skipv(char * s)6959175dedbSmillert skipv(char *s)
6969175dedbSmillert {
6979175dedbSmillert 
6989175dedbSmillert 	if (s >= word && vowel(*s))
6999175dedbSmillert 		s--;
7009175dedbSmillert 	while (s >= word && !vowel(*s))
7019175dedbSmillert 		s--;
7029175dedbSmillert 	return (s);
7039175dedbSmillert }
7049175dedbSmillert 
7059175dedbSmillert int
vowel(unsigned char c)706f69332c7Sderaadt vowel(unsigned char c)
7079175dedbSmillert {
7089175dedbSmillert 
7099175dedbSmillert 	switch (tolower(c)) {
7109175dedbSmillert 	case 'a':
7119175dedbSmillert 	case 'e':
7129175dedbSmillert 	case 'i':
7139175dedbSmillert 	case 'o':
7149175dedbSmillert 	case 'u':
7159175dedbSmillert 	case 'y':
7169175dedbSmillert 		return (1);
7179175dedbSmillert 	}
7189175dedbSmillert 	return (0);
7199175dedbSmillert }
7209175dedbSmillert 
7219175dedbSmillert /*
7229175dedbSmillert  * Crummy way to Britishise.
7239175dedbSmillert  */
7249175dedbSmillert void
ise(void)7259175dedbSmillert ise(void)
7269175dedbSmillert {
727f05c4e53Smillert 	struct suftab *tab;
7289175dedbSmillert 
729f05c4e53Smillert 	for (tab = suftab; tab->suf; tab++) {
730f05c4e53Smillert 		/* Assume that suffix will contain 'z' if a1 or d1 do */
731f05c4e53Smillert 		if (strchr(tab->suf, 'z')) {
732f05c4e53Smillert 			tab->suf = estrdup(tab->suf);
733f05c4e53Smillert 			ztos(tab->suf);
734f05c4e53Smillert 			if (strchr(tab->d1, 'z')) {
735f05c4e53Smillert 				tab->d1 = estrdup(tab->d1);
736f05c4e53Smillert 				ztos(tab->d1);
737f05c4e53Smillert 			}
738f05c4e53Smillert 			if (strchr(tab->a1, 'z')) {
739f05c4e53Smillert 				tab->a1 = estrdup(tab->a1);
740f05c4e53Smillert 				ztos(tab->a1);
741f05c4e53Smillert 			}
742f05c4e53Smillert 		}
7439175dedbSmillert 	}
7449175dedbSmillert }
7459175dedbSmillert 
7469175dedbSmillert void
ztos(char * s)7479175dedbSmillert ztos(char *s)
7489175dedbSmillert {
7499175dedbSmillert 
7509175dedbSmillert 	for (; *s; s++)
7519175dedbSmillert 		if (*s == 'z')
7529175dedbSmillert 			*s = 's';
7539175dedbSmillert }
7549175dedbSmillert 
755f05c4e53Smillert char *
estrdup(const char * s)756f05c4e53Smillert estrdup(const char *s)
757f05c4e53Smillert {
758f05c4e53Smillert 	char *d;
759f05c4e53Smillert 
760f05c4e53Smillert 	if ((d = strdup(s)) == NULL)
761f05c4e53Smillert 		err(1, "strdup");
762f05c4e53Smillert 	return (d);
763f05c4e53Smillert }
764f05c4e53Smillert 
7659175dedbSmillert /*
7669175dedbSmillert  * Look up a word in the dictionary.
7679175dedbSmillert  * Returns 1 if found, 0 if not.
7689175dedbSmillert  */
7699175dedbSmillert int
dict(char * bp,char * ep)7709175dedbSmillert dict(char *bp, char *ep)
7719175dedbSmillert {
7729175dedbSmillert 	char c;
7739175dedbSmillert 	int i, rval;
7749175dedbSmillert 
7759175dedbSmillert 	c = *ep;
7769175dedbSmillert 	*ep = '\0';
7779175dedbSmillert 	if (xflag)
7789175dedbSmillert 		printf("=%s\n", bp);
7799175dedbSmillert 	for (i = rval = 0; wlists[i].fd != -1; i++) {
7809175dedbSmillert 		if ((rval = look((unsigned char *)bp, wlists[i].front,
7819175dedbSmillert 		    wlists[i].back)) == 1)
7829175dedbSmillert 			break;
7839175dedbSmillert 	}
7849175dedbSmillert 	*ep = c;
7859175dedbSmillert 	return (rval);
7869175dedbSmillert }
7879175dedbSmillert 
788a2969341Sschwarze static void __dead
usage(void)7899175dedbSmillert usage(void)
7909175dedbSmillert {
7919175dedbSmillert 	extern char *__progname;
7929175dedbSmillert 
7939175dedbSmillert 	fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n",
7949175dedbSmillert 	    __progname);
7959175dedbSmillert 	exit(1);
7969175dedbSmillert }
797