xref: /netbsd-src/usr.bin/spell/spellprog/spellprog.c (revision 8d9348aad107debd79570b2b2fe79a947e79dc19)
1 /*	$NetBSD: spellprog.c,v 1.10 2021/11/09 09:41:05 nia Exp $	*/
2 
3 /* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
4 
5 /*
6  * Copyright (c) 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)spell.h	8.1 (Berkeley) 6/6/93
34  */
35 /*
36  * Copyright (C) Caldera International Inc.  2001-2002.
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code and documentation must retain the above
43  *    copyright notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *	This product includes software developed or owned by Caldera
50  *	International, Inc.
51  * 4. Neither the name of Caldera International, Inc. nor the names of other
52  *    contributors may be used to endorse or promote products derived from
53  *    this software without specific prior written permission.
54  *
55  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
56  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
57  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
60  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
65  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #include <sys/cdefs.h>
70 
71 #ifndef lint
72 static const char copyright[] =
73 "@(#) Copyright (c) 1991, 1993\n\
74 	The Regents of the University of California.  All rights reserved.\n";
75 #endif /* not lint */
76 
77 #ifndef lint
78 #if 0
79 static const char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 6/6/93";
80 #else
81 #endif
82 static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
83 #endif /* not lint */
84 
85 #include <sys/param.h>
86 #include <sys/mman.h>
87 #include <sys/stat.h>
88 
89 #include <ctype.h>
90 #include <err.h>
91 #include <errno.h>
92 #include <fcntl.h>
93 #include <limits.h>
94 #include <locale.h>
95 #include <stdio.h>
96 #include <stdlib.h>
97 #include <string.h>
98 #include <unistd.h>
99 #include <util.h>
100 
101 #include "extern.h"
102 
103 #define DLEV 2
104 
105 static int	 dict(char *, char *);
106 static int	 trypref(char *, const char *, size_t);
107 static int	 tryword(char *, char *, size_t);
108 static int	 suffix(char *, size_t);
109 static int	 vowel(int);
110 static const char *lookuppref(char **, char *);
111 static char	*skipv(char *);
112 static void	 ise(void);
113 static void	 print_word(FILE *);
114 static void	 ztos(char *);
115 static int	 monosyl(char *, char *);
116 static void 	 usage(void) __dead;
117 static void	 getderiv(size_t);
118 
119 static int	 an(char *, const char *, const char *, size_t);
120 static int	 bility(char *, const char *, const char *, size_t);
121 static int	 es(char *, const char *, const char *, size_t);
122 static int	 i_to_y(char *, const char *, const char *, size_t);
123 static int	 ily(char *, const char *, const char *, size_t);
124 static int	 ize(char *, const char *, const char *, size_t);
125 static int	 metry(char *, const char *, const char *, size_t);
126 static int	 ncy(char *, const char *, const char *, size_t);
127 static int	 nop(char *, const char *, const char *, size_t);
128 static int	 s(char *, const char *, const char *, size_t);
129 static int	 strip(char *, const char *, const char *, size_t);
130 static int	 tion(char *, const char *, const char *, size_t);
131 static int	 y_to_e(char *, const char *, const char *, size_t);
132 static int	 CCe(char *, const char *, const char *, size_t);
133 static int	 VCe(char *, const char *, const char *, size_t);
134 
135 /*
136  * This cannot be const because we modify it when we choose british
137  * spelling.
138  */
139 static struct suftab {
140 	const char *suf;
141 	int (*p1)(char *, const char *, const char *, size_t);
142 	int n1;
143 	const char *d1;
144 	const char *a1;
145 	int (*p2)(char *, const char *, const char *, size_t);
146 	int n2;
147 	const char *d2;
148 	const char *a2;
149 } suftab[] = {
150 	{ .suf = "ssen",	.p1 = ily,	.n1 = 4,
151 	  .d1 = "-y+iness", 	.a1 = "+ness" },
152 	{ .suf = "ssel",	.p1 = ily,	.n1 = 4,
153 	  .d1 = "-y+i+less", 	.a1 = "+less" },
154 	{ .suf = "se",		.p1 = s,	.n1 = 1,
155 	  .d1 = "", 		.a1 = "+s",	.p2 = es,
156 	  .n2 = 2,		.d2 = "-y+ies",	.a2 = "+es" },
157 	{ .suf = "s'",		.p1 = s,	.n1 = 2,
158 	  .d1 = "", 		.a1 = "+'s" },
159 	{ .suf = "s",		.p1 = s,	.n1 = 1,
160 	  .d1 = "", 		.a1 = "+s" },
161 	{ .suf = "ecn",		.p1 = ncy,	.n1 = 1,
162 	  .d1 = "", 		.a1 = "-t+ce" },
163 	{ .suf = "ycn",		.p1 = ncy,	.n1 = 1,
164 	  .d1 = "", 		.a1 = "-cy+t" },
165 	{ .suf = "ytilb",	.p1 = nop,	.n1 = 0,
166 	  .d1 = "", 		.a1 = "" },
167 	{ .suf = "ytilib",	.p1 = bility,	.n1 = 5,
168 	  .d1 = "-le+ility", 	.a1 = "" },
169 	{ .suf = "elbaif",	.p1 = i_to_y,	.n1 = 4,
170 	  .d1 = "-y+iable", 	.a1 = "" },
171 	{ .suf = "elba",	.p1 = CCe,	.n1 = 4,
172 	  .d1 = "-e+able", 	.a1 = "+able" },
173 	{ .suf = "yti",		.p1 = CCe,	.n1 = 3,
174 	  .d1 = "-e+ity", 	.a1 = "+ity" },
175 	{ .suf = "ylb",		.p1 = y_to_e,	.n1 = 1,
176 	  .d1 = "-e+y", 	.a1 = "" },
177 	{ .suf = "yl",		.p1 = ily,	.n1 = 2,
178 	  .d1 = "-y+ily", 	.a1 = "+ly" },
179 	{ .suf = "laci",	.p1 = strip,	.n1 = 2,
180 	  .d1 = "", 		.a1 = "+al" },
181 	{ .suf = "latnem",	.p1 = strip,	.n1 = 2,
182 	  .d1 = "", 		.a1 = "+al" },
183 	{ .suf = "lanoi",	.p1 = strip,	.n1 = 2,
184 	  .d1 = "", 		.a1 = "+al" },
185 	{ .suf = "tnem",	.p1 = strip,	.n1 = 4,
186 	  .d1 = "", 		.a1 = "+ment" },
187 	{ .suf = "gni",		.p1 = CCe,	.n1 = 3,
188 	  .d1 = "-e+ing", 	.a1 = "+ing" },
189 	{ .suf = "reta",	.p1 = nop,	.n1 = 0,
190 	  .d1 = "", 		.a1 = "" },
191 	{ .suf = "re",		.p1 = strip,	.n1 = 1,
192 	  .d1 = "", 		.a1 = "+r",	.p2 = i_to_y,
193 	  .n2 = 2,		.d2 = "-y+ier",	.a2 = "+er" },
194 	{ .suf = "de",		.p1 = strip,	.n1 = 1,
195 	  .d1 = "", 		.a1 = "+d",	.p2 = i_to_y,
196 	  .n2 = 2,		.d2 = "-y+ied",	.a2 = "+ed" },
197 	{ .suf = "citsi",	.p1 = strip,	.n1 = 2,
198 	  .d1 = "", 		.a1 = "+ic" },
199 	{ .suf = "cihparg",	.p1 = i_to_y,	.n1 = 1,
200 	  .d1 = "-y+ic", 	.a1 = "" },
201 	{ .suf = "tse",		.p1 = strip,	.n1 = 2,
202 	  .d1 = "", 		.a1 = "+st",	.p2 = i_to_y,
203 	  .n2 = 3,		.d2 = "-y+iest",.a2 = "+est" },
204 	{ .suf = "cirtem",	.p1 = i_to_y,	.n1 = 1,
205 	  .d1 = "-y+ic", 	.a1 = "" },
206 	{ .suf = "yrtem",	.p1 = metry,	.n1 = 0,
207 	  .d1 = "-ry+er", 	.a1 = "" },
208 	{ .suf = "cigol",	.p1 = i_to_y,	.n1 = 1,
209 	  .d1 = "-y+ic", 	.a1 = "" },
210 	{ .suf = "tsigol",	.p1 = i_to_y,	.n1 = 2,
211 	  .d1 = "-y+ist", 	.a1 = "" },
212 	{ .suf = "tsi",		.p1 = VCe,	.n1 = 3,
213 	  .d1 = "-e+ist", 	.a1 = "+ist" },
214 	{ .suf = "msi",		.p1 = VCe,	.n1 = 3,
215 	  .d1 = "-e+ism", 	.a1 = "+ist" },
216 	{ .suf = "noitacif",	.p1 = i_to_y,	.n1 = 6,
217 	  .d1 = "-y+ication", 	.a1 = "" },
218 	{ .suf = "noitazi",	.p1 = ize,	.n1 = 5,
219 	  .d1 = "-e+ation", 	.a1 = "" },
220 	{ .suf = "rota",	.p1 = tion,	.n1 = 2,
221 	  .d1 = "-e+or", 	.a1 = "" },
222 	{ .suf = "noit",	.p1 = tion,	.n1 = 3,
223 	  .d1 = "-e+ion", 	.a1 = "+ion" },
224 	{ .suf = "naino",	.p1 = an,	.n1 = 3,
225 	  .d1 = "", 		.a1 = "+ian" },
226 	{ .suf = "na",		.p1 = an,	.n1 = 1,
227 	  .d1 = "", 		.a1 = "+n" },
228 	{ .suf = "evit",	.p1 = tion,	.n1 = 3,
229 	  .d1 = "-e+ive", 	.a1 = "+ive" },
230 	{ .suf = "ezi",		.p1 = CCe,	.n1 = 3,
231 	  .d1 = "-e+ize", 	.a1 = "+ize" },
232 	{ .suf = "pihs",	.p1 = strip,	.n1 = 4,
233 	  .d1 = "", 		.a1 = "+ship" },
234 	{ .suf = "dooh",	.p1 = ily,	.n1 = 4,
235 	  .d1 = "-y+hood", 	.a1 = "+hood" },
236 	{ .suf = "ekil",	.p1 = strip,	.n1 = 4,
237 	  .d1 = "", 		.a1 = "+like" },
238 	{ .suf = NULL, }
239 };
240 
241 static const char *preftab[] = {
242 	"anti",
243 	"bio",
244 	"dis",
245 	"electro",
246 	"en",
247 	"fore",
248 	"hyper",
249 	"intra",
250 	"inter",
251 	"iso",
252 	"kilo",
253 	"magneto",
254 	"meta",
255 	"micro",
256 	"milli",
257 	"mis",
258 	"mono",
259 	"multi",
260 	"non",
261 	"out",
262 	"over",
263 	"photo",
264 	"poly",
265 	"pre",
266 	"pseudo",
267 	"re",
268 	"semi",
269 	"stereo",
270 	"sub",
271 	"super",
272 	"thermo",
273 	"ultra",
274 	"under",	/* must precede un */
275 	"un",
276 	NULL
277 };
278 
279 static struct wlist {
280 	int fd;
281 	unsigned char *front;
282 	unsigned char *back;
283 } *wlists;
284 
285 static int vflag;
286 static int xflag;
287 static char word[LINE_MAX];
288 static char original[LINE_MAX];
289 static char affix[LINE_MAX];
290 static struct {
291 	const char **buf;
292 	size_t maxlev;
293 } deriv;
294 
295 /*
296  * The spellprog utility accepts a newline-delimited list of words
297  * on stdin.  For arguments it expects the path to a word list and
298  * the path to a file in which to store found words.
299  *
300  * In normal usage, spell is called twice.  The first time it is
301  * called with a stop list to flag commonly mispelled words.  The
302  * remaining words are then passed to spell again, this time with
303  * the dictionary file as the first (non-flag) argument.
304  *
305  * Unlike historic versions of spellprog, this one does not use
306  * hashed files.  Instead it simply requires that files be sorted
307  * lexigraphically and uses the same algorithm as the look utility.
308  *
309  * Note that spellprog should be called via the spell shell script
310  * and is not meant to be invoked directly by the user.
311  */
312 
313 int
main(int argc,char ** argv)314 main(int argc, char **argv)
315 {
316 	char *ep, *cp, *dp;
317 	char *outfile;
318 	int ch, fold, i;
319 	struct stat sb;
320 	FILE *file, *found;
321 
322 	setlocale(LC_ALL, "");
323 
324 	outfile = NULL;
325 	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
326 		switch (ch) {
327 		case 'b':
328 			/* Use British dictionary and convert ize -> ise. */
329 			ise();
330 			break;
331 		case 'o':
332 			outfile = optarg;
333 			break;
334 		case 'v':
335 			/* Also write derivations to "found" file. */
336 			vflag++;
337 			break;
338 		case 'x':
339 			/* Print plausible stems to stdout. */
340 			xflag++;
341 			break;
342 		default:
343 			usage();
344 		}
345 
346 	}
347 	argc -= optind;
348 	argv += optind;
349 	if (argc < 1)
350 		usage();
351 
352 	/* Open and mmap the word/stop lists. */
353 	if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
354 		err(1, "malloc");
355 
356 	for (i = 0; argc--; i++) {
357 		wlists[i].fd = open(argv[i], O_RDONLY, 0);
358 		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
359 			err(1, "%s", argv[i]);
360 		if (sb.st_size > SIZE_T_MAX)
361 			errx(1, "%s: %s", argv[i], strerror(EFBIG));
362 		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
363 		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
364 		if (wlists[i].front == MAP_FAILED)
365 			err(1, "%s", argv[i]);
366 		wlists[i].back = wlists[i].front + (size_t)sb.st_size;
367 	}
368 	wlists[i].fd = -1;
369 
370 	/* Open file where found words are to be saved. */
371 	if (outfile == NULL)
372 		found = NULL;
373 	else if ((found = fopen(outfile, "w")) == NULL)
374 		err(1, "cannot open %s", outfile);
375 
376 	for (;; print_word(file)) {
377 		affix[0] = '\0';
378 		file = found;
379 		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
380 			if (ep - word == sizeof(word) - 1) {
381 				*ep = '\0';
382 				warnx("word too long (%s)", word);
383 				while ((ch = getchar()) != '\n')
384 					;	/* slurp until EOL */
385 			}
386 			if (ch == EOF) {
387 				if (found != NULL)
388 					fclose(found);
389 				exit(0);
390 			}
391 		}
392 		for (cp = word, dp = original; cp < ep; )
393 			*dp++ = *cp++;
394 		*dp = '\0';
395 		fold = 0;
396 		for (cp = word; cp < ep; cp++)
397 			if (islower((unsigned char)*cp))
398 				goto lcase;
399 		if (trypref(ep, ".", 0))
400 			continue;
401 		++fold;
402 		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
403 			*dp = tolower((unsigned char)*cp);
404 lcase:
405 		if (trypref(ep, ".", 0) || suffix(ep, 0))
406 			continue;
407 		if (isupper((unsigned char)word[0])) {
408 			for (cp = original, dp = word; (*dp = *cp++); dp++) {
409 				if (fold)
410 					*dp = tolower((unsigned char)*dp);
411 			}
412 			word[0] = tolower((unsigned char)word[0]);
413 			goto lcase;
414 		}
415 		file = stdout;
416 	}
417 }
418 
419 static void
print_word(FILE * f)420 print_word(FILE *f)
421 {
422 
423 	if (f != NULL) {
424 		if (vflag && affix[0] != '\0' && affix[0] != '.')
425 			fprintf(f, "%s\t%s\n", affix, original);
426 		else
427 			fprintf(f, "%s\n", original);
428 	}
429 }
430 
431 /*
432  * For each matching suffix in suftab, call the function associated
433  * with that suffix (p1 and p2).
434  */
435 static int
suffix(char * ep,size_t lev)436 suffix(char *ep, size_t lev)
437 {
438 	const struct suftab *t;
439 	char *cp;
440 	const char *sp;
441 
442 	lev += DLEV;
443 	getderiv(lev + 1);
444 	deriv.buf[lev] = deriv.buf[lev - 1] = 0;
445 	for (t = suftab; (sp = t->suf) != NULL; t++) {
446 		cp = ep;
447 		while (*sp) {
448 			if (*--cp != *sp++)
449 				goto next;
450 		}
451 		for (sp = cp; --sp >= word && !vowel(*sp);)
452 			;	/* nothing */
453 		if (sp < word)
454 			return 0;
455 		if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1))
456 			return 1;
457 		if (t->p2 != NULL) {
458 			deriv.buf[lev] = deriv.buf[lev + 1] = NULL;
459 			return (*t->p2)(ep - t->n2, t->d2, t->a2, lev);
460 		}
461 		return 0;
462 next:		;
463 	}
464 	return 0;
465 }
466 
467 static int
468 /*ARGSUSED*/
nop(char * ep,const char * d,const char * a,size_t lev)469 nop(char *ep, const char *d, const char *a, size_t lev)
470 {
471 
472 	return 0;
473 }
474 
475 static int
476 /*ARGSUSED*/
strip(char * ep,const char * d,const char * a,size_t lev)477 strip(char *ep, const char *d, const char *a, size_t lev)
478 {
479 
480 	return trypref(ep, a, lev) || suffix(ep, lev);
481 }
482 
483 static int
s(char * ep,const char * d,const char * a,const size_t lev)484 s(char *ep, const char *d, const char *a, const size_t lev)
485 {
486 
487 	if (lev > DLEV + 1)
488 		return 0;
489 	if (*ep == 's' && ep[-1] == 's')
490 		return 0;
491 	return strip(ep, d, a, lev);
492 }
493 
494 static int
495 /*ARGSUSED*/
an(char * ep,const char * d,const char * a,size_t lev)496 an(char *ep, const char *d, const char *a, size_t lev)
497 {
498 
499 	if (!isupper((unsigned char)*word))	/* must be proper name */
500 		return 0;
501 	return trypref(ep, a, lev);
502 }
503 
504 static int
505 /*ARGSUSED*/
ize(char * ep,const char * d,const char * a,size_t lev)506 ize(char *ep, const char *d, const char *a, size_t lev)
507 {
508 
509 	*ep++ = 'e';
510 	return strip(ep ,"", d, lev);
511 }
512 
513 static int
514 /*ARGSUSED*/
y_to_e(char * ep,const char * d,const char * a,size_t lev)515 y_to_e(char *ep, const char *d, const char *a, size_t lev)
516 {
517 	char c = *ep;
518 
519 	*ep++ = 'e';
520 	if (strip(ep, "", d, lev))
521 		return 1;
522 	ep[-1] = c;
523 	return 0;
524 }
525 
526 static int
ily(char * ep,const char * d,const char * a,size_t lev)527 ily(char *ep, const char *d, const char *a, size_t lev)
528 {
529 
530 	if (ep[-1] == 'i')
531 		return i_to_y(ep, d, a, lev);
532 	else
533 		return strip(ep, d, a, lev);
534 }
535 
536 static int
ncy(char * ep,const char * d,const char * a,size_t lev)537 ncy(char *ep, const char *d, const char *a, size_t lev)
538 {
539 
540 	if (skipv(skipv(ep - 1)) < word)
541 		return 0;
542 	ep[-1] = 't';
543 	return strip(ep, d, a, lev);
544 }
545 
546 static int
bility(char * ep,const char * d,const char * a,size_t lev)547 bility(char *ep, const char *d, const char *a, size_t lev)
548 {
549 
550 	*ep++ = 'l';
551 	return y_to_e(ep, d, a, lev);
552 }
553 
554 static int
i_to_y(char * ep,const char * d,const char * a,size_t lev)555 i_to_y(char *ep, const char *d, const char *a, size_t lev)
556 {
557 
558 	if (ep[-1] == 'i') {
559 		ep[-1] = 'y';
560 		a = d;
561 	}
562 	return strip(ep, "", a, lev);
563 }
564 
565 static int
es(char * ep,const char * d,const char * a,size_t lev)566 es(char *ep, const char *d, const char *a, size_t lev)
567 {
568 
569 	if (lev > DLEV)
570 		return 0;
571 
572 	switch (ep[-1]) {
573 	default:
574 		return 0;
575 	case 'i':
576 		return i_to_y(ep, d, a, lev);
577 	case 's':
578 	case 'h':
579 	case 'z':
580 	case 'x':
581 		return strip(ep, d, a, lev);
582 	}
583 }
584 
585 static int
metry(char * ep,const char * d,const char * a,size_t lev)586 metry(char *ep, const char *d, const char *a, size_t lev)
587 {
588 
589 	ep[-2] = 'e';
590 	ep[-1] = 'r';
591 	return strip(ep, d, a, lev);
592 }
593 
594 static int
tion(char * ep,const char * d,const char * a,size_t lev)595 tion(char *ep, const char *d, const char *a, size_t lev)
596 {
597 
598 	switch (ep[-2]) {
599 	case 'c':
600 	case 'r':
601 		return trypref(ep, a, lev);
602 	case 'a':
603 		return y_to_e(ep, d, a, lev);
604 	}
605 	return 0;
606 }
607 
608 /*
609  * Possible consonant-consonant-e ending.
610  */
611 static int
CCe(char * ep,const char * d,const char * a,size_t lev)612 CCe(char *ep, const char *d, const char *a, size_t lev)
613 {
614 
615 	switch (ep[-1]) {
616 	case 'l':
617 		if (vowel(ep[-2]))
618 			break;
619 		switch (ep[-2]) {
620 		case 'l':
621 		case 'r':
622 		case 'w':
623 			break;
624 		default:
625 			return y_to_e(ep, d, a, lev);
626 		}
627 		break;
628 	case 's':
629 		if (ep[-2] == 's')
630 			break;
631 		/*FALLTHROUGH*/
632 	case 'c':
633 	case 'g':
634 		if (*ep == 'a')
635 			return 0;
636 		/*FALLTHROUGH*/
637 	case 'v':
638 	case 'z':
639 		if (vowel(ep[-2]))
640 			break;
641 		/*FALLTHROUGH*/
642 	case 'u':
643 		if (y_to_e(ep, d, a, lev))
644 			return 1;
645 		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
646 			return 0;
647 	}
648 	return VCe(ep, d, a, lev);
649 }
650 
651 /*
652  * Possible consonant-vowel-consonant-e ending.
653  */
654 static int
VCe(char * ep,const char * d,const char * a,size_t lev)655 VCe(char *ep, const char *d, const char *a, size_t lev)
656 {
657 	char c;
658 
659 	c = ep[-1];
660 	if (c == 'e')
661 		return 0;
662 	if (!vowel(c) && vowel(ep[-2])) {
663 		c = *ep;
664 		*ep++ = 'e';
665 		if (trypref(ep, d, lev) || suffix(ep, lev))
666 			return 1;
667 		ep--;
668 		*ep = c;
669 	}
670 	return strip(ep, d, a, lev);
671 }
672 
673 static const char *
lookuppref(char ** wp,char * ep)674 lookuppref(char **wp, char *ep)
675 {
676 	const char **sp, *cp;
677 	char *bp;
678 
679 	for (sp = preftab; *sp; sp++) {
680 		bp = *wp;
681 		for (cp = *sp; *cp; cp++, bp++) {
682 			if (tolower((unsigned char)*bp) != *cp)
683 				goto next;
684 		}
685 		for (cp = bp; cp < ep; cp++) {
686 			if (vowel(*cp)) {
687 				*wp = bp;
688 				return *sp;
689 			}
690 		}
691 next:		;
692 	}
693 	return 0;
694 }
695 
696 /*
697  * If the word is not in the dictionary, try stripping off prefixes
698  * until the word is found or we run out of prefixes to check.
699  */
700 static int
trypref(char * ep,const char * a,size_t lev)701 trypref(char *ep, const char *a, size_t lev)
702 {
703 	const char *cp;
704 	char *bp;
705 	char *pp;
706 	int val = 0;
707 	char space[20];
708 
709 	getderiv(lev + 2);
710 	deriv.buf[lev] = a;
711 	if (tryword(word, ep, lev))
712 		return 1;
713 	bp = word;
714 	pp = space;
715 	deriv.buf[lev + 1] = pp;
716 	while ((cp = lookuppref(&bp, ep)) != NULL) {
717 		*pp++ = '+';
718 		while ((*pp = *cp++))
719 			pp++;
720 		if (tryword(bp, ep, lev + 1)) {
721 			val = 1;
722 			break;
723 		}
724 		if (pp - space >= sizeof(space))
725 			return 0;
726 	}
727 	deriv.buf[lev + 1] = deriv.buf[lev + 2] = NULL;
728 	return val;
729 }
730 
731 static int
tryword(char * bp,char * ep,size_t lev)732 tryword(char *bp, char *ep, size_t lev)
733 {
734 	size_t i, j;
735 	char duple[3];
736 
737 	if (ep-bp <= 1)
738 		return 0;
739 	if (vowel(*ep) && monosyl(bp, ep))
740 		return 0;
741 
742 	i = dict(bp, ep);
743 	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] &&
744 	    monosyl(bp, ep - 1)) {
745 		ep--;
746 		getderiv(++lev);
747 		deriv.buf[lev] = duple;
748 		duple[0] = '+';
749 		duple[1] = *ep;
750 		duple[2] = '\0';
751 		i = dict(bp, ep);
752 	}
753 	if (vflag == 0 || i == 0)
754 		return i;
755 
756 	/* Also tack on possible derivations. (XXX - warn on truncation?) */
757 	for (j = lev; j > 0; j--) {
758 		if (deriv.buf[j])
759 			(void)strlcat(affix, deriv.buf[j], sizeof(affix));
760 	}
761 	return i;
762 }
763 
764 static int
monosyl(char * bp,char * ep)765 monosyl(char *bp, char *ep)
766 {
767 
768 	if (ep < bp + 2)
769 		return 0;
770 	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
771 		return 0;
772 	while (--ep >= bp)
773 		if (vowel(*ep))
774 			return 0;
775 	return 1;
776 }
777 
778 static char *
skipv(char * st)779 skipv(char *st)
780 {
781 
782 	if (st >= word && vowel(*st))
783 		st--;
784 	while (st >= word && !vowel(*st))
785 		st--;
786 	return st;
787 }
788 
789 static int
vowel(int c)790 vowel(int c)
791 {
792 
793 	switch (tolower(c)) {
794 	case 'a':
795 	case 'e':
796 	case 'i':
797 	case 'o':
798 	case 'u':
799 	case 'y':
800 		return 1;
801 	}
802 	return 0;
803 }
804 
805 /*
806  * Crummy way to Britishise.
807  */
808 static void
ise(void)809 ise(void)
810 {
811 	struct suftab *tab;
812 	char *cp;
813 
814 	for (tab = suftab; tab->suf; tab++) {
815 		/* Assume that suffix will contain 'z' if a1 or d1 do */
816 		if (strchr(tab->suf, 'z')) {
817 			tab->suf = cp = estrdup(tab->suf);
818 			ztos(cp);
819 			if (strchr(tab->d1, 'z')) {
820 				tab->d1 = cp = estrdup(tab->d1);
821 				ztos(cp);
822 			}
823 			if (strchr(tab->a1, 'z')) {
824 				tab->a1 = cp = estrdup(tab->a1);
825 				ztos(cp);
826 			}
827 		}
828 	}
829 }
830 
831 static void
ztos(char * st)832 ztos(char *st)
833 {
834 
835 	for (; *st; st++)
836 		if (*st == 'z')
837 			*st = 's';
838 }
839 
840 /*
841  * Look up a word in the dictionary.
842  * Returns 1 if found, 0 if not.
843  */
844 static int
dict(char * bp,char * ep)845 dict(char *bp, char *ep)
846 {
847 	char c;
848 	int i, rval;
849 
850 	c = *ep;
851 	*ep = '\0';
852 	if (xflag)
853 		printf("=%s\n", bp);
854 	for (i = rval = 0; wlists[i].fd != -1; i++) {
855 		if ((rval = look((unsigned char *)bp, wlists[i].front,
856 		    wlists[i].back)) == 1)
857 			break;
858 	}
859 	*ep = c;
860 	return rval;
861 }
862 
863 static void
getderiv(size_t lev)864 getderiv(size_t lev)
865 {
866 	if (deriv.maxlev < lev) {
867 		if (reallocarr(&deriv.buf, lev, sizeof(*deriv.buf)) != 0)
868 			err(1, "Cannot grow array");
869 		deriv.maxlev = lev;
870 	}
871 }
872 
873 
874 static void
usage(void)875 usage(void)
876 {
877 	(void)fprintf(stderr,
878 	    "Usage: %s [-bvx] [-o found-words] word-list ...\n",
879 	    getprogname());
880 	exit(1);
881 }
882