xref: /netbsd-src/usr.bin/spell/spellprog/spellprog.c (revision bfb6cb13d599546df69c7e4d20d70e22e15a549d)
1 /*	$NetBSD: spellprog.c,v 1.6 2006/10/22 16:36:44 christos Exp $	*/
2 
3 /* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
4 
5 /*
6  * Copyright (c) 1991, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)spell.h	8.1 (Berkeley) 6/6/93
34  */
35 /*
36  * Copyright (C) Caldera International Inc.  2001-2002.
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code and documentation must retain the above
43  *    copyright notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *	This product includes software developed or owned by Caldera
50  *	International, Inc.
51  * 4. Neither the name of Caldera International, Inc. nor the names of other
52  *    contributors may be used to endorse or promote products derived from
53  *    this software without specific prior written permission.
54  *
55  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
56  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
57  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
60  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
65  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  */
68 
69 #ifndef lint
70 static const char copyright[] =
71 "@(#) Copyright (c) 1991, 1993\n\
72 	The Regents of the University of California.  All rights reserved.\n";
73 #endif /* not lint */
74 
75 #ifndef lint
76 #if 0
77 static const char sccsid[] = "@(#)spell.c	8.1 (Berkeley) 6/6/93";
78 #else
79 #endif
80 static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
81 #endif /* not lint */
82 
83 #include <sys/param.h>
84 #include <sys/mman.h>
85 #include <sys/stat.h>
86 
87 #include <ctype.h>
88 #include <err.h>
89 #include <errno.h>
90 #include <fcntl.h>
91 #include <limits.h>
92 #include <locale.h>
93 #include <stdio.h>
94 #include <stdlib.h>
95 #include <string.h>
96 #include <unistd.h>
97 #include <util.h>
98 
99 #include "extern.h"
100 
101 #define DLEV 2
102 
103 static int	 dict(char *, char *);
104 static int	 trypref(char *, const char *, size_t);
105 static int	 tryword(char *, char *, size_t);
106 static int	 suffix(char *, size_t);
107 static int	 vowel(int);
108 static const char *lookuppref(char **, char *);
109 static char	*skipv(char *);
110 static void	 ise(void);
111 static void	 print_word(FILE *);
112 static void	 ztos(char *);
113 static int	 monosyl(char *, char *);
114 static void 	 usage(void) __attribute__((__noreturn__));
115 static void	 getderiv(size_t);
116 
117 static int	 an(char *, const char *, const char *, size_t);
118 static int	 bility(char *, const char *, const char *, size_t);
119 static int	 es(char *, const char *, const char *, size_t);
120 static int	 i_to_y(char *, const char *, const char *, size_t);
121 static int	 ily(char *, const char *, const char *, size_t);
122 static int	 ize(char *, const char *, const char *, size_t);
123 static int	 metry(char *, const char *, const char *, size_t);
124 static int	 ncy(char *, const char *, const char *, size_t);
125 static int	 nop(char *, const char *, const char *, size_t);
126 static int	 s(char *, const char *, const char *, size_t);
127 static int	 strip(char *, const char *, const char *, size_t);
128 static int	 tion(char *, const char *, const char *, size_t);
129 static int	 y_to_e(char *, const char *, const char *, size_t);
130 static int	 CCe(char *, const char *, const char *, size_t);
131 static int	 VCe(char *, const char *, const char *, size_t);
132 
133 /*
134  * This cannot be const because we modify it when we choose british
135  * spelling.
136  */
137 static struct suftab {
138 	const char *suf;
139 	int (*p1)(char *, const char *, const char *, size_t);
140 	int n1;
141 	const char *d1;
142 	const char *a1;
143 	int (*p2)(char *, const char *, const char *, size_t);
144 	int n2;
145 	const char *d2;
146 	const char *a2;
147 } suftab[] = {
148 	{ .suf = "ssen",	.p1 = ily,	.n1 = 4,
149 	  .d1 = "-y+iness", 	.a1 = "+ness" },
150 	{ .suf = "ssel",	.p1 = ily,	.n1 = 4,
151 	  .d1 = "-y+i+less", 	.a1 = "+less" },
152 	{ .suf = "se",		.p1 = s,	.n1 = 1,
153 	  .d1 = "", 		.a1 = "+s",	.p2 = es,
154 	  .n2 = 2,		.d2 = "-y+ies",	.a2 = "+es" },
155 	{ .suf = "s'",		.p1 = s,	.n1 = 2,
156 	  .d1 = "", 		.a1 = "+'s" },
157 	{ .suf = "s",		.p1 = s,	.n1 = 1,
158 	  .d1 = "", 		.a1 = "+s" },
159 	{ .suf = "ecn",		.p1 = ncy,	.n1 = 1,
160 	  .d1 = "", 		.a1 = "-t+ce" },
161 	{ .suf = "ycn",		.p1 = ncy,	.n1 = 1,
162 	  .d1 = "", 		.a1 = "-cy+t" },
163 	{ .suf = "ytilb",	.p1 = nop,	.n1 = 0,
164 	  .d1 = "", 		.a1 = "" },
165 	{ .suf = "ytilib",	.p1 = bility,	.n1 = 5,
166 	  .d1 = "-le+ility", 	.a1 = "" },
167 	{ .suf = "elbaif",	.p1 = i_to_y,	.n1 = 4,
168 	  .d1 = "-y+iable", 	.a1 = "" },
169 	{ .suf = "elba",	.p1 = CCe,	.n1 = 4,
170 	  .d1 = "-e+able", 	.a1 = "+able" },
171 	{ .suf = "yti",		.p1 = CCe,	.n1 = 3,
172 	  .d1 = "-e+ity", 	.a1 = "+ity" },
173 	{ .suf = "ylb",		.p1 = y_to_e,	.n1 = 1,
174 	  .d1 = "-e+y", 	.a1 = "" },
175 	{ .suf = "yl",		.p1 = ily,	.n1 = 2,
176 	  .d1 = "-y+ily", 	.a1 = "+ly" },
177 	{ .suf = "laci",	.p1 = strip,	.n1 = 2,
178 	  .d1 = "", 		.a1 = "+al" },
179 	{ .suf = "latnem",	.p1 = strip,	.n1 = 2,
180 	  .d1 = "", 		.a1 = "+al" },
181 	{ .suf = "lanoi",	.p1 = strip,	.n1 = 2,
182 	  .d1 = "", 		.a1 = "+al" },
183 	{ .suf = "tnem",	.p1 = strip,	.n1 = 4,
184 	  .d1 = "", 		.a1 = "+ment" },
185 	{ .suf = "gni",		.p1 = CCe,	.n1 = 3,
186 	  .d1 = "-e+ing", 	.a1 = "+ing" },
187 	{ .suf = "reta",	.p1 = nop,	.n1 = 0,
188 	  .d1 = "", 		.a1 = "" },
189 	{ .suf = "re",		.p1 = strip,	.n1 = 1,
190 	  .d1 = "", 		.a1 = "+r",	.p2 = i_to_y,
191 	  .n2 = 2,		.d2 = "-y+ier",	.a2 = "+er" },
192 	{ .suf = "de",		.p1 = strip,	.n1 = 1,
193 	  .d1 = "", 		.a1 = "+d",	.p2 = i_to_y,
194 	  .n2 = 2,		.d2 = "-y+ied",	.a2 = "+ed" },
195 	{ .suf = "citsi",	.p1 = strip,	.n1 = 2,
196 	  .d1 = "", 		.a1 = "+ic" },
197 	{ .suf = "cihparg",	.p1 = i_to_y,	.n1 = 1,
198 	  .d1 = "-y+ic", 	.a1 = "" },
199 	{ .suf = "tse",		.p1 = strip,	.n1 = 2,
200 	  .d1 = "", 		.a1 = "+st",	.p2 = i_to_y,
201 	  .n2 = 3,		.d2 = "-y+iest",.a2 = "+est" },
202 	{ .suf = "cirtem",	.p1 = i_to_y,	.n1 = 1,
203 	  .d1 = "-y+ic", 	.a1 = "" },
204 	{ .suf = "yrtem",	.p1 = metry,	.n1 = 0,
205 	  .d1 = "-ry+er", 	.a1 = "" },
206 	{ .suf = "cigol",	.p1 = i_to_y,	.n1 = 1,
207 	  .d1 = "-y+ic", 	.a1 = "" },
208 	{ .suf = "tsigol",	.p1 = i_to_y,	.n1 = 2,
209 	  .d1 = "-y+ist", 	.a1 = "" },
210 	{ .suf = "tsi",		.p1 = VCe,	.n1 = 3,
211 	  .d1 = "-e+ist", 	.a1 = "+ist" },
212 	{ .suf = "msi",		.p1 = VCe,	.n1 = 3,
213 	  .d1 = "-e+ism", 	.a1 = "+ist" },
214 	{ .suf = "noitacif",	.p1 = i_to_y,	.n1 = 6,
215 	  .d1 = "-y+ication", 	.a1 = "" },
216 	{ .suf = "noitazi",	.p1 = ize,	.n1 = 5,
217 	  .d1 = "-e+ation", 	.a1 = "" },
218 	{ .suf = "rota",	.p1 = tion,	.n1 = 2,
219 	  .d1 = "-e+or", 	.a1 = "" },
220 	{ .suf = "noit",	.p1 = tion,	.n1 = 3,
221 	  .d1 = "-e+ion", 	.a1 = "+ion" },
222 	{ .suf = "naino",	.p1 = an,	.n1 = 3,
223 	  .d1 = "", 		.a1 = "+ian" },
224 	{ .suf = "na",		.p1 = an,	.n1 = 1,
225 	  .d1 = "", 		.a1 = "+n" },
226 	{ .suf = "evit",	.p1 = tion,	.n1 = 3,
227 	  .d1 = "-e+ive", 	.a1 = "+ive" },
228 	{ .suf = "ezi",		.p1 = CCe,	.n1 = 3,
229 	  .d1 = "-e+ize", 	.a1 = "+ize" },
230 	{ .suf = "pihs",	.p1 = strip,	.n1 = 4,
231 	  .d1 = "", 		.a1 = "+ship" },
232 	{ .suf = "dooh",	.p1 = ily,	.n1 = 4,
233 	  .d1 = "-y+hood", 	.a1 = "+hood" },
234 	{ .suf = "ekil",	.p1 = strip,	.n1 = 4,
235 	  .d1 = "", 		.a1 = "+like" },
236 	{ .suf = NULL, }
237 };
238 
239 static const char *preftab[] = {
240 	"anti",
241 	"bio",
242 	"dis",
243 	"electro",
244 	"en",
245 	"fore",
246 	"hyper",
247 	"intra",
248 	"inter",
249 	"iso",
250 	"kilo",
251 	"magneto",
252 	"meta",
253 	"micro",
254 	"milli",
255 	"mis",
256 	"mono",
257 	"multi",
258 	"non",
259 	"out",
260 	"over",
261 	"photo",
262 	"poly",
263 	"pre",
264 	"pseudo",
265 	"re",
266 	"semi",
267 	"stereo",
268 	"sub",
269 	"super",
270 	"thermo",
271 	"ultra",
272 	"under",	/* must precede un */
273 	"un",
274 	NULL
275 };
276 
277 static struct wlist {
278 	int fd;
279 	unsigned char *front;
280 	unsigned char *back;
281 } *wlists;
282 
283 static int vflag;
284 static int xflag;
285 static char word[LINE_MAX];
286 static char original[LINE_MAX];
287 static char affix[LINE_MAX];
288 static struct {
289 	const char **buf;
290 	size_t maxlev;
291 } deriv;
292 
293 /*
294  * The spellprog utility accepts a newline-delimited list of words
295  * on stdin.  For arguments it expects the path to a word list and
296  * the path to a file in which to store found words.
297  *
298  * In normal usage, spell is called twice.  The first time it is
299  * called with a stop list to flag commonly mispelled words.  The
300  * remaining words are then passed to spell again, this time with
301  * the dictionary file as the first (non-flag) argument.
302  *
303  * Unlike historic versions of spellprog, this one does not use
304  * hashed files.  Instead it simply requires that files be sorted
305  * lexigraphically and uses the same algorithm as the look utility.
306  *
307  * Note that spellprog should be called via the spell shell script
308  * and is not meant to be invoked directly by the user.
309  */
310 
311 int
312 main(int argc, char **argv)
313 {
314 	char *ep, *cp, *dp;
315 	char *outfile;
316 	int ch, fold, i;
317 	struct stat sb;
318 	FILE *file, *found;
319 
320 	setlocale(LC_ALL, "");
321 
322 	outfile = NULL;
323 	while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
324 		switch (ch) {
325 		case 'b':
326 			/* Use British dictionary and convert ize -> ise. */
327 			ise();
328 			break;
329 		case 'o':
330 			outfile = optarg;
331 			break;
332 		case 'v':
333 			/* Also write derivations to "found" file. */
334 			vflag++;
335 			break;
336 		case 'x':
337 			/* Print plausible stems to stdout. */
338 			xflag++;
339 			break;
340 		default:
341 			usage();
342 		}
343 
344 	}
345 	argc -= optind;
346 	argv += optind;
347 	if (argc < 1)
348 		usage();
349 
350 	/* Open and mmap the word/stop lists. */
351 	if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
352 		err(1, "malloc");
353 
354 	for (i = 0; argc--; i++) {
355 		wlists[i].fd = open(argv[i], O_RDONLY, 0);
356 		if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
357 			err(1, "%s", argv[i]);
358 		if (sb.st_size > SIZE_T_MAX)
359 			errx(1, "%s: %s", argv[i], strerror(EFBIG));
360 		wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
361 		    MAP_PRIVATE, wlists[i].fd, (off_t)0);
362 		if (wlists[i].front == MAP_FAILED)
363 			err(1, "%s", argv[i]);
364 		wlists[i].back = wlists[i].front + (size_t)sb.st_size;
365 	}
366 	wlists[i].fd = -1;
367 
368 	/* Open file where found words are to be saved. */
369 	if (outfile == NULL)
370 		found = NULL;
371 	else if ((found = fopen(outfile, "w")) == NULL)
372 		err(1, "cannot open %s", outfile);
373 
374 	for (;; print_word(file)) {
375 		affix[0] = '\0';
376 		file = found;
377 		for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
378 			if (ep - word == sizeof(word) - 1) {
379 				*ep = '\0';
380 				warnx("word too long (%s)", word);
381 				while ((ch = getchar()) != '\n')
382 					;	/* slurp until EOL */
383 			}
384 			if (ch == EOF) {
385 				if (found != NULL)
386 					fclose(found);
387 				exit(0);
388 			}
389 		}
390 		for (cp = word, dp = original; cp < ep; )
391 			*dp++ = *cp++;
392 		*dp = '\0';
393 		fold = 0;
394 		for (cp = word; cp < ep; cp++)
395 			if (islower((unsigned char)*cp))
396 				goto lcase;
397 		if (trypref(ep, ".", 0))
398 			continue;
399 		++fold;
400 		for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
401 			*dp = tolower((unsigned char)*cp);
402 lcase:
403 		if (trypref(ep, ".", 0) || suffix(ep, 0))
404 			continue;
405 		if (isupper((unsigned char)word[0])) {
406 			for (cp = original, dp = word; (*dp = *cp++); dp++) {
407 				if (fold)
408 					*dp = tolower((unsigned char)*dp);
409 			}
410 			word[0] = tolower((unsigned char)word[0]);
411 			goto lcase;
412 		}
413 		file = stdout;
414 	}
415 }
416 
417 static void
418 print_word(FILE *f)
419 {
420 
421 	if (f != NULL) {
422 		if (vflag && affix[0] != '\0' && affix[0] != '.')
423 			fprintf(f, "%s\t%s\n", affix, original);
424 		else
425 			fprintf(f, "%s\n", original);
426 	}
427 }
428 
429 /*
430  * For each matching suffix in suftab, call the function associated
431  * with that suffix (p1 and p2).
432  */
433 static int
434 suffix(char *ep, size_t lev)
435 {
436 	const struct suftab *t;
437 	char *cp;
438 	const char *sp;
439 
440 	lev += DLEV;
441 	getderiv(lev + 1);
442 	deriv.buf[lev] = deriv.buf[lev - 1] = 0;
443 	for (t = suftab; (sp = t->suf) != NULL; t++) {
444 		cp = ep;
445 		while (*sp) {
446 			if (*--cp != *sp++)
447 				goto next;
448 		}
449 		for (sp = cp; --sp >= word && !vowel(*sp);)
450 			;	/* nothing */
451 		if (sp < word)
452 			return 0;
453 		if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1))
454 			return 1;
455 		if (t->p2 != NULL) {
456 			deriv.buf[lev] = deriv.buf[lev + 1] = '\0';
457 			return (*t->p2)(ep - t->n2, t->d2, t->a2, lev);
458 		}
459 		return 0;
460 next:		;
461 	}
462 	return 0;
463 }
464 
465 static int
466 /*ARGSUSED*/
467 nop(char *ep, const char *d, const char *a, size_t lev)
468 {
469 
470 	return 0;
471 }
472 
473 static int
474 /*ARGSUSED*/
475 strip(char *ep, const char *d, const char *a, size_t lev)
476 {
477 
478 	return trypref(ep, a, lev) || suffix(ep, lev);
479 }
480 
481 static int
482 s(char *ep, const char *d, const char *a, const size_t lev)
483 {
484 
485 	if (lev > DLEV + 1)
486 		return 0;
487 	if (*ep == 's' && ep[-1] == 's')
488 		return 0;
489 	return strip(ep, d, a, lev);
490 }
491 
492 static int
493 /*ARGSUSED*/
494 an(char *ep, const char *d, const char *a, size_t lev)
495 {
496 
497 	if (!isupper((unsigned char)*word))	/* must be proper name */
498 		return 0;
499 	return trypref(ep, a, lev);
500 }
501 
502 static int
503 /*ARGSUSED*/
504 ize(char *ep, const char *d, const char *a, size_t lev)
505 {
506 
507 	*ep++ = 'e';
508 	return strip(ep ,"", d, lev);
509 }
510 
511 static int
512 /*ARGSUSED*/
513 y_to_e(char *ep, const char *d, const char *a, size_t lev)
514 {
515 	char c = *ep;
516 
517 	*ep++ = 'e';
518 	if (strip(ep, "", d, lev))
519 		return 1;
520 	ep[-1] = c;
521 	return 0;
522 }
523 
524 static int
525 ily(char *ep, const char *d, const char *a, size_t lev)
526 {
527 
528 	if (ep[-1] == 'i')
529 		return i_to_y(ep, d, a, lev);
530 	else
531 		return strip(ep, d, a, lev);
532 }
533 
534 static int
535 ncy(char *ep, const char *d, const char *a, size_t lev)
536 {
537 
538 	if (skipv(skipv(ep - 1)) < word)
539 		return 0;
540 	ep[-1] = 't';
541 	return strip(ep, d, a, lev);
542 }
543 
544 static int
545 bility(char *ep, const char *d, const char *a, size_t lev)
546 {
547 
548 	*ep++ = 'l';
549 	return y_to_e(ep, d, a, lev);
550 }
551 
552 static int
553 i_to_y(char *ep, const char *d, const char *a, size_t lev)
554 {
555 
556 	if (ep[-1] == 'i') {
557 		ep[-1] = 'y';
558 		a = d;
559 	}
560 	return strip(ep, "", a, lev);
561 }
562 
563 static int
564 es(char *ep, const char *d, const char *a, size_t lev)
565 {
566 
567 	if (lev > DLEV)
568 		return 0;
569 
570 	switch (ep[-1]) {
571 	default:
572 		return 0;
573 	case 'i':
574 		return i_to_y(ep, d, a, lev);
575 	case 's':
576 	case 'h':
577 	case 'z':
578 	case 'x':
579 		return strip(ep, d, a, lev);
580 	}
581 }
582 
583 static int
584 metry(char *ep, const char *d, const char *a, size_t lev)
585 {
586 
587 	ep[-2] = 'e';
588 	ep[-1] = 'r';
589 	return strip(ep, d, a, lev);
590 }
591 
592 static int
593 tion(char *ep, const char *d, const char *a, size_t lev)
594 {
595 
596 	switch (ep[-2]) {
597 	case 'c':
598 	case 'r':
599 		return trypref(ep, a, lev);
600 	case 'a':
601 		return y_to_e(ep, d, a, lev);
602 	}
603 	return 0;
604 }
605 
606 /*
607  * Possible consonant-consonant-e ending.
608  */
609 static int
610 CCe(char *ep, const char *d, const char *a, size_t lev)
611 {
612 
613 	switch (ep[-1]) {
614 	case 'l':
615 		if (vowel(ep[-2]))
616 			break;
617 		switch (ep[-2]) {
618 		case 'l':
619 		case 'r':
620 		case 'w':
621 			break;
622 		default:
623 			return y_to_e(ep, d, a, lev);
624 		}
625 		break;
626 	case 's':
627 		if (ep[-2] == 's')
628 			break;
629 		/*FALLTHROUGH*/
630 	case 'c':
631 	case 'g':
632 		if (*ep == 'a')
633 			return 0;
634 		/*FALLTHROUGH*/
635 	case 'v':
636 	case 'z':
637 		if (vowel(ep[-2]))
638 			break;
639 		/*FALLTHROUGH*/
640 	case 'u':
641 		if (y_to_e(ep, d, a, lev))
642 			return 1;
643 		if (!(ep[-2] == 'n' && ep[-1] == 'g'))
644 			return 0;
645 	}
646 	return VCe(ep, d, a, lev);
647 }
648 
649 /*
650  * Possible consonant-vowel-consonant-e ending.
651  */
652 static int
653 VCe(char *ep, const char *d, const char *a, size_t lev)
654 {
655 	char c;
656 
657 	c = ep[-1];
658 	if (c == 'e')
659 		return 0;
660 	if (!vowel(c) && vowel(ep[-2])) {
661 		c = *ep;
662 		*ep++ = 'e';
663 		if (trypref(ep, d, lev) || suffix(ep, lev))
664 			return 1;
665 		ep--;
666 		*ep = c;
667 	}
668 	return strip(ep, d, a, lev);
669 }
670 
671 static const char *
672 lookuppref(char **wp, char *ep)
673 {
674 	const char **sp, *cp;
675 	char *bp;
676 
677 	for (sp = preftab; *sp; sp++) {
678 		bp = *wp;
679 		for (cp = *sp; *cp; cp++, bp++) {
680 			if (tolower((unsigned char)*bp) != *cp)
681 				goto next;
682 		}
683 		for (cp = bp; cp < ep; cp++) {
684 			if (vowel(*cp)) {
685 				*wp = bp;
686 				return *sp;
687 			}
688 		}
689 next:		;
690 	}
691 	return 0;
692 }
693 
694 /*
695  * If the word is not in the dictionary, try stripping off prefixes
696  * until the word is found or we run out of prefixes to check.
697  */
698 static int
699 trypref(char *ep, const char *a, size_t lev)
700 {
701 	const char *cp;
702 	char *bp;
703 	char *pp;
704 	int val = 0;
705 	char space[20];
706 
707 	getderiv(lev + 2);
708 	deriv.buf[lev] = a;
709 	if (tryword(word, ep, lev))
710 		return 1;
711 	bp = word;
712 	pp = space;
713 	deriv.buf[lev + 1] = pp;
714 	while ((cp = lookuppref(&bp, ep)) != NULL) {
715 		*pp++ = '+';
716 		while ((*pp = *cp++))
717 			pp++;
718 		if (tryword(bp, ep, lev + 1)) {
719 			val = 1;
720 			break;
721 		}
722 		if (pp - space >= sizeof(space))
723 			return 0;
724 	}
725 	deriv.buf[lev + 1] = deriv.buf[lev + 2] = '\0';
726 	return val;
727 }
728 
729 static int
730 tryword(char *bp, char *ep, size_t lev)
731 {
732 	size_t i, j;
733 	char duple[3];
734 
735 	if (ep-bp <= 1)
736 		return 0;
737 	if (vowel(*ep) && monosyl(bp, ep))
738 		return 0;
739 
740 	i = dict(bp, ep);
741 	if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] &&
742 	    monosyl(bp, ep - 1)) {
743 		ep--;
744 		getderiv(++lev);
745 		deriv.buf[lev] = duple;
746 		duple[0] = '+';
747 		duple[1] = *ep;
748 		duple[2] = '\0';
749 		i = dict(bp, ep);
750 	}
751 	if (vflag == 0 || i == 0)
752 		return i;
753 
754 	/* Also tack on possible derivations. (XXX - warn on truncation?) */
755 	for (j = lev; j > 0; j--) {
756 		if (deriv.buf[j])
757 			(void)strlcat(affix, deriv.buf[j], sizeof(affix));
758 	}
759 	return i;
760 }
761 
762 static int
763 monosyl(char *bp, char *ep)
764 {
765 
766 	if (ep < bp + 2)
767 		return 0;
768 	if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
769 		return 0;
770 	while (--ep >= bp)
771 		if (vowel(*ep))
772 			return 0;
773 	return 1;
774 }
775 
776 static char *
777 skipv(char *st)
778 {
779 
780 	if (st >= word && vowel(*st))
781 		st--;
782 	while (st >= word && !vowel(*st))
783 		st--;
784 	return st;
785 }
786 
787 static int
788 vowel(int c)
789 {
790 
791 	switch (tolower(c)) {
792 	case 'a':
793 	case 'e':
794 	case 'i':
795 	case 'o':
796 	case 'u':
797 	case 'y':
798 		return 1;
799 	}
800 	return 0;
801 }
802 
803 /*
804  * Crummy way to Britishise.
805  */
806 static void
807 ise(void)
808 {
809 	struct suftab *tab;
810 	char *cp;
811 
812 	for (tab = suftab; tab->suf; tab++) {
813 		/* Assume that suffix will contain 'z' if a1 or d1 do */
814 		if (strchr(tab->suf, 'z')) {
815 			tab->suf = cp = estrdup(tab->suf);
816 			ztos(cp);
817 			if (strchr(tab->d1, 'z')) {
818 				tab->d1 = cp = estrdup(tab->d1);
819 				ztos(cp);
820 			}
821 			if (strchr(tab->a1, 'z')) {
822 				tab->a1 = cp = estrdup(tab->a1);
823 				ztos(cp);
824 			}
825 		}
826 	}
827 }
828 
829 static void
830 ztos(char *st)
831 {
832 
833 	for (; *st; st++)
834 		if (*st == 'z')
835 			*st = 's';
836 }
837 
838 /*
839  * Look up a word in the dictionary.
840  * Returns 1 if found, 0 if not.
841  */
842 static int
843 dict(char *bp, char *ep)
844 {
845 	char c;
846 	int i, rval;
847 
848 	c = *ep;
849 	*ep = '\0';
850 	if (xflag)
851 		printf("=%s\n", bp);
852 	for (i = rval = 0; wlists[i].fd != -1; i++) {
853 		if ((rval = look((unsigned char *)bp, wlists[i].front,
854 		    wlists[i].back)) == 1)
855 			break;
856 	}
857 	*ep = c;
858 	return rval;
859 }
860 
861 static void
862 getderiv(size_t lev)
863 {
864 	if (deriv.maxlev < lev) {
865 		void *p = realloc(deriv.buf, sizeof(*deriv.buf) * lev);
866 		if (p == NULL)
867 			err(1, "Cannot grow array");
868 		deriv.buf = p;
869 		deriv.maxlev = lev;
870 	}
871 }
872 
873 
874 static void
875 usage(void)
876 {
877 	(void)fprintf(stderr,
878 	    "Usage: %s [-bvx] [-o found-words] word-list ...\n",
879 	    getprogname());
880 	exit(1);
881 }
882