1 /* $NetBSD: spellprog.c,v 1.10 2021/11/09 09:41:05 nia Exp $ */
2
3 /* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */
4
5 /*
6 * Copyright (c) 1991, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)spell.h 8.1 (Berkeley) 6/6/93
34 */
35 /*
36 * Copyright (C) Caldera International Inc. 2001-2002.
37 * All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code and documentation must retain the above
43 * copyright notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. All advertising materials mentioning features or use of this software
48 * must display the following acknowledgement:
49 * This product includes software developed or owned by Caldera
50 * International, Inc.
51 * 4. Neither the name of Caldera International, Inc. nor the names of other
52 * contributors may be used to endorse or promote products derived from
53 * this software without specific prior written permission.
54 *
55 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
56 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
58 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
59 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
60 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
65 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 * POSSIBILITY OF SUCH DAMAGE.
67 */
68
69 #include <sys/cdefs.h>
70
71 #ifndef lint
72 static const char copyright[] =
73 "@(#) Copyright (c) 1991, 1993\n\
74 The Regents of the University of California. All rights reserved.\n";
75 #endif /* not lint */
76
77 #ifndef lint
78 #if 0
79 static const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93";
80 #else
81 #endif
82 static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $";
83 #endif /* not lint */
84
85 #include <sys/param.h>
86 #include <sys/mman.h>
87 #include <sys/stat.h>
88
89 #include <ctype.h>
90 #include <err.h>
91 #include <errno.h>
92 #include <fcntl.h>
93 #include <limits.h>
94 #include <locale.h>
95 #include <stdio.h>
96 #include <stdlib.h>
97 #include <string.h>
98 #include <unistd.h>
99 #include <util.h>
100
101 #include "extern.h"
102
103 #define DLEV 2
104
105 static int dict(char *, char *);
106 static int trypref(char *, const char *, size_t);
107 static int tryword(char *, char *, size_t);
108 static int suffix(char *, size_t);
109 static int vowel(int);
110 static const char *lookuppref(char **, char *);
111 static char *skipv(char *);
112 static void ise(void);
113 static void print_word(FILE *);
114 static void ztos(char *);
115 static int monosyl(char *, char *);
116 static void usage(void) __dead;
117 static void getderiv(size_t);
118
119 static int an(char *, const char *, const char *, size_t);
120 static int bility(char *, const char *, const char *, size_t);
121 static int es(char *, const char *, const char *, size_t);
122 static int i_to_y(char *, const char *, const char *, size_t);
123 static int ily(char *, const char *, const char *, size_t);
124 static int ize(char *, const char *, const char *, size_t);
125 static int metry(char *, const char *, const char *, size_t);
126 static int ncy(char *, const char *, const char *, size_t);
127 static int nop(char *, const char *, const char *, size_t);
128 static int s(char *, const char *, const char *, size_t);
129 static int strip(char *, const char *, const char *, size_t);
130 static int tion(char *, const char *, const char *, size_t);
131 static int y_to_e(char *, const char *, const char *, size_t);
132 static int CCe(char *, const char *, const char *, size_t);
133 static int VCe(char *, const char *, const char *, size_t);
134
135 /*
136 * This cannot be const because we modify it when we choose british
137 * spelling.
138 */
139 static struct suftab {
140 const char *suf;
141 int (*p1)(char *, const char *, const char *, size_t);
142 int n1;
143 const char *d1;
144 const char *a1;
145 int (*p2)(char *, const char *, const char *, size_t);
146 int n2;
147 const char *d2;
148 const char *a2;
149 } suftab[] = {
150 { .suf = "ssen", .p1 = ily, .n1 = 4,
151 .d1 = "-y+iness", .a1 = "+ness" },
152 { .suf = "ssel", .p1 = ily, .n1 = 4,
153 .d1 = "-y+i+less", .a1 = "+less" },
154 { .suf = "se", .p1 = s, .n1 = 1,
155 .d1 = "", .a1 = "+s", .p2 = es,
156 .n2 = 2, .d2 = "-y+ies", .a2 = "+es" },
157 { .suf = "s'", .p1 = s, .n1 = 2,
158 .d1 = "", .a1 = "+'s" },
159 { .suf = "s", .p1 = s, .n1 = 1,
160 .d1 = "", .a1 = "+s" },
161 { .suf = "ecn", .p1 = ncy, .n1 = 1,
162 .d1 = "", .a1 = "-t+ce" },
163 { .suf = "ycn", .p1 = ncy, .n1 = 1,
164 .d1 = "", .a1 = "-cy+t" },
165 { .suf = "ytilb", .p1 = nop, .n1 = 0,
166 .d1 = "", .a1 = "" },
167 { .suf = "ytilib", .p1 = bility, .n1 = 5,
168 .d1 = "-le+ility", .a1 = "" },
169 { .suf = "elbaif", .p1 = i_to_y, .n1 = 4,
170 .d1 = "-y+iable", .a1 = "" },
171 { .suf = "elba", .p1 = CCe, .n1 = 4,
172 .d1 = "-e+able", .a1 = "+able" },
173 { .suf = "yti", .p1 = CCe, .n1 = 3,
174 .d1 = "-e+ity", .a1 = "+ity" },
175 { .suf = "ylb", .p1 = y_to_e, .n1 = 1,
176 .d1 = "-e+y", .a1 = "" },
177 { .suf = "yl", .p1 = ily, .n1 = 2,
178 .d1 = "-y+ily", .a1 = "+ly" },
179 { .suf = "laci", .p1 = strip, .n1 = 2,
180 .d1 = "", .a1 = "+al" },
181 { .suf = "latnem", .p1 = strip, .n1 = 2,
182 .d1 = "", .a1 = "+al" },
183 { .suf = "lanoi", .p1 = strip, .n1 = 2,
184 .d1 = "", .a1 = "+al" },
185 { .suf = "tnem", .p1 = strip, .n1 = 4,
186 .d1 = "", .a1 = "+ment" },
187 { .suf = "gni", .p1 = CCe, .n1 = 3,
188 .d1 = "-e+ing", .a1 = "+ing" },
189 { .suf = "reta", .p1 = nop, .n1 = 0,
190 .d1 = "", .a1 = "" },
191 { .suf = "re", .p1 = strip, .n1 = 1,
192 .d1 = "", .a1 = "+r", .p2 = i_to_y,
193 .n2 = 2, .d2 = "-y+ier", .a2 = "+er" },
194 { .suf = "de", .p1 = strip, .n1 = 1,
195 .d1 = "", .a1 = "+d", .p2 = i_to_y,
196 .n2 = 2, .d2 = "-y+ied", .a2 = "+ed" },
197 { .suf = "citsi", .p1 = strip, .n1 = 2,
198 .d1 = "", .a1 = "+ic" },
199 { .suf = "cihparg", .p1 = i_to_y, .n1 = 1,
200 .d1 = "-y+ic", .a1 = "" },
201 { .suf = "tse", .p1 = strip, .n1 = 2,
202 .d1 = "", .a1 = "+st", .p2 = i_to_y,
203 .n2 = 3, .d2 = "-y+iest",.a2 = "+est" },
204 { .suf = "cirtem", .p1 = i_to_y, .n1 = 1,
205 .d1 = "-y+ic", .a1 = "" },
206 { .suf = "yrtem", .p1 = metry, .n1 = 0,
207 .d1 = "-ry+er", .a1 = "" },
208 { .suf = "cigol", .p1 = i_to_y, .n1 = 1,
209 .d1 = "-y+ic", .a1 = "" },
210 { .suf = "tsigol", .p1 = i_to_y, .n1 = 2,
211 .d1 = "-y+ist", .a1 = "" },
212 { .suf = "tsi", .p1 = VCe, .n1 = 3,
213 .d1 = "-e+ist", .a1 = "+ist" },
214 { .suf = "msi", .p1 = VCe, .n1 = 3,
215 .d1 = "-e+ism", .a1 = "+ist" },
216 { .suf = "noitacif", .p1 = i_to_y, .n1 = 6,
217 .d1 = "-y+ication", .a1 = "" },
218 { .suf = "noitazi", .p1 = ize, .n1 = 5,
219 .d1 = "-e+ation", .a1 = "" },
220 { .suf = "rota", .p1 = tion, .n1 = 2,
221 .d1 = "-e+or", .a1 = "" },
222 { .suf = "noit", .p1 = tion, .n1 = 3,
223 .d1 = "-e+ion", .a1 = "+ion" },
224 { .suf = "naino", .p1 = an, .n1 = 3,
225 .d1 = "", .a1 = "+ian" },
226 { .suf = "na", .p1 = an, .n1 = 1,
227 .d1 = "", .a1 = "+n" },
228 { .suf = "evit", .p1 = tion, .n1 = 3,
229 .d1 = "-e+ive", .a1 = "+ive" },
230 { .suf = "ezi", .p1 = CCe, .n1 = 3,
231 .d1 = "-e+ize", .a1 = "+ize" },
232 { .suf = "pihs", .p1 = strip, .n1 = 4,
233 .d1 = "", .a1 = "+ship" },
234 { .suf = "dooh", .p1 = ily, .n1 = 4,
235 .d1 = "-y+hood", .a1 = "+hood" },
236 { .suf = "ekil", .p1 = strip, .n1 = 4,
237 .d1 = "", .a1 = "+like" },
238 { .suf = NULL, }
239 };
240
241 static const char *preftab[] = {
242 "anti",
243 "bio",
244 "dis",
245 "electro",
246 "en",
247 "fore",
248 "hyper",
249 "intra",
250 "inter",
251 "iso",
252 "kilo",
253 "magneto",
254 "meta",
255 "micro",
256 "milli",
257 "mis",
258 "mono",
259 "multi",
260 "non",
261 "out",
262 "over",
263 "photo",
264 "poly",
265 "pre",
266 "pseudo",
267 "re",
268 "semi",
269 "stereo",
270 "sub",
271 "super",
272 "thermo",
273 "ultra",
274 "under", /* must precede un */
275 "un",
276 NULL
277 };
278
279 static struct wlist {
280 int fd;
281 unsigned char *front;
282 unsigned char *back;
283 } *wlists;
284
285 static int vflag;
286 static int xflag;
287 static char word[LINE_MAX];
288 static char original[LINE_MAX];
289 static char affix[LINE_MAX];
290 static struct {
291 const char **buf;
292 size_t maxlev;
293 } deriv;
294
295 /*
296 * The spellprog utility accepts a newline-delimited list of words
297 * on stdin. For arguments it expects the path to a word list and
298 * the path to a file in which to store found words.
299 *
300 * In normal usage, spell is called twice. The first time it is
301 * called with a stop list to flag commonly mispelled words. The
302 * remaining words are then passed to spell again, this time with
303 * the dictionary file as the first (non-flag) argument.
304 *
305 * Unlike historic versions of spellprog, this one does not use
306 * hashed files. Instead it simply requires that files be sorted
307 * lexigraphically and uses the same algorithm as the look utility.
308 *
309 * Note that spellprog should be called via the spell shell script
310 * and is not meant to be invoked directly by the user.
311 */
312
313 int
main(int argc,char ** argv)314 main(int argc, char **argv)
315 {
316 char *ep, *cp, *dp;
317 char *outfile;
318 int ch, fold, i;
319 struct stat sb;
320 FILE *file, *found;
321
322 setlocale(LC_ALL, "");
323
324 outfile = NULL;
325 while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
326 switch (ch) {
327 case 'b':
328 /* Use British dictionary and convert ize -> ise. */
329 ise();
330 break;
331 case 'o':
332 outfile = optarg;
333 break;
334 case 'v':
335 /* Also write derivations to "found" file. */
336 vflag++;
337 break;
338 case 'x':
339 /* Print plausible stems to stdout. */
340 xflag++;
341 break;
342 default:
343 usage();
344 }
345
346 }
347 argc -= optind;
348 argv += optind;
349 if (argc < 1)
350 usage();
351
352 /* Open and mmap the word/stop lists. */
353 if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL)
354 err(1, "malloc");
355
356 for (i = 0; argc--; i++) {
357 wlists[i].fd = open(argv[i], O_RDONLY, 0);
358 if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
359 err(1, "%s", argv[i]);
360 if (sb.st_size > SIZE_T_MAX)
361 errx(1, "%s: %s", argv[i], strerror(EFBIG));
362 wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
363 MAP_PRIVATE, wlists[i].fd, (off_t)0);
364 if (wlists[i].front == MAP_FAILED)
365 err(1, "%s", argv[i]);
366 wlists[i].back = wlists[i].front + (size_t)sb.st_size;
367 }
368 wlists[i].fd = -1;
369
370 /* Open file where found words are to be saved. */
371 if (outfile == NULL)
372 found = NULL;
373 else if ((found = fopen(outfile, "w")) == NULL)
374 err(1, "cannot open %s", outfile);
375
376 for (;; print_word(file)) {
377 affix[0] = '\0';
378 file = found;
379 for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
380 if (ep - word == sizeof(word) - 1) {
381 *ep = '\0';
382 warnx("word too long (%s)", word);
383 while ((ch = getchar()) != '\n')
384 ; /* slurp until EOL */
385 }
386 if (ch == EOF) {
387 if (found != NULL)
388 fclose(found);
389 exit(0);
390 }
391 }
392 for (cp = word, dp = original; cp < ep; )
393 *dp++ = *cp++;
394 *dp = '\0';
395 fold = 0;
396 for (cp = word; cp < ep; cp++)
397 if (islower((unsigned char)*cp))
398 goto lcase;
399 if (trypref(ep, ".", 0))
400 continue;
401 ++fold;
402 for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
403 *dp = tolower((unsigned char)*cp);
404 lcase:
405 if (trypref(ep, ".", 0) || suffix(ep, 0))
406 continue;
407 if (isupper((unsigned char)word[0])) {
408 for (cp = original, dp = word; (*dp = *cp++); dp++) {
409 if (fold)
410 *dp = tolower((unsigned char)*dp);
411 }
412 word[0] = tolower((unsigned char)word[0]);
413 goto lcase;
414 }
415 file = stdout;
416 }
417 }
418
419 static void
print_word(FILE * f)420 print_word(FILE *f)
421 {
422
423 if (f != NULL) {
424 if (vflag && affix[0] != '\0' && affix[0] != '.')
425 fprintf(f, "%s\t%s\n", affix, original);
426 else
427 fprintf(f, "%s\n", original);
428 }
429 }
430
431 /*
432 * For each matching suffix in suftab, call the function associated
433 * with that suffix (p1 and p2).
434 */
435 static int
suffix(char * ep,size_t lev)436 suffix(char *ep, size_t lev)
437 {
438 const struct suftab *t;
439 char *cp;
440 const char *sp;
441
442 lev += DLEV;
443 getderiv(lev + 1);
444 deriv.buf[lev] = deriv.buf[lev - 1] = 0;
445 for (t = suftab; (sp = t->suf) != NULL; t++) {
446 cp = ep;
447 while (*sp) {
448 if (*--cp != *sp++)
449 goto next;
450 }
451 for (sp = cp; --sp >= word && !vowel(*sp);)
452 ; /* nothing */
453 if (sp < word)
454 return 0;
455 if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1))
456 return 1;
457 if (t->p2 != NULL) {
458 deriv.buf[lev] = deriv.buf[lev + 1] = NULL;
459 return (*t->p2)(ep - t->n2, t->d2, t->a2, lev);
460 }
461 return 0;
462 next: ;
463 }
464 return 0;
465 }
466
467 static int
468 /*ARGSUSED*/
nop(char * ep,const char * d,const char * a,size_t lev)469 nop(char *ep, const char *d, const char *a, size_t lev)
470 {
471
472 return 0;
473 }
474
475 static int
476 /*ARGSUSED*/
strip(char * ep,const char * d,const char * a,size_t lev)477 strip(char *ep, const char *d, const char *a, size_t lev)
478 {
479
480 return trypref(ep, a, lev) || suffix(ep, lev);
481 }
482
483 static int
s(char * ep,const char * d,const char * a,const size_t lev)484 s(char *ep, const char *d, const char *a, const size_t lev)
485 {
486
487 if (lev > DLEV + 1)
488 return 0;
489 if (*ep == 's' && ep[-1] == 's')
490 return 0;
491 return strip(ep, d, a, lev);
492 }
493
494 static int
495 /*ARGSUSED*/
an(char * ep,const char * d,const char * a,size_t lev)496 an(char *ep, const char *d, const char *a, size_t lev)
497 {
498
499 if (!isupper((unsigned char)*word)) /* must be proper name */
500 return 0;
501 return trypref(ep, a, lev);
502 }
503
504 static int
505 /*ARGSUSED*/
ize(char * ep,const char * d,const char * a,size_t lev)506 ize(char *ep, const char *d, const char *a, size_t lev)
507 {
508
509 *ep++ = 'e';
510 return strip(ep ,"", d, lev);
511 }
512
513 static int
514 /*ARGSUSED*/
y_to_e(char * ep,const char * d,const char * a,size_t lev)515 y_to_e(char *ep, const char *d, const char *a, size_t lev)
516 {
517 char c = *ep;
518
519 *ep++ = 'e';
520 if (strip(ep, "", d, lev))
521 return 1;
522 ep[-1] = c;
523 return 0;
524 }
525
526 static int
ily(char * ep,const char * d,const char * a,size_t lev)527 ily(char *ep, const char *d, const char *a, size_t lev)
528 {
529
530 if (ep[-1] == 'i')
531 return i_to_y(ep, d, a, lev);
532 else
533 return strip(ep, d, a, lev);
534 }
535
536 static int
ncy(char * ep,const char * d,const char * a,size_t lev)537 ncy(char *ep, const char *d, const char *a, size_t lev)
538 {
539
540 if (skipv(skipv(ep - 1)) < word)
541 return 0;
542 ep[-1] = 't';
543 return strip(ep, d, a, lev);
544 }
545
546 static int
bility(char * ep,const char * d,const char * a,size_t lev)547 bility(char *ep, const char *d, const char *a, size_t lev)
548 {
549
550 *ep++ = 'l';
551 return y_to_e(ep, d, a, lev);
552 }
553
554 static int
i_to_y(char * ep,const char * d,const char * a,size_t lev)555 i_to_y(char *ep, const char *d, const char *a, size_t lev)
556 {
557
558 if (ep[-1] == 'i') {
559 ep[-1] = 'y';
560 a = d;
561 }
562 return strip(ep, "", a, lev);
563 }
564
565 static int
es(char * ep,const char * d,const char * a,size_t lev)566 es(char *ep, const char *d, const char *a, size_t lev)
567 {
568
569 if (lev > DLEV)
570 return 0;
571
572 switch (ep[-1]) {
573 default:
574 return 0;
575 case 'i':
576 return i_to_y(ep, d, a, lev);
577 case 's':
578 case 'h':
579 case 'z':
580 case 'x':
581 return strip(ep, d, a, lev);
582 }
583 }
584
585 static int
metry(char * ep,const char * d,const char * a,size_t lev)586 metry(char *ep, const char *d, const char *a, size_t lev)
587 {
588
589 ep[-2] = 'e';
590 ep[-1] = 'r';
591 return strip(ep, d, a, lev);
592 }
593
594 static int
tion(char * ep,const char * d,const char * a,size_t lev)595 tion(char *ep, const char *d, const char *a, size_t lev)
596 {
597
598 switch (ep[-2]) {
599 case 'c':
600 case 'r':
601 return trypref(ep, a, lev);
602 case 'a':
603 return y_to_e(ep, d, a, lev);
604 }
605 return 0;
606 }
607
608 /*
609 * Possible consonant-consonant-e ending.
610 */
611 static int
CCe(char * ep,const char * d,const char * a,size_t lev)612 CCe(char *ep, const char *d, const char *a, size_t lev)
613 {
614
615 switch (ep[-1]) {
616 case 'l':
617 if (vowel(ep[-2]))
618 break;
619 switch (ep[-2]) {
620 case 'l':
621 case 'r':
622 case 'w':
623 break;
624 default:
625 return y_to_e(ep, d, a, lev);
626 }
627 break;
628 case 's':
629 if (ep[-2] == 's')
630 break;
631 /*FALLTHROUGH*/
632 case 'c':
633 case 'g':
634 if (*ep == 'a')
635 return 0;
636 /*FALLTHROUGH*/
637 case 'v':
638 case 'z':
639 if (vowel(ep[-2]))
640 break;
641 /*FALLTHROUGH*/
642 case 'u':
643 if (y_to_e(ep, d, a, lev))
644 return 1;
645 if (!(ep[-2] == 'n' && ep[-1] == 'g'))
646 return 0;
647 }
648 return VCe(ep, d, a, lev);
649 }
650
651 /*
652 * Possible consonant-vowel-consonant-e ending.
653 */
654 static int
VCe(char * ep,const char * d,const char * a,size_t lev)655 VCe(char *ep, const char *d, const char *a, size_t lev)
656 {
657 char c;
658
659 c = ep[-1];
660 if (c == 'e')
661 return 0;
662 if (!vowel(c) && vowel(ep[-2])) {
663 c = *ep;
664 *ep++ = 'e';
665 if (trypref(ep, d, lev) || suffix(ep, lev))
666 return 1;
667 ep--;
668 *ep = c;
669 }
670 return strip(ep, d, a, lev);
671 }
672
673 static const char *
lookuppref(char ** wp,char * ep)674 lookuppref(char **wp, char *ep)
675 {
676 const char **sp, *cp;
677 char *bp;
678
679 for (sp = preftab; *sp; sp++) {
680 bp = *wp;
681 for (cp = *sp; *cp; cp++, bp++) {
682 if (tolower((unsigned char)*bp) != *cp)
683 goto next;
684 }
685 for (cp = bp; cp < ep; cp++) {
686 if (vowel(*cp)) {
687 *wp = bp;
688 return *sp;
689 }
690 }
691 next: ;
692 }
693 return 0;
694 }
695
696 /*
697 * If the word is not in the dictionary, try stripping off prefixes
698 * until the word is found or we run out of prefixes to check.
699 */
700 static int
trypref(char * ep,const char * a,size_t lev)701 trypref(char *ep, const char *a, size_t lev)
702 {
703 const char *cp;
704 char *bp;
705 char *pp;
706 int val = 0;
707 char space[20];
708
709 getderiv(lev + 2);
710 deriv.buf[lev] = a;
711 if (tryword(word, ep, lev))
712 return 1;
713 bp = word;
714 pp = space;
715 deriv.buf[lev + 1] = pp;
716 while ((cp = lookuppref(&bp, ep)) != NULL) {
717 *pp++ = '+';
718 while ((*pp = *cp++))
719 pp++;
720 if (tryword(bp, ep, lev + 1)) {
721 val = 1;
722 break;
723 }
724 if (pp - space >= sizeof(space))
725 return 0;
726 }
727 deriv.buf[lev + 1] = deriv.buf[lev + 2] = NULL;
728 return val;
729 }
730
731 static int
tryword(char * bp,char * ep,size_t lev)732 tryword(char *bp, char *ep, size_t lev)
733 {
734 size_t i, j;
735 char duple[3];
736
737 if (ep-bp <= 1)
738 return 0;
739 if (vowel(*ep) && monosyl(bp, ep))
740 return 0;
741
742 i = dict(bp, ep);
743 if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] &&
744 monosyl(bp, ep - 1)) {
745 ep--;
746 getderiv(++lev);
747 deriv.buf[lev] = duple;
748 duple[0] = '+';
749 duple[1] = *ep;
750 duple[2] = '\0';
751 i = dict(bp, ep);
752 }
753 if (vflag == 0 || i == 0)
754 return i;
755
756 /* Also tack on possible derivations. (XXX - warn on truncation?) */
757 for (j = lev; j > 0; j--) {
758 if (deriv.buf[j])
759 (void)strlcat(affix, deriv.buf[j], sizeof(affix));
760 }
761 return i;
762 }
763
764 static int
monosyl(char * bp,char * ep)765 monosyl(char *bp, char *ep)
766 {
767
768 if (ep < bp + 2)
769 return 0;
770 if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
771 return 0;
772 while (--ep >= bp)
773 if (vowel(*ep))
774 return 0;
775 return 1;
776 }
777
778 static char *
skipv(char * st)779 skipv(char *st)
780 {
781
782 if (st >= word && vowel(*st))
783 st--;
784 while (st >= word && !vowel(*st))
785 st--;
786 return st;
787 }
788
789 static int
vowel(int c)790 vowel(int c)
791 {
792
793 switch (tolower(c)) {
794 case 'a':
795 case 'e':
796 case 'i':
797 case 'o':
798 case 'u':
799 case 'y':
800 return 1;
801 }
802 return 0;
803 }
804
805 /*
806 * Crummy way to Britishise.
807 */
808 static void
ise(void)809 ise(void)
810 {
811 struct suftab *tab;
812 char *cp;
813
814 for (tab = suftab; tab->suf; tab++) {
815 /* Assume that suffix will contain 'z' if a1 or d1 do */
816 if (strchr(tab->suf, 'z')) {
817 tab->suf = cp = estrdup(tab->suf);
818 ztos(cp);
819 if (strchr(tab->d1, 'z')) {
820 tab->d1 = cp = estrdup(tab->d1);
821 ztos(cp);
822 }
823 if (strchr(tab->a1, 'z')) {
824 tab->a1 = cp = estrdup(tab->a1);
825 ztos(cp);
826 }
827 }
828 }
829 }
830
831 static void
ztos(char * st)832 ztos(char *st)
833 {
834
835 for (; *st; st++)
836 if (*st == 'z')
837 *st = 's';
838 }
839
840 /*
841 * Look up a word in the dictionary.
842 * Returns 1 if found, 0 if not.
843 */
844 static int
dict(char * bp,char * ep)845 dict(char *bp, char *ep)
846 {
847 char c;
848 int i, rval;
849
850 c = *ep;
851 *ep = '\0';
852 if (xflag)
853 printf("=%s\n", bp);
854 for (i = rval = 0; wlists[i].fd != -1; i++) {
855 if ((rval = look((unsigned char *)bp, wlists[i].front,
856 wlists[i].back)) == 1)
857 break;
858 }
859 *ep = c;
860 return rval;
861 }
862
863 static void
getderiv(size_t lev)864 getderiv(size_t lev)
865 {
866 if (deriv.maxlev < lev) {
867 if (reallocarr(&deriv.buf, lev, sizeof(*deriv.buf)) != 0)
868 err(1, "Cannot grow array");
869 deriv.maxlev = lev;
870 }
871 }
872
873
874 static void
usage(void)875 usage(void)
876 {
877 (void)fprintf(stderr,
878 "Usage: %s [-bvx] [-o found-words] word-list ...\n",
879 getprogname());
880 exit(1);
881 }
882