1 /* $NetBSD: spellprog.c,v 1.10 2021/11/09 09:41:05 nia Exp $ */ 2 3 /* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */ 4 5 /* 6 * Copyright (c) 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)spell.h 8.1 (Berkeley) 6/6/93 34 */ 35 /* 36 * Copyright (C) Caldera International Inc. 2001-2002. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code and documentation must retain the above 43 * copyright notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed or owned by Caldera 50 * International, Inc. 51 * 4. Neither the name of Caldera International, Inc. nor the names of other 52 * contributors may be used to endorse or promote products derived from 53 * this software without specific prior written permission. 54 * 55 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 56 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 58 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 60 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 65 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 #include <sys/cdefs.h> 70 71 #ifndef lint 72 static const char copyright[] = 73 "@(#) Copyright (c) 1991, 1993\n\ 74 The Regents of the University of California. All rights reserved.\n"; 75 #endif /* not lint */ 76 77 #ifndef lint 78 #if 0 79 static const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93"; 80 #else 81 #endif 82 static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $"; 83 #endif /* not lint */ 84 85 #include <sys/param.h> 86 #include <sys/mman.h> 87 #include <sys/stat.h> 88 89 #include <ctype.h> 90 #include <err.h> 91 #include <errno.h> 92 #include <fcntl.h> 93 #include <limits.h> 94 #include <locale.h> 95 #include <stdio.h> 96 #include <stdlib.h> 97 #include <string.h> 98 #include <unistd.h> 99 #include <util.h> 100 101 #include "extern.h" 102 103 #define DLEV 2 104 105 static int dict(char *, char *); 106 static int trypref(char *, const char *, size_t); 107 static int tryword(char *, char *, size_t); 108 static int suffix(char *, size_t); 109 static int vowel(int); 110 static const char *lookuppref(char **, char *); 111 static char *skipv(char *); 112 static void ise(void); 113 static void print_word(FILE *); 114 static void ztos(char *); 115 static int monosyl(char *, char *); 116 static void usage(void) __dead; 117 static void getderiv(size_t); 118 119 static int an(char *, const char *, const char *, size_t); 120 static int bility(char *, const char *, const char *, size_t); 121 static int es(char *, const char *, const char *, size_t); 122 static int i_to_y(char *, const char *, const char *, size_t); 123 static int ily(char *, const char *, const char *, size_t); 124 static int ize(char *, const char *, const char *, size_t); 125 static int metry(char *, const char *, const char *, size_t); 126 static int ncy(char *, const char *, const char *, size_t); 127 static int nop(char *, const char *, const char *, size_t); 128 static int s(char *, const char *, const char *, size_t); 129 static int strip(char *, const char *, const char *, size_t); 130 static int tion(char *, const char *, const char *, size_t); 131 static int y_to_e(char *, const char *, const char *, size_t); 132 static int CCe(char *, const char *, const char *, size_t); 133 static int VCe(char *, const char *, const char *, size_t); 134 135 /* 136 * This cannot be const because we modify it when we choose british 137 * spelling. 138 */ 139 static struct suftab { 140 const char *suf; 141 int (*p1)(char *, const char *, const char *, size_t); 142 int n1; 143 const char *d1; 144 const char *a1; 145 int (*p2)(char *, const char *, const char *, size_t); 146 int n2; 147 const char *d2; 148 const char *a2; 149 } suftab[] = { 150 { .suf = "ssen", .p1 = ily, .n1 = 4, 151 .d1 = "-y+iness", .a1 = "+ness" }, 152 { .suf = "ssel", .p1 = ily, .n1 = 4, 153 .d1 = "-y+i+less", .a1 = "+less" }, 154 { .suf = "se", .p1 = s, .n1 = 1, 155 .d1 = "", .a1 = "+s", .p2 = es, 156 .n2 = 2, .d2 = "-y+ies", .a2 = "+es" }, 157 { .suf = "s'", .p1 = s, .n1 = 2, 158 .d1 = "", .a1 = "+'s" }, 159 { .suf = "s", .p1 = s, .n1 = 1, 160 .d1 = "", .a1 = "+s" }, 161 { .suf = "ecn", .p1 = ncy, .n1 = 1, 162 .d1 = "", .a1 = "-t+ce" }, 163 { .suf = "ycn", .p1 = ncy, .n1 = 1, 164 .d1 = "", .a1 = "-cy+t" }, 165 { .suf = "ytilb", .p1 = nop, .n1 = 0, 166 .d1 = "", .a1 = "" }, 167 { .suf = "ytilib", .p1 = bility, .n1 = 5, 168 .d1 = "-le+ility", .a1 = "" }, 169 { .suf = "elbaif", .p1 = i_to_y, .n1 = 4, 170 .d1 = "-y+iable", .a1 = "" }, 171 { .suf = "elba", .p1 = CCe, .n1 = 4, 172 .d1 = "-e+able", .a1 = "+able" }, 173 { .suf = "yti", .p1 = CCe, .n1 = 3, 174 .d1 = "-e+ity", .a1 = "+ity" }, 175 { .suf = "ylb", .p1 = y_to_e, .n1 = 1, 176 .d1 = "-e+y", .a1 = "" }, 177 { .suf = "yl", .p1 = ily, .n1 = 2, 178 .d1 = "-y+ily", .a1 = "+ly" }, 179 { .suf = "laci", .p1 = strip, .n1 = 2, 180 .d1 = "", .a1 = "+al" }, 181 { .suf = "latnem", .p1 = strip, .n1 = 2, 182 .d1 = "", .a1 = "+al" }, 183 { .suf = "lanoi", .p1 = strip, .n1 = 2, 184 .d1 = "", .a1 = "+al" }, 185 { .suf = "tnem", .p1 = strip, .n1 = 4, 186 .d1 = "", .a1 = "+ment" }, 187 { .suf = "gni", .p1 = CCe, .n1 = 3, 188 .d1 = "-e+ing", .a1 = "+ing" }, 189 { .suf = "reta", .p1 = nop, .n1 = 0, 190 .d1 = "", .a1 = "" }, 191 { .suf = "re", .p1 = strip, .n1 = 1, 192 .d1 = "", .a1 = "+r", .p2 = i_to_y, 193 .n2 = 2, .d2 = "-y+ier", .a2 = "+er" }, 194 { .suf = "de", .p1 = strip, .n1 = 1, 195 .d1 = "", .a1 = "+d", .p2 = i_to_y, 196 .n2 = 2, .d2 = "-y+ied", .a2 = "+ed" }, 197 { .suf = "citsi", .p1 = strip, .n1 = 2, 198 .d1 = "", .a1 = "+ic" }, 199 { .suf = "cihparg", .p1 = i_to_y, .n1 = 1, 200 .d1 = "-y+ic", .a1 = "" }, 201 { .suf = "tse", .p1 = strip, .n1 = 2, 202 .d1 = "", .a1 = "+st", .p2 = i_to_y, 203 .n2 = 3, .d2 = "-y+iest",.a2 = "+est" }, 204 { .suf = "cirtem", .p1 = i_to_y, .n1 = 1, 205 .d1 = "-y+ic", .a1 = "" }, 206 { .suf = "yrtem", .p1 = metry, .n1 = 0, 207 .d1 = "-ry+er", .a1 = "" }, 208 { .suf = "cigol", .p1 = i_to_y, .n1 = 1, 209 .d1 = "-y+ic", .a1 = "" }, 210 { .suf = "tsigol", .p1 = i_to_y, .n1 = 2, 211 .d1 = "-y+ist", .a1 = "" }, 212 { .suf = "tsi", .p1 = VCe, .n1 = 3, 213 .d1 = "-e+ist", .a1 = "+ist" }, 214 { .suf = "msi", .p1 = VCe, .n1 = 3, 215 .d1 = "-e+ism", .a1 = "+ist" }, 216 { .suf = "noitacif", .p1 = i_to_y, .n1 = 6, 217 .d1 = "-y+ication", .a1 = "" }, 218 { .suf = "noitazi", .p1 = ize, .n1 = 5, 219 .d1 = "-e+ation", .a1 = "" }, 220 { .suf = "rota", .p1 = tion, .n1 = 2, 221 .d1 = "-e+or", .a1 = "" }, 222 { .suf = "noit", .p1 = tion, .n1 = 3, 223 .d1 = "-e+ion", .a1 = "+ion" }, 224 { .suf = "naino", .p1 = an, .n1 = 3, 225 .d1 = "", .a1 = "+ian" }, 226 { .suf = "na", .p1 = an, .n1 = 1, 227 .d1 = "", .a1 = "+n" }, 228 { .suf = "evit", .p1 = tion, .n1 = 3, 229 .d1 = "-e+ive", .a1 = "+ive" }, 230 { .suf = "ezi", .p1 = CCe, .n1 = 3, 231 .d1 = "-e+ize", .a1 = "+ize" }, 232 { .suf = "pihs", .p1 = strip, .n1 = 4, 233 .d1 = "", .a1 = "+ship" }, 234 { .suf = "dooh", .p1 = ily, .n1 = 4, 235 .d1 = "-y+hood", .a1 = "+hood" }, 236 { .suf = "ekil", .p1 = strip, .n1 = 4, 237 .d1 = "", .a1 = "+like" }, 238 { .suf = NULL, } 239 }; 240 241 static const char *preftab[] = { 242 "anti", 243 "bio", 244 "dis", 245 "electro", 246 "en", 247 "fore", 248 "hyper", 249 "intra", 250 "inter", 251 "iso", 252 "kilo", 253 "magneto", 254 "meta", 255 "micro", 256 "milli", 257 "mis", 258 "mono", 259 "multi", 260 "non", 261 "out", 262 "over", 263 "photo", 264 "poly", 265 "pre", 266 "pseudo", 267 "re", 268 "semi", 269 "stereo", 270 "sub", 271 "super", 272 "thermo", 273 "ultra", 274 "under", /* must precede un */ 275 "un", 276 NULL 277 }; 278 279 static struct wlist { 280 int fd; 281 unsigned char *front; 282 unsigned char *back; 283 } *wlists; 284 285 static int vflag; 286 static int xflag; 287 static char word[LINE_MAX]; 288 static char original[LINE_MAX]; 289 static char affix[LINE_MAX]; 290 static struct { 291 const char **buf; 292 size_t maxlev; 293 } deriv; 294 295 /* 296 * The spellprog utility accepts a newline-delimited list of words 297 * on stdin. For arguments it expects the path to a word list and 298 * the path to a file in which to store found words. 299 * 300 * In normal usage, spell is called twice. The first time it is 301 * called with a stop list to flag commonly mispelled words. The 302 * remaining words are then passed to spell again, this time with 303 * the dictionary file as the first (non-flag) argument. 304 * 305 * Unlike historic versions of spellprog, this one does not use 306 * hashed files. Instead it simply requires that files be sorted 307 * lexigraphically and uses the same algorithm as the look utility. 308 * 309 * Note that spellprog should be called via the spell shell script 310 * and is not meant to be invoked directly by the user. 311 */ 312 313 int 314 main(int argc, char **argv) 315 { 316 char *ep, *cp, *dp; 317 char *outfile; 318 int ch, fold, i; 319 struct stat sb; 320 FILE *file, *found; 321 322 setlocale(LC_ALL, ""); 323 324 outfile = NULL; 325 while ((ch = getopt(argc, argv, "bvxo:")) != -1) { 326 switch (ch) { 327 case 'b': 328 /* Use British dictionary and convert ize -> ise. */ 329 ise(); 330 break; 331 case 'o': 332 outfile = optarg; 333 break; 334 case 'v': 335 /* Also write derivations to "found" file. */ 336 vflag++; 337 break; 338 case 'x': 339 /* Print plausible stems to stdout. */ 340 xflag++; 341 break; 342 default: 343 usage(); 344 } 345 346 } 347 argc -= optind; 348 argv += optind; 349 if (argc < 1) 350 usage(); 351 352 /* Open and mmap the word/stop lists. */ 353 if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL) 354 err(1, "malloc"); 355 356 for (i = 0; argc--; i++) { 357 wlists[i].fd = open(argv[i], O_RDONLY, 0); 358 if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0) 359 err(1, "%s", argv[i]); 360 if (sb.st_size > SIZE_T_MAX) 361 errx(1, "%s: %s", argv[i], strerror(EFBIG)); 362 wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ, 363 MAP_PRIVATE, wlists[i].fd, (off_t)0); 364 if (wlists[i].front == MAP_FAILED) 365 err(1, "%s", argv[i]); 366 wlists[i].back = wlists[i].front + (size_t)sb.st_size; 367 } 368 wlists[i].fd = -1; 369 370 /* Open file where found words are to be saved. */ 371 if (outfile == NULL) 372 found = NULL; 373 else if ((found = fopen(outfile, "w")) == NULL) 374 err(1, "cannot open %s", outfile); 375 376 for (;; print_word(file)) { 377 affix[0] = '\0'; 378 file = found; 379 for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) { 380 if (ep - word == sizeof(word) - 1) { 381 *ep = '\0'; 382 warnx("word too long (%s)", word); 383 while ((ch = getchar()) != '\n') 384 ; /* slurp until EOL */ 385 } 386 if (ch == EOF) { 387 if (found != NULL) 388 fclose(found); 389 exit(0); 390 } 391 } 392 for (cp = word, dp = original; cp < ep; ) 393 *dp++ = *cp++; 394 *dp = '\0'; 395 fold = 0; 396 for (cp = word; cp < ep; cp++) 397 if (islower((unsigned char)*cp)) 398 goto lcase; 399 if (trypref(ep, ".", 0)) 400 continue; 401 ++fold; 402 for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++) 403 *dp = tolower((unsigned char)*cp); 404 lcase: 405 if (trypref(ep, ".", 0) || suffix(ep, 0)) 406 continue; 407 if (isupper((unsigned char)word[0])) { 408 for (cp = original, dp = word; (*dp = *cp++); dp++) { 409 if (fold) 410 *dp = tolower((unsigned char)*dp); 411 } 412 word[0] = tolower((unsigned char)word[0]); 413 goto lcase; 414 } 415 file = stdout; 416 } 417 } 418 419 static void 420 print_word(FILE *f) 421 { 422 423 if (f != NULL) { 424 if (vflag && affix[0] != '\0' && affix[0] != '.') 425 fprintf(f, "%s\t%s\n", affix, original); 426 else 427 fprintf(f, "%s\n", original); 428 } 429 } 430 431 /* 432 * For each matching suffix in suftab, call the function associated 433 * with that suffix (p1 and p2). 434 */ 435 static int 436 suffix(char *ep, size_t lev) 437 { 438 const struct suftab *t; 439 char *cp; 440 const char *sp; 441 442 lev += DLEV; 443 getderiv(lev + 1); 444 deriv.buf[lev] = deriv.buf[lev - 1] = 0; 445 for (t = suftab; (sp = t->suf) != NULL; t++) { 446 cp = ep; 447 while (*sp) { 448 if (*--cp != *sp++) 449 goto next; 450 } 451 for (sp = cp; --sp >= word && !vowel(*sp);) 452 ; /* nothing */ 453 if (sp < word) 454 return 0; 455 if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1)) 456 return 1; 457 if (t->p2 != NULL) { 458 deriv.buf[lev] = deriv.buf[lev + 1] = NULL; 459 return (*t->p2)(ep - t->n2, t->d2, t->a2, lev); 460 } 461 return 0; 462 next: ; 463 } 464 return 0; 465 } 466 467 static int 468 /*ARGSUSED*/ 469 nop(char *ep, const char *d, const char *a, size_t lev) 470 { 471 472 return 0; 473 } 474 475 static int 476 /*ARGSUSED*/ 477 strip(char *ep, const char *d, const char *a, size_t lev) 478 { 479 480 return trypref(ep, a, lev) || suffix(ep, lev); 481 } 482 483 static int 484 s(char *ep, const char *d, const char *a, const size_t lev) 485 { 486 487 if (lev > DLEV + 1) 488 return 0; 489 if (*ep == 's' && ep[-1] == 's') 490 return 0; 491 return strip(ep, d, a, lev); 492 } 493 494 static int 495 /*ARGSUSED*/ 496 an(char *ep, const char *d, const char *a, size_t lev) 497 { 498 499 if (!isupper((unsigned char)*word)) /* must be proper name */ 500 return 0; 501 return trypref(ep, a, lev); 502 } 503 504 static int 505 /*ARGSUSED*/ 506 ize(char *ep, const char *d, const char *a, size_t lev) 507 { 508 509 *ep++ = 'e'; 510 return strip(ep ,"", d, lev); 511 } 512 513 static int 514 /*ARGSUSED*/ 515 y_to_e(char *ep, const char *d, const char *a, size_t lev) 516 { 517 char c = *ep; 518 519 *ep++ = 'e'; 520 if (strip(ep, "", d, lev)) 521 return 1; 522 ep[-1] = c; 523 return 0; 524 } 525 526 static int 527 ily(char *ep, const char *d, const char *a, size_t lev) 528 { 529 530 if (ep[-1] == 'i') 531 return i_to_y(ep, d, a, lev); 532 else 533 return strip(ep, d, a, lev); 534 } 535 536 static int 537 ncy(char *ep, const char *d, const char *a, size_t lev) 538 { 539 540 if (skipv(skipv(ep - 1)) < word) 541 return 0; 542 ep[-1] = 't'; 543 return strip(ep, d, a, lev); 544 } 545 546 static int 547 bility(char *ep, const char *d, const char *a, size_t lev) 548 { 549 550 *ep++ = 'l'; 551 return y_to_e(ep, d, a, lev); 552 } 553 554 static int 555 i_to_y(char *ep, const char *d, const char *a, size_t lev) 556 { 557 558 if (ep[-1] == 'i') { 559 ep[-1] = 'y'; 560 a = d; 561 } 562 return strip(ep, "", a, lev); 563 } 564 565 static int 566 es(char *ep, const char *d, const char *a, size_t lev) 567 { 568 569 if (lev > DLEV) 570 return 0; 571 572 switch (ep[-1]) { 573 default: 574 return 0; 575 case 'i': 576 return i_to_y(ep, d, a, lev); 577 case 's': 578 case 'h': 579 case 'z': 580 case 'x': 581 return strip(ep, d, a, lev); 582 } 583 } 584 585 static int 586 metry(char *ep, const char *d, const char *a, size_t lev) 587 { 588 589 ep[-2] = 'e'; 590 ep[-1] = 'r'; 591 return strip(ep, d, a, lev); 592 } 593 594 static int 595 tion(char *ep, const char *d, const char *a, size_t lev) 596 { 597 598 switch (ep[-2]) { 599 case 'c': 600 case 'r': 601 return trypref(ep, a, lev); 602 case 'a': 603 return y_to_e(ep, d, a, lev); 604 } 605 return 0; 606 } 607 608 /* 609 * Possible consonant-consonant-e ending. 610 */ 611 static int 612 CCe(char *ep, const char *d, const char *a, size_t lev) 613 { 614 615 switch (ep[-1]) { 616 case 'l': 617 if (vowel(ep[-2])) 618 break; 619 switch (ep[-2]) { 620 case 'l': 621 case 'r': 622 case 'w': 623 break; 624 default: 625 return y_to_e(ep, d, a, lev); 626 } 627 break; 628 case 's': 629 if (ep[-2] == 's') 630 break; 631 /*FALLTHROUGH*/ 632 case 'c': 633 case 'g': 634 if (*ep == 'a') 635 return 0; 636 /*FALLTHROUGH*/ 637 case 'v': 638 case 'z': 639 if (vowel(ep[-2])) 640 break; 641 /*FALLTHROUGH*/ 642 case 'u': 643 if (y_to_e(ep, d, a, lev)) 644 return 1; 645 if (!(ep[-2] == 'n' && ep[-1] == 'g')) 646 return 0; 647 } 648 return VCe(ep, d, a, lev); 649 } 650 651 /* 652 * Possible consonant-vowel-consonant-e ending. 653 */ 654 static int 655 VCe(char *ep, const char *d, const char *a, size_t lev) 656 { 657 char c; 658 659 c = ep[-1]; 660 if (c == 'e') 661 return 0; 662 if (!vowel(c) && vowel(ep[-2])) { 663 c = *ep; 664 *ep++ = 'e'; 665 if (trypref(ep, d, lev) || suffix(ep, lev)) 666 return 1; 667 ep--; 668 *ep = c; 669 } 670 return strip(ep, d, a, lev); 671 } 672 673 static const char * 674 lookuppref(char **wp, char *ep) 675 { 676 const char **sp, *cp; 677 char *bp; 678 679 for (sp = preftab; *sp; sp++) { 680 bp = *wp; 681 for (cp = *sp; *cp; cp++, bp++) { 682 if (tolower((unsigned char)*bp) != *cp) 683 goto next; 684 } 685 for (cp = bp; cp < ep; cp++) { 686 if (vowel(*cp)) { 687 *wp = bp; 688 return *sp; 689 } 690 } 691 next: ; 692 } 693 return 0; 694 } 695 696 /* 697 * If the word is not in the dictionary, try stripping off prefixes 698 * until the word is found or we run out of prefixes to check. 699 */ 700 static int 701 trypref(char *ep, const char *a, size_t lev) 702 { 703 const char *cp; 704 char *bp; 705 char *pp; 706 int val = 0; 707 char space[20]; 708 709 getderiv(lev + 2); 710 deriv.buf[lev] = a; 711 if (tryword(word, ep, lev)) 712 return 1; 713 bp = word; 714 pp = space; 715 deriv.buf[lev + 1] = pp; 716 while ((cp = lookuppref(&bp, ep)) != NULL) { 717 *pp++ = '+'; 718 while ((*pp = *cp++)) 719 pp++; 720 if (tryword(bp, ep, lev + 1)) { 721 val = 1; 722 break; 723 } 724 if (pp - space >= sizeof(space)) 725 return 0; 726 } 727 deriv.buf[lev + 1] = deriv.buf[lev + 2] = NULL; 728 return val; 729 } 730 731 static int 732 tryword(char *bp, char *ep, size_t lev) 733 { 734 size_t i, j; 735 char duple[3]; 736 737 if (ep-bp <= 1) 738 return 0; 739 if (vowel(*ep) && monosyl(bp, ep)) 740 return 0; 741 742 i = dict(bp, ep); 743 if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && 744 monosyl(bp, ep - 1)) { 745 ep--; 746 getderiv(++lev); 747 deriv.buf[lev] = duple; 748 duple[0] = '+'; 749 duple[1] = *ep; 750 duple[2] = '\0'; 751 i = dict(bp, ep); 752 } 753 if (vflag == 0 || i == 0) 754 return i; 755 756 /* Also tack on possible derivations. (XXX - warn on truncation?) */ 757 for (j = lev; j > 0; j--) { 758 if (deriv.buf[j]) 759 (void)strlcat(affix, deriv.buf[j], sizeof(affix)); 760 } 761 return i; 762 } 763 764 static int 765 monosyl(char *bp, char *ep) 766 { 767 768 if (ep < bp + 2) 769 return 0; 770 if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') 771 return 0; 772 while (--ep >= bp) 773 if (vowel(*ep)) 774 return 0; 775 return 1; 776 } 777 778 static char * 779 skipv(char *st) 780 { 781 782 if (st >= word && vowel(*st)) 783 st--; 784 while (st >= word && !vowel(*st)) 785 st--; 786 return st; 787 } 788 789 static int 790 vowel(int c) 791 { 792 793 switch (tolower(c)) { 794 case 'a': 795 case 'e': 796 case 'i': 797 case 'o': 798 case 'u': 799 case 'y': 800 return 1; 801 } 802 return 0; 803 } 804 805 /* 806 * Crummy way to Britishise. 807 */ 808 static void 809 ise(void) 810 { 811 struct suftab *tab; 812 char *cp; 813 814 for (tab = suftab; tab->suf; tab++) { 815 /* Assume that suffix will contain 'z' if a1 or d1 do */ 816 if (strchr(tab->suf, 'z')) { 817 tab->suf = cp = estrdup(tab->suf); 818 ztos(cp); 819 if (strchr(tab->d1, 'z')) { 820 tab->d1 = cp = estrdup(tab->d1); 821 ztos(cp); 822 } 823 if (strchr(tab->a1, 'z')) { 824 tab->a1 = cp = estrdup(tab->a1); 825 ztos(cp); 826 } 827 } 828 } 829 } 830 831 static void 832 ztos(char *st) 833 { 834 835 for (; *st; st++) 836 if (*st == 'z') 837 *st = 's'; 838 } 839 840 /* 841 * Look up a word in the dictionary. 842 * Returns 1 if found, 0 if not. 843 */ 844 static int 845 dict(char *bp, char *ep) 846 { 847 char c; 848 int i, rval; 849 850 c = *ep; 851 *ep = '\0'; 852 if (xflag) 853 printf("=%s\n", bp); 854 for (i = rval = 0; wlists[i].fd != -1; i++) { 855 if ((rval = look((unsigned char *)bp, wlists[i].front, 856 wlists[i].back)) == 1) 857 break; 858 } 859 *ep = c; 860 return rval; 861 } 862 863 static void 864 getderiv(size_t lev) 865 { 866 if (deriv.maxlev < lev) { 867 if (reallocarr(&deriv.buf, lev, sizeof(*deriv.buf)) != 0) 868 err(1, "Cannot grow array"); 869 deriv.maxlev = lev; 870 } 871 } 872 873 874 static void 875 usage(void) 876 { 877 (void)fprintf(stderr, 878 "Usage: %s [-bvx] [-o found-words] word-list ...\n", 879 getprogname()); 880 exit(1); 881 } 882