1 /* $NetBSD: spellprog.c,v 1.6 2006/10/22 16:36:44 christos Exp $ */ 2 3 /* derived from OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp */ 4 5 /* 6 * Copyright (c) 1991, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)spell.h 8.1 (Berkeley) 6/6/93 34 */ 35 /* 36 * Copyright (C) Caldera International Inc. 2001-2002. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code and documentation must retain the above 43 * copyright notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed or owned by Caldera 50 * International, Inc. 51 * 4. Neither the name of Caldera International, Inc. nor the names of other 52 * contributors may be used to endorse or promote products derived from 53 * this software without specific prior written permission. 54 * 55 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 56 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 57 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 58 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 60 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 64 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 65 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 * POSSIBILITY OF SUCH DAMAGE. 67 */ 68 69 #ifndef lint 70 static const char copyright[] = 71 "@(#) Copyright (c) 1991, 1993\n\ 72 The Regents of the University of California. All rights reserved.\n"; 73 #endif /* not lint */ 74 75 #ifndef lint 76 #if 0 77 static const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93"; 78 #else 79 #endif 80 static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.4 2003/06/03 02:56:16 millert Exp $"; 81 #endif /* not lint */ 82 83 #include <sys/param.h> 84 #include <sys/mman.h> 85 #include <sys/stat.h> 86 87 #include <ctype.h> 88 #include <err.h> 89 #include <errno.h> 90 #include <fcntl.h> 91 #include <limits.h> 92 #include <locale.h> 93 #include <stdio.h> 94 #include <stdlib.h> 95 #include <string.h> 96 #include <unistd.h> 97 #include <util.h> 98 99 #include "extern.h" 100 101 #define DLEV 2 102 103 static int dict(char *, char *); 104 static int trypref(char *, const char *, size_t); 105 static int tryword(char *, char *, size_t); 106 static int suffix(char *, size_t); 107 static int vowel(int); 108 static const char *lookuppref(char **, char *); 109 static char *skipv(char *); 110 static void ise(void); 111 static void print_word(FILE *); 112 static void ztos(char *); 113 static int monosyl(char *, char *); 114 static void usage(void) __attribute__((__noreturn__)); 115 static void getderiv(size_t); 116 117 static int an(char *, const char *, const char *, size_t); 118 static int bility(char *, const char *, const char *, size_t); 119 static int es(char *, const char *, const char *, size_t); 120 static int i_to_y(char *, const char *, const char *, size_t); 121 static int ily(char *, const char *, const char *, size_t); 122 static int ize(char *, const char *, const char *, size_t); 123 static int metry(char *, const char *, const char *, size_t); 124 static int ncy(char *, const char *, const char *, size_t); 125 static int nop(char *, const char *, const char *, size_t); 126 static int s(char *, const char *, const char *, size_t); 127 static int strip(char *, const char *, const char *, size_t); 128 static int tion(char *, const char *, const char *, size_t); 129 static int y_to_e(char *, const char *, const char *, size_t); 130 static int CCe(char *, const char *, const char *, size_t); 131 static int VCe(char *, const char *, const char *, size_t); 132 133 /* 134 * This cannot be const because we modify it when we choose british 135 * spelling. 136 */ 137 static struct suftab { 138 const char *suf; 139 int (*p1)(char *, const char *, const char *, size_t); 140 int n1; 141 const char *d1; 142 const char *a1; 143 int (*p2)(char *, const char *, const char *, size_t); 144 int n2; 145 const char *d2; 146 const char *a2; 147 } suftab[] = { 148 { .suf = "ssen", .p1 = ily, .n1 = 4, 149 .d1 = "-y+iness", .a1 = "+ness" }, 150 { .suf = "ssel", .p1 = ily, .n1 = 4, 151 .d1 = "-y+i+less", .a1 = "+less" }, 152 { .suf = "se", .p1 = s, .n1 = 1, 153 .d1 = "", .a1 = "+s", .p2 = es, 154 .n2 = 2, .d2 = "-y+ies", .a2 = "+es" }, 155 { .suf = "s'", .p1 = s, .n1 = 2, 156 .d1 = "", .a1 = "+'s" }, 157 { .suf = "s", .p1 = s, .n1 = 1, 158 .d1 = "", .a1 = "+s" }, 159 { .suf = "ecn", .p1 = ncy, .n1 = 1, 160 .d1 = "", .a1 = "-t+ce" }, 161 { .suf = "ycn", .p1 = ncy, .n1 = 1, 162 .d1 = "", .a1 = "-cy+t" }, 163 { .suf = "ytilb", .p1 = nop, .n1 = 0, 164 .d1 = "", .a1 = "" }, 165 { .suf = "ytilib", .p1 = bility, .n1 = 5, 166 .d1 = "-le+ility", .a1 = "" }, 167 { .suf = "elbaif", .p1 = i_to_y, .n1 = 4, 168 .d1 = "-y+iable", .a1 = "" }, 169 { .suf = "elba", .p1 = CCe, .n1 = 4, 170 .d1 = "-e+able", .a1 = "+able" }, 171 { .suf = "yti", .p1 = CCe, .n1 = 3, 172 .d1 = "-e+ity", .a1 = "+ity" }, 173 { .suf = "ylb", .p1 = y_to_e, .n1 = 1, 174 .d1 = "-e+y", .a1 = "" }, 175 { .suf = "yl", .p1 = ily, .n1 = 2, 176 .d1 = "-y+ily", .a1 = "+ly" }, 177 { .suf = "laci", .p1 = strip, .n1 = 2, 178 .d1 = "", .a1 = "+al" }, 179 { .suf = "latnem", .p1 = strip, .n1 = 2, 180 .d1 = "", .a1 = "+al" }, 181 { .suf = "lanoi", .p1 = strip, .n1 = 2, 182 .d1 = "", .a1 = "+al" }, 183 { .suf = "tnem", .p1 = strip, .n1 = 4, 184 .d1 = "", .a1 = "+ment" }, 185 { .suf = "gni", .p1 = CCe, .n1 = 3, 186 .d1 = "-e+ing", .a1 = "+ing" }, 187 { .suf = "reta", .p1 = nop, .n1 = 0, 188 .d1 = "", .a1 = "" }, 189 { .suf = "re", .p1 = strip, .n1 = 1, 190 .d1 = "", .a1 = "+r", .p2 = i_to_y, 191 .n2 = 2, .d2 = "-y+ier", .a2 = "+er" }, 192 { .suf = "de", .p1 = strip, .n1 = 1, 193 .d1 = "", .a1 = "+d", .p2 = i_to_y, 194 .n2 = 2, .d2 = "-y+ied", .a2 = "+ed" }, 195 { .suf = "citsi", .p1 = strip, .n1 = 2, 196 .d1 = "", .a1 = "+ic" }, 197 { .suf = "cihparg", .p1 = i_to_y, .n1 = 1, 198 .d1 = "-y+ic", .a1 = "" }, 199 { .suf = "tse", .p1 = strip, .n1 = 2, 200 .d1 = "", .a1 = "+st", .p2 = i_to_y, 201 .n2 = 3, .d2 = "-y+iest",.a2 = "+est" }, 202 { .suf = "cirtem", .p1 = i_to_y, .n1 = 1, 203 .d1 = "-y+ic", .a1 = "" }, 204 { .suf = "yrtem", .p1 = metry, .n1 = 0, 205 .d1 = "-ry+er", .a1 = "" }, 206 { .suf = "cigol", .p1 = i_to_y, .n1 = 1, 207 .d1 = "-y+ic", .a1 = "" }, 208 { .suf = "tsigol", .p1 = i_to_y, .n1 = 2, 209 .d1 = "-y+ist", .a1 = "" }, 210 { .suf = "tsi", .p1 = VCe, .n1 = 3, 211 .d1 = "-e+ist", .a1 = "+ist" }, 212 { .suf = "msi", .p1 = VCe, .n1 = 3, 213 .d1 = "-e+ism", .a1 = "+ist" }, 214 { .suf = "noitacif", .p1 = i_to_y, .n1 = 6, 215 .d1 = "-y+ication", .a1 = "" }, 216 { .suf = "noitazi", .p1 = ize, .n1 = 5, 217 .d1 = "-e+ation", .a1 = "" }, 218 { .suf = "rota", .p1 = tion, .n1 = 2, 219 .d1 = "-e+or", .a1 = "" }, 220 { .suf = "noit", .p1 = tion, .n1 = 3, 221 .d1 = "-e+ion", .a1 = "+ion" }, 222 { .suf = "naino", .p1 = an, .n1 = 3, 223 .d1 = "", .a1 = "+ian" }, 224 { .suf = "na", .p1 = an, .n1 = 1, 225 .d1 = "", .a1 = "+n" }, 226 { .suf = "evit", .p1 = tion, .n1 = 3, 227 .d1 = "-e+ive", .a1 = "+ive" }, 228 { .suf = "ezi", .p1 = CCe, .n1 = 3, 229 .d1 = "-e+ize", .a1 = "+ize" }, 230 { .suf = "pihs", .p1 = strip, .n1 = 4, 231 .d1 = "", .a1 = "+ship" }, 232 { .suf = "dooh", .p1 = ily, .n1 = 4, 233 .d1 = "-y+hood", .a1 = "+hood" }, 234 { .suf = "ekil", .p1 = strip, .n1 = 4, 235 .d1 = "", .a1 = "+like" }, 236 { .suf = NULL, } 237 }; 238 239 static const char *preftab[] = { 240 "anti", 241 "bio", 242 "dis", 243 "electro", 244 "en", 245 "fore", 246 "hyper", 247 "intra", 248 "inter", 249 "iso", 250 "kilo", 251 "magneto", 252 "meta", 253 "micro", 254 "milli", 255 "mis", 256 "mono", 257 "multi", 258 "non", 259 "out", 260 "over", 261 "photo", 262 "poly", 263 "pre", 264 "pseudo", 265 "re", 266 "semi", 267 "stereo", 268 "sub", 269 "super", 270 "thermo", 271 "ultra", 272 "under", /* must precede un */ 273 "un", 274 NULL 275 }; 276 277 static struct wlist { 278 int fd; 279 unsigned char *front; 280 unsigned char *back; 281 } *wlists; 282 283 static int vflag; 284 static int xflag; 285 static char word[LINE_MAX]; 286 static char original[LINE_MAX]; 287 static char affix[LINE_MAX]; 288 static struct { 289 const char **buf; 290 size_t maxlev; 291 } deriv; 292 293 /* 294 * The spellprog utility accepts a newline-delimited list of words 295 * on stdin. For arguments it expects the path to a word list and 296 * the path to a file in which to store found words. 297 * 298 * In normal usage, spell is called twice. The first time it is 299 * called with a stop list to flag commonly mispelled words. The 300 * remaining words are then passed to spell again, this time with 301 * the dictionary file as the first (non-flag) argument. 302 * 303 * Unlike historic versions of spellprog, this one does not use 304 * hashed files. Instead it simply requires that files be sorted 305 * lexigraphically and uses the same algorithm as the look utility. 306 * 307 * Note that spellprog should be called via the spell shell script 308 * and is not meant to be invoked directly by the user. 309 */ 310 311 int 312 main(int argc, char **argv) 313 { 314 char *ep, *cp, *dp; 315 char *outfile; 316 int ch, fold, i; 317 struct stat sb; 318 FILE *file, *found; 319 320 setlocale(LC_ALL, ""); 321 322 outfile = NULL; 323 while ((ch = getopt(argc, argv, "bvxo:")) != -1) { 324 switch (ch) { 325 case 'b': 326 /* Use British dictionary and convert ize -> ise. */ 327 ise(); 328 break; 329 case 'o': 330 outfile = optarg; 331 break; 332 case 'v': 333 /* Also write derivations to "found" file. */ 334 vflag++; 335 break; 336 case 'x': 337 /* Print plausible stems to stdout. */ 338 xflag++; 339 break; 340 default: 341 usage(); 342 } 343 344 } 345 argc -= optind; 346 argv += optind; 347 if (argc < 1) 348 usage(); 349 350 /* Open and mmap the word/stop lists. */ 351 if ((wlists = malloc(sizeof(struct wlist) * (argc + 1))) == NULL) 352 err(1, "malloc"); 353 354 for (i = 0; argc--; i++) { 355 wlists[i].fd = open(argv[i], O_RDONLY, 0); 356 if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0) 357 err(1, "%s", argv[i]); 358 if (sb.st_size > SIZE_T_MAX) 359 errx(1, "%s: %s", argv[i], strerror(EFBIG)); 360 wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ, 361 MAP_PRIVATE, wlists[i].fd, (off_t)0); 362 if (wlists[i].front == MAP_FAILED) 363 err(1, "%s", argv[i]); 364 wlists[i].back = wlists[i].front + (size_t)sb.st_size; 365 } 366 wlists[i].fd = -1; 367 368 /* Open file where found words are to be saved. */ 369 if (outfile == NULL) 370 found = NULL; 371 else if ((found = fopen(outfile, "w")) == NULL) 372 err(1, "cannot open %s", outfile); 373 374 for (;; print_word(file)) { 375 affix[0] = '\0'; 376 file = found; 377 for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) { 378 if (ep - word == sizeof(word) - 1) { 379 *ep = '\0'; 380 warnx("word too long (%s)", word); 381 while ((ch = getchar()) != '\n') 382 ; /* slurp until EOL */ 383 } 384 if (ch == EOF) { 385 if (found != NULL) 386 fclose(found); 387 exit(0); 388 } 389 } 390 for (cp = word, dp = original; cp < ep; ) 391 *dp++ = *cp++; 392 *dp = '\0'; 393 fold = 0; 394 for (cp = word; cp < ep; cp++) 395 if (islower((unsigned char)*cp)) 396 goto lcase; 397 if (trypref(ep, ".", 0)) 398 continue; 399 ++fold; 400 for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++) 401 *dp = tolower((unsigned char)*cp); 402 lcase: 403 if (trypref(ep, ".", 0) || suffix(ep, 0)) 404 continue; 405 if (isupper((unsigned char)word[0])) { 406 for (cp = original, dp = word; (*dp = *cp++); dp++) { 407 if (fold) 408 *dp = tolower((unsigned char)*dp); 409 } 410 word[0] = tolower((unsigned char)word[0]); 411 goto lcase; 412 } 413 file = stdout; 414 } 415 } 416 417 static void 418 print_word(FILE *f) 419 { 420 421 if (f != NULL) { 422 if (vflag && affix[0] != '\0' && affix[0] != '.') 423 fprintf(f, "%s\t%s\n", affix, original); 424 else 425 fprintf(f, "%s\n", original); 426 } 427 } 428 429 /* 430 * For each matching suffix in suftab, call the function associated 431 * with that suffix (p1 and p2). 432 */ 433 static int 434 suffix(char *ep, size_t lev) 435 { 436 const struct suftab *t; 437 char *cp; 438 const char *sp; 439 440 lev += DLEV; 441 getderiv(lev + 1); 442 deriv.buf[lev] = deriv.buf[lev - 1] = 0; 443 for (t = suftab; (sp = t->suf) != NULL; t++) { 444 cp = ep; 445 while (*sp) { 446 if (*--cp != *sp++) 447 goto next; 448 } 449 for (sp = cp; --sp >= word && !vowel(*sp);) 450 ; /* nothing */ 451 if (sp < word) 452 return 0; 453 if ((*t->p1)(ep - t->n1, t->d1, t->a1, lev + 1)) 454 return 1; 455 if (t->p2 != NULL) { 456 deriv.buf[lev] = deriv.buf[lev + 1] = '\0'; 457 return (*t->p2)(ep - t->n2, t->d2, t->a2, lev); 458 } 459 return 0; 460 next: ; 461 } 462 return 0; 463 } 464 465 static int 466 /*ARGSUSED*/ 467 nop(char *ep, const char *d, const char *a, size_t lev) 468 { 469 470 return 0; 471 } 472 473 static int 474 /*ARGSUSED*/ 475 strip(char *ep, const char *d, const char *a, size_t lev) 476 { 477 478 return trypref(ep, a, lev) || suffix(ep, lev); 479 } 480 481 static int 482 s(char *ep, const char *d, const char *a, const size_t lev) 483 { 484 485 if (lev > DLEV + 1) 486 return 0; 487 if (*ep == 's' && ep[-1] == 's') 488 return 0; 489 return strip(ep, d, a, lev); 490 } 491 492 static int 493 /*ARGSUSED*/ 494 an(char *ep, const char *d, const char *a, size_t lev) 495 { 496 497 if (!isupper((unsigned char)*word)) /* must be proper name */ 498 return 0; 499 return trypref(ep, a, lev); 500 } 501 502 static int 503 /*ARGSUSED*/ 504 ize(char *ep, const char *d, const char *a, size_t lev) 505 { 506 507 *ep++ = 'e'; 508 return strip(ep ,"", d, lev); 509 } 510 511 static int 512 /*ARGSUSED*/ 513 y_to_e(char *ep, const char *d, const char *a, size_t lev) 514 { 515 char c = *ep; 516 517 *ep++ = 'e'; 518 if (strip(ep, "", d, lev)) 519 return 1; 520 ep[-1] = c; 521 return 0; 522 } 523 524 static int 525 ily(char *ep, const char *d, const char *a, size_t lev) 526 { 527 528 if (ep[-1] == 'i') 529 return i_to_y(ep, d, a, lev); 530 else 531 return strip(ep, d, a, lev); 532 } 533 534 static int 535 ncy(char *ep, const char *d, const char *a, size_t lev) 536 { 537 538 if (skipv(skipv(ep - 1)) < word) 539 return 0; 540 ep[-1] = 't'; 541 return strip(ep, d, a, lev); 542 } 543 544 static int 545 bility(char *ep, const char *d, const char *a, size_t lev) 546 { 547 548 *ep++ = 'l'; 549 return y_to_e(ep, d, a, lev); 550 } 551 552 static int 553 i_to_y(char *ep, const char *d, const char *a, size_t lev) 554 { 555 556 if (ep[-1] == 'i') { 557 ep[-1] = 'y'; 558 a = d; 559 } 560 return strip(ep, "", a, lev); 561 } 562 563 static int 564 es(char *ep, const char *d, const char *a, size_t lev) 565 { 566 567 if (lev > DLEV) 568 return 0; 569 570 switch (ep[-1]) { 571 default: 572 return 0; 573 case 'i': 574 return i_to_y(ep, d, a, lev); 575 case 's': 576 case 'h': 577 case 'z': 578 case 'x': 579 return strip(ep, d, a, lev); 580 } 581 } 582 583 static int 584 metry(char *ep, const char *d, const char *a, size_t lev) 585 { 586 587 ep[-2] = 'e'; 588 ep[-1] = 'r'; 589 return strip(ep, d, a, lev); 590 } 591 592 static int 593 tion(char *ep, const char *d, const char *a, size_t lev) 594 { 595 596 switch (ep[-2]) { 597 case 'c': 598 case 'r': 599 return trypref(ep, a, lev); 600 case 'a': 601 return y_to_e(ep, d, a, lev); 602 } 603 return 0; 604 } 605 606 /* 607 * Possible consonant-consonant-e ending. 608 */ 609 static int 610 CCe(char *ep, const char *d, const char *a, size_t lev) 611 { 612 613 switch (ep[-1]) { 614 case 'l': 615 if (vowel(ep[-2])) 616 break; 617 switch (ep[-2]) { 618 case 'l': 619 case 'r': 620 case 'w': 621 break; 622 default: 623 return y_to_e(ep, d, a, lev); 624 } 625 break; 626 case 's': 627 if (ep[-2] == 's') 628 break; 629 /*FALLTHROUGH*/ 630 case 'c': 631 case 'g': 632 if (*ep == 'a') 633 return 0; 634 /*FALLTHROUGH*/ 635 case 'v': 636 case 'z': 637 if (vowel(ep[-2])) 638 break; 639 /*FALLTHROUGH*/ 640 case 'u': 641 if (y_to_e(ep, d, a, lev)) 642 return 1; 643 if (!(ep[-2] == 'n' && ep[-1] == 'g')) 644 return 0; 645 } 646 return VCe(ep, d, a, lev); 647 } 648 649 /* 650 * Possible consonant-vowel-consonant-e ending. 651 */ 652 static int 653 VCe(char *ep, const char *d, const char *a, size_t lev) 654 { 655 char c; 656 657 c = ep[-1]; 658 if (c == 'e') 659 return 0; 660 if (!vowel(c) && vowel(ep[-2])) { 661 c = *ep; 662 *ep++ = 'e'; 663 if (trypref(ep, d, lev) || suffix(ep, lev)) 664 return 1; 665 ep--; 666 *ep = c; 667 } 668 return strip(ep, d, a, lev); 669 } 670 671 static const char * 672 lookuppref(char **wp, char *ep) 673 { 674 const char **sp, *cp; 675 char *bp; 676 677 for (sp = preftab; *sp; sp++) { 678 bp = *wp; 679 for (cp = *sp; *cp; cp++, bp++) { 680 if (tolower((unsigned char)*bp) != *cp) 681 goto next; 682 } 683 for (cp = bp; cp < ep; cp++) { 684 if (vowel(*cp)) { 685 *wp = bp; 686 return *sp; 687 } 688 } 689 next: ; 690 } 691 return 0; 692 } 693 694 /* 695 * If the word is not in the dictionary, try stripping off prefixes 696 * until the word is found or we run out of prefixes to check. 697 */ 698 static int 699 trypref(char *ep, const char *a, size_t lev) 700 { 701 const char *cp; 702 char *bp; 703 char *pp; 704 int val = 0; 705 char space[20]; 706 707 getderiv(lev + 2); 708 deriv.buf[lev] = a; 709 if (tryword(word, ep, lev)) 710 return 1; 711 bp = word; 712 pp = space; 713 deriv.buf[lev + 1] = pp; 714 while ((cp = lookuppref(&bp, ep)) != NULL) { 715 *pp++ = '+'; 716 while ((*pp = *cp++)) 717 pp++; 718 if (tryword(bp, ep, lev + 1)) { 719 val = 1; 720 break; 721 } 722 if (pp - space >= sizeof(space)) 723 return 0; 724 } 725 deriv.buf[lev + 1] = deriv.buf[lev + 2] = '\0'; 726 return val; 727 } 728 729 static int 730 tryword(char *bp, char *ep, size_t lev) 731 { 732 size_t i, j; 733 char duple[3]; 734 735 if (ep-bp <= 1) 736 return 0; 737 if (vowel(*ep) && monosyl(bp, ep)) 738 return 0; 739 740 i = dict(bp, ep); 741 if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && 742 monosyl(bp, ep - 1)) { 743 ep--; 744 getderiv(++lev); 745 deriv.buf[lev] = duple; 746 duple[0] = '+'; 747 duple[1] = *ep; 748 duple[2] = '\0'; 749 i = dict(bp, ep); 750 } 751 if (vflag == 0 || i == 0) 752 return i; 753 754 /* Also tack on possible derivations. (XXX - warn on truncation?) */ 755 for (j = lev; j > 0; j--) { 756 if (deriv.buf[j]) 757 (void)strlcat(affix, deriv.buf[j], sizeof(affix)); 758 } 759 return i; 760 } 761 762 static int 763 monosyl(char *bp, char *ep) 764 { 765 766 if (ep < bp + 2) 767 return 0; 768 if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') 769 return 0; 770 while (--ep >= bp) 771 if (vowel(*ep)) 772 return 0; 773 return 1; 774 } 775 776 static char * 777 skipv(char *st) 778 { 779 780 if (st >= word && vowel(*st)) 781 st--; 782 while (st >= word && !vowel(*st)) 783 st--; 784 return st; 785 } 786 787 static int 788 vowel(int c) 789 { 790 791 switch (tolower(c)) { 792 case 'a': 793 case 'e': 794 case 'i': 795 case 'o': 796 case 'u': 797 case 'y': 798 return 1; 799 } 800 return 0; 801 } 802 803 /* 804 * Crummy way to Britishise. 805 */ 806 static void 807 ise(void) 808 { 809 struct suftab *tab; 810 char *cp; 811 812 for (tab = suftab; tab->suf; tab++) { 813 /* Assume that suffix will contain 'z' if a1 or d1 do */ 814 if (strchr(tab->suf, 'z')) { 815 tab->suf = cp = estrdup(tab->suf); 816 ztos(cp); 817 if (strchr(tab->d1, 'z')) { 818 tab->d1 = cp = estrdup(tab->d1); 819 ztos(cp); 820 } 821 if (strchr(tab->a1, 'z')) { 822 tab->a1 = cp = estrdup(tab->a1); 823 ztos(cp); 824 } 825 } 826 } 827 } 828 829 static void 830 ztos(char *st) 831 { 832 833 for (; *st; st++) 834 if (*st == 'z') 835 *st = 's'; 836 } 837 838 /* 839 * Look up a word in the dictionary. 840 * Returns 1 if found, 0 if not. 841 */ 842 static int 843 dict(char *bp, char *ep) 844 { 845 char c; 846 int i, rval; 847 848 c = *ep; 849 *ep = '\0'; 850 if (xflag) 851 printf("=%s\n", bp); 852 for (i = rval = 0; wlists[i].fd != -1; i++) { 853 if ((rval = look((unsigned char *)bp, wlists[i].front, 854 wlists[i].back)) == 1) 855 break; 856 } 857 *ep = c; 858 return rval; 859 } 860 861 static void 862 getderiv(size_t lev) 863 { 864 if (deriv.maxlev < lev) { 865 void *p = realloc(deriv.buf, sizeof(*deriv.buf) * lev); 866 if (p == NULL) 867 err(1, "Cannot grow array"); 868 deriv.buf = p; 869 deriv.maxlev = lev; 870 } 871 } 872 873 874 static void 875 usage(void) 876 { 877 (void)fprintf(stderr, 878 "Usage: %s [-bvx] [-o found-words] word-list ...\n", 879 getprogname()); 880 exit(1); 881 } 882