1*1ed98fdfSderaadt /* $OpenBSD: spellprog.c,v 1.5 2007/09/02 15:19:34 deraadt Exp $ */ 29175dedbSmillert 39175dedbSmillert /* 49175dedbSmillert * Copyright (c) 1991, 1993 59175dedbSmillert * The Regents of the University of California. All rights reserved. 69175dedbSmillert * 79175dedbSmillert * Redistribution and use in source and binary forms, with or without 89175dedbSmillert * modification, are permitted provided that the following conditions 99175dedbSmillert * are met: 109175dedbSmillert * 1. Redistributions of source code must retain the above copyright 119175dedbSmillert * notice, this list of conditions and the following disclaimer. 129175dedbSmillert * 2. Redistributions in binary form must reproduce the above copyright 139175dedbSmillert * notice, this list of conditions and the following disclaimer in the 149175dedbSmillert * documentation and/or other materials provided with the distribution. 15f75387cbSmillert * 3. Neither the name of the University nor the names of its contributors 169175dedbSmillert * may be used to endorse or promote products derived from this software 179175dedbSmillert * without specific prior written permission. 189175dedbSmillert * 199175dedbSmillert * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 209175dedbSmillert * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 219175dedbSmillert * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 229175dedbSmillert * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 239175dedbSmillert * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 249175dedbSmillert * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 259175dedbSmillert * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 269175dedbSmillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 279175dedbSmillert * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 289175dedbSmillert * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 299175dedbSmillert * SUCH DAMAGE. 309175dedbSmillert * 319175dedbSmillert * @(#)spell.h 8.1 (Berkeley) 6/6/93 329175dedbSmillert */ 339175dedbSmillert /* 349175dedbSmillert * Copyright (C) Caldera International Inc. 2001-2002. 359175dedbSmillert * All rights reserved. 369175dedbSmillert * 379175dedbSmillert * Redistribution and use in source and binary forms, with or without 389175dedbSmillert * modification, are permitted provided that the following conditions 399175dedbSmillert * are met: 409175dedbSmillert * 1. Redistributions of source code and documentation must retain the above 419175dedbSmillert * copyright notice, this list of conditions and the following disclaimer. 429175dedbSmillert * 2. Redistributions in binary form must reproduce the above copyright 439175dedbSmillert * notice, this list of conditions and the following disclaimer in the 449175dedbSmillert * documentation and/or other materials provided with the distribution. 459175dedbSmillert * 3. All advertising materials mentioning features or use of this software 469175dedbSmillert * must display the following acknowledgement: 479175dedbSmillert * This product includes software developed or owned by Caldera 489175dedbSmillert * International, Inc. 499175dedbSmillert * 4. Neither the name of Caldera International, Inc. nor the names of other 509175dedbSmillert * contributors may be used to endorse or promote products derived from 519175dedbSmillert * this software without specific prior written permission. 529175dedbSmillert * 539175dedbSmillert * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 549175dedbSmillert * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 559175dedbSmillert * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 569175dedbSmillert * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 579175dedbSmillert * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 589175dedbSmillert * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 599175dedbSmillert * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 609175dedbSmillert * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 619175dedbSmillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 629175dedbSmillert * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 639175dedbSmillert * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 649175dedbSmillert * POSSIBILITY OF SUCH DAMAGE. 659175dedbSmillert */ 669175dedbSmillert 679175dedbSmillert #ifndef lint 689175dedbSmillert static const char copyright[] = 699175dedbSmillert "@(#) Copyright (c) 1991, 1993\n\ 709175dedbSmillert The Regents of the University of California. All rights reserved.\n"; 719175dedbSmillert #endif /* not lint */ 729175dedbSmillert 739175dedbSmillert #ifndef lint 749175dedbSmillert #if 0 759175dedbSmillert static const char sccsid[] = "@(#)spell.c 8.1 (Berkeley) 6/6/93"; 769175dedbSmillert #else 779175dedbSmillert #endif 78*1ed98fdfSderaadt static const char rcsid[] = "$OpenBSD: spellprog.c,v 1.5 2007/09/02 15:19:34 deraadt Exp $"; 799175dedbSmillert #endif /* not lint */ 809175dedbSmillert 819175dedbSmillert #include <sys/param.h> 829175dedbSmillert #include <sys/mman.h> 839175dedbSmillert #include <sys/stat.h> 849175dedbSmillert 859175dedbSmillert #include <ctype.h> 869175dedbSmillert #include <err.h> 879175dedbSmillert #include <errno.h> 889175dedbSmillert #include <fcntl.h> 899175dedbSmillert #include <limits.h> 909175dedbSmillert #include <locale.h> 919175dedbSmillert #include <stdio.h> 929175dedbSmillert #include <stdlib.h> 939175dedbSmillert #include <string.h> 949175dedbSmillert #include <unistd.h> 959175dedbSmillert 969175dedbSmillert #define DLEV 2 979175dedbSmillert 989175dedbSmillert int an(char *, char *, char *, int); 999175dedbSmillert int bility(char *, char *, char *, int); 1009175dedbSmillert int es(char *, char *, char *, int); 1019175dedbSmillert int dict(char *, char *); 1029175dedbSmillert int i_to_y(char *, char *, char *, int); 1039175dedbSmillert int ily(char *, char *, char *, int); 1049175dedbSmillert int ize(char *, char *, char *, int); 1059175dedbSmillert int metry(char *, char *, char *, int); 1069175dedbSmillert int monosyl(char *, char *); 1079175dedbSmillert int ncy(char *, char *, char *, int); 1089175dedbSmillert int nop(void); 1099175dedbSmillert int trypref(char *, char *, int); 1109175dedbSmillert int tryword(char *, char *, int); 1119175dedbSmillert int s(char *, char *, char *, int); 1129175dedbSmillert int strip(char *, char *, char *, int); 1139175dedbSmillert int suffix(char *, int); 1149175dedbSmillert int tion(char *, char *, char *, int); 1159175dedbSmillert int vowel(int); 1169175dedbSmillert int y_to_e(char *, char *, char *, int); 1179175dedbSmillert int CCe(char *, char *, char *, int); 1189175dedbSmillert int VCe(char *, char *, char *, int); 1199175dedbSmillert char *lookuppref(char **, char *); 1209175dedbSmillert char *skipv(char *); 121f05c4e53Smillert char *estrdup(const char *); 1229175dedbSmillert void ise(void); 1239175dedbSmillert void print_word(FILE *); 1249175dedbSmillert void ztos(char *); 1259175dedbSmillert __dead void usage(void); 1269175dedbSmillert 1279175dedbSmillert /* from look.c */ 1289175dedbSmillert int look(unsigned char *, unsigned char *, unsigned char *); 1299175dedbSmillert 1309175dedbSmillert struct suftab { 131f05c4e53Smillert char *suf; 1329175dedbSmillert int (*p1)(); /* XXX - variable args */ 1339175dedbSmillert int n1; 1349175dedbSmillert char *d1; 1359175dedbSmillert char *a1; 1369175dedbSmillert int (*p2)(); /* XXX - variable args */ 1379175dedbSmillert int n2; 1389175dedbSmillert char *d2; 1399175dedbSmillert char *a2; 1409175dedbSmillert } suftab[] = { 1419175dedbSmillert {"ssen", ily, 4, "-y+iness", "+ness" }, 1429175dedbSmillert {"ssel", ily, 4, "-y+i+less", "+less" }, 1439175dedbSmillert {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" }, 1449175dedbSmillert {"s'", s, 2, "", "+'s"}, 1459175dedbSmillert {"s", s, 1, "", "+s"}, 1469175dedbSmillert {"ecn", ncy, 1, "", "-t+ce"}, 1479175dedbSmillert {"ycn", ncy, 1, "", "-cy+t"}, 1489175dedbSmillert {"ytilb", nop, 0, "", ""}, 1499175dedbSmillert {"ytilib", bility, 5, "-le+ility", ""}, 1509175dedbSmillert {"elbaif", i_to_y, 4, "-y+iable", ""}, 1519175dedbSmillert {"elba", CCe, 4, "-e+able", "+able"}, 1529175dedbSmillert {"yti", CCe, 3, "-e+ity", "+ity"}, 1539175dedbSmillert {"ylb", y_to_e, 1, "-e+y", ""}, 1549175dedbSmillert {"yl", ily, 2, "-y+ily", "+ly"}, 1559175dedbSmillert {"laci", strip, 2, "", "+al"}, 1569175dedbSmillert {"latnem", strip, 2, "", "+al"}, 1579175dedbSmillert {"lanoi", strip, 2, "", "+al"}, 1589175dedbSmillert {"tnem", strip, 4, "", "+ment"}, 1599175dedbSmillert {"gni", CCe, 3, "-e+ing", "+ing"}, 1609175dedbSmillert {"reta", nop, 0, "", ""}, 1619175dedbSmillert {"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"}, 1629175dedbSmillert {"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"}, 1639175dedbSmillert {"citsi", strip, 2, "", "+ic"}, 1649175dedbSmillert {"cihparg", i_to_y, 1, "-y+ic", ""}, 1659175dedbSmillert {"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"}, 1669175dedbSmillert {"cirtem", i_to_y, 1, "-y+ic", ""}, 1679175dedbSmillert {"yrtem", metry, 0, "-ry+er", ""}, 1689175dedbSmillert {"cigol", i_to_y, 1, "-y+ic", ""}, 1699175dedbSmillert {"tsigol", i_to_y, 2, "-y+ist", ""}, 1709175dedbSmillert {"tsi", VCe, 3, "-e+ist", "+ist"}, 1719175dedbSmillert {"msi", VCe, 3, "-e+ism", "+ist"}, 1729175dedbSmillert {"noitacif", i_to_y, 6, "-y+ication", ""}, 1739175dedbSmillert {"noitazi", ize, 5, "-e+ation", ""}, 1749175dedbSmillert {"rota", tion, 2, "-e+or", ""}, 1759175dedbSmillert {"noit", tion, 3, "-e+ion", "+ion"}, 1769175dedbSmillert {"naino", an, 3, "", "+ian"}, 1779175dedbSmillert {"na", an, 1, "", "+n"}, 1789175dedbSmillert {"evit", tion, 3, "-e+ive", "+ive"}, 1799175dedbSmillert {"ezi", CCe, 3, "-e+ize", "+ize"}, 1809175dedbSmillert {"pihs", strip, 4, "", "+ship"}, 1819175dedbSmillert {"dooh", ily, 4, "-y+hood", "+hood"}, 1829175dedbSmillert {"ekil", strip, 4, "", "+like"}, 1839175dedbSmillert { NULL } 1849175dedbSmillert }; 1859175dedbSmillert 1869175dedbSmillert char *preftab[] = { 1879175dedbSmillert "anti", 1889175dedbSmillert "bio", 1899175dedbSmillert "dis", 1909175dedbSmillert "electro", 1919175dedbSmillert "en", 1929175dedbSmillert "fore", 1939175dedbSmillert "hyper", 1949175dedbSmillert "intra", 1959175dedbSmillert "inter", 1969175dedbSmillert "iso", 1979175dedbSmillert "kilo", 1989175dedbSmillert "magneto", 1999175dedbSmillert "meta", 2009175dedbSmillert "micro", 2019175dedbSmillert "milli", 2029175dedbSmillert "mis", 2039175dedbSmillert "mono", 2049175dedbSmillert "multi", 2059175dedbSmillert "non", 2069175dedbSmillert "out", 2079175dedbSmillert "over", 2089175dedbSmillert "photo", 2099175dedbSmillert "poly", 2109175dedbSmillert "pre", 2119175dedbSmillert "pseudo", 2129175dedbSmillert "re", 2139175dedbSmillert "semi", 2149175dedbSmillert "stereo", 2159175dedbSmillert "sub", 2169175dedbSmillert "super", 2179175dedbSmillert "thermo", 2189175dedbSmillert "ultra", 2199175dedbSmillert "under", /* must precede un */ 2209175dedbSmillert "un", 2219175dedbSmillert NULL 2229175dedbSmillert }; 2239175dedbSmillert 2249175dedbSmillert struct wlist { 2259175dedbSmillert int fd; 2269175dedbSmillert unsigned char *front; 2279175dedbSmillert unsigned char *back; 2289175dedbSmillert } *wlists; 2299175dedbSmillert 2309175dedbSmillert int vflag; 2319175dedbSmillert int xflag; 2329175dedbSmillert char word[LINE_MAX]; 2339175dedbSmillert char original[LINE_MAX]; 2349175dedbSmillert char *deriv[40]; 2359175dedbSmillert char affix[40]; 2369175dedbSmillert 2379175dedbSmillert /* 2389175dedbSmillert * The spellprog utility accepts a newline-delimited list of words 2399175dedbSmillert * on stdin. For arguments it expects the path to a word list and 2409175dedbSmillert * the path to a file in which to store found words. 2419175dedbSmillert * 2429175dedbSmillert * In normal usage, spell is called twice. The first time it is 2439175dedbSmillert * called with a stop list to flag commonly mispelled words. The 2449175dedbSmillert * remaining words are then passed to spell again, this time with 2459175dedbSmillert * the dictionary file as the first (non-flag) argument. 2469175dedbSmillert * 2479175dedbSmillert * Unlike historic versions of spellprog, this one does not use 2489175dedbSmillert * hashed files. Instead it simply requires that files be sorted 2499175dedbSmillert * lexigraphically and uses the same algorithm as the look utility. 2509175dedbSmillert * 2519175dedbSmillert * Note that spellprog should be called via the spell shell script 2529175dedbSmillert * and is not meant to be invoked directly by the user. 2539175dedbSmillert */ 2549175dedbSmillert 2559175dedbSmillert int 2569175dedbSmillert main(int argc, char **argv) 2579175dedbSmillert { 2589175dedbSmillert char *ep, *cp, *dp; 2599175dedbSmillert char *outfile; 2609175dedbSmillert int ch, fold, i; 2619175dedbSmillert struct stat sb; 2629175dedbSmillert FILE *file, *found; 2639175dedbSmillert 2649175dedbSmillert setlocale(LC_ALL, ""); 2659175dedbSmillert 2669175dedbSmillert outfile = NULL; 2679175dedbSmillert while ((ch = getopt(argc, argv, "bvxo:")) != -1) { 2689175dedbSmillert switch (ch) { 2699175dedbSmillert case 'b': 2709175dedbSmillert /* Use British dictionary and convert ize -> ise. */ 2719175dedbSmillert ise(); 2729175dedbSmillert break; 2739175dedbSmillert case 'o': 2749175dedbSmillert outfile = optarg; 2759175dedbSmillert break; 2769175dedbSmillert case 'v': 2779175dedbSmillert /* Also write derivations to "found" file. */ 2789175dedbSmillert vflag++; 2799175dedbSmillert break; 2809175dedbSmillert case 'x': 2819175dedbSmillert /* Print plausible stems to stdout. */ 2829175dedbSmillert xflag++; 2839175dedbSmillert break; 2849175dedbSmillert default: 2859175dedbSmillert usage(); 2869175dedbSmillert } 2879175dedbSmillert 2889175dedbSmillert } 2899175dedbSmillert argc -= optind; 2909175dedbSmillert argv += optind; 2919175dedbSmillert if (argc < 1) 2929175dedbSmillert usage(); 2939175dedbSmillert 2949175dedbSmillert /* Open and mmap the word/stop lists. */ 295*1ed98fdfSderaadt if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL) 2969175dedbSmillert err(1, "malloc"); 2979175dedbSmillert for (i = 0; argc--; i++) { 2989175dedbSmillert wlists[i].fd = open(argv[i], O_RDONLY, 0); 2999175dedbSmillert if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0) 3009175dedbSmillert err(1, "%s", argv[i]); 3019175dedbSmillert if (sb.st_size > SIZE_T_MAX) 3029175dedbSmillert errx(1, "%s: %s", argv[i], strerror(EFBIG)); 3039175dedbSmillert wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ, 3049175dedbSmillert MAP_PRIVATE, wlists[i].fd, (off_t)0); 3059175dedbSmillert if (wlists[i].front == MAP_FAILED) 3069175dedbSmillert err(1, "%s", argv[i]); 3079175dedbSmillert wlists[i].back = wlists[i].front + sb.st_size; 3089175dedbSmillert } 3099175dedbSmillert wlists[i].fd = -1; 3109175dedbSmillert 3119175dedbSmillert /* Open file where found words are to be saved. */ 3129175dedbSmillert if (outfile == NULL) 3139175dedbSmillert found = NULL; 3149175dedbSmillert else if ((found = fopen(outfile, "w")) == NULL) 3159175dedbSmillert err(1, "cannot open %s", outfile); 3169175dedbSmillert 3179175dedbSmillert for (;; print_word(file)) { 3189175dedbSmillert affix[0] = '\0'; 3199175dedbSmillert file = found; 3209175dedbSmillert for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) { 3219175dedbSmillert if (ep - word == sizeof(word) - 1) { 3229175dedbSmillert *ep = '\0'; 3239175dedbSmillert warnx("word too long (%s)", word); 3249175dedbSmillert while ((ch = getchar()) != '\n') 3259175dedbSmillert ; /* slurp until EOL */ 3269175dedbSmillert } 3279175dedbSmillert if (ch == EOF) { 3289175dedbSmillert if (found != NULL) 3299175dedbSmillert fclose(found); 3309175dedbSmillert exit(0); 3319175dedbSmillert } 3329175dedbSmillert } 3339175dedbSmillert for (cp = word, dp = original; cp < ep; ) 3349175dedbSmillert *dp++ = *cp++; 3359175dedbSmillert *dp = '\0'; 3369175dedbSmillert fold = 0; 3379175dedbSmillert for (cp = word; cp < ep; cp++) 3389175dedbSmillert if (islower(*cp)) 3399175dedbSmillert goto lcase; 3409175dedbSmillert if (trypref(ep, ".", 0)) 3419175dedbSmillert continue; 3429175dedbSmillert ++fold; 3439175dedbSmillert for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++) 3449175dedbSmillert *dp = tolower(*cp); 3459175dedbSmillert lcase: 3469175dedbSmillert if (trypref(ep, ".", 0) || suffix(ep, 0)) 3479175dedbSmillert continue; 3489175dedbSmillert if (isupper(word[0])) { 3499175dedbSmillert for (cp = original, dp = word; (*dp = *cp++); dp++) { 3509175dedbSmillert if (fold) 3519175dedbSmillert *dp = tolower(*dp); 3529175dedbSmillert } 3539175dedbSmillert word[0] = tolower(word[0]); 3549175dedbSmillert goto lcase; 3559175dedbSmillert } 3569175dedbSmillert file = stdout; 3579175dedbSmillert } 3589175dedbSmillert 3599175dedbSmillert exit(0); 3609175dedbSmillert } 3619175dedbSmillert 3629175dedbSmillert void 3639175dedbSmillert print_word(FILE *f) 3649175dedbSmillert { 3659175dedbSmillert 3669175dedbSmillert if (f != NULL) { 3679175dedbSmillert if (vflag && affix[0] != '\0' && affix[0] != '.') 3689175dedbSmillert fprintf(f, "%s\t%s\n", affix, original); 3699175dedbSmillert else 3709175dedbSmillert fprintf(f, "%s\n", original); 3719175dedbSmillert } 3729175dedbSmillert } 3739175dedbSmillert 3749175dedbSmillert /* 3759175dedbSmillert * For each matching suffix in suftab, call the function associated 3769175dedbSmillert * with that suffix (p1 and p2). 3779175dedbSmillert */ 3789175dedbSmillert int 3799175dedbSmillert suffix(char *ep, int lev) 3809175dedbSmillert { 3819175dedbSmillert struct suftab *t; 3829175dedbSmillert char *cp, *sp; 3839175dedbSmillert 3849175dedbSmillert lev += DLEV; 3859175dedbSmillert deriv[lev] = deriv[lev-1] = 0; 3869175dedbSmillert for (t = suftab; (sp = t->suf); t++) { 3879175dedbSmillert cp = ep; 3889175dedbSmillert while (*sp) { 3899175dedbSmillert if (*--cp != *sp++) 3909175dedbSmillert goto next; 3919175dedbSmillert } 3929175dedbSmillert for (sp = cp; --sp >= word && !vowel(*sp);) 3939175dedbSmillert ; /* nothing */ 3949175dedbSmillert if (sp < word) 3959175dedbSmillert return (0); 3969175dedbSmillert if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1)) 3979175dedbSmillert return (1); 3989175dedbSmillert if (t->p2 != NULL) { 3999175dedbSmillert deriv[lev] = deriv[lev+1] = '\0'; 4009175dedbSmillert return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev)); 4019175dedbSmillert } 4029175dedbSmillert return (0); 4039175dedbSmillert next: ; 4049175dedbSmillert } 4059175dedbSmillert return (0); 4069175dedbSmillert } 4079175dedbSmillert 4089175dedbSmillert int 4099175dedbSmillert nop(void) 4109175dedbSmillert { 4119175dedbSmillert 4129175dedbSmillert return (0); 4139175dedbSmillert } 4149175dedbSmillert 4159175dedbSmillert int 4169175dedbSmillert strip(char *ep, char *d, char *a, int lev) 4179175dedbSmillert { 4189175dedbSmillert 4199175dedbSmillert return (trypref(ep, a, lev) || suffix(ep, lev)); 4209175dedbSmillert } 4219175dedbSmillert 4229175dedbSmillert int 4239175dedbSmillert s(char *ep, char *d, char *a, int lev) 4249175dedbSmillert { 4259175dedbSmillert 4269175dedbSmillert if (lev > DLEV + 1) 4279175dedbSmillert return (0); 4289175dedbSmillert if (*ep == 's' && ep[-1] == 's') 4299175dedbSmillert return (0); 4309175dedbSmillert return (strip(ep, d, a, lev)); 4319175dedbSmillert } 4329175dedbSmillert 4339175dedbSmillert int 4349175dedbSmillert an(char *ep, char *d, char *a, int lev) 4359175dedbSmillert { 4369175dedbSmillert 4379175dedbSmillert if (!isupper(*word)) /* must be proper name */ 4389175dedbSmillert return (0); 4399175dedbSmillert return (trypref(ep,a,lev)); 4409175dedbSmillert } 4419175dedbSmillert 4429175dedbSmillert int 4439175dedbSmillert ize(char *ep, char *d, char *a, int lev) 4449175dedbSmillert { 4459175dedbSmillert 4469175dedbSmillert *ep++ = 'e'; 4479175dedbSmillert return (strip(ep ,"", d, lev)); 4489175dedbSmillert } 4499175dedbSmillert 4509175dedbSmillert int 4519175dedbSmillert y_to_e(char *ep, char *d, char *a, int lev) 4529175dedbSmillert { 4539175dedbSmillert char c = *ep; 4549175dedbSmillert 4559175dedbSmillert *ep++ = 'e'; 4569175dedbSmillert if (strip(ep, "", d, lev)) 4579175dedbSmillert return (1); 4589175dedbSmillert ep[-1] = c; 4599175dedbSmillert return (0); 4609175dedbSmillert } 4619175dedbSmillert 4629175dedbSmillert int 4639175dedbSmillert ily(char *ep, char *d, char *a, int lev) 4649175dedbSmillert { 4659175dedbSmillert 4669175dedbSmillert if (ep[-1] == 'i') 4679175dedbSmillert return (i_to_y(ep, d, a, lev)); 4689175dedbSmillert else 4699175dedbSmillert return (strip(ep, d, a, lev)); 4709175dedbSmillert } 4719175dedbSmillert 4729175dedbSmillert int 4739175dedbSmillert ncy(char *ep, char *d, char *a, int lev) 4749175dedbSmillert { 4759175dedbSmillert 4769175dedbSmillert if (skipv(skipv(ep-1)) < word) 4779175dedbSmillert return (0); 4789175dedbSmillert ep[-1] = 't'; 4799175dedbSmillert return (strip(ep, d, a, lev)); 4809175dedbSmillert } 4819175dedbSmillert 4829175dedbSmillert int 4839175dedbSmillert bility(char *ep, char *d, char *a, int lev) 4849175dedbSmillert { 4859175dedbSmillert 4869175dedbSmillert *ep++ = 'l'; 4879175dedbSmillert return (y_to_e(ep, d, a, lev)); 4889175dedbSmillert } 4899175dedbSmillert 4909175dedbSmillert int 4919175dedbSmillert i_to_y(char *ep, char *d, char *a, int lev) 4929175dedbSmillert { 4939175dedbSmillert 4949175dedbSmillert if (ep[-1] == 'i') { 4959175dedbSmillert ep[-1] = 'y'; 4969175dedbSmillert a = d; 4979175dedbSmillert } 4989175dedbSmillert return (strip(ep, "", a, lev)); 4999175dedbSmillert } 5009175dedbSmillert 5019175dedbSmillert int 5029175dedbSmillert es(char *ep, char *d, char *a, int lev) 5039175dedbSmillert { 5049175dedbSmillert 5059175dedbSmillert if (lev > DLEV) 5069175dedbSmillert return (0); 5079175dedbSmillert 5089175dedbSmillert switch (ep[-1]) { 5099175dedbSmillert default: 5109175dedbSmillert return (0); 5119175dedbSmillert case 'i': 5129175dedbSmillert return (i_to_y(ep, d, a, lev)); 5139175dedbSmillert case 's': 5149175dedbSmillert case 'h': 5159175dedbSmillert case 'z': 5169175dedbSmillert case 'x': 5179175dedbSmillert return (strip(ep, d, a, lev)); 5189175dedbSmillert } 5199175dedbSmillert } 5209175dedbSmillert 5219175dedbSmillert int 5229175dedbSmillert metry(char *ep, char *d, char *a, int lev) 5239175dedbSmillert { 5249175dedbSmillert 5259175dedbSmillert ep[-2] = 'e'; 5269175dedbSmillert ep[-1] = 'r'; 5279175dedbSmillert return (strip(ep, d, a, lev)); 5289175dedbSmillert } 5299175dedbSmillert 5309175dedbSmillert int 5319175dedbSmillert tion(char *ep, char *d, char *a, int lev) 5329175dedbSmillert { 5339175dedbSmillert 5349175dedbSmillert switch (ep[-2]) { 5359175dedbSmillert case 'c': 5369175dedbSmillert case 'r': 5379175dedbSmillert return (trypref(ep, a, lev)); 5389175dedbSmillert case 'a': 5399175dedbSmillert return (y_to_e(ep, d, a, lev)); 5409175dedbSmillert } 5419175dedbSmillert return (0); 5429175dedbSmillert } 5439175dedbSmillert 5449175dedbSmillert /* 5459175dedbSmillert * Possible consonant-consonant-e ending. 5469175dedbSmillert */ 5479175dedbSmillert int 5489175dedbSmillert CCe(char *ep, char *d, char *a, int lev) 5499175dedbSmillert { 5509175dedbSmillert 5519175dedbSmillert switch (ep[-1]) { 5529175dedbSmillert case 'l': 5539175dedbSmillert if (vowel(ep[-2])) 5549175dedbSmillert break; 5559175dedbSmillert switch (ep[-2]) { 5569175dedbSmillert case 'l': 5579175dedbSmillert case 'r': 5589175dedbSmillert case 'w': 5599175dedbSmillert break; 5609175dedbSmillert default: 5619175dedbSmillert return (y_to_e(ep, d, a, lev)); 5629175dedbSmillert } 5639175dedbSmillert break; 5649175dedbSmillert case 's': 5659175dedbSmillert if (ep[-2] == 's') 5669175dedbSmillert break; 5679175dedbSmillert case 'c': 5689175dedbSmillert case 'g': 5699175dedbSmillert if (*ep == 'a') 5709175dedbSmillert return (0); 5719175dedbSmillert case 'v': 5729175dedbSmillert case 'z': 5739175dedbSmillert if (vowel(ep[-2])) 5749175dedbSmillert break; 5759175dedbSmillert case 'u': 5769175dedbSmillert if (y_to_e(ep, d, a, lev)) 5779175dedbSmillert return (1); 5789175dedbSmillert if (!(ep[-2] == 'n' && ep[-1] == 'g')) 5799175dedbSmillert return (0); 5809175dedbSmillert } 5819175dedbSmillert return (VCe(ep, d, a, lev)); 5829175dedbSmillert } 5839175dedbSmillert 5849175dedbSmillert /* 5859175dedbSmillert * Possible consonant-vowel-consonant-e ending. 5869175dedbSmillert */ 5879175dedbSmillert int 5889175dedbSmillert VCe(char *ep, char *d, char *a, int lev) 5899175dedbSmillert { 5909175dedbSmillert char c; 5919175dedbSmillert 5929175dedbSmillert c = ep[-1]; 5939175dedbSmillert if (c == 'e') 5949175dedbSmillert return (0); 5959175dedbSmillert if (!vowel(c) && vowel(ep[-2])) { 5969175dedbSmillert c = *ep; 5979175dedbSmillert *ep++ = 'e'; 5989175dedbSmillert if (trypref(ep, d, lev) || suffix(ep, lev)) 5999175dedbSmillert return (1); 6009175dedbSmillert ep--; 6019175dedbSmillert *ep = c; 6029175dedbSmillert } 6039175dedbSmillert return (strip(ep, d, a, lev)); 6049175dedbSmillert } 6059175dedbSmillert 6069175dedbSmillert char * 6079175dedbSmillert lookuppref(char **wp, char *ep) 6089175dedbSmillert { 6099175dedbSmillert char **sp; 6109175dedbSmillert char *bp,*cp; 6119175dedbSmillert 6129175dedbSmillert for (sp = preftab; *sp; sp++) { 6139175dedbSmillert bp = *wp; 6149175dedbSmillert for (cp = *sp; *cp; cp++, bp++) { 6159175dedbSmillert if (tolower(*bp) != *cp) 6169175dedbSmillert goto next; 6179175dedbSmillert } 6189175dedbSmillert for (cp = bp; cp < ep; cp++) { 6199175dedbSmillert if (vowel(*cp)) { 6209175dedbSmillert *wp = bp; 6219175dedbSmillert return (*sp); 6229175dedbSmillert } 6239175dedbSmillert } 6249175dedbSmillert next: ; 6259175dedbSmillert } 6269175dedbSmillert return (0); 6279175dedbSmillert } 6289175dedbSmillert 6299175dedbSmillert /* 6309175dedbSmillert * If the word is not in the dictionary, try stripping off prefixes 6319175dedbSmillert * until the word is found or we run out of prefixes to check. 6329175dedbSmillert */ 6339175dedbSmillert int 6349175dedbSmillert trypref(char *ep, char *a, int lev) 6359175dedbSmillert { 6369175dedbSmillert char *cp; 6379175dedbSmillert char *bp; 6389175dedbSmillert char *pp; 6399175dedbSmillert int val = 0; 6409175dedbSmillert char space[20]; 6419175dedbSmillert 6429175dedbSmillert deriv[lev] = a; 6439175dedbSmillert if (tryword(word, ep, lev)) 6449175dedbSmillert return (1); 6459175dedbSmillert bp = word; 6469175dedbSmillert pp = space; 6479175dedbSmillert deriv[lev+1] = pp; 6489175dedbSmillert while ((cp = lookuppref(&bp, ep))) { 6499175dedbSmillert *pp++ = '+'; 6509175dedbSmillert while ((*pp = *cp++)) 6519175dedbSmillert pp++; 6529175dedbSmillert if (tryword(bp, ep, lev+1)) { 6539175dedbSmillert val = 1; 6549175dedbSmillert break; 6559175dedbSmillert } 6569175dedbSmillert if (pp - space >= sizeof(space)) 6579175dedbSmillert return (0); 6589175dedbSmillert } 6599175dedbSmillert deriv[lev+1] = deriv[lev+2] = '\0'; 6609175dedbSmillert return (val); 6619175dedbSmillert } 6629175dedbSmillert 6639175dedbSmillert int 6649175dedbSmillert tryword(char *bp, char *ep, int lev) 6659175dedbSmillert { 6669175dedbSmillert int i, j; 6679175dedbSmillert char duple[3]; 6689175dedbSmillert 6699175dedbSmillert if (ep-bp <= 1) 6709175dedbSmillert return (0); 6719175dedbSmillert if (vowel(*ep) && monosyl(bp, ep)) 6729175dedbSmillert return (0); 6739175dedbSmillert 6749175dedbSmillert i = dict(bp, ep); 6759175dedbSmillert if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) { 6769175dedbSmillert ep--; 6779175dedbSmillert deriv[++lev] = duple; 6789175dedbSmillert duple[0] = '+'; 6799175dedbSmillert duple[1] = *ep; 6809175dedbSmillert duple[2] = '\0'; 6819175dedbSmillert i = dict(bp, ep); 6829175dedbSmillert } 6839175dedbSmillert if (vflag == 0 || i == 0) 6849175dedbSmillert return (i); 6859175dedbSmillert 6869175dedbSmillert /* Also tack on possible derivations. (XXX - warn on truncation?) */ 6879175dedbSmillert for (j = lev; j > 0; j--) { 6889175dedbSmillert if (deriv[j]) 6899175dedbSmillert strlcat(affix, deriv[j], sizeof(affix)); 6909175dedbSmillert } 6919175dedbSmillert return (i); 6929175dedbSmillert } 6939175dedbSmillert 6949175dedbSmillert int 6959175dedbSmillert monosyl(char *bp, char *ep) 6969175dedbSmillert { 6979175dedbSmillert 6989175dedbSmillert if (ep < bp + 2) 6999175dedbSmillert return (0); 7009175dedbSmillert if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') 7019175dedbSmillert return (0); 7029175dedbSmillert while (--ep >= bp) 7039175dedbSmillert if (vowel(*ep)) 7049175dedbSmillert return (0); 7059175dedbSmillert return (1); 7069175dedbSmillert } 7079175dedbSmillert 7089175dedbSmillert char * 7099175dedbSmillert skipv(char *s) 7109175dedbSmillert { 7119175dedbSmillert 7129175dedbSmillert if (s >= word && vowel(*s)) 7139175dedbSmillert s--; 7149175dedbSmillert while (s >= word && !vowel(*s)) 7159175dedbSmillert s--; 7169175dedbSmillert return (s); 7179175dedbSmillert } 7189175dedbSmillert 7199175dedbSmillert int 7209175dedbSmillert vowel(int c) 7219175dedbSmillert { 7229175dedbSmillert 7239175dedbSmillert switch (tolower(c)) { 7249175dedbSmillert case 'a': 7259175dedbSmillert case 'e': 7269175dedbSmillert case 'i': 7279175dedbSmillert case 'o': 7289175dedbSmillert case 'u': 7299175dedbSmillert case 'y': 7309175dedbSmillert return (1); 7319175dedbSmillert } 7329175dedbSmillert return (0); 7339175dedbSmillert } 7349175dedbSmillert 7359175dedbSmillert /* 7369175dedbSmillert * Crummy way to Britishise. 7379175dedbSmillert */ 7389175dedbSmillert void 7399175dedbSmillert ise(void) 7409175dedbSmillert { 741f05c4e53Smillert struct suftab *tab; 7429175dedbSmillert 743f05c4e53Smillert for (tab = suftab; tab->suf; tab++) { 744f05c4e53Smillert /* Assume that suffix will contain 'z' if a1 or d1 do */ 745f05c4e53Smillert if (strchr(tab->suf, 'z')) { 746f05c4e53Smillert tab->suf = estrdup(tab->suf); 747f05c4e53Smillert ztos(tab->suf); 748f05c4e53Smillert if (strchr(tab->d1, 'z')) { 749f05c4e53Smillert tab->d1 = estrdup(tab->d1); 750f05c4e53Smillert ztos(tab->d1); 751f05c4e53Smillert } 752f05c4e53Smillert if (strchr(tab->a1, 'z')) { 753f05c4e53Smillert tab->a1 = estrdup(tab->a1); 754f05c4e53Smillert ztos(tab->a1); 755f05c4e53Smillert } 756f05c4e53Smillert } 7579175dedbSmillert } 7589175dedbSmillert } 7599175dedbSmillert 7609175dedbSmillert void 7619175dedbSmillert ztos(char *s) 7629175dedbSmillert { 7639175dedbSmillert 7649175dedbSmillert for (; *s; s++) 7659175dedbSmillert if (*s == 'z') 7669175dedbSmillert *s = 's'; 7679175dedbSmillert } 7689175dedbSmillert 769f05c4e53Smillert char * 770f05c4e53Smillert estrdup(const char *s) 771f05c4e53Smillert { 772f05c4e53Smillert char *d; 773f05c4e53Smillert 774f05c4e53Smillert if ((d = strdup(s)) == NULL) 775f05c4e53Smillert err(1, "strdup"); 776f05c4e53Smillert return (d); 777f05c4e53Smillert } 778f05c4e53Smillert 7799175dedbSmillert /* 7809175dedbSmillert * Look up a word in the dictionary. 7819175dedbSmillert * Returns 1 if found, 0 if not. 7829175dedbSmillert */ 7839175dedbSmillert int 7849175dedbSmillert dict(char *bp, char *ep) 7859175dedbSmillert { 7869175dedbSmillert char c; 7879175dedbSmillert int i, rval; 7889175dedbSmillert 7899175dedbSmillert c = *ep; 7909175dedbSmillert *ep = '\0'; 7919175dedbSmillert if (xflag) 7929175dedbSmillert printf("=%s\n", bp); 7939175dedbSmillert for (i = rval = 0; wlists[i].fd != -1; i++) { 7949175dedbSmillert if ((rval = look((unsigned char *)bp, wlists[i].front, 7959175dedbSmillert wlists[i].back)) == 1) 7969175dedbSmillert break; 7979175dedbSmillert } 7989175dedbSmillert *ep = c; 7999175dedbSmillert return (rval); 8009175dedbSmillert } 8019175dedbSmillert 8029175dedbSmillert __dead void 8039175dedbSmillert usage(void) 8049175dedbSmillert { 8059175dedbSmillert extern char *__progname; 8069175dedbSmillert 8079175dedbSmillert fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n", 8089175dedbSmillert __progname); 8099175dedbSmillert exit(1); 8109175dedbSmillert } 811