1*d9a51c35Sjmc /* $OpenBSD: spellprog.c,v 1.16 2022/12/26 19:16:03 jmc Exp $ */
29175dedbSmillert
39175dedbSmillert /*
49175dedbSmillert * Copyright (c) 1991, 1993
59175dedbSmillert * The Regents of the University of California. All rights reserved.
69175dedbSmillert *
79175dedbSmillert * Redistribution and use in source and binary forms, with or without
89175dedbSmillert * modification, are permitted provided that the following conditions
99175dedbSmillert * are met:
109175dedbSmillert * 1. Redistributions of source code must retain the above copyright
119175dedbSmillert * notice, this list of conditions and the following disclaimer.
129175dedbSmillert * 2. Redistributions in binary form must reproduce the above copyright
139175dedbSmillert * notice, this list of conditions and the following disclaimer in the
149175dedbSmillert * documentation and/or other materials provided with the distribution.
15f75387cbSmillert * 3. Neither the name of the University nor the names of its contributors
169175dedbSmillert * may be used to endorse or promote products derived from this software
179175dedbSmillert * without specific prior written permission.
189175dedbSmillert *
199175dedbSmillert * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
209175dedbSmillert * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
219175dedbSmillert * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
229175dedbSmillert * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
239175dedbSmillert * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
249175dedbSmillert * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
259175dedbSmillert * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
269175dedbSmillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
279175dedbSmillert * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
289175dedbSmillert * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
299175dedbSmillert * SUCH DAMAGE.
309175dedbSmillert *
319175dedbSmillert * @(#)spell.h 8.1 (Berkeley) 6/6/93
329175dedbSmillert */
339175dedbSmillert /*
349175dedbSmillert * Copyright (C) Caldera International Inc. 2001-2002.
359175dedbSmillert * All rights reserved.
369175dedbSmillert *
379175dedbSmillert * Redistribution and use in source and binary forms, with or without
389175dedbSmillert * modification, are permitted provided that the following conditions
399175dedbSmillert * are met:
409175dedbSmillert * 1. Redistributions of source code and documentation must retain the above
419175dedbSmillert * copyright notice, this list of conditions and the following disclaimer.
429175dedbSmillert * 2. Redistributions in binary form must reproduce the above copyright
439175dedbSmillert * notice, this list of conditions and the following disclaimer in the
449175dedbSmillert * documentation and/or other materials provided with the distribution.
459175dedbSmillert * 3. All advertising materials mentioning features or use of this software
469175dedbSmillert * must display the following acknowledgement:
479175dedbSmillert * This product includes software developed or owned by Caldera
489175dedbSmillert * International, Inc.
499175dedbSmillert * 4. Neither the name of Caldera International, Inc. nor the names of other
509175dedbSmillert * contributors may be used to endorse or promote products derived from
519175dedbSmillert * this software without specific prior written permission.
529175dedbSmillert *
539175dedbSmillert * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
549175dedbSmillert * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
559175dedbSmillert * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
569175dedbSmillert * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
579175dedbSmillert * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
589175dedbSmillert * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
599175dedbSmillert * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
609175dedbSmillert * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
619175dedbSmillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
629175dedbSmillert * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
639175dedbSmillert * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
649175dedbSmillert * POSSIBILITY OF SUCH DAMAGE.
659175dedbSmillert */
669175dedbSmillert
679175dedbSmillert #include <sys/mman.h>
689175dedbSmillert #include <sys/stat.h>
699175dedbSmillert
709175dedbSmillert #include <ctype.h>
719175dedbSmillert #include <err.h>
729175dedbSmillert #include <errno.h>
739175dedbSmillert #include <fcntl.h>
749175dedbSmillert #include <limits.h>
754239b822Smillert #include <stdint.h>
769175dedbSmillert #include <stdio.h>
779175dedbSmillert #include <stdlib.h>
789175dedbSmillert #include <string.h>
799175dedbSmillert #include <unistd.h>
809175dedbSmillert
819175dedbSmillert #define DLEV 2
829175dedbSmillert
839175dedbSmillert int an(char *, char *, char *, int);
849175dedbSmillert int bility(char *, char *, char *, int);
859175dedbSmillert int es(char *, char *, char *, int);
869175dedbSmillert int dict(char *, char *);
879175dedbSmillert int i_to_y(char *, char *, char *, int);
889175dedbSmillert int ily(char *, char *, char *, int);
899175dedbSmillert int ize(char *, char *, char *, int);
909175dedbSmillert int metry(char *, char *, char *, int);
919175dedbSmillert int monosyl(char *, char *);
929175dedbSmillert int ncy(char *, char *, char *, int);
93e14aac28Snicm int nop(char *, char *, char *, int);
949175dedbSmillert int trypref(char *, char *, int);
959175dedbSmillert int tryword(char *, char *, int);
969175dedbSmillert int s(char *, char *, char *, int);
979175dedbSmillert int strip(char *, char *, char *, int);
989175dedbSmillert int suffix(char *, int);
999175dedbSmillert int tion(char *, char *, char *, int);
100f69332c7Sderaadt int vowel(unsigned char);
1019175dedbSmillert int y_to_e(char *, char *, char *, int);
1029175dedbSmillert int CCe(char *, char *, char *, int);
1039175dedbSmillert int VCe(char *, char *, char *, int);
1049175dedbSmillert char *lookuppref(char **, char *);
1059175dedbSmillert char *skipv(char *);
106f05c4e53Smillert char *estrdup(const char *);
1079175dedbSmillert void ise(void);
1089175dedbSmillert void print_word(FILE *);
1099175dedbSmillert void ztos(char *);
110a2969341Sschwarze static void __dead usage(void);
1119175dedbSmillert
1129175dedbSmillert /* from look.c */
1139175dedbSmillert int look(unsigned char *, unsigned char *, unsigned char *);
1149175dedbSmillert
1159175dedbSmillert struct suftab {
116f05c4e53Smillert char *suf;
117e14aac28Snicm int (*p1)(char *, char *, char *, int);
1189175dedbSmillert int n1;
1199175dedbSmillert char *d1;
1209175dedbSmillert char *a1;
121e14aac28Snicm int (*p2)(char *, char *, char *, int);
1229175dedbSmillert int n2;
1239175dedbSmillert char *d2;
1249175dedbSmillert char *a2;
1259175dedbSmillert } suftab[] = {
1269175dedbSmillert {"ssen", ily, 4, "-y+iness", "+ness" },
1279175dedbSmillert {"ssel", ily, 4, "-y+i+less", "+less" },
1289175dedbSmillert {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
1299175dedbSmillert {"s'", s, 2, "", "+'s"},
1309175dedbSmillert {"s", s, 1, "", "+s"},
1319175dedbSmillert {"ecn", ncy, 1, "", "-t+ce"},
1329175dedbSmillert {"ycn", ncy, 1, "", "-cy+t"},
1339175dedbSmillert {"ytilb", nop, 0, "", ""},
1349175dedbSmillert {"ytilib", bility, 5, "-le+ility", ""},
1359175dedbSmillert {"elbaif", i_to_y, 4, "-y+iable", ""},
1369175dedbSmillert {"elba", CCe, 4, "-e+able", "+able"},
1379175dedbSmillert {"yti", CCe, 3, "-e+ity", "+ity"},
1389175dedbSmillert {"ylb", y_to_e, 1, "-e+y", ""},
1399175dedbSmillert {"yl", ily, 2, "-y+ily", "+ly"},
1409175dedbSmillert {"laci", strip, 2, "", "+al"},
1419175dedbSmillert {"latnem", strip, 2, "", "+al"},
1429175dedbSmillert {"lanoi", strip, 2, "", "+al"},
1439175dedbSmillert {"tnem", strip, 4, "", "+ment"},
1449175dedbSmillert {"gni", CCe, 3, "-e+ing", "+ing"},
1459175dedbSmillert {"reta", nop, 0, "", ""},
1469175dedbSmillert {"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
1479175dedbSmillert {"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
1489175dedbSmillert {"citsi", strip, 2, "", "+ic"},
1499175dedbSmillert {"cihparg", i_to_y, 1, "-y+ic", ""},
1509175dedbSmillert {"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
1519175dedbSmillert {"cirtem", i_to_y, 1, "-y+ic", ""},
1529175dedbSmillert {"yrtem", metry, 0, "-ry+er", ""},
1539175dedbSmillert {"cigol", i_to_y, 1, "-y+ic", ""},
1549175dedbSmillert {"tsigol", i_to_y, 2, "-y+ist", ""},
1559175dedbSmillert {"tsi", VCe, 3, "-e+ist", "+ist"},
1569175dedbSmillert {"msi", VCe, 3, "-e+ism", "+ist"},
1579175dedbSmillert {"noitacif", i_to_y, 6, "-y+ication", ""},
1589175dedbSmillert {"noitazi", ize, 5, "-e+ation", ""},
1599175dedbSmillert {"rota", tion, 2, "-e+or", ""},
1609175dedbSmillert {"noit", tion, 3, "-e+ion", "+ion"},
1619175dedbSmillert {"naino", an, 3, "", "+ian"},
1629175dedbSmillert {"na", an, 1, "", "+n"},
1639175dedbSmillert {"evit", tion, 3, "-e+ive", "+ive"},
1649175dedbSmillert {"ezi", CCe, 3, "-e+ize", "+ize"},
1659175dedbSmillert {"pihs", strip, 4, "", "+ship"},
1669175dedbSmillert {"dooh", ily, 4, "-y+hood", "+hood"},
1679175dedbSmillert {"ekil", strip, 4, "", "+like"},
1689175dedbSmillert { NULL }
1699175dedbSmillert };
1709175dedbSmillert
1719175dedbSmillert char *preftab[] = {
1729175dedbSmillert "anti",
1739175dedbSmillert "bio",
1749175dedbSmillert "dis",
1759175dedbSmillert "electro",
1769175dedbSmillert "en",
1779175dedbSmillert "fore",
1789175dedbSmillert "hyper",
1799175dedbSmillert "intra",
1809175dedbSmillert "inter",
1819175dedbSmillert "iso",
1829175dedbSmillert "kilo",
1839175dedbSmillert "magneto",
1849175dedbSmillert "meta",
1859175dedbSmillert "micro",
1869175dedbSmillert "milli",
1879175dedbSmillert "mis",
1889175dedbSmillert "mono",
1899175dedbSmillert "multi",
1909175dedbSmillert "non",
1919175dedbSmillert "out",
1929175dedbSmillert "over",
1939175dedbSmillert "photo",
1949175dedbSmillert "poly",
1959175dedbSmillert "pre",
1969175dedbSmillert "pseudo",
1979175dedbSmillert "re",
1989175dedbSmillert "semi",
1999175dedbSmillert "stereo",
2009175dedbSmillert "sub",
2019175dedbSmillert "super",
2029175dedbSmillert "thermo",
2039175dedbSmillert "ultra",
2049175dedbSmillert "under", /* must precede un */
2059175dedbSmillert "un",
2069175dedbSmillert NULL
2079175dedbSmillert };
2089175dedbSmillert
2099175dedbSmillert struct wlist {
2109175dedbSmillert int fd;
2119175dedbSmillert unsigned char *front;
2129175dedbSmillert unsigned char *back;
2139175dedbSmillert } *wlists;
2149175dedbSmillert
2159175dedbSmillert int vflag;
2169175dedbSmillert int xflag;
2179175dedbSmillert char word[LINE_MAX];
2189175dedbSmillert char original[LINE_MAX];
2199175dedbSmillert char *deriv[40];
2209175dedbSmillert char affix[40];
2219175dedbSmillert
2229175dedbSmillert /*
2239175dedbSmillert * The spellprog utility accepts a newline-delimited list of words
2249175dedbSmillert * on stdin. For arguments it expects the path to a word list and
2259175dedbSmillert * the path to a file in which to store found words.
2269175dedbSmillert *
2279175dedbSmillert * In normal usage, spell is called twice. The first time it is
228*d9a51c35Sjmc * called with a stop list to flag commonly misspelled words. The
2299175dedbSmillert * remaining words are then passed to spell again, this time with
2309175dedbSmillert * the dictionary file as the first (non-flag) argument.
2319175dedbSmillert *
2329175dedbSmillert * Unlike historic versions of spellprog, this one does not use
2339175dedbSmillert * hashed files. Instead it simply requires that files be sorted
2349175dedbSmillert * lexigraphically and uses the same algorithm as the look utility.
2359175dedbSmillert *
2369175dedbSmillert * Note that spellprog should be called via the spell shell script
2379175dedbSmillert * and is not meant to be invoked directly by the user.
2389175dedbSmillert */
2399175dedbSmillert
2409175dedbSmillert int
main(int argc,char ** argv)2419175dedbSmillert main(int argc, char **argv)
2429175dedbSmillert {
2439175dedbSmillert char *ep, *cp, *dp;
2449175dedbSmillert char *outfile;
2459175dedbSmillert int ch, fold, i;
2469175dedbSmillert struct stat sb;
2479175dedbSmillert FILE *file, *found;
2489175dedbSmillert
2496ed9540bSderaadt if (pledge("stdio rpath wpath cpath", NULL) == -1)
2506ed9540bSderaadt err(1, "pledge");
2516ed9540bSderaadt
2529175dedbSmillert outfile = NULL;
2539175dedbSmillert while ((ch = getopt(argc, argv, "bvxo:")) != -1) {
2549175dedbSmillert switch (ch) {
2559175dedbSmillert case 'b':
2569175dedbSmillert /* Use British dictionary and convert ize -> ise. */
2579175dedbSmillert ise();
2589175dedbSmillert break;
2599175dedbSmillert case 'o':
2609175dedbSmillert outfile = optarg;
2619175dedbSmillert break;
2629175dedbSmillert case 'v':
2639175dedbSmillert /* Also write derivations to "found" file. */
264e9f70589Sderaadt vflag = 1;
2659175dedbSmillert break;
2669175dedbSmillert case 'x':
2679175dedbSmillert /* Print plausible stems to stdout. */
268e9f70589Sderaadt xflag = 1;
2699175dedbSmillert break;
2709175dedbSmillert default:
2719175dedbSmillert usage();
2729175dedbSmillert }
2739175dedbSmillert
2749175dedbSmillert }
2759175dedbSmillert argc -= optind;
2769175dedbSmillert argv += optind;
2779175dedbSmillert if (argc < 1)
2789175dedbSmillert usage();
2799175dedbSmillert
2809175dedbSmillert /* Open and mmap the word/stop lists. */
2811ed98fdfSderaadt if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL)
2829175dedbSmillert err(1, "malloc");
2839175dedbSmillert for (i = 0; argc--; i++) {
284b7041c07Sderaadt wlists[i].fd = open(argv[i], O_RDONLY);
2859175dedbSmillert if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0)
2869175dedbSmillert err(1, "%s", argv[i]);
2874239b822Smillert if (sb.st_size > SIZE_MAX)
2885ad04d35Sguenther errc(1, EFBIG, "%s", argv[i]);
2899175dedbSmillert wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ,
2909175dedbSmillert MAP_PRIVATE, wlists[i].fd, (off_t)0);
2919175dedbSmillert if (wlists[i].front == MAP_FAILED)
2929175dedbSmillert err(1, "%s", argv[i]);
2939175dedbSmillert wlists[i].back = wlists[i].front + sb.st_size;
2949175dedbSmillert }
2959175dedbSmillert wlists[i].fd = -1;
2969175dedbSmillert
2979175dedbSmillert /* Open file where found words are to be saved. */
2989175dedbSmillert if (outfile == NULL)
2999175dedbSmillert found = NULL;
3009175dedbSmillert else if ((found = fopen(outfile, "w")) == NULL)
3019175dedbSmillert err(1, "cannot open %s", outfile);
3029175dedbSmillert
3039175dedbSmillert for (;; print_word(file)) {
3049175dedbSmillert affix[0] = '\0';
3059175dedbSmillert file = found;
3069175dedbSmillert for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) {
3079175dedbSmillert if (ep - word == sizeof(word) - 1) {
3089175dedbSmillert *ep = '\0';
3099175dedbSmillert warnx("word too long (%s)", word);
3109175dedbSmillert while ((ch = getchar()) != '\n')
3119175dedbSmillert ; /* slurp until EOL */
3129175dedbSmillert }
3139175dedbSmillert if (ch == EOF) {
3149175dedbSmillert if (found != NULL)
3159175dedbSmillert fclose(found);
316a2969341Sschwarze return (0);
3179175dedbSmillert }
3189175dedbSmillert }
3199175dedbSmillert for (cp = word, dp = original; cp < ep; )
3209175dedbSmillert *dp++ = *cp++;
3219175dedbSmillert *dp = '\0';
3229175dedbSmillert fold = 0;
3239175dedbSmillert for (cp = word; cp < ep; cp++)
324f69332c7Sderaadt if (islower((unsigned char)*cp))
3259175dedbSmillert goto lcase;
3269175dedbSmillert if (trypref(ep, ".", 0))
3279175dedbSmillert continue;
3289175dedbSmillert ++fold;
3299175dedbSmillert for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++)
330f69332c7Sderaadt *dp = tolower((unsigned char)*cp);
3319175dedbSmillert lcase:
3329175dedbSmillert if (trypref(ep, ".", 0) || suffix(ep, 0))
3339175dedbSmillert continue;
334f69332c7Sderaadt if (isupper((unsigned char)word[0])) {
3359175dedbSmillert for (cp = original, dp = word; (*dp = *cp++); dp++) {
3369175dedbSmillert if (fold)
337f69332c7Sderaadt *dp = tolower((unsigned char)*dp);
3389175dedbSmillert }
339f69332c7Sderaadt word[0] = tolower((unsigned char)word[0]);
3409175dedbSmillert goto lcase;
3419175dedbSmillert }
3429175dedbSmillert file = stdout;
3439175dedbSmillert }
3449175dedbSmillert
345a2969341Sschwarze return (0);
3469175dedbSmillert }
3479175dedbSmillert
3489175dedbSmillert void
print_word(FILE * f)3499175dedbSmillert print_word(FILE *f)
3509175dedbSmillert {
3519175dedbSmillert
3529175dedbSmillert if (f != NULL) {
3539175dedbSmillert if (vflag && affix[0] != '\0' && affix[0] != '.')
3549175dedbSmillert fprintf(f, "%s\t%s\n", affix, original);
3559175dedbSmillert else
3569175dedbSmillert fprintf(f, "%s\n", original);
3579175dedbSmillert }
3589175dedbSmillert }
3599175dedbSmillert
3609175dedbSmillert /*
3619175dedbSmillert * For each matching suffix in suftab, call the function associated
3629175dedbSmillert * with that suffix (p1 and p2).
3639175dedbSmillert */
3649175dedbSmillert int
suffix(char * ep,int lev)3659175dedbSmillert suffix(char *ep, int lev)
3669175dedbSmillert {
3679175dedbSmillert struct suftab *t;
3689175dedbSmillert char *cp, *sp;
3699175dedbSmillert
3709175dedbSmillert lev += DLEV;
3719175dedbSmillert deriv[lev] = deriv[lev-1] = 0;
3729175dedbSmillert for (t = suftab; (sp = t->suf); t++) {
3739175dedbSmillert cp = ep;
3749175dedbSmillert while (*sp) {
3759175dedbSmillert if (*--cp != *sp++)
3769175dedbSmillert goto next;
3779175dedbSmillert }
3789175dedbSmillert for (sp = cp; --sp >= word && !vowel(*sp);)
3799175dedbSmillert ; /* nothing */
3809175dedbSmillert if (sp < word)
3819175dedbSmillert return (0);
3829175dedbSmillert if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
3839175dedbSmillert return (1);
3849175dedbSmillert if (t->p2 != NULL) {
385e14aac28Snicm deriv[lev] = deriv[lev+1] = 0;
3869175dedbSmillert return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
3879175dedbSmillert }
3889175dedbSmillert return (0);
3899175dedbSmillert next: ;
3909175dedbSmillert }
3919175dedbSmillert return (0);
3929175dedbSmillert }
3939175dedbSmillert
3949175dedbSmillert int
nop(char * ep,char * d,char * a,int lev)395e14aac28Snicm nop(char *ep, char *d, char *a, int lev)
3969175dedbSmillert {
3979175dedbSmillert
3989175dedbSmillert return (0);
3999175dedbSmillert }
4009175dedbSmillert
4019175dedbSmillert int
strip(char * ep,char * d,char * a,int lev)4029175dedbSmillert strip(char *ep, char *d, char *a, int lev)
4039175dedbSmillert {
4049175dedbSmillert
4059175dedbSmillert return (trypref(ep, a, lev) || suffix(ep, lev));
4069175dedbSmillert }
4079175dedbSmillert
4089175dedbSmillert int
s(char * ep,char * d,char * a,int lev)4099175dedbSmillert s(char *ep, char *d, char *a, int lev)
4109175dedbSmillert {
4119175dedbSmillert
4129175dedbSmillert if (lev > DLEV + 1)
4139175dedbSmillert return (0);
4149175dedbSmillert if (*ep == 's' && ep[-1] == 's')
4159175dedbSmillert return (0);
4169175dedbSmillert return (strip(ep, d, a, lev));
4179175dedbSmillert }
4189175dedbSmillert
4199175dedbSmillert int
an(char * ep,char * d,char * a,int lev)4209175dedbSmillert an(char *ep, char *d, char *a, int lev)
4219175dedbSmillert {
4229175dedbSmillert
423f69332c7Sderaadt if (!isupper((unsigned char)*word)) /* must be proper name */
4249175dedbSmillert return (0);
4259175dedbSmillert return (trypref(ep,a,lev));
4269175dedbSmillert }
4279175dedbSmillert
4289175dedbSmillert int
ize(char * ep,char * d,char * a,int lev)4299175dedbSmillert ize(char *ep, char *d, char *a, int lev)
4309175dedbSmillert {
4319175dedbSmillert
4329175dedbSmillert *ep++ = 'e';
4339175dedbSmillert return (strip(ep ,"", d, lev));
4349175dedbSmillert }
4359175dedbSmillert
4369175dedbSmillert int
y_to_e(char * ep,char * d,char * a,int lev)4379175dedbSmillert y_to_e(char *ep, char *d, char *a, int lev)
4389175dedbSmillert {
4399175dedbSmillert char c = *ep;
4409175dedbSmillert
4419175dedbSmillert *ep++ = 'e';
4429175dedbSmillert if (strip(ep, "", d, lev))
4439175dedbSmillert return (1);
4449175dedbSmillert ep[-1] = c;
4459175dedbSmillert return (0);
4469175dedbSmillert }
4479175dedbSmillert
4489175dedbSmillert int
ily(char * ep,char * d,char * a,int lev)4499175dedbSmillert ily(char *ep, char *d, char *a, int lev)
4509175dedbSmillert {
4519175dedbSmillert
4529175dedbSmillert if (ep[-1] == 'i')
4539175dedbSmillert return (i_to_y(ep, d, a, lev));
4549175dedbSmillert else
4559175dedbSmillert return (strip(ep, d, a, lev));
4569175dedbSmillert }
4579175dedbSmillert
4589175dedbSmillert int
ncy(char * ep,char * d,char * a,int lev)4599175dedbSmillert ncy(char *ep, char *d, char *a, int lev)
4609175dedbSmillert {
4619175dedbSmillert
4629175dedbSmillert if (skipv(skipv(ep-1)) < word)
4639175dedbSmillert return (0);
4649175dedbSmillert ep[-1] = 't';
4659175dedbSmillert return (strip(ep, d, a, lev));
4669175dedbSmillert }
4679175dedbSmillert
4689175dedbSmillert int
bility(char * ep,char * d,char * a,int lev)4699175dedbSmillert bility(char *ep, char *d, char *a, int lev)
4709175dedbSmillert {
4719175dedbSmillert
4729175dedbSmillert *ep++ = 'l';
4739175dedbSmillert return (y_to_e(ep, d, a, lev));
4749175dedbSmillert }
4759175dedbSmillert
4769175dedbSmillert int
i_to_y(char * ep,char * d,char * a,int lev)4779175dedbSmillert i_to_y(char *ep, char *d, char *a, int lev)
4789175dedbSmillert {
4799175dedbSmillert
4809175dedbSmillert if (ep[-1] == 'i') {
4819175dedbSmillert ep[-1] = 'y';
4829175dedbSmillert a = d;
4839175dedbSmillert }
4849175dedbSmillert return (strip(ep, "", a, lev));
4859175dedbSmillert }
4869175dedbSmillert
4879175dedbSmillert int
es(char * ep,char * d,char * a,int lev)4889175dedbSmillert es(char *ep, char *d, char *a, int lev)
4899175dedbSmillert {
4909175dedbSmillert
4919175dedbSmillert if (lev > DLEV)
4929175dedbSmillert return (0);
4939175dedbSmillert
4949175dedbSmillert switch (ep[-1]) {
4959175dedbSmillert default:
4969175dedbSmillert return (0);
4979175dedbSmillert case 'i':
4989175dedbSmillert return (i_to_y(ep, d, a, lev));
4999175dedbSmillert case 's':
5009175dedbSmillert case 'h':
5019175dedbSmillert case 'z':
5029175dedbSmillert case 'x':
5039175dedbSmillert return (strip(ep, d, a, lev));
5049175dedbSmillert }
5059175dedbSmillert }
5069175dedbSmillert
5079175dedbSmillert int
metry(char * ep,char * d,char * a,int lev)5089175dedbSmillert metry(char *ep, char *d, char *a, int lev)
5099175dedbSmillert {
5109175dedbSmillert
5119175dedbSmillert ep[-2] = 'e';
5129175dedbSmillert ep[-1] = 'r';
5139175dedbSmillert return (strip(ep, d, a, lev));
5149175dedbSmillert }
5159175dedbSmillert
5169175dedbSmillert int
tion(char * ep,char * d,char * a,int lev)5179175dedbSmillert tion(char *ep, char *d, char *a, int lev)
5189175dedbSmillert {
5199175dedbSmillert
5209175dedbSmillert switch (ep[-2]) {
5219175dedbSmillert case 'c':
5229175dedbSmillert case 'r':
5239175dedbSmillert return (trypref(ep, a, lev));
5249175dedbSmillert case 'a':
5259175dedbSmillert return (y_to_e(ep, d, a, lev));
5269175dedbSmillert }
5279175dedbSmillert return (0);
5289175dedbSmillert }
5299175dedbSmillert
5309175dedbSmillert /*
5319175dedbSmillert * Possible consonant-consonant-e ending.
5329175dedbSmillert */
5339175dedbSmillert int
CCe(char * ep,char * d,char * a,int lev)5349175dedbSmillert CCe(char *ep, char *d, char *a, int lev)
5359175dedbSmillert {
5369175dedbSmillert
5379175dedbSmillert switch (ep[-1]) {
5389175dedbSmillert case 'l':
5399175dedbSmillert if (vowel(ep[-2]))
5409175dedbSmillert break;
5419175dedbSmillert switch (ep[-2]) {
5429175dedbSmillert case 'l':
5439175dedbSmillert case 'r':
5449175dedbSmillert case 'w':
5459175dedbSmillert break;
5469175dedbSmillert default:
5479175dedbSmillert return (y_to_e(ep, d, a, lev));
5489175dedbSmillert }
5499175dedbSmillert break;
5509175dedbSmillert case 's':
5519175dedbSmillert if (ep[-2] == 's')
5529175dedbSmillert break;
5539175dedbSmillert case 'c':
5549175dedbSmillert case 'g':
5559175dedbSmillert if (*ep == 'a')
5569175dedbSmillert return (0);
5579175dedbSmillert case 'v':
5589175dedbSmillert case 'z':
5599175dedbSmillert if (vowel(ep[-2]))
5609175dedbSmillert break;
5619175dedbSmillert case 'u':
5629175dedbSmillert if (y_to_e(ep, d, a, lev))
5639175dedbSmillert return (1);
5649175dedbSmillert if (!(ep[-2] == 'n' && ep[-1] == 'g'))
5659175dedbSmillert return (0);
5669175dedbSmillert }
5679175dedbSmillert return (VCe(ep, d, a, lev));
5689175dedbSmillert }
5699175dedbSmillert
5709175dedbSmillert /*
5719175dedbSmillert * Possible consonant-vowel-consonant-e ending.
5729175dedbSmillert */
5739175dedbSmillert int
VCe(char * ep,char * d,char * a,int lev)5749175dedbSmillert VCe(char *ep, char *d, char *a, int lev)
5759175dedbSmillert {
5769175dedbSmillert char c;
5779175dedbSmillert
5789175dedbSmillert c = ep[-1];
5799175dedbSmillert if (c == 'e')
5809175dedbSmillert return (0);
5819175dedbSmillert if (!vowel(c) && vowel(ep[-2])) {
5829175dedbSmillert c = *ep;
5839175dedbSmillert *ep++ = 'e';
5849175dedbSmillert if (trypref(ep, d, lev) || suffix(ep, lev))
5859175dedbSmillert return (1);
5869175dedbSmillert ep--;
5879175dedbSmillert *ep = c;
5889175dedbSmillert }
5899175dedbSmillert return (strip(ep, d, a, lev));
5909175dedbSmillert }
5919175dedbSmillert
5929175dedbSmillert char *
lookuppref(char ** wp,char * ep)5939175dedbSmillert lookuppref(char **wp, char *ep)
5949175dedbSmillert {
5959175dedbSmillert char **sp;
5969175dedbSmillert char *bp,*cp;
5979175dedbSmillert
5989175dedbSmillert for (sp = preftab; *sp; sp++) {
5999175dedbSmillert bp = *wp;
6009175dedbSmillert for (cp = *sp; *cp; cp++, bp++) {
601f69332c7Sderaadt if (tolower((unsigned char)*bp) != *cp)
6029175dedbSmillert goto next;
6039175dedbSmillert }
6049175dedbSmillert for (cp = bp; cp < ep; cp++) {
6059175dedbSmillert if (vowel(*cp)) {
6069175dedbSmillert *wp = bp;
6079175dedbSmillert return (*sp);
6089175dedbSmillert }
6099175dedbSmillert }
6109175dedbSmillert next: ;
6119175dedbSmillert }
6129175dedbSmillert return (0);
6139175dedbSmillert }
6149175dedbSmillert
6159175dedbSmillert /*
6169175dedbSmillert * If the word is not in the dictionary, try stripping off prefixes
6179175dedbSmillert * until the word is found or we run out of prefixes to check.
6189175dedbSmillert */
6199175dedbSmillert int
trypref(char * ep,char * a,int lev)6209175dedbSmillert trypref(char *ep, char *a, int lev)
6219175dedbSmillert {
6229175dedbSmillert char *cp;
6239175dedbSmillert char *bp;
6249175dedbSmillert char *pp;
6259175dedbSmillert int val = 0;
6269175dedbSmillert char space[20];
6279175dedbSmillert
6289175dedbSmillert deriv[lev] = a;
6299175dedbSmillert if (tryword(word, ep, lev))
6309175dedbSmillert return (1);
6319175dedbSmillert bp = word;
6329175dedbSmillert pp = space;
6339175dedbSmillert deriv[lev+1] = pp;
6349175dedbSmillert while ((cp = lookuppref(&bp, ep))) {
6359175dedbSmillert *pp++ = '+';
6369175dedbSmillert while ((*pp = *cp++))
6379175dedbSmillert pp++;
6389175dedbSmillert if (tryword(bp, ep, lev+1)) {
6399175dedbSmillert val = 1;
6409175dedbSmillert break;
6419175dedbSmillert }
6429175dedbSmillert if (pp - space >= sizeof(space))
6439175dedbSmillert return (0);
6449175dedbSmillert }
645e14aac28Snicm deriv[lev+1] = deriv[lev+2] = 0;
6469175dedbSmillert return (val);
6479175dedbSmillert }
6489175dedbSmillert
6499175dedbSmillert int
tryword(char * bp,char * ep,int lev)6509175dedbSmillert tryword(char *bp, char *ep, int lev)
6519175dedbSmillert {
6529175dedbSmillert int i, j;
6539175dedbSmillert char duple[3];
6549175dedbSmillert
6559175dedbSmillert if (ep-bp <= 1)
6569175dedbSmillert return (0);
6579175dedbSmillert if (vowel(*ep) && monosyl(bp, ep))
6589175dedbSmillert return (0);
6599175dedbSmillert
6609175dedbSmillert i = dict(bp, ep);
6619175dedbSmillert if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) {
6629175dedbSmillert ep--;
6639175dedbSmillert deriv[++lev] = duple;
6649175dedbSmillert duple[0] = '+';
6659175dedbSmillert duple[1] = *ep;
6669175dedbSmillert duple[2] = '\0';
6679175dedbSmillert i = dict(bp, ep);
6689175dedbSmillert }
6699175dedbSmillert if (vflag == 0 || i == 0)
6709175dedbSmillert return (i);
6719175dedbSmillert
6729175dedbSmillert /* Also tack on possible derivations. (XXX - warn on truncation?) */
6739175dedbSmillert for (j = lev; j > 0; j--) {
6749175dedbSmillert if (deriv[j])
6759175dedbSmillert strlcat(affix, deriv[j], sizeof(affix));
6769175dedbSmillert }
6779175dedbSmillert return (i);
6789175dedbSmillert }
6799175dedbSmillert
6809175dedbSmillert int
monosyl(char * bp,char * ep)6819175dedbSmillert monosyl(char *bp, char *ep)
6829175dedbSmillert {
6839175dedbSmillert
6849175dedbSmillert if (ep < bp + 2)
6859175dedbSmillert return (0);
6869175dedbSmillert if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w')
6879175dedbSmillert return (0);
6889175dedbSmillert while (--ep >= bp)
6899175dedbSmillert if (vowel(*ep))
6909175dedbSmillert return (0);
6919175dedbSmillert return (1);
6929175dedbSmillert }
6939175dedbSmillert
6949175dedbSmillert char *
skipv(char * s)6959175dedbSmillert skipv(char *s)
6969175dedbSmillert {
6979175dedbSmillert
6989175dedbSmillert if (s >= word && vowel(*s))
6999175dedbSmillert s--;
7009175dedbSmillert while (s >= word && !vowel(*s))
7019175dedbSmillert s--;
7029175dedbSmillert return (s);
7039175dedbSmillert }
7049175dedbSmillert
7059175dedbSmillert int
vowel(unsigned char c)706f69332c7Sderaadt vowel(unsigned char c)
7079175dedbSmillert {
7089175dedbSmillert
7099175dedbSmillert switch (tolower(c)) {
7109175dedbSmillert case 'a':
7119175dedbSmillert case 'e':
7129175dedbSmillert case 'i':
7139175dedbSmillert case 'o':
7149175dedbSmillert case 'u':
7159175dedbSmillert case 'y':
7169175dedbSmillert return (1);
7179175dedbSmillert }
7189175dedbSmillert return (0);
7199175dedbSmillert }
7209175dedbSmillert
7219175dedbSmillert /*
7229175dedbSmillert * Crummy way to Britishise.
7239175dedbSmillert */
7249175dedbSmillert void
ise(void)7259175dedbSmillert ise(void)
7269175dedbSmillert {
727f05c4e53Smillert struct suftab *tab;
7289175dedbSmillert
729f05c4e53Smillert for (tab = suftab; tab->suf; tab++) {
730f05c4e53Smillert /* Assume that suffix will contain 'z' if a1 or d1 do */
731f05c4e53Smillert if (strchr(tab->suf, 'z')) {
732f05c4e53Smillert tab->suf = estrdup(tab->suf);
733f05c4e53Smillert ztos(tab->suf);
734f05c4e53Smillert if (strchr(tab->d1, 'z')) {
735f05c4e53Smillert tab->d1 = estrdup(tab->d1);
736f05c4e53Smillert ztos(tab->d1);
737f05c4e53Smillert }
738f05c4e53Smillert if (strchr(tab->a1, 'z')) {
739f05c4e53Smillert tab->a1 = estrdup(tab->a1);
740f05c4e53Smillert ztos(tab->a1);
741f05c4e53Smillert }
742f05c4e53Smillert }
7439175dedbSmillert }
7449175dedbSmillert }
7459175dedbSmillert
7469175dedbSmillert void
ztos(char * s)7479175dedbSmillert ztos(char *s)
7489175dedbSmillert {
7499175dedbSmillert
7509175dedbSmillert for (; *s; s++)
7519175dedbSmillert if (*s == 'z')
7529175dedbSmillert *s = 's';
7539175dedbSmillert }
7549175dedbSmillert
755f05c4e53Smillert char *
estrdup(const char * s)756f05c4e53Smillert estrdup(const char *s)
757f05c4e53Smillert {
758f05c4e53Smillert char *d;
759f05c4e53Smillert
760f05c4e53Smillert if ((d = strdup(s)) == NULL)
761f05c4e53Smillert err(1, "strdup");
762f05c4e53Smillert return (d);
763f05c4e53Smillert }
764f05c4e53Smillert
7659175dedbSmillert /*
7669175dedbSmillert * Look up a word in the dictionary.
7679175dedbSmillert * Returns 1 if found, 0 if not.
7689175dedbSmillert */
7699175dedbSmillert int
dict(char * bp,char * ep)7709175dedbSmillert dict(char *bp, char *ep)
7719175dedbSmillert {
7729175dedbSmillert char c;
7739175dedbSmillert int i, rval;
7749175dedbSmillert
7759175dedbSmillert c = *ep;
7769175dedbSmillert *ep = '\0';
7779175dedbSmillert if (xflag)
7789175dedbSmillert printf("=%s\n", bp);
7799175dedbSmillert for (i = rval = 0; wlists[i].fd != -1; i++) {
7809175dedbSmillert if ((rval = look((unsigned char *)bp, wlists[i].front,
7819175dedbSmillert wlists[i].back)) == 1)
7829175dedbSmillert break;
7839175dedbSmillert }
7849175dedbSmillert *ep = c;
7859175dedbSmillert return (rval);
7869175dedbSmillert }
7879175dedbSmillert
788a2969341Sschwarze static void __dead
usage(void)7899175dedbSmillert usage(void)
7909175dedbSmillert {
7919175dedbSmillert extern char *__progname;
7929175dedbSmillert
7939175dedbSmillert fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n",
7949175dedbSmillert __progname);
7959175dedbSmillert exit(1);
7969175dedbSmillert }
797