10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 22*239Sceastha /* 23*239Sceastha * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*239Sceastha * Use is subject to license terms. 25*239Sceastha */ 26*239Sceastha 270Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 280Sstevel@tonic-gate /* All Rights Reserved */ 290Sstevel@tonic-gate 300Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 310Sstevel@tonic-gate 320Sstevel@tonic-gate #include <stdlib.h> 330Sstevel@tonic-gate #include <unistd.h> 340Sstevel@tonic-gate #include <limits.h> 350Sstevel@tonic-gate #include <string.h> 360Sstevel@tonic-gate #include <stdio.h> 370Sstevel@tonic-gate #include <ctype.h> 380Sstevel@tonic-gate #include <locale.h> 390Sstevel@tonic-gate #include "hash.h" 400Sstevel@tonic-gate 410Sstevel@tonic-gate #define Tolower(c) (isupper(c)?tolower(c):c) 420Sstevel@tonic-gate #define DLEV 2 430Sstevel@tonic-gate 440Sstevel@tonic-gate /* 450Sstevel@tonic-gate * ANSI prototypes 460Sstevel@tonic-gate */ 470Sstevel@tonic-gate static int ily(char *, char *, char *, int); 480Sstevel@tonic-gate static int s(char *, char *, char *, int); 490Sstevel@tonic-gate static int es(char *, char *, char *, int); 500Sstevel@tonic-gate static int subst(char *, char *, char *, int); 510Sstevel@tonic-gate static int nop(void); 520Sstevel@tonic-gate static int bility(char *, char *, char *, int); 530Sstevel@tonic-gate static int i_to_y(char *, char *, char *, int); 540Sstevel@tonic-gate static int CCe(char *, char *, char *, int); 550Sstevel@tonic-gate static int y_to_e(char *, char *, char *, int); 560Sstevel@tonic-gate static int strip(char *, char *, char *, int); 570Sstevel@tonic-gate static int ize(char *, char *, char *, int); 580Sstevel@tonic-gate static int tion(char *, char *, char *, int); 590Sstevel@tonic-gate static int an(char *, char *, char *, int); 600Sstevel@tonic-gate int prime(char *); 610Sstevel@tonic-gate static void ise(void); 620Sstevel@tonic-gate static int tryword(char *, char *, int); 630Sstevel@tonic-gate static int trypref(char *, char *, int); 640Sstevel@tonic-gate static int trysuff(char *, int); 650Sstevel@tonic-gate static int vowel(int); 660Sstevel@tonic-gate static int dict(char *, char *); 670Sstevel@tonic-gate static int monosyl(char *, char *); 680Sstevel@tonic-gate static int VCe(char *, char *, char *, int); 690Sstevel@tonic-gate static char *skipv(char *); 700Sstevel@tonic-gate static void ztos(char *); 710Sstevel@tonic-gate 720Sstevel@tonic-gate static struct suftab { 730Sstevel@tonic-gate char *suf; 740Sstevel@tonic-gate int (*p1)(); 750Sstevel@tonic-gate int n1; 760Sstevel@tonic-gate char *d1; 770Sstevel@tonic-gate char *a1; 780Sstevel@tonic-gate int (*p2)(); 790Sstevel@tonic-gate int n2; 800Sstevel@tonic-gate char *d2; 810Sstevel@tonic-gate char *a2; 820Sstevel@tonic-gate } suftab[] = { 830Sstevel@tonic-gate {"ssen", ily, 4, "-y+iness", "+ness" }, 840Sstevel@tonic-gate {"ssel", ily, 4, "-y+i+less", "+less" }, 850Sstevel@tonic-gate {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" }, 860Sstevel@tonic-gate {"s'", s, 2, "", "+'s"}, 870Sstevel@tonic-gate {"s", s, 1, "", "+s"}, 880Sstevel@tonic-gate {"ecn", subst, 1, "-t+ce", ""}, 890Sstevel@tonic-gate {"ycn", subst, 1, "-t+cy", ""}, 900Sstevel@tonic-gate {"ytilb", nop, 0, "", ""}, 910Sstevel@tonic-gate {"ytilib", bility, 5, "-le+ility", ""}, 920Sstevel@tonic-gate {"elbaif", i_to_y, 4, "-y+iable", ""}, 930Sstevel@tonic-gate {"elba", CCe, 4, "-e+able", "+able"}, 940Sstevel@tonic-gate {"yti", CCe, 3, "-e+ity", "+ity"}, 950Sstevel@tonic-gate {"ylb", y_to_e, 1, "-e+y", ""}, 960Sstevel@tonic-gate {"yl", ily, 2, "-y+ily", "+ly"}, 970Sstevel@tonic-gate {"laci", strip, 2, "", "+al"}, 980Sstevel@tonic-gate {"latnem", strip, 2, "", "+al"}, 990Sstevel@tonic-gate {"lanoi", strip, 2, "", "+al"}, 1000Sstevel@tonic-gate {"tnem", strip, 4, "", "+ment"}, 1010Sstevel@tonic-gate {"gni", CCe, 3, "-e+ing", "+ing"}, 1020Sstevel@tonic-gate {"reta", nop, 0, "", ""}, 1030Sstevel@tonic-gate {"retc", nop, 0, "", ""}, 1040Sstevel@tonic-gate {"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"}, 1050Sstevel@tonic-gate {"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"}, 1060Sstevel@tonic-gate {"citsi", strip, 2, "", "+ic"}, 1070Sstevel@tonic-gate {"citi", ize, 1, "-ic+e", ""}, 1080Sstevel@tonic-gate {"cihparg", i_to_y, 1, "-y+ic", ""}, 1090Sstevel@tonic-gate {"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"}, 1100Sstevel@tonic-gate {"cirtem", i_to_y, 1, "-y+ic", ""}, 1110Sstevel@tonic-gate {"yrtem", subst, 0, "-er+ry", ""}, 1120Sstevel@tonic-gate {"cigol", i_to_y, 1, "-y+ic", ""}, 1130Sstevel@tonic-gate {"tsigol", i_to_y, 2, "-y+ist", ""}, 1140Sstevel@tonic-gate {"tsi", CCe, 3, "-e+ist", "+ist"}, 1150Sstevel@tonic-gate {"msi", CCe, 3, "-e+ism", "+ist"}, 1160Sstevel@tonic-gate {"noitacifi", i_to_y, 6, "-y+ication", ""}, 1170Sstevel@tonic-gate {"noitazi", ize, 4, "-e+ation", ""}, 1180Sstevel@tonic-gate {"rota", tion, 2, "-e+or", ""}, 1190Sstevel@tonic-gate {"rotc", tion, 2, "", "+or"}, 1200Sstevel@tonic-gate {"noit", tion, 3, "-e+ion", "+ion"}, 1210Sstevel@tonic-gate {"naino", an, 3, "", "+ian"}, 1220Sstevel@tonic-gate {"na", an, 1, "", "+n"}, 1230Sstevel@tonic-gate {"evi", subst, 0, "-ion+ive", ""}, 1240Sstevel@tonic-gate {"ezi", CCe, 3, "-e+ize", "+ize"}, 1250Sstevel@tonic-gate {"pihs", strip, 4, "", "+ship"}, 1260Sstevel@tonic-gate {"dooh", ily, 4, "-y+ihood", "+hood"}, 1270Sstevel@tonic-gate {"luf", ily, 3, "-y+iful", "+ful"}, 1280Sstevel@tonic-gate {"ekil", strip, 4, "", "+like"}, 1290Sstevel@tonic-gate 0 1300Sstevel@tonic-gate }; 1310Sstevel@tonic-gate 1320Sstevel@tonic-gate static char *preftab[] = { 1330Sstevel@tonic-gate "anti", 1340Sstevel@tonic-gate "auto", 1350Sstevel@tonic-gate "bio", 1360Sstevel@tonic-gate "counter", 1370Sstevel@tonic-gate "dis", 1380Sstevel@tonic-gate "electro", 1390Sstevel@tonic-gate "en", 1400Sstevel@tonic-gate "fore", 1410Sstevel@tonic-gate "geo", 1420Sstevel@tonic-gate "hyper", 1430Sstevel@tonic-gate "intra", 1440Sstevel@tonic-gate "inter", 1450Sstevel@tonic-gate "iso", 1460Sstevel@tonic-gate "kilo", 1470Sstevel@tonic-gate "magneto", 1480Sstevel@tonic-gate "meta", 1490Sstevel@tonic-gate "micro", 1500Sstevel@tonic-gate "mid", 1510Sstevel@tonic-gate "milli", 1520Sstevel@tonic-gate "mis", 1530Sstevel@tonic-gate "mono", 1540Sstevel@tonic-gate "multi", 1550Sstevel@tonic-gate "non", 1560Sstevel@tonic-gate "out", 1570Sstevel@tonic-gate "over", 1580Sstevel@tonic-gate "photo", 1590Sstevel@tonic-gate "poly", 1600Sstevel@tonic-gate "pre", 1610Sstevel@tonic-gate "pseudo", 1620Sstevel@tonic-gate "psycho", 1630Sstevel@tonic-gate "re", 1640Sstevel@tonic-gate "semi", 1650Sstevel@tonic-gate "stereo", 1660Sstevel@tonic-gate "sub", 1670Sstevel@tonic-gate "super", 1680Sstevel@tonic-gate "tele", 1690Sstevel@tonic-gate "thermo", 1700Sstevel@tonic-gate "ultra", 1710Sstevel@tonic-gate "under", /* must precede un */ 1720Sstevel@tonic-gate "un", 1730Sstevel@tonic-gate 0 1740Sstevel@tonic-gate }; 1750Sstevel@tonic-gate 1760Sstevel@tonic-gate static int vflag; 1770Sstevel@tonic-gate static int xflag; 1780Sstevel@tonic-gate static char *prog; 1790Sstevel@tonic-gate static char word[LINE_MAX]; 1800Sstevel@tonic-gate static char original[LINE_MAX]; 1810Sstevel@tonic-gate static char *deriv[LINE_MAX]; 1820Sstevel@tonic-gate static char affix[LINE_MAX]; 1830Sstevel@tonic-gate static FILE *file, *found; 1840Sstevel@tonic-gate /* 1850Sstevel@tonic-gate * deriv is stack of pointers to notes like +micro +ed 1860Sstevel@tonic-gate * affix is concatenated string of notes 1870Sstevel@tonic-gate * the buffer size 141 stems from the sizes of original and affix. 1880Sstevel@tonic-gate */ 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate /* 1910Sstevel@tonic-gate * in an attempt to defray future maintenance misunderstandings, here is 1920Sstevel@tonic-gate * an attempt to describe the input/output expectations of the spell 1930Sstevel@tonic-gate * program. 1940Sstevel@tonic-gate * 1950Sstevel@tonic-gate * spellprog is intended to be called from the shell file spell. 1960Sstevel@tonic-gate * because of this, there is little error checking (this is historical, not 1970Sstevel@tonic-gate * necessarily advisable). 1980Sstevel@tonic-gate * 1990Sstevel@tonic-gate * spellprog options hashed-list pass 2000Sstevel@tonic-gate * 2010Sstevel@tonic-gate * the hashed-list is a list of the form made by spellin. 2020Sstevel@tonic-gate * there are 2 types of hashed lists: 2030Sstevel@tonic-gate * 1. a stop list: this specifies words that by the rules embodied 2040Sstevel@tonic-gate * in spellprog would be recognized as correct, BUT are really 2050Sstevel@tonic-gate * errors. 2060Sstevel@tonic-gate * 2. a dictionary of correctly spelled words. 2070Sstevel@tonic-gate * the pass number determines how the words found in the specified 2080Sstevel@tonic-gate * hashed-list are treated. If the pass number is 1, the hashed-list is 2090Sstevel@tonic-gate * treated as the stop-list, otherwise, it is treated as the regular 2100Sstevel@tonic-gate * dictionary list. in this case, the value of "pass" is a filename. Found 2110Sstevel@tonic-gate * words are written to this file. 2120Sstevel@tonic-gate * 2130Sstevel@tonic-gate * In the normal case, the filename = /dev/null. However, if the v option 2140Sstevel@tonic-gate * is specified, the derivations are written to this file. 2150Sstevel@tonic-gate * The spellprog looks up words in the hashed-list; if a word is found, it 2160Sstevel@tonic-gate * is printed to the stdout. If the hashed-list was the stop-list, the 2170Sstevel@tonic-gate * words found are presumed to be misspellings. in this case, 2180Sstevel@tonic-gate * a control character is printed ( a "-" is appended to the word. 2190Sstevel@tonic-gate * a hyphen will never occur naturally in the input list because deroff 2200Sstevel@tonic-gate * is used in the shell file before calling spellprog.) 2210Sstevel@tonic-gate * If the regualar spelling list was used (hlista or hlistb), the words 2220Sstevel@tonic-gate * are correct, and may be ditched. (unless the -v option was used - 2230Sstevel@tonic-gate * see the manual page). 2240Sstevel@tonic-gate * 2250Sstevel@tonic-gate * spellprog should be called twice : first with the stop-list, to flag all 2260Sstevel@tonic-gate * a priori incorrectly spelled words; second with the dictionary. 2270Sstevel@tonic-gate * 2280Sstevel@tonic-gate * spellprog hstop 1 |\ 2290Sstevel@tonic-gate * spellprog hlista /dev/null 2300Sstevel@tonic-gate * 2310Sstevel@tonic-gate * for a complete scenario, see the shell file: spell. 2320Sstevel@tonic-gate * 2330Sstevel@tonic-gate */ 2340Sstevel@tonic-gate 235*239Sceastha int 2360Sstevel@tonic-gate main(int argc, char **argv) 2370Sstevel@tonic-gate { 238*239Sceastha char *ep, *cp; 239*239Sceastha char *dp; 2400Sstevel@tonic-gate int fold; 2410Sstevel@tonic-gate int c, j; 2420Sstevel@tonic-gate int pass; 2430Sstevel@tonic-gate 2440Sstevel@tonic-gate /* Set locale environment variables local definitions */ 2450Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 2460Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 2470Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ 2480Sstevel@tonic-gate #endif 2490Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 2500Sstevel@tonic-gate 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate prog = argv[0]; 2530Sstevel@tonic-gate while ((c = getopt(argc, argv, "bvx")) != EOF) { 2540Sstevel@tonic-gate switch (c) { 2550Sstevel@tonic-gate case 'b': 2560Sstevel@tonic-gate ise(); 2570Sstevel@tonic-gate break; 2580Sstevel@tonic-gate case 'v': 2590Sstevel@tonic-gate vflag++; 2600Sstevel@tonic-gate break; 2610Sstevel@tonic-gate case 'x': 2620Sstevel@tonic-gate xflag++; 2630Sstevel@tonic-gate break; 2640Sstevel@tonic-gate } 2650Sstevel@tonic-gate } 2660Sstevel@tonic-gate 2670Sstevel@tonic-gate argc -= optind; 2680Sstevel@tonic-gate argv = &argv[optind]; 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate if ((argc < 2) || !prime(*argv)) { 2710Sstevel@tonic-gate (void) fprintf(stderr, 2720Sstevel@tonic-gate gettext("%s: cannot initialize hash table\n"), prog); 2730Sstevel@tonic-gate exit(1); 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate argc--; 2760Sstevel@tonic-gate argv++; 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate /* 2790Sstevel@tonic-gate * if pass is not 1, it is assumed to be a filename. 2800Sstevel@tonic-gate * found words are written to this file. 2810Sstevel@tonic-gate */ 2820Sstevel@tonic-gate pass = **argv; 2830Sstevel@tonic-gate if (pass != '1') 2840Sstevel@tonic-gate found = fopen(*argv, "w"); 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate for (;;) { 2870Sstevel@tonic-gate affix[0] = 0; 2880Sstevel@tonic-gate file = stdout; 2890Sstevel@tonic-gate for (ep = word; (*ep = j = getchar()) != '\n'; ep++) 2900Sstevel@tonic-gate if (j == EOF) 2910Sstevel@tonic-gate exit(0); 2920Sstevel@tonic-gate /* 2930Sstevel@tonic-gate * here is the hyphen processing. these words were found in the stop 2940Sstevel@tonic-gate * list. however, if they exist as is, (no derivations tried) in the 2950Sstevel@tonic-gate * dictionary, let them through as correct. 2960Sstevel@tonic-gate * 2970Sstevel@tonic-gate */ 2980Sstevel@tonic-gate if (ep[-1] == '-') { 2990Sstevel@tonic-gate *--ep = 0; 3000Sstevel@tonic-gate if (!tryword(word, ep, 0)) 3010Sstevel@tonic-gate (void) fprintf(file, "%s\n", word); 3020Sstevel@tonic-gate continue; 3030Sstevel@tonic-gate } 3040Sstevel@tonic-gate for (cp = word, dp = original; cp < ep; ) 3050Sstevel@tonic-gate *dp++ = *cp++; 3060Sstevel@tonic-gate *dp = 0; 3070Sstevel@tonic-gate fold = 0; 3080Sstevel@tonic-gate for (cp = word; cp < ep; cp++) 3090Sstevel@tonic-gate if (islower(*cp)) 3100Sstevel@tonic-gate goto lcase; 3110Sstevel@tonic-gate if (((ep - word) == 1) && 3120Sstevel@tonic-gate ((word[0] == 'A') || (word[0] == 'I'))) 3130Sstevel@tonic-gate continue; 3140Sstevel@tonic-gate if (trypref(ep, ".", 0)) 3150Sstevel@tonic-gate goto foundit; 3160Sstevel@tonic-gate ++fold; 3170Sstevel@tonic-gate for (cp = original+1, dp = word+1; dp < ep; dp++, cp++) 3180Sstevel@tonic-gate *dp = Tolower(*cp); 3190Sstevel@tonic-gate lcase: 3200Sstevel@tonic-gate if (((ep - word) == 1) && (word[0] == 'a')) 3210Sstevel@tonic-gate continue; 3220Sstevel@tonic-gate if (trypref(ep, ".", 0)||trysuff(ep, 0)) 3230Sstevel@tonic-gate goto foundit; 3240Sstevel@tonic-gate if (isupper(word[0])) { 3250Sstevel@tonic-gate for (cp = original, dp = word; *dp = *cp++; dp++) 3260Sstevel@tonic-gate if (fold) *dp = Tolower(*dp); 3270Sstevel@tonic-gate word[0] = Tolower(word[0]); 3280Sstevel@tonic-gate goto lcase; 3290Sstevel@tonic-gate } 3300Sstevel@tonic-gate (void) fprintf(file, "%s\n", original); 3310Sstevel@tonic-gate continue; 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate foundit: 3340Sstevel@tonic-gate if (pass == '1') 3350Sstevel@tonic-gate (void) fprintf(file, "%s-\n", original); 3360Sstevel@tonic-gate else if (affix[0] != 0 && affix[0] != '.') { 3370Sstevel@tonic-gate file = found; 3380Sstevel@tonic-gate (void) fprintf(file, "%s\t%s\n", affix, 3390Sstevel@tonic-gate original); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate } 3420Sstevel@tonic-gate } 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate /* 3450Sstevel@tonic-gate * strip exactly one suffix and do 3460Sstevel@tonic-gate * indicated routine(s), which may recursively 3470Sstevel@tonic-gate * strip suffixes 3480Sstevel@tonic-gate */ 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate static int 3510Sstevel@tonic-gate trysuff(char *ep, int lev) 3520Sstevel@tonic-gate { 353*239Sceastha struct suftab *t; 354*239Sceastha char *cp, *sp; 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate lev += DLEV; 3570Sstevel@tonic-gate deriv[lev] = deriv[lev-1] = 0; 3580Sstevel@tonic-gate for (t = &suftab[0]; (sp = t->suf) != 0; t++) { 3590Sstevel@tonic-gate cp = ep; 3600Sstevel@tonic-gate while (*sp) 3610Sstevel@tonic-gate if (*--cp != *sp++) 3620Sstevel@tonic-gate goto next; 3630Sstevel@tonic-gate for (sp = cp; --sp >= word && !vowel(*sp); ); 3640Sstevel@tonic-gate if (sp < word) 3650Sstevel@tonic-gate return (0); 3660Sstevel@tonic-gate if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1)) 3670Sstevel@tonic-gate return (1); 3680Sstevel@tonic-gate if (t->p2 != 0) { 3690Sstevel@tonic-gate deriv[lev] = deriv[lev+1] = 0; 3700Sstevel@tonic-gate return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev)); 3710Sstevel@tonic-gate } 3720Sstevel@tonic-gate return (0); 3730Sstevel@tonic-gate next:; 3740Sstevel@tonic-gate } 3750Sstevel@tonic-gate return (0); 3760Sstevel@tonic-gate } 3770Sstevel@tonic-gate 3780Sstevel@tonic-gate static int 3790Sstevel@tonic-gate nop(void) 3800Sstevel@tonic-gate { 3810Sstevel@tonic-gate return (0); 3820Sstevel@tonic-gate } 3830Sstevel@tonic-gate 3840Sstevel@tonic-gate /* ARGSUSED */ 3850Sstevel@tonic-gate static int 3860Sstevel@tonic-gate strip(char *ep, char *d, char *a, int lev) 3870Sstevel@tonic-gate { 3880Sstevel@tonic-gate return (trypref(ep, a, lev)||trysuff(ep, lev)); 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate static int 3920Sstevel@tonic-gate s(char *ep, char *d, char *a, int lev) 3930Sstevel@tonic-gate { 3940Sstevel@tonic-gate if (lev > DLEV+1) 3950Sstevel@tonic-gate return (0); 3960Sstevel@tonic-gate if (*ep == 's' && ep[-1] == 's') 3970Sstevel@tonic-gate return (0); 3980Sstevel@tonic-gate return (strip(ep, d, a, lev)); 3990Sstevel@tonic-gate } 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate /* ARGSUSED */ 4020Sstevel@tonic-gate static int 4030Sstevel@tonic-gate an(char *ep, char *d, char *a, int lev) 4040Sstevel@tonic-gate { 4050Sstevel@tonic-gate if (!isupper(*word)) /* must be proper name */ 4060Sstevel@tonic-gate return (0); 4070Sstevel@tonic-gate return (trypref(ep, a, lev)); 4080Sstevel@tonic-gate } 4090Sstevel@tonic-gate 4100Sstevel@tonic-gate /* ARGSUSED */ 4110Sstevel@tonic-gate static int 4120Sstevel@tonic-gate ize(char *ep, char *d, char *a, int lev) 4130Sstevel@tonic-gate { 4140Sstevel@tonic-gate ep[-1] = 'e'; 4150Sstevel@tonic-gate return (strip(ep, "", d, lev)); 4160Sstevel@tonic-gate } 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate /* ARGSUSED */ 4190Sstevel@tonic-gate static int 4200Sstevel@tonic-gate y_to_e(char *ep, char *d, char *a, int lev) 4210Sstevel@tonic-gate { 4220Sstevel@tonic-gate *ep++ = 'e'; 4230Sstevel@tonic-gate return (strip(ep, "", d, lev)); 4240Sstevel@tonic-gate } 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate static int 4270Sstevel@tonic-gate ily(char *ep, char *d, char *a, int lev) 4280Sstevel@tonic-gate { 4290Sstevel@tonic-gate if (ep[-1] == 'i') 4300Sstevel@tonic-gate return (i_to_y(ep, d, a, lev)); 4310Sstevel@tonic-gate else 4320Sstevel@tonic-gate return (strip(ep, d, a, lev)); 4330Sstevel@tonic-gate } 4340Sstevel@tonic-gate 4350Sstevel@tonic-gate static int 4360Sstevel@tonic-gate bility(char *ep, char *d, char *a, int lev) 4370Sstevel@tonic-gate { 4380Sstevel@tonic-gate *ep++ = 'l'; 4390Sstevel@tonic-gate return (y_to_e(ep, d, a, lev)); 4400Sstevel@tonic-gate } 4410Sstevel@tonic-gate 4420Sstevel@tonic-gate static int 4430Sstevel@tonic-gate i_to_y(char *ep, char *d, char *a, int lev) 4440Sstevel@tonic-gate { 4450Sstevel@tonic-gate if (ep[-1] == 'i') { 4460Sstevel@tonic-gate ep[-1] = 'y'; 4470Sstevel@tonic-gate a = d; 4480Sstevel@tonic-gate } 4490Sstevel@tonic-gate return (strip(ep, "", a, lev)); 4500Sstevel@tonic-gate } 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate static int 4530Sstevel@tonic-gate es(char *ep, char *d, char *a, int lev) 4540Sstevel@tonic-gate { 4550Sstevel@tonic-gate if (lev > DLEV) 4560Sstevel@tonic-gate return (0); 4570Sstevel@tonic-gate switch (ep[-1]) { 4580Sstevel@tonic-gate default: 4590Sstevel@tonic-gate return (0); 4600Sstevel@tonic-gate case 'i': 4610Sstevel@tonic-gate return (i_to_y(ep, d, a, lev)); 4620Sstevel@tonic-gate case 's': 4630Sstevel@tonic-gate case 'h': 4640Sstevel@tonic-gate case 'z': 4650Sstevel@tonic-gate case 'x': 4660Sstevel@tonic-gate return (strip(ep, d, a, lev)); 4670Sstevel@tonic-gate } 4680Sstevel@tonic-gate } 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate /* ARGSUSED */ 4710Sstevel@tonic-gate static int 4720Sstevel@tonic-gate subst(char *ep, char *d, char *a, int lev) 4730Sstevel@tonic-gate { 4740Sstevel@tonic-gate char *u, *t; 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate if (skipv(skipv(ep-1)) < word) 4770Sstevel@tonic-gate return (0); 4780Sstevel@tonic-gate for (t = d; *t != '+'; t++) 4790Sstevel@tonic-gate continue; 4800Sstevel@tonic-gate for (u = ep; *--t != '-'; ) 4810Sstevel@tonic-gate *--u = *t; 4820Sstevel@tonic-gate return (strip(ep, "", d, lev)); 4830Sstevel@tonic-gate } 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate 4860Sstevel@tonic-gate static int 4870Sstevel@tonic-gate tion(char *ep, char *d, char *a, int lev) 4880Sstevel@tonic-gate { 4890Sstevel@tonic-gate switch (ep[-2]) { 4900Sstevel@tonic-gate case 'c': 4910Sstevel@tonic-gate case 'r': 4920Sstevel@tonic-gate return (trypref(ep, a, lev)); 4930Sstevel@tonic-gate case 'a': 4940Sstevel@tonic-gate return (y_to_e(ep, d, a, lev)); 4950Sstevel@tonic-gate } 4960Sstevel@tonic-gate return (0); 4970Sstevel@tonic-gate } 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate /* possible consonant-consonant-e ending */ 5000Sstevel@tonic-gate static int 5010Sstevel@tonic-gate CCe(char *ep, char *d, char *a, int lev) 5020Sstevel@tonic-gate { 5030Sstevel@tonic-gate switch (ep[-1]) { 5040Sstevel@tonic-gate case 'r': 5050Sstevel@tonic-gate if (ep[-2] == 't') 5060Sstevel@tonic-gate return (y_to_e(ep, d, a, lev)); 5070Sstevel@tonic-gate break; 5080Sstevel@tonic-gate case 'l': 5090Sstevel@tonic-gate if (vowel(ep[-2])) 5100Sstevel@tonic-gate break; 5110Sstevel@tonic-gate switch (ep[-2]) { 5120Sstevel@tonic-gate case 'l': 5130Sstevel@tonic-gate case 'r': 5140Sstevel@tonic-gate case 'w': 5150Sstevel@tonic-gate break; 5160Sstevel@tonic-gate default: 5170Sstevel@tonic-gate return (y_to_e(ep, d, a, lev)); 5180Sstevel@tonic-gate } 5190Sstevel@tonic-gate break; 5200Sstevel@tonic-gate case 's': 5210Sstevel@tonic-gate if (ep[-2] == 's') 5220Sstevel@tonic-gate break; 5230Sstevel@tonic-gate if (*ep == 'a') 5240Sstevel@tonic-gate return (0); 5250Sstevel@tonic-gate if (vowel(ep[-2])) 5260Sstevel@tonic-gate break; 5270Sstevel@tonic-gate if (y_to_e(ep, d, a, lev)) 5280Sstevel@tonic-gate return (1); 5290Sstevel@tonic-gate if (!(ep[-2] == 'n' && ep[-1] == 'g')) 5300Sstevel@tonic-gate return (0); 5310Sstevel@tonic-gate break; 5320Sstevel@tonic-gate case 'c': 5330Sstevel@tonic-gate case 'g': 5340Sstevel@tonic-gate if (*ep == 'a') 5350Sstevel@tonic-gate return (0); 5360Sstevel@tonic-gate if (vowel(ep[-2])) 5370Sstevel@tonic-gate break; 5380Sstevel@tonic-gate if (y_to_e(ep, d, a, lev)) 5390Sstevel@tonic-gate return (1); 5400Sstevel@tonic-gate if (!(ep[-2] == 'n' && ep[-1] == 'g')) 5410Sstevel@tonic-gate return (0); 5420Sstevel@tonic-gate break; 5430Sstevel@tonic-gate case 'v': 5440Sstevel@tonic-gate case 'z': 5450Sstevel@tonic-gate if (vowel(ep[-2])) 5460Sstevel@tonic-gate break; 5470Sstevel@tonic-gate if (y_to_e(ep, d, a, lev)) 5480Sstevel@tonic-gate return (1); 5490Sstevel@tonic-gate if (!(ep[-2] == 'n' && ep[-1] == 'g')) 5500Sstevel@tonic-gate return (0); 5510Sstevel@tonic-gate break; 5520Sstevel@tonic-gate case 'u': 5530Sstevel@tonic-gate if (y_to_e(ep, d, a, lev)) 5540Sstevel@tonic-gate return (1); 5550Sstevel@tonic-gate if (!(ep[-2] == 'n' && ep[-1] == 'g')) 5560Sstevel@tonic-gate return (0); 5570Sstevel@tonic-gate break; 5580Sstevel@tonic-gate } 5590Sstevel@tonic-gate return (VCe(ep, d, a, lev)); 5600Sstevel@tonic-gate } 5610Sstevel@tonic-gate 5620Sstevel@tonic-gate /* possible consonant-vowel-consonant-e ending */ 5630Sstevel@tonic-gate static int 5640Sstevel@tonic-gate VCe(char *ep, char *d, char *a, int lev) 5650Sstevel@tonic-gate { 5660Sstevel@tonic-gate char c; 5670Sstevel@tonic-gate c = ep[-1]; 5680Sstevel@tonic-gate if (c == 'e') 5690Sstevel@tonic-gate return (0); 5700Sstevel@tonic-gate if (!vowel(c) && vowel(ep[-2])) { 5710Sstevel@tonic-gate c = *ep; 5720Sstevel@tonic-gate *ep++ = 'e'; 5730Sstevel@tonic-gate if (trypref(ep, d, lev)||trysuff(ep, lev)) 5740Sstevel@tonic-gate return (1); 5750Sstevel@tonic-gate ep--; 5760Sstevel@tonic-gate *ep = c; 5770Sstevel@tonic-gate } 5780Sstevel@tonic-gate return (strip(ep, d, a, lev)); 5790Sstevel@tonic-gate } 5800Sstevel@tonic-gate 5810Sstevel@tonic-gate static char * 5820Sstevel@tonic-gate lookuppref(char **wp, char *ep) 5830Sstevel@tonic-gate { 584*239Sceastha char **sp; 585*239Sceastha char *bp, *cp; 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate for (sp = preftab; *sp; sp++) { 5880Sstevel@tonic-gate bp = *wp; 5890Sstevel@tonic-gate for (cp = *sp; *cp; cp++, bp++) 5900Sstevel@tonic-gate if (Tolower(*bp) != *cp) 5910Sstevel@tonic-gate goto next; 5920Sstevel@tonic-gate for (cp = bp; cp < ep; cp++) 5930Sstevel@tonic-gate if (vowel(*cp)) { 5940Sstevel@tonic-gate *wp = bp; 5950Sstevel@tonic-gate return (*sp); 5960Sstevel@tonic-gate } 5970Sstevel@tonic-gate next:; 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate return (0); 6000Sstevel@tonic-gate } 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate /* 6030Sstevel@tonic-gate * while word is not in dictionary try stripping 6040Sstevel@tonic-gate * prefixes. Fail if no more prefixes. 6050Sstevel@tonic-gate */ 6060Sstevel@tonic-gate static int 6070Sstevel@tonic-gate trypref(char *ep, char *a, int lev) 6080Sstevel@tonic-gate { 609*239Sceastha char *cp; 6100Sstevel@tonic-gate char *bp; 611*239Sceastha char *pp; 6120Sstevel@tonic-gate int val = 0; 6130Sstevel@tonic-gate char space[LINE_MAX * 2]; 6140Sstevel@tonic-gate deriv[lev] = a; 6150Sstevel@tonic-gate if (tryword(word, ep, lev)) 6160Sstevel@tonic-gate return (1); 6170Sstevel@tonic-gate bp = word; 6180Sstevel@tonic-gate pp = space; 6190Sstevel@tonic-gate deriv[lev+1] = pp; 6200Sstevel@tonic-gate while (cp = lookuppref(&bp, ep)) { 6210Sstevel@tonic-gate *pp++ = '+'; 6220Sstevel@tonic-gate while (*pp = *cp++) 6230Sstevel@tonic-gate pp++; 6240Sstevel@tonic-gate if (tryword(bp, ep, lev+1)) { 6250Sstevel@tonic-gate val = 1; 6260Sstevel@tonic-gate break; 6270Sstevel@tonic-gate } 6280Sstevel@tonic-gate } 6290Sstevel@tonic-gate deriv[lev+1] = deriv[lev+2] = 0; 6300Sstevel@tonic-gate return (val); 6310Sstevel@tonic-gate } 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate static int 6340Sstevel@tonic-gate tryword(char *bp, char *ep, int lev) 6350Sstevel@tonic-gate { 636*239Sceastha int i, j; 6370Sstevel@tonic-gate char duple[3]; 6380Sstevel@tonic-gate if (ep-bp <= 1) 6390Sstevel@tonic-gate return (0); 6400Sstevel@tonic-gate if (vowel(*ep)) { 6410Sstevel@tonic-gate if (monosyl(bp, ep)) 6420Sstevel@tonic-gate return (0); 6430Sstevel@tonic-gate } 6440Sstevel@tonic-gate i = dict(bp, ep); 6450Sstevel@tonic-gate if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) { 6460Sstevel@tonic-gate ep--; 6470Sstevel@tonic-gate deriv[++lev] = duple; 6480Sstevel@tonic-gate duple[0] = '+'; 6490Sstevel@tonic-gate duple[1] = *ep; 6500Sstevel@tonic-gate duple[2] = 0; 6510Sstevel@tonic-gate i = dict(bp, ep); 6520Sstevel@tonic-gate } 6530Sstevel@tonic-gate if (vflag == 0 || i == 0) 6540Sstevel@tonic-gate return (i); 6550Sstevel@tonic-gate /* 6560Sstevel@tonic-gate * when derivations are wanted, collect them 6570Sstevel@tonic-gate * for printing 6580Sstevel@tonic-gate */ 6590Sstevel@tonic-gate j = lev; 6600Sstevel@tonic-gate do { 6610Sstevel@tonic-gate if (deriv[j]) 6620Sstevel@tonic-gate (void) strcat(affix, deriv[j]); 6630Sstevel@tonic-gate } while (--j > 0); 6640Sstevel@tonic-gate return (i); 6650Sstevel@tonic-gate } 6660Sstevel@tonic-gate 6670Sstevel@tonic-gate 6680Sstevel@tonic-gate static int 6690Sstevel@tonic-gate monosyl(char *bp, char *ep) 6700Sstevel@tonic-gate { 6710Sstevel@tonic-gate if (ep < bp+2) 6720Sstevel@tonic-gate return (0); 6730Sstevel@tonic-gate if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') 6740Sstevel@tonic-gate return (0); 6750Sstevel@tonic-gate while (--ep >= bp) 6760Sstevel@tonic-gate if (vowel(*ep)) 6770Sstevel@tonic-gate return (0); 6780Sstevel@tonic-gate return (1); 6790Sstevel@tonic-gate } 6800Sstevel@tonic-gate 6810Sstevel@tonic-gate static char * 6820Sstevel@tonic-gate skipv(char *s) 6830Sstevel@tonic-gate { 6840Sstevel@tonic-gate if (s >= word&&vowel(*s)) 6850Sstevel@tonic-gate s--; 6860Sstevel@tonic-gate while (s >= word && !vowel(*s)) 6870Sstevel@tonic-gate s--; 6880Sstevel@tonic-gate return (s); 6890Sstevel@tonic-gate } 6900Sstevel@tonic-gate 6910Sstevel@tonic-gate static int 6920Sstevel@tonic-gate vowel(int c) 6930Sstevel@tonic-gate { 6940Sstevel@tonic-gate switch (Tolower(c)) { 6950Sstevel@tonic-gate case 'a': 6960Sstevel@tonic-gate case 'e': 6970Sstevel@tonic-gate case 'i': 6980Sstevel@tonic-gate case 'o': 6990Sstevel@tonic-gate case 'u': 7000Sstevel@tonic-gate case 'y': 7010Sstevel@tonic-gate return (1); 7020Sstevel@tonic-gate } 7030Sstevel@tonic-gate return (0); 7040Sstevel@tonic-gate } 7050Sstevel@tonic-gate 7060Sstevel@tonic-gate /* crummy way to Britishise */ 7070Sstevel@tonic-gate static void 7080Sstevel@tonic-gate ise(void) 7090Sstevel@tonic-gate { 710*239Sceastha struct suftab *p; 7110Sstevel@tonic-gate 7120Sstevel@tonic-gate for (p = suftab; p->suf; p++) { 7130Sstevel@tonic-gate ztos(p->suf); 7140Sstevel@tonic-gate ztos(p->d1); 7150Sstevel@tonic-gate ztos(p->a1); 7160Sstevel@tonic-gate } 7170Sstevel@tonic-gate } 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate static void 7200Sstevel@tonic-gate ztos(char *s) 7210Sstevel@tonic-gate { 7220Sstevel@tonic-gate for (; *s; s++) 7230Sstevel@tonic-gate if (*s == 'z') 7240Sstevel@tonic-gate *s = 's'; 7250Sstevel@tonic-gate } 7260Sstevel@tonic-gate 7270Sstevel@tonic-gate static int 7280Sstevel@tonic-gate dict(char *bp, char *ep) 7290Sstevel@tonic-gate { 730*239Sceastha int temp, result; 7310Sstevel@tonic-gate if (xflag) 7320Sstevel@tonic-gate (void) fprintf(stdout, "=%.*s\n", ep-bp, bp); 7330Sstevel@tonic-gate temp = *ep; 7340Sstevel@tonic-gate *ep = 0; 7350Sstevel@tonic-gate result = hashlook(bp); 7360Sstevel@tonic-gate *ep = temp; 7370Sstevel@tonic-gate return (result); 7380Sstevel@tonic-gate } 739