13e12c5d1SDavid du Colombier #include <u.h>
23e12c5d1SDavid du Colombier #include <libc.h>
33e12c5d1SDavid du Colombier #include <bio.h>
43e12c5d1SDavid du Colombier /* Macros for Rune support of ctype.h-like functions */
53e12c5d1SDavid du Colombier
63e12c5d1SDavid du Colombier #define isupper(r) (L'A' <= (r) && (r) <= L'Z')
73e12c5d1SDavid du Colombier #define islower(r) (L'a' <= (r) && (r) <= L'z')
83e12c5d1SDavid du Colombier #define isalpha(r) (isupper(r) || islower(r))
93e12c5d1SDavid du Colombier #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
103e12c5d1SDavid du Colombier
113e12c5d1SDavid du Colombier #define isdigit(r) (L'0' <= (r) && (r) <= L'9')
123e12c5d1SDavid du Colombier
133e12c5d1SDavid du Colombier #define isalnum(r) (isalpha(r) || isdigit(r))
143e12c5d1SDavid du Colombier
153e12c5d1SDavid du Colombier #define isspace(r) ((r) == L' ' || (r) == L'\t' \
163e12c5d1SDavid du Colombier || (0x0A <= (r) && (r) <= 0x0D))
173e12c5d1SDavid du Colombier
183e12c5d1SDavid du Colombier #define tolower(r) ((r)-'A'+'a')
193e12c5d1SDavid du Colombier
203e12c5d1SDavid du Colombier #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
213e12c5d1SDavid du Colombier
223e12c5d1SDavid du Colombier #define WORDSIZ 4000
233e12c5d1SDavid du Colombier char *filename = "/lib/words";
243e12c5d1SDavid du Colombier Biobuf *dfile;
253e12c5d1SDavid du Colombier Biobuf bout;
263e12c5d1SDavid du Colombier Biobuf bin;
273e12c5d1SDavid du Colombier
283e12c5d1SDavid du Colombier int fold;
293e12c5d1SDavid du Colombier int direc;
303e12c5d1SDavid du Colombier int exact;
313e12c5d1SDavid du Colombier int iflag;
323e12c5d1SDavid du Colombier int rev = 1; /*-1 for reverse-ordered file, not implemented*/
333e12c5d1SDavid du Colombier int (*compare)(Rune*, Rune*);
34bd389b36SDavid du Colombier Rune tab = '\t';
353e12c5d1SDavid du Colombier Rune entry[WORDSIZ];
363e12c5d1SDavid du Colombier Rune word[WORDSIZ];
373e12c5d1SDavid du Colombier Rune key[50], orig[50];
383e12c5d1SDavid du Colombier Rune latin_fold_tab[] =
393e12c5d1SDavid du Colombier {
403e12c5d1SDavid du Colombier /* Table to fold latin 1 characters to ASCII equivalents
413e12c5d1SDavid du Colombier based at Rune value 0xc0
423e12c5d1SDavid du Colombier
433e12c5d1SDavid du Colombier À Á Â Ã Ä Å Æ Ç
443e12c5d1SDavid du Colombier È É Ê Ë Ì Í Î Ï
453e12c5d1SDavid du Colombier Ð Ñ Ò Ó Ô Õ Ö ×
463e12c5d1SDavid du Colombier Ø Ù Ú Û Ü Ý Þ ß
473e12c5d1SDavid du Colombier à á â ã ä å æ ç
483e12c5d1SDavid du Colombier è é ê ë ì í î ï
493e12c5d1SDavid du Colombier ð ñ ò ó ô õ ö ÷
503e12c5d1SDavid du Colombier ø ù ú û ü ý þ ÿ
513e12c5d1SDavid du Colombier */
523e12c5d1SDavid du Colombier 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
533e12c5d1SDavid du Colombier 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
543e12c5d1SDavid du Colombier 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
553e12c5d1SDavid du Colombier 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
563e12c5d1SDavid du Colombier 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
573e12c5d1SDavid du Colombier 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
583e12c5d1SDavid du Colombier 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
593e12c5d1SDavid du Colombier 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
603e12c5d1SDavid du Colombier };
613e12c5d1SDavid du Colombier
623e12c5d1SDavid du Colombier int locate(void);
633e12c5d1SDavid du Colombier int acomp(Rune*, Rune*);
643e12c5d1SDavid du Colombier int getword(Biobuf*, Rune *rp, int n);
653e12c5d1SDavid du Colombier void torune(char*, Rune*);
663e12c5d1SDavid du Colombier void rcanon(Rune*, Rune*);
673e12c5d1SDavid du Colombier int ncomp(Rune*, Rune*);
683e12c5d1SDavid du Colombier
693e12c5d1SDavid du Colombier void
usage(void)70*7fd2696aSDavid du Colombier usage(void)
71*7fd2696aSDavid du Colombier {
72*7fd2696aSDavid du Colombier fprint(2, "usage: %s [-dfinx] [-t c] [string] [file]\n", argv0);
73*7fd2696aSDavid du Colombier exits("usage");
74*7fd2696aSDavid du Colombier }
75*7fd2696aSDavid du Colombier
76*7fd2696aSDavid du Colombier void
main(int argc,char * argv[])773e12c5d1SDavid du Colombier main(int argc, char *argv[])
783e12c5d1SDavid du Colombier {
797dd7cddfSDavid du Colombier int n;
807dd7cddfSDavid du Colombier
813e12c5d1SDavid du Colombier Binit(&bin, 0, OREAD);
823e12c5d1SDavid du Colombier Binit(&bout, 1, OWRITE);
833e12c5d1SDavid du Colombier compare = acomp;
843e12c5d1SDavid du Colombier ARGBEGIN{
853e12c5d1SDavid du Colombier case 'd':
863e12c5d1SDavid du Colombier direc++;
873e12c5d1SDavid du Colombier break;
883e12c5d1SDavid du Colombier case 'f':
893e12c5d1SDavid du Colombier fold++;
903e12c5d1SDavid du Colombier break;
913e12c5d1SDavid du Colombier case 'i':
923e12c5d1SDavid du Colombier iflag++;
933e12c5d1SDavid du Colombier break;
943e12c5d1SDavid du Colombier case 'n':
953e12c5d1SDavid du Colombier compare = ncomp;
963e12c5d1SDavid du Colombier break;
973e12c5d1SDavid du Colombier case 't':
98*7fd2696aSDavid du Colombier chartorune(&tab, EARGF(usage()));
993e12c5d1SDavid du Colombier break;
1003e12c5d1SDavid du Colombier case 'x':
1013e12c5d1SDavid du Colombier exact++;
1023e12c5d1SDavid du Colombier break;
1033e12c5d1SDavid du Colombier default:
1043e12c5d1SDavid du Colombier fprint(2, "%s: bad option %c\n", argv0, ARGC());
105*7fd2696aSDavid du Colombier usage();
1063e12c5d1SDavid du Colombier } ARGEND
1073e12c5d1SDavid du Colombier if(!iflag){
1083e12c5d1SDavid du Colombier if(argc >= 1) {
1093e12c5d1SDavid du Colombier torune(argv[0], orig);
1103e12c5d1SDavid du Colombier argv++;
1113e12c5d1SDavid du Colombier argc--;
1123e12c5d1SDavid du Colombier } else
1133e12c5d1SDavid du Colombier iflag++;
1143e12c5d1SDavid du Colombier }
1153e12c5d1SDavid du Colombier if(argc < 1) {
1163e12c5d1SDavid du Colombier direc++;
1173e12c5d1SDavid du Colombier fold++;
1183e12c5d1SDavid du Colombier } else
1193e12c5d1SDavid du Colombier filename = argv[0];
1203e12c5d1SDavid du Colombier if (!iflag)
1213e12c5d1SDavid du Colombier rcanon(orig, key);
1223e12c5d1SDavid du Colombier dfile = Bopen(filename, OREAD);
1233e12c5d1SDavid du Colombier if(dfile == 0) {
1243e12c5d1SDavid du Colombier fprint(2, "look: can't open %s\n", filename);
1253e12c5d1SDavid du Colombier exits("no dictionary");
1263e12c5d1SDavid du Colombier }
1273e12c5d1SDavid du Colombier if(!iflag)
1283e12c5d1SDavid du Colombier if(!locate())
1293e12c5d1SDavid du Colombier exits("not found");
1303e12c5d1SDavid du Colombier do {
1313e12c5d1SDavid du Colombier if(iflag) {
1323e12c5d1SDavid du Colombier Bflush(&bout);
1333e12c5d1SDavid du Colombier if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
1343e12c5d1SDavid du Colombier exits(0);
1353e12c5d1SDavid du Colombier rcanon(orig, key);
1363e12c5d1SDavid du Colombier if(!locate())
1373e12c5d1SDavid du Colombier continue;
1383e12c5d1SDavid du Colombier }
1397dd7cddfSDavid du Colombier if (!exact || !acomp(word, key))
1403e12c5d1SDavid du Colombier Bprint(&bout, "%S\n", entry);
1413e12c5d1SDavid du Colombier while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
1423e12c5d1SDavid du Colombier rcanon(entry, word);
1437dd7cddfSDavid du Colombier n = compare(key, word);
1447dd7cddfSDavid du Colombier switch(n) {
1453e12c5d1SDavid du Colombier case -1:
1463e12c5d1SDavid du Colombier if(exact)
1473e12c5d1SDavid du Colombier break;
1483e12c5d1SDavid du Colombier case 0:
1493e12c5d1SDavid du Colombier if (!exact || !acomp(word, orig))
1503e12c5d1SDavid du Colombier Bprint(&bout, "%S\n", entry);
1513e12c5d1SDavid du Colombier continue;
1523e12c5d1SDavid du Colombier }
1533e12c5d1SDavid du Colombier break;
1543e12c5d1SDavid du Colombier }
1553e12c5d1SDavid du Colombier } while(iflag);
1563e12c5d1SDavid du Colombier exits(0);
1573e12c5d1SDavid du Colombier }
1583e12c5d1SDavid du Colombier
1593e12c5d1SDavid du Colombier int
locate(void)1603e12c5d1SDavid du Colombier locate(void)
1613e12c5d1SDavid du Colombier {
162*7fd2696aSDavid du Colombier vlong top, bot, mid;
1633e12c5d1SDavid du Colombier long c;
1647dd7cddfSDavid du Colombier int n;
1653e12c5d1SDavid du Colombier
1663e12c5d1SDavid du Colombier bot = 0;
167*7fd2696aSDavid du Colombier top = Bseek(dfile, 0, 2);
1683e12c5d1SDavid du Colombier for(;;) {
1693e12c5d1SDavid du Colombier mid = (top+bot) / 2;
1703e12c5d1SDavid du Colombier Bseek(dfile, mid, 0);
1717dd7cddfSDavid du Colombier do
1723e12c5d1SDavid du Colombier c = Bgetrune(dfile);
1737dd7cddfSDavid du Colombier while(c>=0 && c!='\n');
1747dd7cddfSDavid du Colombier mid = Boffset(dfile);
1753e12c5d1SDavid du Colombier if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
1763e12c5d1SDavid du Colombier break;
1773e12c5d1SDavid du Colombier rcanon(entry, word);
1787dd7cddfSDavid du Colombier n = compare(key, word);
1797dd7cddfSDavid du Colombier switch(n) {
1803e12c5d1SDavid du Colombier case -2:
1813e12c5d1SDavid du Colombier case -1:
1823e12c5d1SDavid du Colombier case 0:
1833e12c5d1SDavid du Colombier if(top <= mid)
1843e12c5d1SDavid du Colombier break;
1853e12c5d1SDavid du Colombier top = mid;
1863e12c5d1SDavid du Colombier continue;
1873e12c5d1SDavid du Colombier case 1:
1883e12c5d1SDavid du Colombier case 2:
1893e12c5d1SDavid du Colombier bot = mid;
1903e12c5d1SDavid du Colombier continue;
1913e12c5d1SDavid du Colombier }
1923e12c5d1SDavid du Colombier break;
1933e12c5d1SDavid du Colombier }
1943e12c5d1SDavid du Colombier Bseek(dfile, bot, 0);
1953e12c5d1SDavid du Colombier while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
1963e12c5d1SDavid du Colombier rcanon(entry, word);
1977dd7cddfSDavid du Colombier n = compare(key, word);
1987dd7cddfSDavid du Colombier switch(n) {
1993e12c5d1SDavid du Colombier case -2:
2003e12c5d1SDavid du Colombier return 0;
2013e12c5d1SDavid du Colombier case -1:
2023e12c5d1SDavid du Colombier if(exact)
2033e12c5d1SDavid du Colombier return 0;
2043e12c5d1SDavid du Colombier case 0:
2053e12c5d1SDavid du Colombier return 1;
2063e12c5d1SDavid du Colombier case 1:
2073e12c5d1SDavid du Colombier case 2:
2083e12c5d1SDavid du Colombier continue;
2093e12c5d1SDavid du Colombier }
2103e12c5d1SDavid du Colombier }
2113e12c5d1SDavid du Colombier return 0;
2123e12c5d1SDavid du Colombier }
2133e12c5d1SDavid du Colombier
2143e12c5d1SDavid du Colombier /*
2153e12c5d1SDavid du Colombier * acomp(s, t) returns:
2163e12c5d1SDavid du Colombier * -2 if s strictly precedes t
2173e12c5d1SDavid du Colombier * -1 if s is a prefix of t
2183e12c5d1SDavid du Colombier * 0 if s is the same as t
2193e12c5d1SDavid du Colombier * 1 if t is a prefix of s
2203e12c5d1SDavid du Colombier * 2 if t strictly precedes s
2213e12c5d1SDavid du Colombier */
2223e12c5d1SDavid du Colombier
2233e12c5d1SDavid du Colombier int
acomp(Rune * s,Rune * t)2243e12c5d1SDavid du Colombier acomp(Rune *s, Rune *t)
2253e12c5d1SDavid du Colombier {
2263e12c5d1SDavid du Colombier int cs, ct;
2273e12c5d1SDavid du Colombier
2283e12c5d1SDavid du Colombier for(;;) {
2293e12c5d1SDavid du Colombier cs = *s;
2303e12c5d1SDavid du Colombier ct = *t;
2313e12c5d1SDavid du Colombier if(cs != ct)
2323e12c5d1SDavid du Colombier break;
2333e12c5d1SDavid du Colombier if(cs == 0)
2343e12c5d1SDavid du Colombier return 0;
2353e12c5d1SDavid du Colombier s++;
2363e12c5d1SDavid du Colombier t++;
2373e12c5d1SDavid du Colombier }
2383e12c5d1SDavid du Colombier if(cs == 0)
2393e12c5d1SDavid du Colombier return -1;
2403e12c5d1SDavid du Colombier if(ct == 0)
2413e12c5d1SDavid du Colombier return 1;
2423e12c5d1SDavid du Colombier if(cs < ct)
2433e12c5d1SDavid du Colombier return -2;
2443e12c5d1SDavid du Colombier return 2;
2453e12c5d1SDavid du Colombier }
2463e12c5d1SDavid du Colombier
2473e12c5d1SDavid du Colombier void
torune(char * old,Rune * new)2483e12c5d1SDavid du Colombier torune(char *old, Rune *new)
2493e12c5d1SDavid du Colombier {
250bd389b36SDavid du Colombier do old += chartorune(new, old);
251bd389b36SDavid du Colombier while(*new++);
2523e12c5d1SDavid du Colombier }
2533e12c5d1SDavid du Colombier
2543e12c5d1SDavid du Colombier void
rcanon(Rune * old,Rune * new)2553e12c5d1SDavid du Colombier rcanon(Rune *old, Rune *new)
2563e12c5d1SDavid du Colombier {
2573e12c5d1SDavid du Colombier Rune r;
2583e12c5d1SDavid du Colombier
259bd389b36SDavid du Colombier while((r = *old++) && r != tab) {
2603e12c5d1SDavid du Colombier if (islatin1(r) && latin_fold_tab[r-0xc0])
2613e12c5d1SDavid du Colombier r = latin_fold_tab[r-0xc0];
2623e12c5d1SDavid du Colombier if(direc)
2633e12c5d1SDavid du Colombier if(!(isalnum(r) || r == L' ' || r == L'\t'))
2643e12c5d1SDavid du Colombier continue;
2653e12c5d1SDavid du Colombier if(fold)
2663e12c5d1SDavid du Colombier if(isupper(r))
2673e12c5d1SDavid du Colombier r = tolower(r);
2683e12c5d1SDavid du Colombier *new++ = r;
2693e12c5d1SDavid du Colombier }
2703e12c5d1SDavid du Colombier *new = 0;
2713e12c5d1SDavid du Colombier }
2723e12c5d1SDavid du Colombier
2733e12c5d1SDavid du Colombier int
ncomp(Rune * s,Rune * t)2743e12c5d1SDavid du Colombier ncomp(Rune *s, Rune *t)
2753e12c5d1SDavid du Colombier {
2763e12c5d1SDavid du Colombier Rune *is, *it, *js, *jt;
2773e12c5d1SDavid du Colombier int a, b;
2783e12c5d1SDavid du Colombier int ssgn, tsgn;
2793e12c5d1SDavid du Colombier
2803e12c5d1SDavid du Colombier while(isspace(*s))
2813e12c5d1SDavid du Colombier s++;
2823e12c5d1SDavid du Colombier while(isspace(*t))
2833e12c5d1SDavid du Colombier t++;
2843e12c5d1SDavid du Colombier ssgn = tsgn = -2*rev;
2853e12c5d1SDavid du Colombier if(*s == '-') {
2863e12c5d1SDavid du Colombier s++;
2873e12c5d1SDavid du Colombier ssgn = -ssgn;
2883e12c5d1SDavid du Colombier }
2893e12c5d1SDavid du Colombier if(*t == '-') {
2903e12c5d1SDavid du Colombier t++;
2913e12c5d1SDavid du Colombier tsgn = -tsgn;
2923e12c5d1SDavid du Colombier }
2933e12c5d1SDavid du Colombier for(is = s; isdigit(*is); is++)
2943e12c5d1SDavid du Colombier ;
2953e12c5d1SDavid du Colombier for(it = t; isdigit(*it); it++)
2963e12c5d1SDavid du Colombier ;
2973e12c5d1SDavid du Colombier js = is;
2983e12c5d1SDavid du Colombier jt = it;
2993e12c5d1SDavid du Colombier a = 0;
3003e12c5d1SDavid du Colombier if(ssgn == tsgn)
3013e12c5d1SDavid du Colombier while(it>t && is>s)
3023e12c5d1SDavid du Colombier if(b = *--it - *--is)
3033e12c5d1SDavid du Colombier a = b;
3043e12c5d1SDavid du Colombier while(is > s)
3053e12c5d1SDavid du Colombier if(*--is != '0')
3063e12c5d1SDavid du Colombier return -ssgn;
3073e12c5d1SDavid du Colombier while(it > t)
3083e12c5d1SDavid du Colombier if(*--it != '0')
3093e12c5d1SDavid du Colombier return tsgn;
3103e12c5d1SDavid du Colombier if(a)
3113e12c5d1SDavid du Colombier return sgn(a)*ssgn;
3123e12c5d1SDavid du Colombier if(*(s=js) == '.')
3133e12c5d1SDavid du Colombier s++;
3143e12c5d1SDavid du Colombier if(*(t=jt) == '.')
3153e12c5d1SDavid du Colombier t++;
3163e12c5d1SDavid du Colombier if(ssgn == tsgn)
3173e12c5d1SDavid du Colombier while(isdigit(*s) && isdigit(*t))
3183e12c5d1SDavid du Colombier if(a = *t++ - *s++)
3193e12c5d1SDavid du Colombier return sgn(a)*ssgn;
3203e12c5d1SDavid du Colombier while(isdigit(*s))
3213e12c5d1SDavid du Colombier if(*s++ != '0')
3223e12c5d1SDavid du Colombier return -ssgn;
3233e12c5d1SDavid du Colombier while(isdigit(*t))
3243e12c5d1SDavid du Colombier if(*t++ != '0')
3253e12c5d1SDavid du Colombier return tsgn;
3263e12c5d1SDavid du Colombier return 0;
3273e12c5d1SDavid du Colombier }
3283e12c5d1SDavid du Colombier
3293e12c5d1SDavid du Colombier int
getword(Biobuf * f,Rune * rp,int n)3303e12c5d1SDavid du Colombier getword(Biobuf *f, Rune *rp, int n)
3313e12c5d1SDavid du Colombier {
3323e12c5d1SDavid du Colombier long c;
3333e12c5d1SDavid du Colombier
3343e12c5d1SDavid du Colombier while(n-- > 0) {
3357dd7cddfSDavid du Colombier c = Bgetrune(f);
3367dd7cddfSDavid du Colombier if(c < 0)
3373e12c5d1SDavid du Colombier return 0;
3383e12c5d1SDavid du Colombier if(c == '\n') {
3393e12c5d1SDavid du Colombier *rp = L'\0';
3403e12c5d1SDavid du Colombier return 1;
3413e12c5d1SDavid du Colombier }
3423e12c5d1SDavid du Colombier *rp++ = c;
3433e12c5d1SDavid du Colombier }
3443e12c5d1SDavid du Colombier fprint(2, "Look: word too long. Bailing out.\n");
3453e12c5d1SDavid du Colombier return 0;
3463e12c5d1SDavid du Colombier }
347