1*48292Sbostic /*-
2*48292Sbostic * %sccs.include.proprietary.c%
3*48292Sbostic */
4*48292Sbostic
512310Stut #ifndef lint
6*48292Sbostic static char sccsid[] = "@(#)what4.c 4.3 (Berkeley) 04/18/91";
7*48292Sbostic #endif /* not lint */
812310Stut
912310Stut #include "what..c"
1012310Stut #define NW 5
1112310Stut #define ZIPF 10
1212310Stut #define HASHF 3
1312310Stut #define WLEN 10
1412310Stut #define SAME 0
1512310Stut #define TSIZE HASHF*ZIPF*NW
1612310Stut #define NF 10
1712310Stut
1812310Stut struct wst {
1912310Stut char *tx;
2012310Stut int ct;
2112310Stut }
2212310Stut ;
2312310Stut int HSIZE;
2412310Stut static struct wst word[TSIZE];
2532129Sbostic static char tbuf[NW*ZIPF*WLEN], *tp = tbuf;
2612310Stut
freqwd(fn,wd,nin)2712310Stut freqwd ( fn, wd, nin )
2812310Stut char *fn[], *wd[];
2912310Stut {
3012310Stut FILE *fi[NF];
3132129Sbostic int nw = 0, i, any, nf, j, wexch(), wcomp();
3212310Stut char tw[20];
3312310Stut for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
3412310Stut for(nf=0; fn[nf] && nf<NF; nf++)
3512310Stut fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
3612310Stut do {
3712310Stut any=0;
3812310Stut for(i=0; i<nf; i++)
3912310Stut {
4012310Stut if (fi[i]==NULL) continue;
4112310Stut if (gw(fi[i], tw)==0)
4212310Stut {
4312310Stut fclose(fi[i]);
4412310Stut fi[i]==NULL;
4512310Stut continue;
4612310Stut }
4712310Stut any=1;
4812310Stut if (common(tw)) continue;
4912310Stut if (strlen(tw)<3) continue;
5012310Stut j = lookup (tw);
5112310Stut if (j<0 && nw < ZIPF*NW)
5212310Stut {
5312310Stut j = -j;
5412310Stut strcpy (tp, tw);
5512310Stut word[j].tx = tp;
5612310Stut while (*tp++);
5712310Stut _assert (tp < tbuf+NW*ZIPF*WLEN);
5812310Stut word[j].ct = 1;
5912310Stut nw++;
6012310Stut }
6112310Stut else if (j>0)
6212310Stut word[j].ct++;
6312310Stut }
6412310Stut }
6512310Stut while (any>0);
6612310Stut shell ( TSIZE, wcomp, wexch );
6712310Stut for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
6812310Stut if (nw>=nin*2 && word[nw].ct != word[0].ct)
6912310Stut break;
7012310Stut for(i=0; i<nw; i++)
7112310Stut wd[i] = word[i].tx;
7212310Stut return(nw);
7312310Stut }
7412310Stut
lookup(wt)7512310Stut lookup (wt)
7612310Stut char *wt;
7712310Stut {
7812310Stut int h;
7912310Stut h = hash(wt);
8012310Stut for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
8112310Stut {
8212310Stut if (h==0) continue;
8312310Stut if (strcmp(wt, word[h].tx) == SAME)
8412310Stut return (h);
8512310Stut }
8612310Stut return ( -h );
8712310Stut }
8812310Stut
hash(s)8912310Stut hash (s)
9012310Stut char *s;
9112310Stut {
9232129Sbostic int k = 0, c = 0, i = 0;
9312310Stut while ( c = *s++ )
9412310Stut k ^= (c << (i++%5) );
9512310Stut return (k>0 ? k : -k);
9612310Stut }
9712310Stut
gw(f,t)9812310Stut gw (f, t)
9912310Stut char *t;
10012310Stut FILE *f;
10112310Stut {
10232129Sbostic int start = 1, oldc = ' ', c;
10312310Stut if (f==NULL) return (0);
10412310Stut while ( (c=getc(f)) != EOF)
10512310Stut {
10612310Stut if (isupper(c)) c= tolower(c);
10712310Stut if (start==1)
10812310Stut if (!alphanum(c, oldc))
10912310Stut continue;
11012310Stut else
11112310Stut start=0;
11212310Stut if (start==0)
11312310Stut if (alphanum(c, oldc))
11412310Stut *t++ = c;
11512310Stut else
11612310Stut {
11712310Stut *t=0;
11812310Stut return(1);
11912310Stut }
12012310Stut oldc=c;
12112310Stut }
12212310Stut return(0);
12312310Stut }
12412310Stut
alphanum(c,oldc)12512310Stut alphanum( c, oldc )
12612310Stut {
12712310Stut if (isalpha(c) || isdigit(c)) return(1);
12812310Stut if (isalpha(oldc))
12912310Stut if (c== '\'' || c == '-') return(1);
13012310Stut return(0);
13112310Stut }
13212310Stut
wcomp(n1,n2)13312310Stut wcomp (n1, n2)
13412310Stut {
13512310Stut return (word[n1].ct >= word[n2].ct);
13612310Stut }
13712310Stut
wexch(n1,n2)13812310Stut wexch (n1, n2)
13912310Stut {
14012310Stut struct wst tt;
14112310Stut tt.tx = word[n1].tx;
14212310Stut tt.ct = word[n1].ct;
14312310Stut word[n1].tx = word[n2].tx;
14412310Stut word[n1].ct = word[n2].ct;
14512310Stut word[n2].tx = tt.tx;
14612310Stut word[n2].ct = tt.ct;
14712310Stut }
14812310Stut
prime(n)14912310Stut prime(n)
15012310Stut {
15112310Stut /* only executed once- slow is ok */
15212310Stut int i;
15312310Stut if (n%2==0) return(0);
15412310Stut for(i=3; i*i<=n; i+= 2)
15512310Stut if (n%i ==0 ) return(0);
15612310Stut return(1);
15712310Stut }
15812310Stut
trimnl(s)15912310Stut trimnl(s)
16012310Stut char *s;
16112310Stut {
16212310Stut while (*s)s++;
16312310Stut if (*--s=='\n') *s=0;
16412310Stut }
16512310Stut
16612310Stut /* this is the test for what4.c as a standalone prog ... */
16712310Stut # ifdef 0
16812310Stut main (argc, argv)
16912310Stut char *argv[];
17012310Stut {
17112310Stut char *ff[10], *wd[20], **ffp ff;
17212310Stut int n, i;
17312310Stut
17412310Stut while (--argc)
17512310Stut *ffp++ = *++argv;
17612310Stut *ffp=0;
17712310Stut n=freqwd(ff,wd);
17812310Stut for(i=0; i<n; i++)
17912310Stut printf("%s\n",wd[i]);
18012310Stut printf("total of %d items\n",n);
18112310Stut }
18212310Stut # endif 0
183