112310Stut #ifndef lint 2*32129Sbostic static char *sccsid = "@(#)what4.c 4.2 (Berkeley) 09/11/87"; 312310Stut #endif 412310Stut 512310Stut #include "what..c" 612310Stut #define NW 5 712310Stut #define ZIPF 10 812310Stut #define HASHF 3 912310Stut #define WLEN 10 1012310Stut #define SAME 0 1112310Stut #define TSIZE HASHF*ZIPF*NW 1212310Stut #define NF 10 1312310Stut 1412310Stut struct wst { 1512310Stut char *tx; 1612310Stut int ct; 1712310Stut } 1812310Stut ; 1912310Stut int HSIZE; 2012310Stut static struct wst word[TSIZE]; 21*32129Sbostic static char tbuf[NW*ZIPF*WLEN], *tp = tbuf; 2212310Stut 2312310Stut freqwd ( fn, wd, nin ) 2412310Stut char *fn[], *wd[]; 2512310Stut { 2612310Stut FILE *fi[NF]; 27*32129Sbostic int nw = 0, i, any, nf, j, wexch(), wcomp(); 2812310Stut char tw[20]; 2912310Stut for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); 3012310Stut for(nf=0; fn[nf] && nf<NF; nf++) 3112310Stut fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; 3212310Stut do { 3312310Stut any=0; 3412310Stut for(i=0; i<nf; i++) 3512310Stut { 3612310Stut if (fi[i]==NULL) continue; 3712310Stut if (gw(fi[i], tw)==0) 3812310Stut { 3912310Stut fclose(fi[i]); 4012310Stut fi[i]==NULL; 4112310Stut continue; 4212310Stut } 4312310Stut any=1; 4412310Stut if (common(tw)) continue; 4512310Stut if (strlen(tw)<3) continue; 4612310Stut j = lookup (tw); 4712310Stut if (j<0 && nw < ZIPF*NW) 4812310Stut { 4912310Stut j = -j; 5012310Stut strcpy (tp, tw); 5112310Stut word[j].tx = tp; 5212310Stut while (*tp++); 5312310Stut _assert (tp < tbuf+NW*ZIPF*WLEN); 5412310Stut word[j].ct = 1; 5512310Stut nw++; 5612310Stut } 5712310Stut else if (j>0) 5812310Stut word[j].ct++; 5912310Stut } 6012310Stut } 6112310Stut while (any>0); 6212310Stut shell ( TSIZE, wcomp, wexch ); 6312310Stut for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) 6412310Stut if (nw>=nin*2 && word[nw].ct != word[0].ct) 6512310Stut break; 6612310Stut for(i=0; i<nw; i++) 6712310Stut wd[i] = word[i].tx; 6812310Stut return(nw); 6912310Stut } 7012310Stut 7112310Stut lookup (wt) 7212310Stut char *wt; 7312310Stut { 7412310Stut int h; 7512310Stut h = hash(wt); 7612310Stut for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) 7712310Stut { 7812310Stut if (h==0) continue; 7912310Stut if (strcmp(wt, word[h].tx) == SAME) 8012310Stut return (h); 8112310Stut } 8212310Stut return ( -h ); 8312310Stut } 8412310Stut 8512310Stut hash (s) 8612310Stut char *s; 8712310Stut { 88*32129Sbostic int k = 0, c = 0, i = 0; 8912310Stut while ( c = *s++ ) 9012310Stut k ^= (c << (i++%5) ); 9112310Stut return (k>0 ? k : -k); 9212310Stut } 9312310Stut 9412310Stut gw (f, t) 9512310Stut char *t; 9612310Stut FILE *f; 9712310Stut { 98*32129Sbostic int start = 1, oldc = ' ', c; 9912310Stut if (f==NULL) return (0); 10012310Stut while ( (c=getc(f)) != EOF) 10112310Stut { 10212310Stut if (isupper(c)) c= tolower(c); 10312310Stut if (start==1) 10412310Stut if (!alphanum(c, oldc)) 10512310Stut continue; 10612310Stut else 10712310Stut start=0; 10812310Stut if (start==0) 10912310Stut if (alphanum(c, oldc)) 11012310Stut *t++ = c; 11112310Stut else 11212310Stut { 11312310Stut *t=0; 11412310Stut return(1); 11512310Stut } 11612310Stut oldc=c; 11712310Stut } 11812310Stut return(0); 11912310Stut } 12012310Stut 12112310Stut alphanum( c, oldc ) 12212310Stut { 12312310Stut if (isalpha(c) || isdigit(c)) return(1); 12412310Stut if (isalpha(oldc)) 12512310Stut if (c== '\'' || c == '-') return(1); 12612310Stut return(0); 12712310Stut } 12812310Stut 12912310Stut wcomp (n1, n2) 13012310Stut { 13112310Stut return (word[n1].ct >= word[n2].ct); 13212310Stut } 13312310Stut 13412310Stut wexch (n1, n2) 13512310Stut { 13612310Stut struct wst tt; 13712310Stut tt.tx = word[n1].tx; 13812310Stut tt.ct = word[n1].ct; 13912310Stut word[n1].tx = word[n2].tx; 14012310Stut word[n1].ct = word[n2].ct; 14112310Stut word[n2].tx = tt.tx; 14212310Stut word[n2].ct = tt.ct; 14312310Stut } 14412310Stut 14512310Stut prime(n) 14612310Stut { 14712310Stut /* only executed once- slow is ok */ 14812310Stut int i; 14912310Stut if (n%2==0) return(0); 15012310Stut for(i=3; i*i<=n; i+= 2) 15112310Stut if (n%i ==0 ) return(0); 15212310Stut return(1); 15312310Stut } 15412310Stut 15512310Stut trimnl(s) 15612310Stut char *s; 15712310Stut { 15812310Stut while (*s)s++; 15912310Stut if (*--s=='\n') *s=0; 16012310Stut } 16112310Stut 16212310Stut /* this is the test for what4.c as a standalone prog ... */ 16312310Stut # ifdef 0 16412310Stut main (argc, argv) 16512310Stut char *argv[]; 16612310Stut { 16712310Stut char *ff[10], *wd[20], **ffp ff; 16812310Stut int n, i; 16912310Stut 17012310Stut while (--argc) 17112310Stut *ffp++ = *++argv; 17212310Stut *ffp=0; 17312310Stut n=freqwd(ff,wd); 17412310Stut for(i=0; i<n; i++) 17512310Stut printf("%s\n",wd[i]); 17612310Stut printf("total of %d items\n",n); 17712310Stut } 17812310Stut # endif 0 179