1*12310Stut #ifndef lint 2*12310Stut static char *sccsid = "@(#)what4.c 4.1 (Berkeley) 05/06/83"; 3*12310Stut #endif 4*12310Stut 5*12310Stut #include "what..c" 6*12310Stut #define NW 5 7*12310Stut #define ZIPF 10 8*12310Stut #define HASHF 3 9*12310Stut #define WLEN 10 10*12310Stut #define SAME 0 11*12310Stut #define TSIZE HASHF*ZIPF*NW 12*12310Stut #define NF 10 13*12310Stut 14*12310Stut struct wst { 15*12310Stut char *tx; 16*12310Stut int ct; 17*12310Stut } 18*12310Stut ; 19*12310Stut int HSIZE; 20*12310Stut static struct wst word[TSIZE]; 21*12310Stut static char tbuf[NW*ZIPF*WLEN], *tp tbuf; 22*12310Stut 23*12310Stut freqwd ( fn, wd, nin ) 24*12310Stut char *fn[], *wd[]; 25*12310Stut { 26*12310Stut FILE *fi[NF]; 27*12310Stut int nw 0, i, any, nf, j, wexch(), wcomp(); 28*12310Stut char tw[20]; 29*12310Stut for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--); 30*12310Stut for(nf=0; fn[nf] && nf<NF; nf++) 31*12310Stut fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL; 32*12310Stut do { 33*12310Stut any=0; 34*12310Stut for(i=0; i<nf; i++) 35*12310Stut { 36*12310Stut if (fi[i]==NULL) continue; 37*12310Stut if (gw(fi[i], tw)==0) 38*12310Stut { 39*12310Stut fclose(fi[i]); 40*12310Stut fi[i]==NULL; 41*12310Stut continue; 42*12310Stut } 43*12310Stut any=1; 44*12310Stut if (common(tw)) continue; 45*12310Stut if (strlen(tw)<3) continue; 46*12310Stut j = lookup (tw); 47*12310Stut if (j<0 && nw < ZIPF*NW) 48*12310Stut { 49*12310Stut j = -j; 50*12310Stut strcpy (tp, tw); 51*12310Stut word[j].tx = tp; 52*12310Stut while (*tp++); 53*12310Stut _assert (tp < tbuf+NW*ZIPF*WLEN); 54*12310Stut word[j].ct = 1; 55*12310Stut nw++; 56*12310Stut } 57*12310Stut else if (j>0) 58*12310Stut word[j].ct++; 59*12310Stut } 60*12310Stut } 61*12310Stut while (any>0); 62*12310Stut shell ( TSIZE, wcomp, wexch ); 63*12310Stut for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++) 64*12310Stut if (nw>=nin*2 && word[nw].ct != word[0].ct) 65*12310Stut break; 66*12310Stut for(i=0; i<nw; i++) 67*12310Stut wd[i] = word[i].tx; 68*12310Stut return(nw); 69*12310Stut } 70*12310Stut 71*12310Stut lookup (wt) 72*12310Stut char *wt; 73*12310Stut { 74*12310Stut int h; 75*12310Stut h = hash(wt); 76*12310Stut for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE) 77*12310Stut { 78*12310Stut if (h==0) continue; 79*12310Stut if (strcmp(wt, word[h].tx) == SAME) 80*12310Stut return (h); 81*12310Stut } 82*12310Stut return ( -h ); 83*12310Stut } 84*12310Stut 85*12310Stut hash (s) 86*12310Stut char *s; 87*12310Stut { 88*12310Stut int k 0, c 0, i 0; 89*12310Stut while ( c = *s++ ) 90*12310Stut k ^= (c << (i++%5) ); 91*12310Stut return (k>0 ? k : -k); 92*12310Stut } 93*12310Stut 94*12310Stut gw (f, t) 95*12310Stut char *t; 96*12310Stut FILE *f; 97*12310Stut { 98*12310Stut int start 1, oldc ' ', c; 99*12310Stut if (f==NULL) return (0); 100*12310Stut while ( (c=getc(f)) != EOF) 101*12310Stut { 102*12310Stut if (isupper(c)) c= tolower(c); 103*12310Stut if (start==1) 104*12310Stut if (!alphanum(c, oldc)) 105*12310Stut continue; 106*12310Stut else 107*12310Stut start=0; 108*12310Stut if (start==0) 109*12310Stut if (alphanum(c, oldc)) 110*12310Stut *t++ = c; 111*12310Stut else 112*12310Stut { 113*12310Stut *t=0; 114*12310Stut return(1); 115*12310Stut } 116*12310Stut oldc=c; 117*12310Stut } 118*12310Stut return(0); 119*12310Stut } 120*12310Stut 121*12310Stut alphanum( c, oldc ) 122*12310Stut { 123*12310Stut if (isalpha(c) || isdigit(c)) return(1); 124*12310Stut if (isalpha(oldc)) 125*12310Stut if (c== '\'' || c == '-') return(1); 126*12310Stut return(0); 127*12310Stut } 128*12310Stut 129*12310Stut wcomp (n1, n2) 130*12310Stut { 131*12310Stut return (word[n1].ct >= word[n2].ct); 132*12310Stut } 133*12310Stut 134*12310Stut wexch (n1, n2) 135*12310Stut { 136*12310Stut struct wst tt; 137*12310Stut tt.tx = word[n1].tx; 138*12310Stut tt.ct = word[n1].ct; 139*12310Stut word[n1].tx = word[n2].tx; 140*12310Stut word[n1].ct = word[n2].ct; 141*12310Stut word[n2].tx = tt.tx; 142*12310Stut word[n2].ct = tt.ct; 143*12310Stut } 144*12310Stut 145*12310Stut prime(n) 146*12310Stut { 147*12310Stut /* only executed once- slow is ok */ 148*12310Stut int i; 149*12310Stut if (n%2==0) return(0); 150*12310Stut for(i=3; i*i<=n; i+= 2) 151*12310Stut if (n%i ==0 ) return(0); 152*12310Stut return(1); 153*12310Stut } 154*12310Stut 155*12310Stut trimnl(s) 156*12310Stut char *s; 157*12310Stut { 158*12310Stut while (*s)s++; 159*12310Stut if (*--s=='\n') *s=0; 160*12310Stut } 161*12310Stut 162*12310Stut /* this is the test for what4.c as a standalone prog ... */ 163*12310Stut # ifdef 0 164*12310Stut main (argc, argv) 165*12310Stut char *argv[]; 166*12310Stut { 167*12310Stut char *ff[10], *wd[20], **ffp ff; 168*12310Stut int n, i; 169*12310Stut 170*12310Stut while (--argc) 171*12310Stut *ffp++ = *++argv; 172*12310Stut *ffp=0; 173*12310Stut n=freqwd(ff,wd); 174*12310Stut for(i=0; i<n; i++) 175*12310Stut printf("%s\n",wd[i]); 176*12310Stut printf("total of %d items\n",n); 177*12310Stut } 178*12310Stut # endif 0 179