19075Srrh #ifndef lint 2*9080Srrh static char sccsid[] = "@(#)dprog.c 4.2 (Berkeley) 82/11/06"; 39075Srrh #endif not lint 49075Srrh 59075Srrh /* 6*9080Srrh * diction -- print all sentences containing one of default phrases 79075Srrh * 89075Srrh * status returns: 99075Srrh * 0 - ok, and some matches 109075Srrh * 1 - ok, but no matches 119075Srrh * 2 - some error 129075Srrh */ 139075Srrh 149075Srrh #include <stdio.h> 159075Srrh #include <ctype.h> 169075Srrh 179075Srrh #define MAXSIZ 6500 189075Srrh #define QSIZE 650 19*9080Srrh int linemsg; 20*9080Srrh long olcount; 21*9080Srrh long lcount; 229075Srrh struct words { 239075Srrh char inp; 249075Srrh char out; 259075Srrh struct words *nst; 269075Srrh struct words *link; 279075Srrh struct words *fail; 289075Srrh } w[MAXSIZ], *smax, *q; 299075Srrh 30*9080Srrh char table[128] = { 31*9080Srrh 0, 0, 0, 0, 0, 0, 0, 0, 32*9080Srrh 0, 0, ' ', 0, 0, 0, 0, 0, 33*9080Srrh 0, 0, 0, 0, 0, 0, 0, 0, 34*9080Srrh 0, 0, 0, 0, 0, 0, 0, 0, 35*9080Srrh ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', 36*9080Srrh ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', 37*9080Srrh '0', '1', '2', '3', '4', '5', '6', '7', 38*9080Srrh '8', '9', ' ', ' ', ' ', ' ', ' ', '.', 39*9080Srrh ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 40*9080Srrh 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 41*9080Srrh 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 42*9080Srrh 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', 43*9080Srrh ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 44*9080Srrh 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 45*9080Srrh 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 46*9080Srrh 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' 47*9080Srrh }; 48*9080Srrh int caps = 0; 49*9080Srrh int lineno = 0; 509075Srrh int fflag; 519075Srrh int nflag = 1; /*use default file*/ 529075Srrh char *filename; 53*9080Srrh int mflg = 0; /*don't catch output*/ 549075Srrh int nfile; 559075Srrh int nsucc; 569075Srrh long nsent = 0; 579075Srrh long nhits = 0; 589075Srrh char *nlp; 599075Srrh char *begp, *endp; 60*9080Srrh int beg, last; 61*9080Srrh char *myst; 62*9080Srrh int myct = 0; 639075Srrh int oct = 0; 649075Srrh FILE *wordf; 65*9080Srrh FILE *mine; 669075Srrh char *argptr; 67*9080Srrh long tl = 0; 68*9080Srrh long th = 0; 699075Srrh 709075Srrh main(argc, argv) 71*9080Srrh char *argv[]; 729075Srrh { 73*9080Srrh int sv; 749075Srrh while (--argc > 0 && (++argv)[0][0]=='-') 759075Srrh switch (argv[0][1]) { 769075Srrh 779075Srrh case 'f': 789075Srrh fflag++; 79*9080Srrh filename = (++argv)[0]; 809075Srrh argc--; 819075Srrh continue; 829075Srrh 839075Srrh case 'n': 849075Srrh nflag = 0; 859075Srrh continue; 869075Srrh case 'd': 87*9080Srrh mflg=0; 889075Srrh continue; 89*9080Srrh case 'c': 90*9080Srrh caps++; 91*9080Srrh continue; 92*9080Srrh case 'l': 93*9080Srrh lineno++; 94*9080Srrh continue; 959075Srrh default: 969075Srrh fprintf(stderr, "diction: unknown flag\n"); 979075Srrh continue; 989075Srrh } 999075Srrh out: 1009075Srrh if(nflag){ 1019075Srrh wordf = fopen(DICT,"r"); 1029075Srrh if(wordf == NULL){ 1039075Srrh fprintf(stderr,"diction: can't open default dictionary\n"); 1049075Srrh exit(2); 1059075Srrh } 1069075Srrh } 1079075Srrh else { 108*9080Srrh wordf = fopen(filename,"r"); 1099075Srrh if(wordf == NULL){ 1109075Srrh fprintf(stderr,"diction: can't open %s\n",filename); 1119075Srrh exit(2); 1129075Srrh } 1139075Srrh } 1149075Srrh 115*9080Srrh #ifdef CATCH 116*9080Srrh if(fopen(CATCH,"r") != NULL) 117*9080Srrh if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; 118*9080Srrh #endif 119*9080Srrh #ifdef MACS 120*9080Srrh if(caps){ 121*9080Srrh printf(".so "); 122*9080Srrh printf(MACS); 123*9080Srrh printf("\n"); 124*9080Srrh } 125*9080Srrh #endif 1269075Srrh cgotofn(); 1279075Srrh cfail(); 1289075Srrh nfile = argc; 1299075Srrh if (argc<=0) { 1309075Srrh execute((char *)NULL); 1319075Srrh } 1329075Srrh else while (--argc >= 0) { 1339075Srrh execute(*argv); 134*9080Srrh if(lineno){ 135*9080Srrh printf("file %s: number of lines %ld number of phrases found %ld\n", 136*9080Srrh *argv, lcount-1, nhits); 137*9080Srrh tl += lcount-1; 138*9080Srrh th += nhits; 139*9080Srrh sv = lcount-1; 140*9080Srrh lcount = nhits = 0; 141*9080Srrh } 1429075Srrh argv++; 1439075Srrh } 144*9080Srrh if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); 145*9080Srrh if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); 146*9080Srrh else if(tl != sv) 147*9080Srrh if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); 1489075Srrh exit(nsucc == 0); 1499075Srrh } 1509075Srrh 1519075Srrh execute(file) 1529075Srrh char *file; 1539075Srrh { 1549075Srrh register char *p; 1559075Srrh register struct words *c; 1569075Srrh register ccount; 157*9080Srrh int count1; 158*9080Srrh char *beg1; 1599075Srrh struct words *savc; 1609075Srrh char *savp; 1619075Srrh int savct; 1629075Srrh int scr; 1639075Srrh char buf[1024]; 1649075Srrh int f; 1659075Srrh int hit; 166*9080Srrh last = 0; 1679075Srrh if (file) { 1689075Srrh if ((f = open(file, 0)) < 0) { 1699075Srrh fprintf(stderr, "diction: can't open %s\n", file); 1709075Srrh exit(2); 1719075Srrh } 1729075Srrh } 1739075Srrh else f = 0; 174*9080Srrh lcount = olcount = 1; 175*9080Srrh linemsg = 1; 1769075Srrh ccount = 0; 177*9080Srrh count1 = -1; 1789075Srrh p = buf; 1799075Srrh nlp = p; 1809075Srrh c = w; 1819075Srrh oct = hit = 0; 182*9080Srrh savc = (struct words *) 0; 183*9080Srrh savp = (char *) 0; 1849075Srrh for (;;) { 185*9080Srrh if(--ccount <= 0) { 1869075Srrh if (p == &buf[1024]) p = buf; 1879075Srrh if (p > &buf[512]) { 1889075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 1899075Srrh } 1909075Srrh else if ((ccount = read(f, p, 512)) <= 0) break; 191*9080Srrh if(caps && (count1 > 0)) 192*9080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout); 193*9080Srrh count1 = ccount; 194*9080Srrh beg1 = p; 1959075Srrh } 1969075Srrh if(p == &buf[1024])p=buf; 1979075Srrh nstate: 198*9080Srrh if (c->inp == table[*p]) { 1999075Srrh c = c->nst; 2009075Srrh } 2019075Srrh else if (c->link != 0) { 2029075Srrh c = c->link; 2039075Srrh goto nstate; 2049075Srrh } 2059075Srrh else { 2069075Srrh if(savp != 0){ 2079075Srrh c=savc; 2089075Srrh p=savp; 2099075Srrh if(ccount > savct)ccount += savct; 2109075Srrh else ccount = savct; 211*9080Srrh savc = (struct words *) 0; 212*9080Srrh savp = (char *) 0; 2139075Srrh goto hadone; 2149075Srrh } 2159075Srrh c = c->fail; 2169075Srrh if (c==0) { 2179075Srrh c = w; 2189075Srrh istate: 219*9080Srrh if (c->inp == table[*p]) { 2209075Srrh c = c->nst; 2219075Srrh } 2229075Srrh else if (c->link != 0) { 2239075Srrh c = c->link; 2249075Srrh goto istate; 2259075Srrh } 2269075Srrh } 2279075Srrh else goto nstate; 2289075Srrh } 2299075Srrh if(c->out){ 230*9080Srrh if((c->inp == table[*(p+1)]) && (c->nst != 0)){ 2319075Srrh savp=p; 2329075Srrh savc=c; 2339075Srrh savct=ccount; 2349075Srrh goto cont; 2359075Srrh } 2369075Srrh else if(c->link != 0){ 2379075Srrh savc=c; 2389075Srrh while((savc=savc->link)!= 0){ 239*9080Srrh if(savc->inp == table[*(p+1)]){ 2409075Srrh savp=p; 2419075Srrh savc=c; 2429075Srrh savct=ccount; 2439075Srrh goto cont; 2449075Srrh } 2459075Srrh } 2469075Srrh } 2479075Srrh hadone: 248*9080Srrh savc = (struct words *) 0; 249*9080Srrh savp = (char *) 0; 2509075Srrh if(c->out == (char)(0377)){ 2519075Srrh c=w; 2529075Srrh goto nstate; 2539075Srrh } 2549075Srrh begp = p - (c->out); 2559075Srrh if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 2569075Srrh endp=p; 257*9080Srrh if(mflg){ 258*9080Srrh if(begp-20 < &buf[0]){ 259*9080Srrh myst = &buf[1024]-20; 260*9080Srrh if(nlp < &buf[512])myst=nlp; 261*9080Srrh } 262*9080Srrh else myst = begp-20; 263*9080Srrh if(myst < nlp)myst = nlp; 264*9080Srrh beg = 0; 265*9080Srrh } 2669075Srrh hit = 1; 2679075Srrh nhits++; 268*9080Srrh if(*p == '\n')lcount++; 269*9080Srrh if (table[*p++] == '.') { 270*9080Srrh linemsg = 1; 2719075Srrh if (--ccount <= 0) { 2729075Srrh if (p == &buf[1024]) p = buf; 2739075Srrh if (p > &buf[512]) { 2749075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 2759075Srrh } 2769075Srrh else if ((ccount = read(f, p, 512)) <= 0) break; 277*9080Srrh if(caps && (count1 > 0)) 278*9080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout); 279*9080Srrh count1=ccount; 280*9080Srrh beg1=p; 2819075Srrh } 2829075Srrh } 2839075Srrh succeed: nsucc = 1; 2849075Srrh { 2859075Srrh if (p <= nlp) { 286*9080Srrh outc(&buf[1024],file); 2879075Srrh nlp = buf; 2889075Srrh } 289*9080Srrh outc(p,file); 2909075Srrh } 291*9080Srrh if(mflg)last=1; 2929075Srrh nomatch: 2939075Srrh nlp = p; 2949075Srrh c = w; 2959075Srrh begp = endp = 0; 2969075Srrh continue; 2979075Srrh } 2989075Srrh cont: 299*9080Srrh if(*p == '\n')lcount++; 300*9080Srrh if (table[*p++] == '.'){ 3019075Srrh if(hit){ 3029075Srrh if(p <= nlp){ 303*9080Srrh outc(&buf[1024],file); 3049075Srrh nlp = buf; 3059075Srrh } 306*9080Srrh outc(p,file); 307*9080Srrh if(!caps)printf("\n\n"); 308*9080Srrh if(mflg && last){putc('\n',mine);myct = 0;} 3099075Srrh } 310*9080Srrh linemsg = 1; 311*9080Srrh if(*p == '\n')olcount = lcount+1; 312*9080Srrh else 313*9080Srrh olcount=lcount; 314*9080Srrh last = 0; 3159075Srrh hit = 0; 3169075Srrh oct = 0; 3179075Srrh nlp = p; 3189075Srrh c = w; 3199075Srrh begp = endp = 0; 320*9080Srrh nsent++; 3219075Srrh } 3229075Srrh } 323*9080Srrh if(caps && (count1 > 0)) 324*9080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout); 3259075Srrh close(f); 3269075Srrh } 3279075Srrh 3289075Srrh getargc() 3299075Srrh { 3309075Srrh register c; 3319075Srrh if (wordf){ 3329075Srrh if((c=getc(wordf))==EOF){ 3339075Srrh fclose(wordf); 3349075Srrh if(nflag && fflag){ 3359075Srrh nflag=0; 336*9080Srrh wordf=fopen(filename,"r"); 3379075Srrh if(wordf == NULL){ 338*9080Srrh fprintf("diction can't open %s\n",filename); 3399075Srrh exit(2); 3409075Srrh } 3419075Srrh return(getc(wordf)); 3429075Srrh } 3439075Srrh else return(EOF); 3449075Srrh } 3459075Srrh else return(c); 3469075Srrh } 3479075Srrh if ((c = *argptr++) == '\0') 3489075Srrh return(EOF); 3499075Srrh return(c); 3509075Srrh } 3519075Srrh 3529075Srrh cgotofn() { 3539075Srrh register c; 3549075Srrh register struct words *s; 3559075Srrh register ct; 3569075Srrh int neg; 3579075Srrh 3589075Srrh s = smax = w; 3599075Srrh neg = ct = 0; 3609075Srrh nword: for(;;) { 3619075Srrh c = getargc(); 3629075Srrh if(c == '~'){ 3639075Srrh neg++; 3649075Srrh c = getargc(); 3659075Srrh } 3669075Srrh if (c==EOF) 3679075Srrh return; 3689075Srrh if (c == '\n') { 3699075Srrh if(neg)s->out = 0377; 3709075Srrh else s->out = ct-1; 3719075Srrh neg = ct = 0; 3729075Srrh s = w; 3739075Srrh } else { 3749075Srrh loop: if (s->inp == c) { 3759075Srrh s = s->nst; 3769075Srrh ct++; 3779075Srrh continue; 3789075Srrh } 3799075Srrh if (s->inp == 0) goto enter; 3809075Srrh if (s->link == 0) { 3819075Srrh if (smax >= &w[MAXSIZ - 1]) overflo(); 3829075Srrh s->link = ++smax; 3839075Srrh s = smax; 3849075Srrh goto enter; 3859075Srrh } 3869075Srrh s = s->link; 3879075Srrh goto loop; 3889075Srrh } 3899075Srrh } 3909075Srrh 3919075Srrh enter: 3929075Srrh do { 3939075Srrh s->inp = c; 3949075Srrh ct++; 3959075Srrh if (smax >= &w[MAXSIZ - 1]) overflo(); 3969075Srrh s->nst = ++smax; 3979075Srrh s = smax; 3989075Srrh } while ((c = getargc()) != '\n' && c!=EOF); 3999075Srrh if(neg)smax->out = 0377; 4009075Srrh else smax->out = ct-1; 4019075Srrh neg = ct = 0; 4029075Srrh s = w; 4039075Srrh if (c != EOF) 4049075Srrh goto nword; 4059075Srrh } 4069075Srrh 4079075Srrh overflo() { 4089075Srrh fprintf(stderr, "wordlist too large\n"); 4099075Srrh exit(2); 4109075Srrh } 4119075Srrh cfail() { 4129075Srrh struct words *queue[QSIZE]; 4139075Srrh struct words **front, **rear; 4149075Srrh struct words *state; 4159075Srrh int bstart; 4169075Srrh register char c; 4179075Srrh register struct words *s; 4189075Srrh s = w; 4199075Srrh front = rear = queue; 4209075Srrh init: if ((s->inp) != 0) { 4219075Srrh *rear++ = s->nst; 4229075Srrh if (rear >= &queue[QSIZE - 1]) overflo(); 4239075Srrh } 4249075Srrh if ((s = s->link) != 0) { 4259075Srrh goto init; 4269075Srrh } 4279075Srrh 4289075Srrh while (rear!=front) { 4299075Srrh s = *front; 4309075Srrh if (front == &queue[QSIZE-1]) 4319075Srrh front = queue; 4329075Srrh else front++; 4339075Srrh cloop: if ((c = s->inp) != 0) { 4349075Srrh bstart=0; 4359075Srrh *rear = (q = s->nst); 4369075Srrh if (front < rear) 4379075Srrh if (rear >= &queue[QSIZE-1]) 4389075Srrh if (front == queue) overflo(); 4399075Srrh else rear = queue; 4409075Srrh else rear++; 4419075Srrh else 4429075Srrh if (++rear == front) overflo(); 4439075Srrh state = s->fail; 4449075Srrh floop: if (state == 0){ state = w;bstart=1;} 4459075Srrh if (state->inp == c) { 4469075Srrh qloop: q->fail = state->nst; 4479075Srrh if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 4489075Srrh if((q=q->link) != 0)goto qloop; 4499075Srrh } 4509075Srrh else if ((state = state->link) != 0) 4519075Srrh goto floop; 4529075Srrh else if(bstart==0){state=0; goto floop;} 4539075Srrh } 4549075Srrh if ((s = s->link) != 0) 4559075Srrh goto cloop; 4569075Srrh } 4579075Srrh /* for(s=w;s<=smax;s++) 4589075Srrh printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 4599075Srrh s->inp,s->out,s->nst,s->link,s->fail); 4609075Srrh */ 4619075Srrh } 462*9080Srrh outc(addr,file) 4639075Srrh char *addr; 464*9080Srrh char *file; 4659075Srrh { 466*9080Srrh int inside; 4679075Srrh 468*9080Srrh inside = 0; 469*9080Srrh if(!caps && lineno && linemsg){ 470*9080Srrh printf("beginning line %ld",olcount); 471*9080Srrh if(file != (char *)NULL)printf(" %s\n",file); 472*9080Srrh else printf("\n"); 473*9080Srrh linemsg = 0; 474*9080Srrh } 4759075Srrh while(nlp < addr){ 476*9080Srrh if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ 4779075Srrh oct=0; 4789075Srrh putchar('\n'); 4799075Srrh } 4809075Srrh if(nlp == begp){ 481*9080Srrh if(caps)inside++; 482*9080Srrh else { 483*9080Srrh if( oct >45){putchar('\n'); 484*9080Srrh oct=0; 485*9080Srrh } 486*9080Srrh if( oct==0 || table[*nlp] != ' '){ 487*9080Srrh printf("*["); 488*9080Srrh oct+=2; 489*9080Srrh } 490*9080Srrh else {printf(" *[");; 491*9080Srrh oct+=3; 492*9080Srrh } 493*9080Srrh } 494*9080Srrh if(mflg)putc('[',mine); 4959075Srrh } 496*9080Srrh if(inside){ 497*9080Srrh if(islower(*nlp))*nlp = toupper(*nlp); 498*9080Srrh } 499*9080Srrh else { 500*9080Srrh if(!caps && *nlp == '\n')*nlp = ' '; 501*9080Srrh if(*nlp == ' ' && oct==0); 502*9080Srrh else if(!caps) {putchar(*nlp); oct++;} 503*9080Srrh } 5049075Srrh if(nlp == endp){ 505*9080Srrh if(caps) 506*9080Srrh inside= 0; 507*9080Srrh else { 508*9080Srrh if(*(nlp) != ' '){printf("]*"); 509*9080Srrh oct+=2; 510*9080Srrh } 511*9080Srrh else {printf("]* "); 512*9080Srrh oct+=3; 513*9080Srrh } 514*9080Srrh if(oct >60){putchar('\n'); 515*9080Srrh oct=0; 516*9080Srrh } 517*9080Srrh } 518*9080Srrh if(mflg)putc(']',mine); 519*9080Srrh beg = 0; 5209075Srrh } 521*9080Srrh if(mflg){ 522*9080Srrh if(nlp == myst)beg = 1; 523*9080Srrh if(beg || last){ 524*9080Srrh putc(*nlp,mine); 525*9080Srrh if(myct++ >= 72 || last == 20){ 526*9080Srrh putc('\n',mine); 527*9080Srrh if(last == 20)last=myct=0; 528*9080Srrh else myct=0; 529*9080Srrh } 530*9080Srrh if(last)last++; 531*9080Srrh } 532*9080Srrh } 5339075Srrh nlp++; 5349075Srrh } 5359075Srrh } 536