19075Srrh #ifndef lint 2*46821Sbostic static char sccsid[] = "@(#)dprog.c 4.4 (Berkeley) 91/03/01"; 39075Srrh #endif not lint 49075Srrh 59075Srrh /* 69080Srrh * diction -- print all sentences containing one of default phrases 79075Srrh * 89075Srrh * status returns: 99075Srrh * 0 - ok, and some matches 109075Srrh * 1 - ok, but no matches 119075Srrh * 2 - some error 129075Srrh */ 139075Srrh 149075Srrh #include <stdio.h> 159075Srrh #include <ctype.h> 1637872Sbostic #include "pathnames.h" 179075Srrh 189075Srrh #define MAXSIZ 6500 199075Srrh #define QSIZE 650 209080Srrh int linemsg; 219080Srrh long olcount; 229080Srrh long lcount; 239075Srrh struct words { 249075Srrh char inp; 259075Srrh char out; 269075Srrh struct words *nst; 279075Srrh struct words *link; 289075Srrh struct words *fail; 299075Srrh } w[MAXSIZ], *smax, *q; 309075Srrh 319080Srrh char table[128] = { 329080Srrh 0, 0, 0, 0, 0, 0, 0, 0, 339080Srrh 0, 0, ' ', 0, 0, 0, 0, 0, 349080Srrh 0, 0, 0, 0, 0, 0, 0, 0, 359080Srrh 0, 0, 0, 0, 0, 0, 0, 0, 369080Srrh ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', 379080Srrh ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', 389080Srrh '0', '1', '2', '3', '4', '5', '6', '7', 399080Srrh '8', '9', ' ', ' ', ' ', ' ', ' ', '.', 409080Srrh ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 419080Srrh 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 429080Srrh 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 439080Srrh 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', 449080Srrh ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 459080Srrh 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 469080Srrh 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 479080Srrh 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' 489080Srrh }; 499080Srrh int caps = 0; 509080Srrh int lineno = 0; 519075Srrh int fflag; 529075Srrh int nflag = 1; /*use default file*/ 539075Srrh char *filename; 549080Srrh int mflg = 0; /*don't catch output*/ 559075Srrh int nfile; 569075Srrh int nsucc; 579075Srrh long nsent = 0; 589075Srrh long nhits = 0; 599075Srrh char *nlp; 609075Srrh char *begp, *endp; 619080Srrh int beg, last; 629080Srrh char *myst; 639080Srrh int myct = 0; 649075Srrh int oct = 0; 659075Srrh FILE *wordf; 669080Srrh FILE *mine; 679075Srrh char *argptr; 689080Srrh long tl = 0; 699080Srrh long th = 0; 709075Srrh 719075Srrh main(argc, argv) 729080Srrh char *argv[]; 739075Srrh { 749080Srrh int sv; 759075Srrh while (--argc > 0 && (++argv)[0][0]=='-') 769075Srrh switch (argv[0][1]) { 779075Srrh 789075Srrh case 'f': 799075Srrh fflag++; 809080Srrh filename = (++argv)[0]; 819075Srrh argc--; 829075Srrh continue; 839075Srrh 849075Srrh case 'n': 859075Srrh nflag = 0; 869075Srrh continue; 879075Srrh case 'd': 889080Srrh mflg=0; 899075Srrh continue; 909080Srrh case 'c': 919080Srrh caps++; 929080Srrh continue; 939080Srrh case 'l': 949080Srrh lineno++; 959080Srrh continue; 969075Srrh default: 979075Srrh fprintf(stderr, "diction: unknown flag\n"); 989075Srrh continue; 999075Srrh } 1009075Srrh out: 1019075Srrh if(nflag){ 10237872Sbostic wordf = fopen(_PATH_DICT,"r"); 1039075Srrh if(wordf == NULL){ 1049075Srrh fprintf(stderr,"diction: can't open default dictionary\n"); 1059075Srrh exit(2); 1069075Srrh } 1079075Srrh } 1089075Srrh else { 1099080Srrh wordf = fopen(filename,"r"); 1109075Srrh if(wordf == NULL){ 1119075Srrh fprintf(stderr,"diction: can't open %s\n",filename); 1129075Srrh exit(2); 1139075Srrh } 1149075Srrh } 1159075Srrh 1169080Srrh #ifdef CATCH 1179080Srrh if(fopen(CATCH,"r") != NULL) 1189080Srrh if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; 1199080Srrh #endif 1209080Srrh #ifdef MACS 1219080Srrh if(caps){ 1229080Srrh printf(".so "); 1239080Srrh printf(MACS); 1249080Srrh printf("\n"); 1259080Srrh } 1269080Srrh #endif 1279075Srrh cgotofn(); 1289075Srrh cfail(); 1299075Srrh nfile = argc; 1309075Srrh if (argc<=0) { 1319075Srrh execute((char *)NULL); 1329075Srrh } 1339075Srrh else while (--argc >= 0) { 1349075Srrh execute(*argv); 1359080Srrh if(lineno){ 1369080Srrh printf("file %s: number of lines %ld number of phrases found %ld\n", 1379080Srrh *argv, lcount-1, nhits); 1389080Srrh tl += lcount-1; 1399080Srrh th += nhits; 1409080Srrh sv = lcount-1; 1419080Srrh lcount = nhits = 0; 1429080Srrh } 1439075Srrh argv++; 1449075Srrh } 1459080Srrh if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); 1469080Srrh if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); 1479080Srrh else if(tl != sv) 1489080Srrh if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); 1499075Srrh exit(nsucc == 0); 1509075Srrh } 1519075Srrh 1529075Srrh execute(file) 1539075Srrh char *file; 1549075Srrh { 1559075Srrh register char *p; 1569075Srrh register struct words *c; 1579075Srrh register ccount; 1589080Srrh int count1; 1599080Srrh char *beg1; 1609075Srrh struct words *savc; 1619075Srrh char *savp; 1629075Srrh int savct; 1639075Srrh int scr; 1649075Srrh char buf[1024]; 1659075Srrh int f; 1669075Srrh int hit; 1679080Srrh last = 0; 1689075Srrh if (file) { 1699075Srrh if ((f = open(file, 0)) < 0) { 1709075Srrh fprintf(stderr, "diction: can't open %s\n", file); 1719075Srrh exit(2); 1729075Srrh } 1739075Srrh } 1749075Srrh else f = 0; 1759080Srrh lcount = olcount = 1; 1769080Srrh linemsg = 1; 1779075Srrh ccount = 0; 1789080Srrh count1 = -1; 1799075Srrh p = buf; 1809075Srrh nlp = p; 1819075Srrh c = w; 1829075Srrh oct = hit = 0; 1839080Srrh savc = (struct words *) 0; 1849080Srrh savp = (char *) 0; 1859075Srrh for (;;) { 1869080Srrh if(--ccount <= 0) { 1879075Srrh if (p == &buf[1024]) p = buf; 1889075Srrh if (p > &buf[512]) { 1899075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 1909075Srrh } 1919075Srrh else if ((ccount = read(f, p, 512)) <= 0) break; 1929080Srrh if(caps && (count1 > 0)) 1939080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout); 1949080Srrh count1 = ccount; 1959080Srrh beg1 = p; 1969075Srrh } 1979075Srrh if(p == &buf[1024])p=buf; 1989075Srrh nstate: 1999080Srrh if (c->inp == table[*p]) { 2009075Srrh c = c->nst; 2019075Srrh } 2029075Srrh else if (c->link != 0) { 2039075Srrh c = c->link; 2049075Srrh goto nstate; 2059075Srrh } 2069075Srrh else { 2079075Srrh if(savp != 0){ 2089075Srrh c=savc; 2099075Srrh p=savp; 2109075Srrh if(ccount > savct)ccount += savct; 2119075Srrh else ccount = savct; 2129080Srrh savc = (struct words *) 0; 2139080Srrh savp = (char *) 0; 2149075Srrh goto hadone; 2159075Srrh } 2169075Srrh c = c->fail; 2179075Srrh if (c==0) { 2189075Srrh c = w; 2199075Srrh istate: 2209080Srrh if (c->inp == table[*p]) { 2219075Srrh c = c->nst; 2229075Srrh } 2239075Srrh else if (c->link != 0) { 2249075Srrh c = c->link; 2259075Srrh goto istate; 2269075Srrh } 2279075Srrh } 2289075Srrh else goto nstate; 2299075Srrh } 2309075Srrh if(c->out){ 2319080Srrh if((c->inp == table[*(p+1)]) && (c->nst != 0)){ 2329075Srrh savp=p; 2339075Srrh savc=c; 2349075Srrh savct=ccount; 2359075Srrh goto cont; 2369075Srrh } 2379075Srrh else if(c->link != 0){ 2389075Srrh savc=c; 2399075Srrh while((savc=savc->link)!= 0){ 2409080Srrh if(savc->inp == table[*(p+1)]){ 2419075Srrh savp=p; 2429075Srrh savc=c; 2439075Srrh savct=ccount; 2449075Srrh goto cont; 2459075Srrh } 2469075Srrh } 2479075Srrh } 2489075Srrh hadone: 2499080Srrh savc = (struct words *) 0; 2509080Srrh savp = (char *) 0; 2519075Srrh if(c->out == (char)(0377)){ 2529075Srrh c=w; 2539075Srrh goto nstate; 2549075Srrh } 2559075Srrh begp = p - (c->out); 2569075Srrh if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 2579075Srrh endp=p; 2589080Srrh if(mflg){ 2599080Srrh if(begp-20 < &buf[0]){ 2609080Srrh myst = &buf[1024]-20; 2619080Srrh if(nlp < &buf[512])myst=nlp; 2629080Srrh } 2639080Srrh else myst = begp-20; 2649080Srrh if(myst < nlp)myst = nlp; 2659080Srrh beg = 0; 2669080Srrh } 2679075Srrh hit = 1; 2689075Srrh nhits++; 2699080Srrh if(*p == '\n')lcount++; 2709080Srrh if (table[*p++] == '.') { 2719080Srrh linemsg = 1; 2729075Srrh if (--ccount <= 0) { 2739075Srrh if (p == &buf[1024]) p = buf; 2749075Srrh if (p > &buf[512]) { 2759075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 2769075Srrh } 2779075Srrh else if ((ccount = read(f, p, 512)) <= 0) break; 2789080Srrh if(caps && (count1 > 0)) 2799080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout); 2809080Srrh count1=ccount; 2819080Srrh beg1=p; 2829075Srrh } 2839075Srrh } 2849075Srrh succeed: nsucc = 1; 2859075Srrh { 2869075Srrh if (p <= nlp) { 2879080Srrh outc(&buf[1024],file); 2889075Srrh nlp = buf; 2899075Srrh } 2909080Srrh outc(p,file); 2919075Srrh } 2929080Srrh if(mflg)last=1; 2939075Srrh nomatch: 2949075Srrh nlp = p; 2959075Srrh c = w; 2969075Srrh begp = endp = 0; 2979075Srrh continue; 2989075Srrh } 2999075Srrh cont: 3009080Srrh if(*p == '\n')lcount++; 3019080Srrh if (table[*p++] == '.'){ 3029075Srrh if(hit){ 3039075Srrh if(p <= nlp){ 3049080Srrh outc(&buf[1024],file); 3059075Srrh nlp = buf; 3069075Srrh } 3079080Srrh outc(p,file); 3089080Srrh if(!caps)printf("\n\n"); 3099080Srrh if(mflg && last){putc('\n',mine);myct = 0;} 3109075Srrh } 3119080Srrh linemsg = 1; 3129080Srrh if(*p == '\n')olcount = lcount+1; 3139080Srrh else 3149080Srrh olcount=lcount; 3159080Srrh last = 0; 3169075Srrh hit = 0; 3179075Srrh oct = 0; 3189075Srrh nlp = p; 3199075Srrh c = w; 3209075Srrh begp = endp = 0; 3219080Srrh nsent++; 3229075Srrh } 3239075Srrh } 3249080Srrh if(caps && (count1 > 0)) 3259080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout); 3269075Srrh close(f); 3279075Srrh } 3289075Srrh 3299075Srrh getargc() 3309075Srrh { 3319075Srrh register c; 3329075Srrh if (wordf){ 3339075Srrh if((c=getc(wordf))==EOF){ 3349075Srrh fclose(wordf); 3359075Srrh if(nflag && fflag){ 3369075Srrh nflag=0; 3379080Srrh wordf=fopen(filename,"r"); 3389075Srrh if(wordf == NULL){ 339*46821Sbostic fprintf(stderr, 340*46821Sbostic "diction can't open %s\n",filename); 3419075Srrh exit(2); 3429075Srrh } 3439075Srrh return(getc(wordf)); 3449075Srrh } 3459075Srrh else return(EOF); 3469075Srrh } 3479075Srrh else return(c); 3489075Srrh } 3499075Srrh if ((c = *argptr++) == '\0') 3509075Srrh return(EOF); 3519075Srrh return(c); 3529075Srrh } 3539075Srrh 3549075Srrh cgotofn() { 3559075Srrh register c; 3569075Srrh register struct words *s; 3579075Srrh register ct; 3589075Srrh int neg; 3599075Srrh 3609075Srrh s = smax = w; 3619075Srrh neg = ct = 0; 3629075Srrh nword: for(;;) { 3639075Srrh c = getargc(); 3649075Srrh if(c == '~'){ 3659075Srrh neg++; 3669075Srrh c = getargc(); 3679075Srrh } 3689075Srrh if (c==EOF) 3699075Srrh return; 3709075Srrh if (c == '\n') { 3719075Srrh if(neg)s->out = 0377; 3729075Srrh else s->out = ct-1; 3739075Srrh neg = ct = 0; 3749075Srrh s = w; 3759075Srrh } else { 3769075Srrh loop: if (s->inp == c) { 3779075Srrh s = s->nst; 3789075Srrh ct++; 3799075Srrh continue; 3809075Srrh } 3819075Srrh if (s->inp == 0) goto enter; 3829075Srrh if (s->link == 0) { 3839075Srrh if (smax >= &w[MAXSIZ - 1]) overflo(); 3849075Srrh s->link = ++smax; 3859075Srrh s = smax; 3869075Srrh goto enter; 3879075Srrh } 3889075Srrh s = s->link; 3899075Srrh goto loop; 3909075Srrh } 3919075Srrh } 3929075Srrh 3939075Srrh enter: 3949075Srrh do { 3959075Srrh s->inp = c; 3969075Srrh ct++; 3979075Srrh if (smax >= &w[MAXSIZ - 1]) overflo(); 3989075Srrh s->nst = ++smax; 3999075Srrh s = smax; 4009075Srrh } while ((c = getargc()) != '\n' && c!=EOF); 4019075Srrh if(neg)smax->out = 0377; 4029075Srrh else smax->out = ct-1; 4039075Srrh neg = ct = 0; 4049075Srrh s = w; 4059075Srrh if (c != EOF) 4069075Srrh goto nword; 4079075Srrh } 4089075Srrh 4099075Srrh overflo() { 4109075Srrh fprintf(stderr, "wordlist too large\n"); 4119075Srrh exit(2); 4129075Srrh } 4139075Srrh cfail() { 4149075Srrh struct words *queue[QSIZE]; 4159075Srrh struct words **front, **rear; 4169075Srrh struct words *state; 4179075Srrh int bstart; 4189075Srrh register char c; 4199075Srrh register struct words *s; 4209075Srrh s = w; 4219075Srrh front = rear = queue; 4229075Srrh init: if ((s->inp) != 0) { 4239075Srrh *rear++ = s->nst; 4249075Srrh if (rear >= &queue[QSIZE - 1]) overflo(); 4259075Srrh } 4269075Srrh if ((s = s->link) != 0) { 4279075Srrh goto init; 4289075Srrh } 4299075Srrh 4309075Srrh while (rear!=front) { 4319075Srrh s = *front; 4329075Srrh if (front == &queue[QSIZE-1]) 4339075Srrh front = queue; 4349075Srrh else front++; 4359075Srrh cloop: if ((c = s->inp) != 0) { 4369075Srrh bstart=0; 4379075Srrh *rear = (q = s->nst); 4389075Srrh if (front < rear) 4399075Srrh if (rear >= &queue[QSIZE-1]) 4409075Srrh if (front == queue) overflo(); 4419075Srrh else rear = queue; 4429075Srrh else rear++; 4439075Srrh else 4449075Srrh if (++rear == front) overflo(); 4459075Srrh state = s->fail; 4469075Srrh floop: if (state == 0){ state = w;bstart=1;} 4479075Srrh if (state->inp == c) { 4489075Srrh qloop: q->fail = state->nst; 4499075Srrh if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 4509075Srrh if((q=q->link) != 0)goto qloop; 4519075Srrh } 4529075Srrh else if ((state = state->link) != 0) 4539075Srrh goto floop; 4549075Srrh else if(bstart==0){state=0; goto floop;} 4559075Srrh } 4569075Srrh if ((s = s->link) != 0) 4579075Srrh goto cloop; 4589075Srrh } 4599075Srrh /* for(s=w;s<=smax;s++) 4609075Srrh printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 4619075Srrh s->inp,s->out,s->nst,s->link,s->fail); 4629075Srrh */ 4639075Srrh } 4649080Srrh outc(addr,file) 4659075Srrh char *addr; 4669080Srrh char *file; 4679075Srrh { 4689080Srrh int inside; 4699075Srrh 4709080Srrh inside = 0; 4719080Srrh if(!caps && lineno && linemsg){ 4729080Srrh printf("beginning line %ld",olcount); 4739080Srrh if(file != (char *)NULL)printf(" %s\n",file); 4749080Srrh else printf("\n"); 4759080Srrh linemsg = 0; 4769080Srrh } 4779075Srrh while(nlp < addr){ 4789080Srrh if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ 4799075Srrh oct=0; 4809075Srrh putchar('\n'); 4819075Srrh } 4829075Srrh if(nlp == begp){ 4839080Srrh if(caps)inside++; 4849080Srrh else { 4859080Srrh if( oct >45){putchar('\n'); 4869080Srrh oct=0; 4879080Srrh } 4889080Srrh if( oct==0 || table[*nlp] != ' '){ 4899080Srrh printf("*["); 4909080Srrh oct+=2; 4919080Srrh } 4929080Srrh else {printf(" *[");; 4939080Srrh oct+=3; 4949080Srrh } 4959080Srrh } 4969080Srrh if(mflg)putc('[',mine); 4979075Srrh } 4989080Srrh if(inside){ 4999080Srrh if(islower(*nlp))*nlp = toupper(*nlp); 5009080Srrh } 5019080Srrh else { 5029080Srrh if(!caps && *nlp == '\n')*nlp = ' '; 5039080Srrh if(*nlp == ' ' && oct==0); 5049080Srrh else if(!caps) {putchar(*nlp); oct++;} 5059080Srrh } 5069075Srrh if(nlp == endp){ 5079080Srrh if(caps) 5089080Srrh inside= 0; 5099080Srrh else { 5109080Srrh if(*(nlp) != ' '){printf("]*"); 5119080Srrh oct+=2; 5129080Srrh } 5139080Srrh else {printf("]* "); 5149080Srrh oct+=3; 5159080Srrh } 5169080Srrh if(oct >60){putchar('\n'); 5179080Srrh oct=0; 5189080Srrh } 5199080Srrh } 5209080Srrh if(mflg)putc(']',mine); 5219080Srrh beg = 0; 5229075Srrh } 5239080Srrh if(mflg){ 5249080Srrh if(nlp == myst)beg = 1; 5259080Srrh if(beg || last){ 5269080Srrh putc(*nlp,mine); 5279080Srrh if(myct++ >= 72 || last == 20){ 5289080Srrh putc('\n',mine); 5299080Srrh if(last == 20)last=myct=0; 5309080Srrh else myct=0; 5319080Srrh } 5329080Srrh if(last)last++; 5339080Srrh } 5349080Srrh } 5359075Srrh nlp++; 5369075Srrh } 5379075Srrh } 538