1*48242Sbostic /*-
2*48242Sbostic * %sccs.include.proprietary.c%
3*48242Sbostic */
4*48242Sbostic
59075Srrh #ifndef lint
6*48242Sbostic static char sccsid[] = "@(#)dprog.c 4.5 (Berkeley) 04/17/91";
7*48242Sbostic #endif /* not lint */
89075Srrh
99075Srrh /*
109080Srrh * diction -- print all sentences containing one of default phrases
119075Srrh *
129075Srrh * status returns:
139075Srrh * 0 - ok, and some matches
149075Srrh * 1 - ok, but no matches
159075Srrh * 2 - some error
169075Srrh */
179075Srrh
189075Srrh #include <stdio.h>
199075Srrh #include <ctype.h>
2037872Sbostic #include "pathnames.h"
219075Srrh
229075Srrh #define MAXSIZ 6500
239075Srrh #define QSIZE 650
249080Srrh int linemsg;
259080Srrh long olcount;
269080Srrh long lcount;
279075Srrh struct words {
289075Srrh char inp;
299075Srrh char out;
309075Srrh struct words *nst;
319075Srrh struct words *link;
329075Srrh struct words *fail;
339075Srrh } w[MAXSIZ], *smax, *q;
349075Srrh
359080Srrh char table[128] = {
369080Srrh 0, 0, 0, 0, 0, 0, 0, 0,
379080Srrh 0, 0, ' ', 0, 0, 0, 0, 0,
389080Srrh 0, 0, 0, 0, 0, 0, 0, 0,
399080Srrh 0, 0, 0, 0, 0, 0, 0, 0,
409080Srrh ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
419080Srrh ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
429080Srrh '0', '1', '2', '3', '4', '5', '6', '7',
439080Srrh '8', '9', ' ', ' ', ' ', ' ', ' ', '.',
449080Srrh ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
459080Srrh 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
469080Srrh 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
479080Srrh 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
489080Srrh ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
499080Srrh 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
509080Srrh 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
519080Srrh 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
529080Srrh };
539080Srrh int caps = 0;
549080Srrh int lineno = 0;
559075Srrh int fflag;
569075Srrh int nflag = 1; /*use default file*/
579075Srrh char *filename;
589080Srrh int mflg = 0; /*don't catch output*/
599075Srrh int nfile;
609075Srrh int nsucc;
619075Srrh long nsent = 0;
629075Srrh long nhits = 0;
639075Srrh char *nlp;
649075Srrh char *begp, *endp;
659080Srrh int beg, last;
669080Srrh char *myst;
679080Srrh int myct = 0;
689075Srrh int oct = 0;
699075Srrh FILE *wordf;
709080Srrh FILE *mine;
719075Srrh char *argptr;
729080Srrh long tl = 0;
739080Srrh long th = 0;
749075Srrh
main(argc,argv)759075Srrh main(argc, argv)
769080Srrh char *argv[];
779075Srrh {
789080Srrh int sv;
799075Srrh while (--argc > 0 && (++argv)[0][0]=='-')
809075Srrh switch (argv[0][1]) {
819075Srrh
829075Srrh case 'f':
839075Srrh fflag++;
849080Srrh filename = (++argv)[0];
859075Srrh argc--;
869075Srrh continue;
879075Srrh
889075Srrh case 'n':
899075Srrh nflag = 0;
909075Srrh continue;
919075Srrh case 'd':
929080Srrh mflg=0;
939075Srrh continue;
949080Srrh case 'c':
959080Srrh caps++;
969080Srrh continue;
979080Srrh case 'l':
989080Srrh lineno++;
999080Srrh continue;
1009075Srrh default:
1019075Srrh fprintf(stderr, "diction: unknown flag\n");
1029075Srrh continue;
1039075Srrh }
1049075Srrh out:
1059075Srrh if(nflag){
10637872Sbostic wordf = fopen(_PATH_DICT,"r");
1079075Srrh if(wordf == NULL){
1089075Srrh fprintf(stderr,"diction: can't open default dictionary\n");
1099075Srrh exit(2);
1109075Srrh }
1119075Srrh }
1129075Srrh else {
1139080Srrh wordf = fopen(filename,"r");
1149075Srrh if(wordf == NULL){
1159075Srrh fprintf(stderr,"diction: can't open %s\n",filename);
1169075Srrh exit(2);
1179075Srrh }
1189075Srrh }
1199075Srrh
1209080Srrh #ifdef CATCH
1219080Srrh if(fopen(CATCH,"r") != NULL)
1229080Srrh if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
1239080Srrh #endif
1249080Srrh #ifdef MACS
1259080Srrh if(caps){
1269080Srrh printf(".so ");
1279080Srrh printf(MACS);
1289080Srrh printf("\n");
1299080Srrh }
1309080Srrh #endif
1319075Srrh cgotofn();
1329075Srrh cfail();
1339075Srrh nfile = argc;
1349075Srrh if (argc<=0) {
1359075Srrh execute((char *)NULL);
1369075Srrh }
1379075Srrh else while (--argc >= 0) {
1389075Srrh execute(*argv);
1399080Srrh if(lineno){
1409080Srrh printf("file %s: number of lines %ld number of phrases found %ld\n",
1419080Srrh *argv, lcount-1, nhits);
1429080Srrh tl += lcount-1;
1439080Srrh th += nhits;
1449080Srrh sv = lcount-1;
1459080Srrh lcount = nhits = 0;
1469080Srrh }
1479075Srrh argv++;
1489075Srrh }
1499080Srrh if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
1509080Srrh if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
1519080Srrh else if(tl != sv)
1529080Srrh if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
1539075Srrh exit(nsucc == 0);
1549075Srrh }
1559075Srrh
execute(file)1569075Srrh execute(file)
1579075Srrh char *file;
1589075Srrh {
1599075Srrh register char *p;
1609075Srrh register struct words *c;
1619075Srrh register ccount;
1629080Srrh int count1;
1639080Srrh char *beg1;
1649075Srrh struct words *savc;
1659075Srrh char *savp;
1669075Srrh int savct;
1679075Srrh int scr;
1689075Srrh char buf[1024];
1699075Srrh int f;
1709075Srrh int hit;
1719080Srrh last = 0;
1729075Srrh if (file) {
1739075Srrh if ((f = open(file, 0)) < 0) {
1749075Srrh fprintf(stderr, "diction: can't open %s\n", file);
1759075Srrh exit(2);
1769075Srrh }
1779075Srrh }
1789075Srrh else f = 0;
1799080Srrh lcount = olcount = 1;
1809080Srrh linemsg = 1;
1819075Srrh ccount = 0;
1829080Srrh count1 = -1;
1839075Srrh p = buf;
1849075Srrh nlp = p;
1859075Srrh c = w;
1869075Srrh oct = hit = 0;
1879080Srrh savc = (struct words *) 0;
1889080Srrh savp = (char *) 0;
1899075Srrh for (;;) {
1909080Srrh if(--ccount <= 0) {
1919075Srrh if (p == &buf[1024]) p = buf;
1929075Srrh if (p > &buf[512]) {
1939075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
1949075Srrh }
1959075Srrh else if ((ccount = read(f, p, 512)) <= 0) break;
1969080Srrh if(caps && (count1 > 0))
1979080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout);
1989080Srrh count1 = ccount;
1999080Srrh beg1 = p;
2009075Srrh }
2019075Srrh if(p == &buf[1024])p=buf;
2029075Srrh nstate:
2039080Srrh if (c->inp == table[*p]) {
2049075Srrh c = c->nst;
2059075Srrh }
2069075Srrh else if (c->link != 0) {
2079075Srrh c = c->link;
2089075Srrh goto nstate;
2099075Srrh }
2109075Srrh else {
2119075Srrh if(savp != 0){
2129075Srrh c=savc;
2139075Srrh p=savp;
2149075Srrh if(ccount > savct)ccount += savct;
2159075Srrh else ccount = savct;
2169080Srrh savc = (struct words *) 0;
2179080Srrh savp = (char *) 0;
2189075Srrh goto hadone;
2199075Srrh }
2209075Srrh c = c->fail;
2219075Srrh if (c==0) {
2229075Srrh c = w;
2239075Srrh istate:
2249080Srrh if (c->inp == table[*p]) {
2259075Srrh c = c->nst;
2269075Srrh }
2279075Srrh else if (c->link != 0) {
2289075Srrh c = c->link;
2299075Srrh goto istate;
2309075Srrh }
2319075Srrh }
2329075Srrh else goto nstate;
2339075Srrh }
2349075Srrh if(c->out){
2359080Srrh if((c->inp == table[*(p+1)]) && (c->nst != 0)){
2369075Srrh savp=p;
2379075Srrh savc=c;
2389075Srrh savct=ccount;
2399075Srrh goto cont;
2409075Srrh }
2419075Srrh else if(c->link != 0){
2429075Srrh savc=c;
2439075Srrh while((savc=savc->link)!= 0){
2449080Srrh if(savc->inp == table[*(p+1)]){
2459075Srrh savp=p;
2469075Srrh savc=c;
2479075Srrh savct=ccount;
2489075Srrh goto cont;
2499075Srrh }
2509075Srrh }
2519075Srrh }
2529075Srrh hadone:
2539080Srrh savc = (struct words *) 0;
2549080Srrh savp = (char *) 0;
2559075Srrh if(c->out == (char)(0377)){
2569075Srrh c=w;
2579075Srrh goto nstate;
2589075Srrh }
2599075Srrh begp = p - (c->out);
2609075Srrh if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
2619075Srrh endp=p;
2629080Srrh if(mflg){
2639080Srrh if(begp-20 < &buf[0]){
2649080Srrh myst = &buf[1024]-20;
2659080Srrh if(nlp < &buf[512])myst=nlp;
2669080Srrh }
2679080Srrh else myst = begp-20;
2689080Srrh if(myst < nlp)myst = nlp;
2699080Srrh beg = 0;
2709080Srrh }
2719075Srrh hit = 1;
2729075Srrh nhits++;
2739080Srrh if(*p == '\n')lcount++;
2749080Srrh if (table[*p++] == '.') {
2759080Srrh linemsg = 1;
2769075Srrh if (--ccount <= 0) {
2779075Srrh if (p == &buf[1024]) p = buf;
2789075Srrh if (p > &buf[512]) {
2799075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
2809075Srrh }
2819075Srrh else if ((ccount = read(f, p, 512)) <= 0) break;
2829080Srrh if(caps && (count1 > 0))
2839080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout);
2849080Srrh count1=ccount;
2859080Srrh beg1=p;
2869075Srrh }
2879075Srrh }
2889075Srrh succeed: nsucc = 1;
2899075Srrh {
2909075Srrh if (p <= nlp) {
2919080Srrh outc(&buf[1024],file);
2929075Srrh nlp = buf;
2939075Srrh }
2949080Srrh outc(p,file);
2959075Srrh }
2969080Srrh if(mflg)last=1;
2979075Srrh nomatch:
2989075Srrh nlp = p;
2999075Srrh c = w;
3009075Srrh begp = endp = 0;
3019075Srrh continue;
3029075Srrh }
3039075Srrh cont:
3049080Srrh if(*p == '\n')lcount++;
3059080Srrh if (table[*p++] == '.'){
3069075Srrh if(hit){
3079075Srrh if(p <= nlp){
3089080Srrh outc(&buf[1024],file);
3099075Srrh nlp = buf;
3109075Srrh }
3119080Srrh outc(p,file);
3129080Srrh if(!caps)printf("\n\n");
3139080Srrh if(mflg && last){putc('\n',mine);myct = 0;}
3149075Srrh }
3159080Srrh linemsg = 1;
3169080Srrh if(*p == '\n')olcount = lcount+1;
3179080Srrh else
3189080Srrh olcount=lcount;
3199080Srrh last = 0;
3209075Srrh hit = 0;
3219075Srrh oct = 0;
3229075Srrh nlp = p;
3239075Srrh c = w;
3249075Srrh begp = endp = 0;
3259080Srrh nsent++;
3269075Srrh }
3279075Srrh }
3289080Srrh if(caps && (count1 > 0))
3299080Srrh fwrite(beg1,sizeof(*beg1),count1,stdout);
3309075Srrh close(f);
3319075Srrh }
3329075Srrh
getargc()3339075Srrh getargc()
3349075Srrh {
3359075Srrh register c;
3369075Srrh if (wordf){
3379075Srrh if((c=getc(wordf))==EOF){
3389075Srrh fclose(wordf);
3399075Srrh if(nflag && fflag){
3409075Srrh nflag=0;
3419080Srrh wordf=fopen(filename,"r");
3429075Srrh if(wordf == NULL){
34346821Sbostic fprintf(stderr,
34446821Sbostic "diction can't open %s\n",filename);
3459075Srrh exit(2);
3469075Srrh }
3479075Srrh return(getc(wordf));
3489075Srrh }
3499075Srrh else return(EOF);
3509075Srrh }
3519075Srrh else return(c);
3529075Srrh }
3539075Srrh if ((c = *argptr++) == '\0')
3549075Srrh return(EOF);
3559075Srrh return(c);
3569075Srrh }
3579075Srrh
cgotofn()3589075Srrh cgotofn() {
3599075Srrh register c;
3609075Srrh register struct words *s;
3619075Srrh register ct;
3629075Srrh int neg;
3639075Srrh
3649075Srrh s = smax = w;
3659075Srrh neg = ct = 0;
3669075Srrh nword: for(;;) {
3679075Srrh c = getargc();
3689075Srrh if(c == '~'){
3699075Srrh neg++;
3709075Srrh c = getargc();
3719075Srrh }
3729075Srrh if (c==EOF)
3739075Srrh return;
3749075Srrh if (c == '\n') {
3759075Srrh if(neg)s->out = 0377;
3769075Srrh else s->out = ct-1;
3779075Srrh neg = ct = 0;
3789075Srrh s = w;
3799075Srrh } else {
3809075Srrh loop: if (s->inp == c) {
3819075Srrh s = s->nst;
3829075Srrh ct++;
3839075Srrh continue;
3849075Srrh }
3859075Srrh if (s->inp == 0) goto enter;
3869075Srrh if (s->link == 0) {
3879075Srrh if (smax >= &w[MAXSIZ - 1]) overflo();
3889075Srrh s->link = ++smax;
3899075Srrh s = smax;
3909075Srrh goto enter;
3919075Srrh }
3929075Srrh s = s->link;
3939075Srrh goto loop;
3949075Srrh }
3959075Srrh }
3969075Srrh
3979075Srrh enter:
3989075Srrh do {
3999075Srrh s->inp = c;
4009075Srrh ct++;
4019075Srrh if (smax >= &w[MAXSIZ - 1]) overflo();
4029075Srrh s->nst = ++smax;
4039075Srrh s = smax;
4049075Srrh } while ((c = getargc()) != '\n' && c!=EOF);
4059075Srrh if(neg)smax->out = 0377;
4069075Srrh else smax->out = ct-1;
4079075Srrh neg = ct = 0;
4089075Srrh s = w;
4099075Srrh if (c != EOF)
4109075Srrh goto nword;
4119075Srrh }
4129075Srrh
overflo()4139075Srrh overflo() {
4149075Srrh fprintf(stderr, "wordlist too large\n");
4159075Srrh exit(2);
4169075Srrh }
cfail()4179075Srrh cfail() {
4189075Srrh struct words *queue[QSIZE];
4199075Srrh struct words **front, **rear;
4209075Srrh struct words *state;
4219075Srrh int bstart;
4229075Srrh register char c;
4239075Srrh register struct words *s;
4249075Srrh s = w;
4259075Srrh front = rear = queue;
4269075Srrh init: if ((s->inp) != 0) {
4279075Srrh *rear++ = s->nst;
4289075Srrh if (rear >= &queue[QSIZE - 1]) overflo();
4299075Srrh }
4309075Srrh if ((s = s->link) != 0) {
4319075Srrh goto init;
4329075Srrh }
4339075Srrh
4349075Srrh while (rear!=front) {
4359075Srrh s = *front;
4369075Srrh if (front == &queue[QSIZE-1])
4379075Srrh front = queue;
4389075Srrh else front++;
4399075Srrh cloop: if ((c = s->inp) != 0) {
4409075Srrh bstart=0;
4419075Srrh *rear = (q = s->nst);
4429075Srrh if (front < rear)
4439075Srrh if (rear >= &queue[QSIZE-1])
4449075Srrh if (front == queue) overflo();
4459075Srrh else rear = queue;
4469075Srrh else rear++;
4479075Srrh else
4489075Srrh if (++rear == front) overflo();
4499075Srrh state = s->fail;
4509075Srrh floop: if (state == 0){ state = w;bstart=1;}
4519075Srrh if (state->inp == c) {
4529075Srrh qloop: q->fail = state->nst;
4539075Srrh if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
4549075Srrh if((q=q->link) != 0)goto qloop;
4559075Srrh }
4569075Srrh else if ((state = state->link) != 0)
4579075Srrh goto floop;
4589075Srrh else if(bstart==0){state=0; goto floop;}
4599075Srrh }
4609075Srrh if ((s = s->link) != 0)
4619075Srrh goto cloop;
4629075Srrh }
4639075Srrh /* for(s=w;s<=smax;s++)
4649075Srrh printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
4659075Srrh s->inp,s->out,s->nst,s->link,s->fail);
4669075Srrh */
4679075Srrh }
outc(addr,file)4689080Srrh outc(addr,file)
4699075Srrh char *addr;
4709080Srrh char *file;
4719075Srrh {
4729080Srrh int inside;
4739075Srrh
4749080Srrh inside = 0;
4759080Srrh if(!caps && lineno && linemsg){
4769080Srrh printf("beginning line %ld",olcount);
4779080Srrh if(file != (char *)NULL)printf(" %s\n",file);
4789080Srrh else printf("\n");
4799080Srrh linemsg = 0;
4809080Srrh }
4819075Srrh while(nlp < addr){
4829080Srrh if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
4839075Srrh oct=0;
4849075Srrh putchar('\n');
4859075Srrh }
4869075Srrh if(nlp == begp){
4879080Srrh if(caps)inside++;
4889080Srrh else {
4899080Srrh if( oct >45){putchar('\n');
4909080Srrh oct=0;
4919080Srrh }
4929080Srrh if( oct==0 || table[*nlp] != ' '){
4939080Srrh printf("*[");
4949080Srrh oct+=2;
4959080Srrh }
4969080Srrh else {printf(" *[");;
4979080Srrh oct+=3;
4989080Srrh }
4999080Srrh }
5009080Srrh if(mflg)putc('[',mine);
5019075Srrh }
5029080Srrh if(inside){
5039080Srrh if(islower(*nlp))*nlp = toupper(*nlp);
5049080Srrh }
5059080Srrh else {
5069080Srrh if(!caps && *nlp == '\n')*nlp = ' ';
5079080Srrh if(*nlp == ' ' && oct==0);
5089080Srrh else if(!caps) {putchar(*nlp); oct++;}
5099080Srrh }
5109075Srrh if(nlp == endp){
5119080Srrh if(caps)
5129080Srrh inside= 0;
5139080Srrh else {
5149080Srrh if(*(nlp) != ' '){printf("]*");
5159080Srrh oct+=2;
5169080Srrh }
5179080Srrh else {printf("]* ");
5189080Srrh oct+=3;
5199080Srrh }
5209080Srrh if(oct >60){putchar('\n');
5219080Srrh oct=0;
5229080Srrh }
5239080Srrh }
5249080Srrh if(mflg)putc(']',mine);
5259080Srrh beg = 0;
5269075Srrh }
5279080Srrh if(mflg){
5289080Srrh if(nlp == myst)beg = 1;
5299080Srrh if(beg || last){
5309080Srrh putc(*nlp,mine);
5319080Srrh if(myct++ >= 72 || last == 20){
5329080Srrh putc('\n',mine);
5339080Srrh if(last == 20)last=myct=0;
5349080Srrh else myct=0;
5359080Srrh }
5369080Srrh if(last)last++;
5379080Srrh }
5389080Srrh }
5399075Srrh nlp++;
5409075Srrh }
5419075Srrh }
542