19075Srrh #ifndef lint
2*37872Sbostic static char sccsid[] = "@(#)dprog.c	4.3	(Berkeley)	89/05/11";
39075Srrh #endif not lint
49075Srrh 
59075Srrh /*
69080Srrh  * diction -- print all sentences containing one of default phrases
79075Srrh  *
89075Srrh  *	status returns:
99075Srrh  *		0 - ok, and some matches
109075Srrh  *		1 - ok, but no matches
119075Srrh  *		2 - some error
129075Srrh  */
139075Srrh 
149075Srrh #include <stdio.h>
159075Srrh #include <ctype.h>
16*37872Sbostic #include "pathnames.h"
179075Srrh 
189075Srrh #define	MAXSIZ 6500
199075Srrh #define QSIZE 650
209080Srrh int linemsg;
219080Srrh long olcount;
229080Srrh long lcount;
239075Srrh struct words {
249075Srrh 	char 	inp;
259075Srrh 	char	out;
269075Srrh 	struct	words *nst;
279075Srrh 	struct	words *link;
289075Srrh 	struct	words *fail;
299075Srrh } w[MAXSIZ], *smax, *q;
309075Srrh 
319080Srrh char table[128] = {
329080Srrh 	0, 0, 0, 0, 0, 0, 0, 0,
339080Srrh 	0, 0, ' ', 0, 0, 0, 0, 0,
349080Srrh 	0, 0, 0, 0, 0, 0, 0, 0,
359080Srrh 	0, 0, 0, 0, 0, 0, 0, 0,
369080Srrh 	' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
379080Srrh 	' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
389080Srrh 	'0', '1', '2', '3', '4', '5', '6', '7',
399080Srrh 	'8', '9', ' ', ' ', ' ', ' ', ' ', '.',
409080Srrh 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
419080Srrh 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
429080Srrh 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
439080Srrh 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
449080Srrh 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
459080Srrh 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
469080Srrh 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
479080Srrh 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
489080Srrh 	};
499080Srrh int	caps = 0;
509080Srrh int	lineno = 0;
519075Srrh int fflag;
529075Srrh int nflag	= 1; /*use default file*/
539075Srrh char *filename;
549080Srrh int	mflg	= 0;	/*don't catch output*/
559075Srrh int	nfile;
569075Srrh int	nsucc;
579075Srrh long nsent = 0;
589075Srrh long nhits = 0;
599075Srrh char *nlp;
609075Srrh char *begp, *endp;
619080Srrh int beg, last;
629080Srrh char *myst;
639080Srrh int myct = 0;
649075Srrh int oct = 0;
659075Srrh FILE	*wordf;
669080Srrh FILE *mine;
679075Srrh char	*argptr;
689080Srrh long tl = 0;
699080Srrh long th = 0;
709075Srrh 
719075Srrh main(argc, argv)
729080Srrh char *argv[];
739075Srrh {
749080Srrh 	int sv;
759075Srrh 	while (--argc > 0 && (++argv)[0][0]=='-')
769075Srrh 		switch (argv[0][1]) {
779075Srrh 
789075Srrh 		case 'f':
799075Srrh 			fflag++;
809080Srrh 			filename = (++argv)[0];
819075Srrh 			argc--;
829075Srrh 			continue;
839075Srrh 
849075Srrh 		case 'n':
859075Srrh 			nflag = 0;
869075Srrh 			continue;
879075Srrh 		case 'd':
889080Srrh 			mflg=0;
899075Srrh 			continue;
909080Srrh 		case 'c':
919080Srrh 			caps++;
929080Srrh 			continue;
939080Srrh 		case 'l':
949080Srrh 			lineno++;
959080Srrh 			continue;
969075Srrh 		default:
979075Srrh 			fprintf(stderr, "diction: unknown flag\n");
989075Srrh 			continue;
999075Srrh 		}
1009075Srrh out:
1019075Srrh 	if(nflag){
102*37872Sbostic 		wordf = fopen(_PATH_DICT,"r");
1039075Srrh 		if(wordf == NULL){
1049075Srrh 			fprintf(stderr,"diction: can't open default dictionary\n");
1059075Srrh 			exit(2);
1069075Srrh 		}
1079075Srrh 	}
1089075Srrh 	else {
1099080Srrh 		wordf = fopen(filename,"r");
1109075Srrh 		if(wordf == NULL){
1119075Srrh 			fprintf(stderr,"diction: can't open %s\n",filename);
1129075Srrh 			exit(2);
1139075Srrh 		}
1149075Srrh 	}
1159075Srrh 
1169080Srrh #ifdef CATCH
1179080Srrh 	if(fopen(CATCH,"r") != NULL)
1189080Srrh 		if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
1199080Srrh #endif
1209080Srrh #ifdef MACS
1219080Srrh 	if(caps){
1229080Srrh 		printf(".so ");
1239080Srrh 		printf(MACS);
1249080Srrh 		printf("\n");
1259080Srrh 	}
1269080Srrh #endif
1279075Srrh 	cgotofn();
1289075Srrh 	cfail();
1299075Srrh 	nfile = argc;
1309075Srrh 	if (argc<=0) {
1319075Srrh 		execute((char *)NULL);
1329075Srrh 	}
1339075Srrh 	else while (--argc >= 0) {
1349075Srrh 		execute(*argv);
1359080Srrh 		if(lineno){
1369080Srrh 			printf("file %s: number of lines %ld number of phrases found %ld\n",
1379080Srrh 				*argv, lcount-1, nhits);
1389080Srrh 			tl += lcount-1;
1399080Srrh 			th += nhits;
1409080Srrh 			sv = lcount-1;
1419080Srrh 			lcount = nhits = 0;
1429080Srrh 		}
1439075Srrh 		argv++;
1449075Srrh 	}
1459080Srrh 	if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
1469080Srrh 	if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
1479080Srrh 	else if(tl != sv)
1489080Srrh 		 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
1499075Srrh 	exit(nsucc == 0);
1509075Srrh }
1519075Srrh 
1529075Srrh execute(file)
1539075Srrh char *file;
1549075Srrh {
1559075Srrh 	register char *p;
1569075Srrh 	register struct words *c;
1579075Srrh 	register ccount;
1589080Srrh 	int count1;
1599080Srrh 	char *beg1;
1609075Srrh 	struct words *savc;
1619075Srrh 	char *savp;
1629075Srrh 	int savct;
1639075Srrh 	int scr;
1649075Srrh 	char buf[1024];
1659075Srrh 	int f;
1669075Srrh 	int hit;
1679080Srrh 	last = 0;
1689075Srrh 	if (file) {
1699075Srrh 		if ((f = open(file, 0)) < 0) {
1709075Srrh 			fprintf(stderr, "diction: can't open %s\n", file);
1719075Srrh 			exit(2);
1729075Srrh 		}
1739075Srrh 	}
1749075Srrh 	else f = 0;
1759080Srrh 	lcount = olcount = 1;
1769080Srrh 	linemsg = 1;
1779075Srrh 	ccount = 0;
1789080Srrh 	count1 = -1;
1799075Srrh 	p = buf;
1809075Srrh 	nlp = p;
1819075Srrh 	c = w;
1829075Srrh 	oct = hit = 0;
1839080Srrh 	savc = (struct words *) 0;
1849080Srrh 	savp = (char *) 0;
1859075Srrh 	for (;;) {
1869080Srrh 		if(--ccount <= 0) {
1879075Srrh 			if (p == &buf[1024]) p = buf;
1889075Srrh 			if (p > &buf[512]) {
1899075Srrh 				if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
1909075Srrh 			}
1919075Srrh 			else if ((ccount = read(f, p, 512)) <= 0) break;
1929080Srrh 			if(caps && (count1 > 0))
1939080Srrh 				fwrite(beg1,sizeof(*beg1),count1,stdout);
1949080Srrh 			count1 = ccount;
1959080Srrh 			beg1 = p;
1969075Srrh 		}
1979075Srrh 		if(p == &buf[1024])p=buf;
1989075Srrh 		nstate:
1999080Srrh 			if (c->inp == table[*p]) {
2009075Srrh 				c = c->nst;
2019075Srrh 			}
2029075Srrh 			else if (c->link != 0) {
2039075Srrh 				c = c->link;
2049075Srrh 				goto nstate;
2059075Srrh 			}
2069075Srrh 			else {
2079075Srrh 				if(savp != 0){
2089075Srrh 					c=savc;
2099075Srrh 					p=savp;
2109075Srrh 					if(ccount > savct)ccount += savct;
2119075Srrh 					else ccount = savct;
2129080Srrh 					savc = (struct words *) 0;
2139080Srrh 					savp = (char *) 0;
2149075Srrh 					goto hadone;
2159075Srrh 				}
2169075Srrh 				c = c->fail;
2179075Srrh 				if (c==0) {
2189075Srrh 					c = w;
2199075Srrh 					istate:
2209080Srrh 					if (c->inp == table[*p]) {
2219075Srrh 						c = c->nst;
2229075Srrh 					}
2239075Srrh 					else if (c->link != 0) {
2249075Srrh 						c = c->link;
2259075Srrh 						goto istate;
2269075Srrh 					}
2279075Srrh 				}
2289075Srrh 				else goto nstate;
2299075Srrh 			}
2309075Srrh 		if(c->out){
2319080Srrh 			if((c->inp == table[*(p+1)]) && (c->nst != 0)){
2329075Srrh 				savp=p;
2339075Srrh 				savc=c;
2349075Srrh 				savct=ccount;
2359075Srrh 				goto cont;
2369075Srrh 			}
2379075Srrh 			else if(c->link != 0){
2389075Srrh 				savc=c;
2399075Srrh 				while((savc=savc->link)!= 0){
2409080Srrh 					if(savc->inp == table[*(p+1)]){
2419075Srrh 						savp=p;
2429075Srrh 						savc=c;
2439075Srrh 						savct=ccount;
2449075Srrh 						goto cont;
2459075Srrh 					}
2469075Srrh 				}
2479075Srrh 			}
2489075Srrh 		hadone:
2499080Srrh 			savc = (struct words *) 0;
2509080Srrh 			savp = (char *) 0;
2519075Srrh 			if(c->out == (char)(0377)){
2529075Srrh 				c=w;
2539075Srrh 				goto nstate;
2549075Srrh 			}
2559075Srrh 			begp = p - (c->out);
2569075Srrh 			if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
2579075Srrh 			endp=p;
2589080Srrh 			if(mflg){
2599080Srrh 				if(begp-20 < &buf[0]){
2609080Srrh 					myst = &buf[1024]-20;
2619080Srrh 					if(nlp < &buf[512])myst=nlp;
2629080Srrh 				}
2639080Srrh 				else myst = begp-20;
2649080Srrh 				if(myst < nlp)myst = nlp;
2659080Srrh 				beg = 0;
2669080Srrh 			}
2679075Srrh 			hit = 1;
2689075Srrh 			nhits++;
2699080Srrh 			if(*p == '\n')lcount++;
2709080Srrh 			if (table[*p++] == '.') {
2719080Srrh 				linemsg = 1;
2729075Srrh 				if (--ccount <= 0) {
2739075Srrh 					if (p == &buf[1024]) p = buf;
2749075Srrh 					if (p > &buf[512]) {
2759075Srrh 						if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
2769075Srrh 					}
2779075Srrh 					else if ((ccount = read(f, p, 512)) <= 0) break;
2789080Srrh 					if(caps && (count1 > 0))
2799080Srrh 						fwrite(beg1,sizeof(*beg1),count1,stdout);
2809080Srrh 					count1=ccount;
2819080Srrh 					beg1=p;
2829075Srrh 				}
2839075Srrh 			}
2849075Srrh 	succeed:	nsucc = 1;
2859075Srrh 			{
2869075Srrh 				if (p <= nlp) {
2879080Srrh 					outc(&buf[1024],file);
2889075Srrh 					nlp = buf;
2899075Srrh 				}
2909080Srrh 				outc(p,file);
2919075Srrh 			}
2929080Srrh 			if(mflg)last=1;
2939075Srrh 	nomatch:
2949075Srrh 			nlp = p;
2959075Srrh 			c = w;
2969075Srrh 			begp = endp = 0;
2979075Srrh 			continue;
2989075Srrh 		}
2999075Srrh 	cont:
3009080Srrh 		if(*p == '\n')lcount++;
3019080Srrh 		if (table[*p++] == '.'){
3029075Srrh 				if(hit){
3039075Srrh 					if(p <= nlp){
3049080Srrh 						outc(&buf[1024],file);
3059075Srrh 						nlp = buf;
3069075Srrh 					}
3079080Srrh 					outc(p,file);
3089080Srrh 					if(!caps)printf("\n\n");
3099080Srrh 					if(mflg && last){putc('\n',mine);myct = 0;}
3109075Srrh 					}
3119080Srrh 				linemsg = 1;
3129080Srrh 				if(*p == '\n')olcount = lcount+1;
3139080Srrh 				else
3149080Srrh 					olcount=lcount;
3159080Srrh 				last = 0;
3169075Srrh 				hit = 0;
3179075Srrh 				oct = 0;
3189075Srrh 				nlp = p;
3199075Srrh 				c = w;
3209075Srrh 				begp = endp = 0;
3219080Srrh 				nsent++;
3229075Srrh 			}
3239075Srrh 	}
3249080Srrh 	if(caps && (count1 > 0))
3259080Srrh 		fwrite(beg1,sizeof(*beg1),count1,stdout);
3269075Srrh 	close(f);
3279075Srrh }
3289075Srrh 
3299075Srrh getargc()
3309075Srrh {
3319075Srrh 	register c;
3329075Srrh 	if (wordf){
3339075Srrh 		if((c=getc(wordf))==EOF){
3349075Srrh 			fclose(wordf);
3359075Srrh 			if(nflag && fflag){
3369075Srrh 				nflag=0;
3379080Srrh 				wordf=fopen(filename,"r");
3389075Srrh 				if(wordf == NULL){
3399080Srrh 					fprintf("diction can't open %s\n",filename);
3409075Srrh 					exit(2);
3419075Srrh 				}
3429075Srrh 				return(getc(wordf));
3439075Srrh 			}
3449075Srrh 			else return(EOF);
3459075Srrh 		}
3469075Srrh 		else return(c);
3479075Srrh 	}
3489075Srrh 	if ((c = *argptr++) == '\0')
3499075Srrh 		return(EOF);
3509075Srrh 	return(c);
3519075Srrh }
3529075Srrh 
3539075Srrh cgotofn() {
3549075Srrh 	register c;
3559075Srrh 	register struct words *s;
3569075Srrh 	register ct;
3579075Srrh 	int neg;
3589075Srrh 
3599075Srrh 	s = smax = w;
3609075Srrh 	neg = ct = 0;
3619075Srrh nword:	for(;;) {
3629075Srrh 		c = getargc();
3639075Srrh 		if(c == '~'){
3649075Srrh 			neg++;
3659075Srrh 			c = getargc();
3669075Srrh 		}
3679075Srrh 		if (c==EOF)
3689075Srrh 			return;
3699075Srrh 		if (c == '\n') {
3709075Srrh 			if(neg)s->out = 0377;
3719075Srrh 			else s->out = ct-1;
3729075Srrh 			neg = ct = 0;
3739075Srrh 			s = w;
3749075Srrh 		} else {
3759075Srrh 		loop:	if (s->inp == c) {
3769075Srrh 				s = s->nst;
3779075Srrh 				ct++;
3789075Srrh 				continue;
3799075Srrh 			}
3809075Srrh 			if (s->inp == 0) goto enter;
3819075Srrh 			if (s->link == 0) {
3829075Srrh 				if (smax >= &w[MAXSIZ - 1]) overflo();
3839075Srrh 				s->link = ++smax;
3849075Srrh 				s = smax;
3859075Srrh 				goto enter;
3869075Srrh 			}
3879075Srrh 			s = s->link;
3889075Srrh 			goto loop;
3899075Srrh 		}
3909075Srrh 	}
3919075Srrh 
3929075Srrh 	enter:
3939075Srrh 	do {
3949075Srrh 		s->inp = c;
3959075Srrh 		ct++;
3969075Srrh 		if (smax >= &w[MAXSIZ - 1]) overflo();
3979075Srrh 		s->nst = ++smax;
3989075Srrh 		s = smax;
3999075Srrh 	} while ((c = getargc()) != '\n' && c!=EOF);
4009075Srrh 	if(neg)smax->out = 0377;
4019075Srrh 	else smax->out = ct-1;
4029075Srrh 	neg = ct = 0;
4039075Srrh 	s = w;
4049075Srrh 	if (c != EOF)
4059075Srrh 		goto nword;
4069075Srrh }
4079075Srrh 
4089075Srrh overflo() {
4099075Srrh 	fprintf(stderr, "wordlist too large\n");
4109075Srrh 	exit(2);
4119075Srrh }
4129075Srrh cfail() {
4139075Srrh 	struct words *queue[QSIZE];
4149075Srrh 	struct words **front, **rear;
4159075Srrh 	struct words *state;
4169075Srrh 	int bstart;
4179075Srrh 	register char c;
4189075Srrh 	register struct words *s;
4199075Srrh 	s = w;
4209075Srrh 	front = rear = queue;
4219075Srrh init:	if ((s->inp) != 0) {
4229075Srrh 		*rear++ = s->nst;
4239075Srrh 		if (rear >= &queue[QSIZE - 1]) overflo();
4249075Srrh 	}
4259075Srrh 	if ((s = s->link) != 0) {
4269075Srrh 		goto init;
4279075Srrh 	}
4289075Srrh 
4299075Srrh 	while (rear!=front) {
4309075Srrh 		s = *front;
4319075Srrh 		if (front == &queue[QSIZE-1])
4329075Srrh 			front = queue;
4339075Srrh 		else front++;
4349075Srrh 	cloop:	if ((c = s->inp) != 0) {
4359075Srrh 			bstart=0;
4369075Srrh 			*rear = (q = s->nst);
4379075Srrh 			if (front < rear)
4389075Srrh 				if (rear >= &queue[QSIZE-1])
4399075Srrh 					if (front == queue) overflo();
4409075Srrh 					else rear = queue;
4419075Srrh 				else rear++;
4429075Srrh 			else
4439075Srrh 				if (++rear == front) overflo();
4449075Srrh 			state = s->fail;
4459075Srrh 		floop:	if (state == 0){ state = w;bstart=1;}
4469075Srrh 			if (state->inp == c) {
4479075Srrh 			qloop:	q->fail = state->nst;
4489075Srrh 				if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
4499075Srrh 				if((q=q->link) != 0)goto qloop;
4509075Srrh 			}
4519075Srrh 			else if ((state = state->link) != 0)
4529075Srrh 				goto floop;
4539075Srrh 			else if(bstart==0){state=0; goto floop;}
4549075Srrh 		}
4559075Srrh 		if ((s = s->link) != 0)
4569075Srrh 			goto cloop;
4579075Srrh 	}
4589075Srrh /*	for(s=w;s<=smax;s++)
4599075Srrh 		printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
4609075Srrh 			s->inp,s->out,s->nst,s->link,s->fail);
4619075Srrh */
4629075Srrh }
4639080Srrh outc(addr,file)
4649075Srrh char *addr;
4659080Srrh char *file;
4669075Srrh {
4679080Srrh 	int inside;
4689075Srrh 
4699080Srrh 	inside = 0;
4709080Srrh 	if(!caps && lineno && linemsg){
4719080Srrh 		printf("beginning line %ld",olcount);
4729080Srrh 		if(file != (char *)NULL)printf(" %s\n",file);
4739080Srrh 		else printf("\n");
4749080Srrh 		linemsg = 0;
4759080Srrh 	}
4769075Srrh 	while(nlp < addr){
4779080Srrh 		if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
4789075Srrh 			oct=0;
4799075Srrh 			putchar('\n');
4809075Srrh 		}
4819075Srrh 		if(nlp == begp){
4829080Srrh 			if(caps)inside++;
4839080Srrh 			else {
4849080Srrh 				if( oct >45){putchar('\n');
4859080Srrh 					oct=0;
4869080Srrh 				}
4879080Srrh 				if( oct==0 || table[*nlp] != ' '){
4889080Srrh 					printf("*[");
4899080Srrh 					oct+=2;
4909080Srrh 				}
4919080Srrh 				else {printf(" *[");;
4929080Srrh 					oct+=3;
4939080Srrh 				}
4949080Srrh 			}
4959080Srrh 			if(mflg)putc('[',mine);
4969075Srrh 		}
4979080Srrh 		if(inside){
4989080Srrh 			if(islower(*nlp))*nlp = toupper(*nlp);
4999080Srrh 		}
5009080Srrh 		else {
5019080Srrh 			if(!caps && *nlp == '\n')*nlp = ' ';
5029080Srrh 			if(*nlp == ' ' && oct==0);
5039080Srrh 			else if(!caps) {putchar(*nlp); oct++;}
5049080Srrh 		}
5059075Srrh 		if(nlp == endp){
5069080Srrh 			if(caps)
5079080Srrh 				inside= 0;
5089080Srrh 			else {
5099080Srrh 				if(*(nlp) != ' '){printf("]*");
5109080Srrh 					oct+=2;
5119080Srrh 				}
5129080Srrh 				else {printf("]* ");
5139080Srrh 					oct+=3;
5149080Srrh 				}
5159080Srrh 				if(oct >60){putchar('\n');
5169080Srrh 					oct=0;
5179080Srrh 				}
5189080Srrh 			}
5199080Srrh 			if(mflg)putc(']',mine);
5209080Srrh 			beg = 0;
5219075Srrh 		}
5229080Srrh 		if(mflg){
5239080Srrh 			if(nlp == myst)beg = 1;
5249080Srrh 			if(beg || last){
5259080Srrh 				putc(*nlp,mine);
5269080Srrh 				if(myct++ >= 72 || last == 20){
5279080Srrh 					putc('\n',mine);
5289080Srrh 					if(last == 20)last=myct=0;
5299080Srrh 					else myct=0;
5309080Srrh 				}
5319080Srrh 				if(last)last++;
5329080Srrh 			}
5339080Srrh 		}
5349075Srrh 		nlp++;
5359075Srrh 	}
5369075Srrh }
537