19075Srrh #ifndef lint
2*9080Srrh static char sccsid[] = "@(#)dprog.c	4.2	(Berkeley)	82/11/06";
39075Srrh #endif not lint
49075Srrh 
59075Srrh /*
6*9080Srrh  * diction -- print all sentences containing one of default phrases
79075Srrh  *
89075Srrh  *	status returns:
99075Srrh  *		0 - ok, and some matches
109075Srrh  *		1 - ok, but no matches
119075Srrh  *		2 - some error
129075Srrh  */
139075Srrh 
149075Srrh #include <stdio.h>
159075Srrh #include <ctype.h>
169075Srrh 
179075Srrh #define	MAXSIZ 6500
189075Srrh #define QSIZE 650
19*9080Srrh int linemsg;
20*9080Srrh long olcount;
21*9080Srrh long lcount;
229075Srrh struct words {
239075Srrh 	char 	inp;
249075Srrh 	char	out;
259075Srrh 	struct	words *nst;
269075Srrh 	struct	words *link;
279075Srrh 	struct	words *fail;
289075Srrh } w[MAXSIZ], *smax, *q;
299075Srrh 
30*9080Srrh char table[128] = {
31*9080Srrh 	0, 0, 0, 0, 0, 0, 0, 0,
32*9080Srrh 	0, 0, ' ', 0, 0, 0, 0, 0,
33*9080Srrh 	0, 0, 0, 0, 0, 0, 0, 0,
34*9080Srrh 	0, 0, 0, 0, 0, 0, 0, 0,
35*9080Srrh 	' ', '.', ' ', ' ', ' ', ' ', ' ', ' ',
36*9080Srrh 	' ', ' ', ' ', ' ', ' ', ' ', '.', ' ',
37*9080Srrh 	'0', '1', '2', '3', '4', '5', '6', '7',
38*9080Srrh 	'8', '9', ' ', ' ', ' ', ' ', ' ', '.',
39*9080Srrh 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
40*9080Srrh 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
41*9080Srrh 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
42*9080Srrh 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ',
43*9080Srrh 	' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
44*9080Srrh 	'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
45*9080Srrh 	'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
46*9080Srrh 	'x', 'y', 'z', ' ', ' ', ' ', ' ', ' '
47*9080Srrh 	};
48*9080Srrh int	caps = 0;
49*9080Srrh int	lineno = 0;
509075Srrh int fflag;
519075Srrh int nflag	= 1; /*use default file*/
529075Srrh char *filename;
53*9080Srrh int	mflg	= 0;	/*don't catch output*/
549075Srrh int	nfile;
559075Srrh int	nsucc;
569075Srrh long nsent = 0;
579075Srrh long nhits = 0;
589075Srrh char *nlp;
599075Srrh char *begp, *endp;
60*9080Srrh int beg, last;
61*9080Srrh char *myst;
62*9080Srrh int myct = 0;
639075Srrh int oct = 0;
649075Srrh FILE	*wordf;
65*9080Srrh FILE *mine;
669075Srrh char	*argptr;
67*9080Srrh long tl = 0;
68*9080Srrh long th = 0;
699075Srrh 
709075Srrh main(argc, argv)
71*9080Srrh char *argv[];
729075Srrh {
73*9080Srrh 	int sv;
749075Srrh 	while (--argc > 0 && (++argv)[0][0]=='-')
759075Srrh 		switch (argv[0][1]) {
769075Srrh 
779075Srrh 		case 'f':
789075Srrh 			fflag++;
79*9080Srrh 			filename = (++argv)[0];
809075Srrh 			argc--;
819075Srrh 			continue;
829075Srrh 
839075Srrh 		case 'n':
849075Srrh 			nflag = 0;
859075Srrh 			continue;
869075Srrh 		case 'd':
87*9080Srrh 			mflg=0;
889075Srrh 			continue;
89*9080Srrh 		case 'c':
90*9080Srrh 			caps++;
91*9080Srrh 			continue;
92*9080Srrh 		case 'l':
93*9080Srrh 			lineno++;
94*9080Srrh 			continue;
959075Srrh 		default:
969075Srrh 			fprintf(stderr, "diction: unknown flag\n");
979075Srrh 			continue;
989075Srrh 		}
999075Srrh out:
1009075Srrh 	if(nflag){
1019075Srrh 		wordf = fopen(DICT,"r");
1029075Srrh 		if(wordf == NULL){
1039075Srrh 			fprintf(stderr,"diction: can't open default dictionary\n");
1049075Srrh 			exit(2);
1059075Srrh 		}
1069075Srrh 	}
1079075Srrh 	else {
108*9080Srrh 		wordf = fopen(filename,"r");
1099075Srrh 		if(wordf == NULL){
1109075Srrh 			fprintf(stderr,"diction: can't open %s\n",filename);
1119075Srrh 			exit(2);
1129075Srrh 		}
1139075Srrh 	}
1149075Srrh 
115*9080Srrh #ifdef CATCH
116*9080Srrh 	if(fopen(CATCH,"r") != NULL)
117*9080Srrh 		if((mine=fopen(CATCH,"a"))!=NULL)mflg=1;
118*9080Srrh #endif
119*9080Srrh #ifdef MACS
120*9080Srrh 	if(caps){
121*9080Srrh 		printf(".so ");
122*9080Srrh 		printf(MACS);
123*9080Srrh 		printf("\n");
124*9080Srrh 	}
125*9080Srrh #endif
1269075Srrh 	cgotofn();
1279075Srrh 	cfail();
1289075Srrh 	nfile = argc;
1299075Srrh 	if (argc<=0) {
1309075Srrh 		execute((char *)NULL);
1319075Srrh 	}
1329075Srrh 	else while (--argc >= 0) {
1339075Srrh 		execute(*argv);
134*9080Srrh 		if(lineno){
135*9080Srrh 			printf("file %s: number of lines %ld number of phrases found %ld\n",
136*9080Srrh 				*argv, lcount-1, nhits);
137*9080Srrh 			tl += lcount-1;
138*9080Srrh 			th += nhits;
139*9080Srrh 			sv = lcount-1;
140*9080Srrh 			lcount = nhits = 0;
141*9080Srrh 		}
1429075Srrh 		argv++;
1439075Srrh 	}
144*9080Srrh 	if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th);
145*9080Srrh 	if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits);
146*9080Srrh 	else if(tl != sv)
147*9080Srrh 		 if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th);
1489075Srrh 	exit(nsucc == 0);
1499075Srrh }
1509075Srrh 
1519075Srrh execute(file)
1529075Srrh char *file;
1539075Srrh {
1549075Srrh 	register char *p;
1559075Srrh 	register struct words *c;
1569075Srrh 	register ccount;
157*9080Srrh 	int count1;
158*9080Srrh 	char *beg1;
1599075Srrh 	struct words *savc;
1609075Srrh 	char *savp;
1619075Srrh 	int savct;
1629075Srrh 	int scr;
1639075Srrh 	char buf[1024];
1649075Srrh 	int f;
1659075Srrh 	int hit;
166*9080Srrh 	last = 0;
1679075Srrh 	if (file) {
1689075Srrh 		if ((f = open(file, 0)) < 0) {
1699075Srrh 			fprintf(stderr, "diction: can't open %s\n", file);
1709075Srrh 			exit(2);
1719075Srrh 		}
1729075Srrh 	}
1739075Srrh 	else f = 0;
174*9080Srrh 	lcount = olcount = 1;
175*9080Srrh 	linemsg = 1;
1769075Srrh 	ccount = 0;
177*9080Srrh 	count1 = -1;
1789075Srrh 	p = buf;
1799075Srrh 	nlp = p;
1809075Srrh 	c = w;
1819075Srrh 	oct = hit = 0;
182*9080Srrh 	savc = (struct words *) 0;
183*9080Srrh 	savp = (char *) 0;
1849075Srrh 	for (;;) {
185*9080Srrh 		if(--ccount <= 0) {
1869075Srrh 			if (p == &buf[1024]) p = buf;
1879075Srrh 			if (p > &buf[512]) {
1889075Srrh 				if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
1899075Srrh 			}
1909075Srrh 			else if ((ccount = read(f, p, 512)) <= 0) break;
191*9080Srrh 			if(caps && (count1 > 0))
192*9080Srrh 				fwrite(beg1,sizeof(*beg1),count1,stdout);
193*9080Srrh 			count1 = ccount;
194*9080Srrh 			beg1 = p;
1959075Srrh 		}
1969075Srrh 		if(p == &buf[1024])p=buf;
1979075Srrh 		nstate:
198*9080Srrh 			if (c->inp == table[*p]) {
1999075Srrh 				c = c->nst;
2009075Srrh 			}
2019075Srrh 			else if (c->link != 0) {
2029075Srrh 				c = c->link;
2039075Srrh 				goto nstate;
2049075Srrh 			}
2059075Srrh 			else {
2069075Srrh 				if(savp != 0){
2079075Srrh 					c=savc;
2089075Srrh 					p=savp;
2099075Srrh 					if(ccount > savct)ccount += savct;
2109075Srrh 					else ccount = savct;
211*9080Srrh 					savc = (struct words *) 0;
212*9080Srrh 					savp = (char *) 0;
2139075Srrh 					goto hadone;
2149075Srrh 				}
2159075Srrh 				c = c->fail;
2169075Srrh 				if (c==0) {
2179075Srrh 					c = w;
2189075Srrh 					istate:
219*9080Srrh 					if (c->inp == table[*p]) {
2209075Srrh 						c = c->nst;
2219075Srrh 					}
2229075Srrh 					else if (c->link != 0) {
2239075Srrh 						c = c->link;
2249075Srrh 						goto istate;
2259075Srrh 					}
2269075Srrh 				}
2279075Srrh 				else goto nstate;
2289075Srrh 			}
2299075Srrh 		if(c->out){
230*9080Srrh 			if((c->inp == table[*(p+1)]) && (c->nst != 0)){
2319075Srrh 				savp=p;
2329075Srrh 				savc=c;
2339075Srrh 				savct=ccount;
2349075Srrh 				goto cont;
2359075Srrh 			}
2369075Srrh 			else if(c->link != 0){
2379075Srrh 				savc=c;
2389075Srrh 				while((savc=savc->link)!= 0){
239*9080Srrh 					if(savc->inp == table[*(p+1)]){
2409075Srrh 						savp=p;
2419075Srrh 						savc=c;
2429075Srrh 						savct=ccount;
2439075Srrh 						goto cont;
2449075Srrh 					}
2459075Srrh 				}
2469075Srrh 			}
2479075Srrh 		hadone:
248*9080Srrh 			savc = (struct words *) 0;
249*9080Srrh 			savp = (char *) 0;
2509075Srrh 			if(c->out == (char)(0377)){
2519075Srrh 				c=w;
2529075Srrh 				goto nstate;
2539075Srrh 			}
2549075Srrh 			begp = p - (c->out);
2559075Srrh 			if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
2569075Srrh 			endp=p;
257*9080Srrh 			if(mflg){
258*9080Srrh 				if(begp-20 < &buf[0]){
259*9080Srrh 					myst = &buf[1024]-20;
260*9080Srrh 					if(nlp < &buf[512])myst=nlp;
261*9080Srrh 				}
262*9080Srrh 				else myst = begp-20;
263*9080Srrh 				if(myst < nlp)myst = nlp;
264*9080Srrh 				beg = 0;
265*9080Srrh 			}
2669075Srrh 			hit = 1;
2679075Srrh 			nhits++;
268*9080Srrh 			if(*p == '\n')lcount++;
269*9080Srrh 			if (table[*p++] == '.') {
270*9080Srrh 				linemsg = 1;
2719075Srrh 				if (--ccount <= 0) {
2729075Srrh 					if (p == &buf[1024]) p = buf;
2739075Srrh 					if (p > &buf[512]) {
2749075Srrh 						if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
2759075Srrh 					}
2769075Srrh 					else if ((ccount = read(f, p, 512)) <= 0) break;
277*9080Srrh 					if(caps && (count1 > 0))
278*9080Srrh 						fwrite(beg1,sizeof(*beg1),count1,stdout);
279*9080Srrh 					count1=ccount;
280*9080Srrh 					beg1=p;
2819075Srrh 				}
2829075Srrh 			}
2839075Srrh 	succeed:	nsucc = 1;
2849075Srrh 			{
2859075Srrh 				if (p <= nlp) {
286*9080Srrh 					outc(&buf[1024],file);
2879075Srrh 					nlp = buf;
2889075Srrh 				}
289*9080Srrh 				outc(p,file);
2909075Srrh 			}
291*9080Srrh 			if(mflg)last=1;
2929075Srrh 	nomatch:
2939075Srrh 			nlp = p;
2949075Srrh 			c = w;
2959075Srrh 			begp = endp = 0;
2969075Srrh 			continue;
2979075Srrh 		}
2989075Srrh 	cont:
299*9080Srrh 		if(*p == '\n')lcount++;
300*9080Srrh 		if (table[*p++] == '.'){
3019075Srrh 				if(hit){
3029075Srrh 					if(p <= nlp){
303*9080Srrh 						outc(&buf[1024],file);
3049075Srrh 						nlp = buf;
3059075Srrh 					}
306*9080Srrh 					outc(p,file);
307*9080Srrh 					if(!caps)printf("\n\n");
308*9080Srrh 					if(mflg && last){putc('\n',mine);myct = 0;}
3099075Srrh 					}
310*9080Srrh 				linemsg = 1;
311*9080Srrh 				if(*p == '\n')olcount = lcount+1;
312*9080Srrh 				else
313*9080Srrh 					olcount=lcount;
314*9080Srrh 				last = 0;
3159075Srrh 				hit = 0;
3169075Srrh 				oct = 0;
3179075Srrh 				nlp = p;
3189075Srrh 				c = w;
3199075Srrh 				begp = endp = 0;
320*9080Srrh 				nsent++;
3219075Srrh 			}
3229075Srrh 	}
323*9080Srrh 	if(caps && (count1 > 0))
324*9080Srrh 		fwrite(beg1,sizeof(*beg1),count1,stdout);
3259075Srrh 	close(f);
3269075Srrh }
3279075Srrh 
3289075Srrh getargc()
3299075Srrh {
3309075Srrh 	register c;
3319075Srrh 	if (wordf){
3329075Srrh 		if((c=getc(wordf))==EOF){
3339075Srrh 			fclose(wordf);
3349075Srrh 			if(nflag && fflag){
3359075Srrh 				nflag=0;
336*9080Srrh 				wordf=fopen(filename,"r");
3379075Srrh 				if(wordf == NULL){
338*9080Srrh 					fprintf("diction can't open %s\n",filename);
3399075Srrh 					exit(2);
3409075Srrh 				}
3419075Srrh 				return(getc(wordf));
3429075Srrh 			}
3439075Srrh 			else return(EOF);
3449075Srrh 		}
3459075Srrh 		else return(c);
3469075Srrh 	}
3479075Srrh 	if ((c = *argptr++) == '\0')
3489075Srrh 		return(EOF);
3499075Srrh 	return(c);
3509075Srrh }
3519075Srrh 
3529075Srrh cgotofn() {
3539075Srrh 	register c;
3549075Srrh 	register struct words *s;
3559075Srrh 	register ct;
3569075Srrh 	int neg;
3579075Srrh 
3589075Srrh 	s = smax = w;
3599075Srrh 	neg = ct = 0;
3609075Srrh nword:	for(;;) {
3619075Srrh 		c = getargc();
3629075Srrh 		if(c == '~'){
3639075Srrh 			neg++;
3649075Srrh 			c = getargc();
3659075Srrh 		}
3669075Srrh 		if (c==EOF)
3679075Srrh 			return;
3689075Srrh 		if (c == '\n') {
3699075Srrh 			if(neg)s->out = 0377;
3709075Srrh 			else s->out = ct-1;
3719075Srrh 			neg = ct = 0;
3729075Srrh 			s = w;
3739075Srrh 		} else {
3749075Srrh 		loop:	if (s->inp == c) {
3759075Srrh 				s = s->nst;
3769075Srrh 				ct++;
3779075Srrh 				continue;
3789075Srrh 			}
3799075Srrh 			if (s->inp == 0) goto enter;
3809075Srrh 			if (s->link == 0) {
3819075Srrh 				if (smax >= &w[MAXSIZ - 1]) overflo();
3829075Srrh 				s->link = ++smax;
3839075Srrh 				s = smax;
3849075Srrh 				goto enter;
3859075Srrh 			}
3869075Srrh 			s = s->link;
3879075Srrh 			goto loop;
3889075Srrh 		}
3899075Srrh 	}
3909075Srrh 
3919075Srrh 	enter:
3929075Srrh 	do {
3939075Srrh 		s->inp = c;
3949075Srrh 		ct++;
3959075Srrh 		if (smax >= &w[MAXSIZ - 1]) overflo();
3969075Srrh 		s->nst = ++smax;
3979075Srrh 		s = smax;
3989075Srrh 	} while ((c = getargc()) != '\n' && c!=EOF);
3999075Srrh 	if(neg)smax->out = 0377;
4009075Srrh 	else smax->out = ct-1;
4019075Srrh 	neg = ct = 0;
4029075Srrh 	s = w;
4039075Srrh 	if (c != EOF)
4049075Srrh 		goto nword;
4059075Srrh }
4069075Srrh 
4079075Srrh overflo() {
4089075Srrh 	fprintf(stderr, "wordlist too large\n");
4099075Srrh 	exit(2);
4109075Srrh }
4119075Srrh cfail() {
4129075Srrh 	struct words *queue[QSIZE];
4139075Srrh 	struct words **front, **rear;
4149075Srrh 	struct words *state;
4159075Srrh 	int bstart;
4169075Srrh 	register char c;
4179075Srrh 	register struct words *s;
4189075Srrh 	s = w;
4199075Srrh 	front = rear = queue;
4209075Srrh init:	if ((s->inp) != 0) {
4219075Srrh 		*rear++ = s->nst;
4229075Srrh 		if (rear >= &queue[QSIZE - 1]) overflo();
4239075Srrh 	}
4249075Srrh 	if ((s = s->link) != 0) {
4259075Srrh 		goto init;
4269075Srrh 	}
4279075Srrh 
4289075Srrh 	while (rear!=front) {
4299075Srrh 		s = *front;
4309075Srrh 		if (front == &queue[QSIZE-1])
4319075Srrh 			front = queue;
4329075Srrh 		else front++;
4339075Srrh 	cloop:	if ((c = s->inp) != 0) {
4349075Srrh 			bstart=0;
4359075Srrh 			*rear = (q = s->nst);
4369075Srrh 			if (front < rear)
4379075Srrh 				if (rear >= &queue[QSIZE-1])
4389075Srrh 					if (front == queue) overflo();
4399075Srrh 					else rear = queue;
4409075Srrh 				else rear++;
4419075Srrh 			else
4429075Srrh 				if (++rear == front) overflo();
4439075Srrh 			state = s->fail;
4449075Srrh 		floop:	if (state == 0){ state = w;bstart=1;}
4459075Srrh 			if (state->inp == c) {
4469075Srrh 			qloop:	q->fail = state->nst;
4479075Srrh 				if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
4489075Srrh 				if((q=q->link) != 0)goto qloop;
4499075Srrh 			}
4509075Srrh 			else if ((state = state->link) != 0)
4519075Srrh 				goto floop;
4529075Srrh 			else if(bstart==0){state=0; goto floop;}
4539075Srrh 		}
4549075Srrh 		if ((s = s->link) != 0)
4559075Srrh 			goto cloop;
4569075Srrh 	}
4579075Srrh /*	for(s=w;s<=smax;s++)
4589075Srrh 		printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
4599075Srrh 			s->inp,s->out,s->nst,s->link,s->fail);
4609075Srrh */
4619075Srrh }
462*9080Srrh outc(addr,file)
4639075Srrh char *addr;
464*9080Srrh char *file;
4659075Srrh {
466*9080Srrh 	int inside;
4679075Srrh 
468*9080Srrh 	inside = 0;
469*9080Srrh 	if(!caps && lineno && linemsg){
470*9080Srrh 		printf("beginning line %ld",olcount);
471*9080Srrh 		if(file != (char *)NULL)printf(" %s\n",file);
472*9080Srrh 		else printf("\n");
473*9080Srrh 		linemsg = 0;
474*9080Srrh 	}
4759075Srrh 	while(nlp < addr){
476*9080Srrh 		if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){
4779075Srrh 			oct=0;
4789075Srrh 			putchar('\n');
4799075Srrh 		}
4809075Srrh 		if(nlp == begp){
481*9080Srrh 			if(caps)inside++;
482*9080Srrh 			else {
483*9080Srrh 				if( oct >45){putchar('\n');
484*9080Srrh 					oct=0;
485*9080Srrh 				}
486*9080Srrh 				if( oct==0 || table[*nlp] != ' '){
487*9080Srrh 					printf("*[");
488*9080Srrh 					oct+=2;
489*9080Srrh 				}
490*9080Srrh 				else {printf(" *[");;
491*9080Srrh 					oct+=3;
492*9080Srrh 				}
493*9080Srrh 			}
494*9080Srrh 			if(mflg)putc('[',mine);
4959075Srrh 		}
496*9080Srrh 		if(inside){
497*9080Srrh 			if(islower(*nlp))*nlp = toupper(*nlp);
498*9080Srrh 		}
499*9080Srrh 		else {
500*9080Srrh 			if(!caps && *nlp == '\n')*nlp = ' ';
501*9080Srrh 			if(*nlp == ' ' && oct==0);
502*9080Srrh 			else if(!caps) {putchar(*nlp); oct++;}
503*9080Srrh 		}
5049075Srrh 		if(nlp == endp){
505*9080Srrh 			if(caps)
506*9080Srrh 				inside= 0;
507*9080Srrh 			else {
508*9080Srrh 				if(*(nlp) != ' '){printf("]*");
509*9080Srrh 					oct+=2;
510*9080Srrh 				}
511*9080Srrh 				else {printf("]* ");
512*9080Srrh 					oct+=3;
513*9080Srrh 				}
514*9080Srrh 				if(oct >60){putchar('\n');
515*9080Srrh 					oct=0;
516*9080Srrh 				}
517*9080Srrh 			}
518*9080Srrh 			if(mflg)putc(']',mine);
519*9080Srrh 			beg = 0;
5209075Srrh 		}
521*9080Srrh 		if(mflg){
522*9080Srrh 			if(nlp == myst)beg = 1;
523*9080Srrh 			if(beg || last){
524*9080Srrh 				putc(*nlp,mine);
525*9080Srrh 				if(myct++ >= 72 || last == 20){
526*9080Srrh 					putc('\n',mine);
527*9080Srrh 					if(last == 20)last=myct=0;
528*9080Srrh 					else myct=0;
529*9080Srrh 				}
530*9080Srrh 				if(last)last++;
531*9080Srrh 			}
532*9080Srrh 		}
5339075Srrh 		nlp++;
5349075Srrh 	}
5359075Srrh }
536