1*9075Srrh #ifndef lint
2*9075Srrh static char sccsid[] = "@(#)dprog.c	4.1	(Berkeley)	82/11/06";
3*9075Srrh #endif not lint
4*9075Srrh 
5*9075Srrh /*
6*9075Srrh * diction -- print all sentences containing one of default phrases
7*9075Srrh  *
8*9075Srrh  *	status returns:
9*9075Srrh  *		0 - ok, and some matches
10*9075Srrh  *		1 - ok, but no matches
11*9075Srrh  *		2 - some error
12*9075Srrh  */
13*9075Srrh 
14*9075Srrh #include <stdio.h>
15*9075Srrh #include <ctype.h>
16*9075Srrh 
17*9075Srrh #define	MAXSIZ 6500
18*9075Srrh #define QSIZE 650
19*9075Srrh struct words {
20*9075Srrh 	char 	inp;
21*9075Srrh 	char	out;
22*9075Srrh 	struct	words *nst;
23*9075Srrh 	struct	words *link;
24*9075Srrh 	struct	words *fail;
25*9075Srrh } w[MAXSIZ], *smax, *q;
26*9075Srrh 
27*9075Srrh int fflag;
28*9075Srrh int nflag	= 1; /*use default file*/
29*9075Srrh char *filename;
30*9075Srrh int	nfile;
31*9075Srrh int	nsucc;
32*9075Srrh long nsent = 0;
33*9075Srrh long nhits = 0;
34*9075Srrh char *nlp;
35*9075Srrh char *begp, *endp;
36*9075Srrh int oct = 0;
37*9075Srrh FILE	*wordf;
38*9075Srrh char	*argptr;
39*9075Srrh 
40*9075Srrh main(argc, argv)
41*9075Srrh char **argv;
42*9075Srrh {
43*9075Srrh 	while (--argc > 0 && (++argv)[0][0]=='-')
44*9075Srrh 		switch (argv[0][1]) {
45*9075Srrh 
46*9075Srrh 		case 'f':
47*9075Srrh 			fflag++;
48*9075Srrh 			filename = ++argv;
49*9075Srrh 			argc--;
50*9075Srrh 			continue;
51*9075Srrh 
52*9075Srrh 		case 'n':
53*9075Srrh 			nflag = 0;
54*9075Srrh 			continue;
55*9075Srrh 		case 'd':
56*9075Srrh 			continue;
57*9075Srrh 		default:
58*9075Srrh 			fprintf(stderr, "diction: unknown flag\n");
59*9075Srrh 			continue;
60*9075Srrh 		}
61*9075Srrh out:
62*9075Srrh 	if(nflag){
63*9075Srrh 		wordf = fopen(DICT,"r");
64*9075Srrh 		if(wordf == NULL){
65*9075Srrh 			fprintf(stderr,"diction: can't open default dictionary\n");
66*9075Srrh 			exit(2);
67*9075Srrh 		}
68*9075Srrh 	}
69*9075Srrh 	else {
70*9075Srrh 		wordf = fopen(*filename,"r");
71*9075Srrh 		if(wordf == NULL){
72*9075Srrh 			fprintf(stderr,"diction: can't open %s\n",filename);
73*9075Srrh 			exit(2);
74*9075Srrh 		}
75*9075Srrh 	}
76*9075Srrh 
77*9075Srrh 	cgotofn();
78*9075Srrh 	cfail();
79*9075Srrh 	nfile = argc;
80*9075Srrh 	if (argc<=0) {
81*9075Srrh 		execute((char *)NULL);
82*9075Srrh 	}
83*9075Srrh 	else while (--argc >= 0) {
84*9075Srrh 		execute(*argv);
85*9075Srrh 		argv++;
86*9075Srrh 	}
87*9075Srrh 	printf("number of sentences %ld number of hits %ld\n",nsent,nhits);
88*9075Srrh 	exit(nsucc == 0);
89*9075Srrh }
90*9075Srrh 
91*9075Srrh execute(file)
92*9075Srrh char *file;
93*9075Srrh {
94*9075Srrh 	register char *p;
95*9075Srrh 	register struct words *c;
96*9075Srrh 	register ccount;
97*9075Srrh 	struct words *savc;
98*9075Srrh 	char *savp;
99*9075Srrh 	int savct;
100*9075Srrh 	int scr;
101*9075Srrh 	char buf[1024];
102*9075Srrh 	int f;
103*9075Srrh 	int hit;
104*9075Srrh 	if (file) {
105*9075Srrh 		if ((f = open(file, 0)) < 0) {
106*9075Srrh 			fprintf(stderr, "diction: can't open %s\n", file);
107*9075Srrh 			exit(2);
108*9075Srrh 		}
109*9075Srrh 	}
110*9075Srrh 	else f = 0;
111*9075Srrh 	ccount = 0;
112*9075Srrh 	p = buf;
113*9075Srrh 	nlp = p;
114*9075Srrh 	c = w;
115*9075Srrh 	oct = hit = 0;
116*9075Srrh 	savc = savp = 0;
117*9075Srrh 	for (;;) {
118*9075Srrh 		if (--ccount <= 0) {
119*9075Srrh 			if (p == &buf[1024]) p = buf;
120*9075Srrh 			if (p > &buf[512]) {
121*9075Srrh 				if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
122*9075Srrh 			}
123*9075Srrh 			else if ((ccount = read(f, p, 512)) <= 0) break;
124*9075Srrh 			convert(p,ccount);
125*9075Srrh 		}
126*9075Srrh 		if(p == &buf[1024])p=buf;
127*9075Srrh 		nstate:
128*9075Srrh 			if (c->inp == *p) {
129*9075Srrh 				c = c->nst;
130*9075Srrh 			}
131*9075Srrh 			else if (c->link != 0) {
132*9075Srrh 				c = c->link;
133*9075Srrh 				goto nstate;
134*9075Srrh 			}
135*9075Srrh 			else {
136*9075Srrh 				if(savp != 0){
137*9075Srrh 					c=savc;
138*9075Srrh 					p=savp;
139*9075Srrh 					if(ccount > savct)ccount += savct;
140*9075Srrh 					else ccount = savct;
141*9075Srrh 					savc=savp=0;
142*9075Srrh 					goto hadone;
143*9075Srrh 				}
144*9075Srrh 				c = c->fail;
145*9075Srrh 				if (c==0) {
146*9075Srrh 					c = w;
147*9075Srrh 					istate:
148*9075Srrh 					if (c->inp == *p) {
149*9075Srrh 						c = c->nst;
150*9075Srrh 					}
151*9075Srrh 					else if (c->link != 0) {
152*9075Srrh 						c = c->link;
153*9075Srrh 						goto istate;
154*9075Srrh 					}
155*9075Srrh 				}
156*9075Srrh 				else goto nstate;
157*9075Srrh 			}
158*9075Srrh 		if(c->out){
159*9075Srrh 			if((c->inp == *(p+1)) && (c->nst != 0)){
160*9075Srrh 				savp=p;
161*9075Srrh 				savc=c;
162*9075Srrh 				savct=ccount;
163*9075Srrh 				goto cont;
164*9075Srrh 			}
165*9075Srrh 			else if(c->link != 0){
166*9075Srrh 				savc=c;
167*9075Srrh 				while((savc=savc->link)!= 0){
168*9075Srrh 					if(savc->inp == *(p+1)){
169*9075Srrh 						savp=p;
170*9075Srrh 						savc=c;
171*9075Srrh 						savct=ccount;
172*9075Srrh 						goto cont;
173*9075Srrh 					}
174*9075Srrh 				}
175*9075Srrh 			}
176*9075Srrh 		hadone:
177*9075Srrh 			savc=savp=0;
178*9075Srrh 			if(c->out == (char)(0377)){
179*9075Srrh 				c=w;
180*9075Srrh 				goto nstate;
181*9075Srrh 			}
182*9075Srrh 			begp = p - (c->out);
183*9075Srrh 			if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp);
184*9075Srrh 			endp=p;
185*9075Srrh 			hit = 1;
186*9075Srrh 			nhits++;
187*9075Srrh 			if (*p++ == '.') {
188*9075Srrh 				if (--ccount <= 0) {
189*9075Srrh 					if (p == &buf[1024]) p = buf;
190*9075Srrh 					if (p > &buf[512]) {
191*9075Srrh 						if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break;
192*9075Srrh 					}
193*9075Srrh 					else if ((ccount = read(f, p, 512)) <= 0) break;
194*9075Srrh 					convert(p,ccount);
195*9075Srrh 				}
196*9075Srrh 			}
197*9075Srrh 	succeed:	nsucc = 1;
198*9075Srrh 			{
199*9075Srrh 				if (p <= nlp) {
200*9075Srrh 					outc(&buf[1024]);
201*9075Srrh 					nlp = buf;
202*9075Srrh 				}
203*9075Srrh 				outc(p);
204*9075Srrh 			}
205*9075Srrh 	nomatch:
206*9075Srrh 			nlp = p;
207*9075Srrh 			c = w;
208*9075Srrh 			begp = endp = 0;
209*9075Srrh 			continue;
210*9075Srrh 		}
211*9075Srrh 	cont:
212*9075Srrh 		if (*p++ == '.'){
213*9075Srrh 				if(hit){
214*9075Srrh 					if(p <= nlp){
215*9075Srrh 						outc(&buf[1024]);
216*9075Srrh 						nlp = buf;
217*9075Srrh 					}
218*9075Srrh 					outc(p);
219*9075Srrh 					putchar('\n'); putchar('\n');
220*9075Srrh 					}
221*9075Srrh 				hit = 0;
222*9075Srrh 				oct = 0;
223*9075Srrh 				nlp = p;
224*9075Srrh 				c = w;
225*9075Srrh 				begp = endp = 0;
226*9075Srrh 			}
227*9075Srrh 	}
228*9075Srrh 	close(f);
229*9075Srrh }
230*9075Srrh 
231*9075Srrh getargc()
232*9075Srrh {
233*9075Srrh 	register c;
234*9075Srrh 	if (wordf){
235*9075Srrh 		if((c=getc(wordf))==EOF){
236*9075Srrh 			fclose(wordf);
237*9075Srrh 			if(nflag && fflag){
238*9075Srrh 				nflag=0;
239*9075Srrh 				wordf=fopen(*filename,"r");
240*9075Srrh 				if(wordf == NULL){
241*9075Srrh 					fprintf("can't open %s\n",filename);
242*9075Srrh 					exit(2);
243*9075Srrh 				}
244*9075Srrh 				return(getc(wordf));
245*9075Srrh 			}
246*9075Srrh 			else return(EOF);
247*9075Srrh 		}
248*9075Srrh 		else return(c);
249*9075Srrh 	}
250*9075Srrh 	if ((c = *argptr++) == '\0')
251*9075Srrh 		return(EOF);
252*9075Srrh 	return(c);
253*9075Srrh }
254*9075Srrh 
255*9075Srrh cgotofn() {
256*9075Srrh 	register c;
257*9075Srrh 	register struct words *s;
258*9075Srrh 	register ct;
259*9075Srrh 	int neg;
260*9075Srrh 
261*9075Srrh 	s = smax = w;
262*9075Srrh 	neg = ct = 0;
263*9075Srrh nword:	for(;;) {
264*9075Srrh 		c = getargc();
265*9075Srrh 		if(c == '~'){
266*9075Srrh 			neg++;
267*9075Srrh 			c = getargc();
268*9075Srrh 		}
269*9075Srrh 		if (c==EOF)
270*9075Srrh 			return;
271*9075Srrh 		if (c == '\n') {
272*9075Srrh 			if(neg)s->out = 0377;
273*9075Srrh 			else s->out = ct-1;
274*9075Srrh 			neg = ct = 0;
275*9075Srrh 			s = w;
276*9075Srrh 		} else {
277*9075Srrh 		loop:	if (s->inp == c) {
278*9075Srrh 				s = s->nst;
279*9075Srrh 				ct++;
280*9075Srrh 				continue;
281*9075Srrh 			}
282*9075Srrh 			if (s->inp == 0) goto enter;
283*9075Srrh 			if (s->link == 0) {
284*9075Srrh 				if (smax >= &w[MAXSIZ - 1]) overflo();
285*9075Srrh 				s->link = ++smax;
286*9075Srrh 				s = smax;
287*9075Srrh 				goto enter;
288*9075Srrh 			}
289*9075Srrh 			s = s->link;
290*9075Srrh 			goto loop;
291*9075Srrh 		}
292*9075Srrh 	}
293*9075Srrh 
294*9075Srrh 	enter:
295*9075Srrh 	do {
296*9075Srrh 		s->inp = c;
297*9075Srrh 		ct++;
298*9075Srrh 		if (smax >= &w[MAXSIZ - 1]) overflo();
299*9075Srrh 		s->nst = ++smax;
300*9075Srrh 		s = smax;
301*9075Srrh 	} while ((c = getargc()) != '\n' && c!=EOF);
302*9075Srrh 	if(neg)smax->out = 0377;
303*9075Srrh 	else smax->out = ct-1;
304*9075Srrh 	neg = ct = 0;
305*9075Srrh 	s = w;
306*9075Srrh 	if (c != EOF)
307*9075Srrh 		goto nword;
308*9075Srrh }
309*9075Srrh 
310*9075Srrh overflo() {
311*9075Srrh 	fprintf(stderr, "wordlist too large\n");
312*9075Srrh 	exit(2);
313*9075Srrh }
314*9075Srrh cfail() {
315*9075Srrh 	struct words *queue[QSIZE];
316*9075Srrh 	struct words **front, **rear;
317*9075Srrh 	struct words *state;
318*9075Srrh 	int bstart;
319*9075Srrh 	register char c;
320*9075Srrh 	register struct words *s;
321*9075Srrh 	s = w;
322*9075Srrh 	front = rear = queue;
323*9075Srrh init:	if ((s->inp) != 0) {
324*9075Srrh 		*rear++ = s->nst;
325*9075Srrh 		if (rear >= &queue[QSIZE - 1]) overflo();
326*9075Srrh 	}
327*9075Srrh 	if ((s = s->link) != 0) {
328*9075Srrh 		goto init;
329*9075Srrh 	}
330*9075Srrh 
331*9075Srrh 	while (rear!=front) {
332*9075Srrh 		s = *front;
333*9075Srrh 		if (front == &queue[QSIZE-1])
334*9075Srrh 			front = queue;
335*9075Srrh 		else front++;
336*9075Srrh 	cloop:	if ((c = s->inp) != 0) {
337*9075Srrh 			bstart=0;
338*9075Srrh 			*rear = (q = s->nst);
339*9075Srrh 			if (front < rear)
340*9075Srrh 				if (rear >= &queue[QSIZE-1])
341*9075Srrh 					if (front == queue) overflo();
342*9075Srrh 					else rear = queue;
343*9075Srrh 				else rear++;
344*9075Srrh 			else
345*9075Srrh 				if (++rear == front) overflo();
346*9075Srrh 			state = s->fail;
347*9075Srrh 		floop:	if (state == 0){ state = w;bstart=1;}
348*9075Srrh 			if (state->inp == c) {
349*9075Srrh 			qloop:	q->fail = state->nst;
350*9075Srrh 				if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out;
351*9075Srrh 				if((q=q->link) != 0)goto qloop;
352*9075Srrh 			}
353*9075Srrh 			else if ((state = state->link) != 0)
354*9075Srrh 				goto floop;
355*9075Srrh 			else if(bstart==0){state=0; goto floop;}
356*9075Srrh 		}
357*9075Srrh 		if ((s = s->link) != 0)
358*9075Srrh 			goto cloop;
359*9075Srrh 	}
360*9075Srrh /*	for(s=w;s<=smax;s++)
361*9075Srrh 		printf("s %d ch %c out %d nst %d link %d fail %d\n",s,
362*9075Srrh 			s->inp,s->out,s->nst,s->link,s->fail);
363*9075Srrh */
364*9075Srrh }
365*9075Srrh convert(p,ccount)
366*9075Srrh char *p;
367*9075Srrh {
368*9075Srrh 	int ct;
369*9075Srrh 	char *pt;
370*9075Srrh 	for(pt=p,ct=ccount;--ct>=0;pt++){
371*9075Srrh 		if(isupper(*pt))*pt=tolower(*pt);
372*9075Srrh 		else if(isspace(*pt))*pt=' ';
373*9075Srrh 		else if(*pt=='.' || *pt=='?'||*pt=='!'){
374*9075Srrh 			*pt='.';
375*9075Srrh 			nsent++;
376*9075Srrh 		}
377*9075Srrh 		else if(ispunct(*pt))*pt=' ';
378*9075Srrh 	}
379*9075Srrh }
380*9075Srrh outc(addr)
381*9075Srrh char *addr;
382*9075Srrh {
383*9075Srrh 
384*9075Srrh 	while(nlp < addr){
385*9075Srrh 		if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){
386*9075Srrh 			oct=0;
387*9075Srrh 			putchar('\n');
388*9075Srrh 		}
389*9075Srrh 		if(nlp == begp){
390*9075Srrh 			putchar('[');
391*9075Srrh 		}
392*9075Srrh 		putchar(*nlp);
393*9075Srrh 		if(nlp == endp){
394*9075Srrh 			putchar(']');
395*9075Srrh 		}
396*9075Srrh 		nlp++;
397*9075Srrh 	}
398*9075Srrh }
399