xref: /plan9/sys/lib/man/permind/ptx1.c (revision ec59a3ddbfceee0efe34584c2c9981a5e5ff1ec4)
1 
2 /*	permuted title index
3 	ptx [-t] [-i ignore] [-o only] [-w num] [-r]
4 	    [-c commands] [-g gap] [-f] [input]
5 	Ptx reads the input file and permutes on words in it.
6 	It excludes all words in the ignore file.
7 	Alternately it includes words in the only file.
8 	if neither is given it excludes the words in
9 	/sys/man/man0/permind/ignore.
10 
11 	The width of the output line (except for -r field)
12 	can be changed to num,
13 	which is a troff width measure, ens by default.
14 	with no -w, num is 72n, or 100n under -t.
15 	the -f flag tells the program to fold the output
16 	the -t flag says the output is for troff
17 	font specifier -F implies -t.
18 	-g sets the gutter
19 	-h sets the hole between wrapped segments
20 	-r takes the first word on each line and makes it
21 	into a fifth field.
22 	-c inserts troff commands for font-setting etc at beginning
23 
24 	*/
25 
26 #include <u.h>
27 #include <libc.h>
28 #include <stdio.h>
29 #include <ctype.h>
30 #define DEFLTX "/sys/lib/man/permind/ignore"
31 #define TILDE 0177
32 #define	N 30
33 #define	MAX	N*BUFSIZ
34 #define LMAX	2048
35 #define MAXT	2048
36 #define MASK	03777
37 #define ON	1
38 
39 #define isabreak(c) (btable[c])
40 
41 char *getline(void);
42 void msg(char *, char *);
43 void extra(int);
44 void diag(char *, char *);
45 void cmpline(char *);
46 int cmpword(char *, char *, char *);
47 void putline(char *, char *);
48 void makek(void);
49 void getsort(void);
50 char *rtrim(char *, char *, int);
51 char *ltrim(char *, char *, int);
52 void putout(char *, char *);
53 void setlen(void);
54 void getlen(void);
55 int hash(char *, char *);
56 int storeh(int, char *);
57 
58 int status;
59 
60 
61 char *hasht[MAXT];
62 char line[LMAX];
63 char mark[LMAX];
64 struct word {
65 	char *p;
66 	int w;
67 } word[LMAX/2];
68 char btable[256];
69 int ignore;
70 int only;
71 char *lenarg;
72 char *gutarg;
73 char *holarg;
74 int llen;
75 int spacesl;
76 int gutter;
77 int hole;
78 int mlen = LMAX;
79 int halflen;
80 int rflag;
81 char *strtbufp, *endbufp;
82 
83 
84 char *empty = "";
85 char *font = "R";
86 char *roff = "/bin/nroff";
87 char *troff = "/bin/troff";
88 
89 char *infile = "/fd/0";
90 FILE *inptr;
91 
92 FILE *outptr = stdout;
93 
94 char *sortfile = "ptxsort";	/* output of sort program */
95 char nofold[] = {'-', 'd', 't', TILDE, 0};
96 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
97 char *sortopt = nofold;
98 FILE *sortptr;
99 
100 char *kfile = "ptxmark";	/* ptxsort + troff goo for widths */
101 FILE *kptr;
102 
103 char *wfile = "ptxwidth";	/* widths of words in ptxsort */
104 FILE *wptr;
105 
106 char *bfile;	/*contains user supplied break chars */
107 FILE *bptr;
108 
109 char *cmds;
110 
111 main(int argc, char **argv)
112 {
113 	int c;
114 	char *bufp;
115 	char *pend;
116 
117 	char *xfile;
118 	FILE *xptr;
119 	Waitmsg *w;
120 
121 
122 /*	argument decoding	*/
123 
124 	xfile = DEFLTX;
125 	ARGBEGIN {
126 	case 'r':
127 		rflag = 1;
128 		break;
129 	case 'f':
130 		sortopt = fold;
131 		break;
132 	case 'w':
133 		if(lenarg)
134 			extra(ARGC());
135 		lenarg = ARGF();
136 		break;
137 	case 'c':
138 		if(cmds)
139 			extra(ARGC());
140 		cmds = ARGF();
141 	case 't':
142 		roff = troff;
143 		break;
144 	case 'g':
145 		if(gutarg)
146 			extra(ARGC());
147 		gutarg =  ARGF();
148 		break;
149 	case 'h':
150 		if(holarg)
151 			extra(ARGC());
152 		holarg =  ARGF();
153 		break;
154 
155 	case 'i':
156 		if(only|ignore)
157 			extra(ARGC());
158 		ignore++;
159 		xfile = ARGF();
160 		break;
161 
162 	case 'o':
163 		if(only|ignore)
164 			extra(ARGC());
165 		only++;
166 		xfile = ARGF();
167 		break;
168 
169 	case 'b':
170 		if(bfile)
171 			extra(ARGC());
172 		bfile = ARGF();
173 		break;
174 
175 	default:
176 		diag("Illegal argument:",*argv);
177 	} ARGEND
178 
179 	if(lenarg == 0)
180 		lenarg = troff? "100n": "72n";
181 	if(gutarg == 0)
182 		gutarg = "3n";
183 	if(holarg == 0)
184 		holarg = gutarg;
185 
186 	if(argc > 1)
187 		diag("Too many filenames",empty);
188 	if(argc == 1)
189 		infile = *argv;
190 
191 
192 	/* Default breaks of blank, tab and newline */
193 	btable[' '] = ON;
194 	btable['\t'] = ON;
195 	btable['\n'] = ON;
196 	if(bfile) {
197 		if((bptr = fopen(bfile,"r")) == NULL)
198 			diag("Cannot open break char file",bfile);
199 
200 		while((c = getc(bptr)) != EOF)
201 			btable[c] = ON;
202 	}
203 
204 /*	Allocate space for a buffer.  If only or ignore file present
205 	read it into buffer. Else read in default ignore file
206 	and put resulting words in buffer.
207 	*/
208 
209 
210 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
211 		diag("Out of memory space",empty);
212 	bufp = strtbufp;
213 	endbufp = strtbufp+MAX;
214 
215 	if((xptr = fopen(xfile,"r")) == NULL)
216 		diag("Cannot open  file",xfile);
217 
218 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
219 		if(isabreak(c)) {
220 			if(storeh(hash(strtbufp,bufp),strtbufp))
221 				diag("Too many words",xfile);
222 			*bufp++ = '\0';
223 			strtbufp = bufp;
224 		}
225 		else {
226 			*bufp++ = (isupper(c)?tolower(c):c);
227 		}
228 	}
229 	if (bufp >= endbufp)
230 		diag("Too many words in file",xfile);
231 	endbufp = --bufp;
232 
233 	/* open output file for sorting */
234 
235 	if((sortptr = fopen(sortfile, "w")) == NULL)
236 		diag("Cannot open output for sorting:",sortfile);
237 
238 /*	get a line of data and compare each word for
239 	inclusion or exclusion in the sort phase
240 */
241 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
242 		diag("Cannot open data: ",infile);
243 	while(pend=getline())
244 		cmpline(pend);
245 	fclose(sortptr);
246 
247 	if(fork()==0){
248 		execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
249 			sortfile, "-o", sortfile, 0);
250 		diag("Sort exec failed","");
251 	}
252 	if((w=wait())==nil || w->msg[0]!=0)
253 		diag("Sort failed","");
254 	free(w);
255 
256 	makek();
257 	if(fork()==0){
258 		if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
259 			diag("Cannot create width file:",wfile);
260 		execl(roff, roff, "-a", kfile, 0);
261 		diag("Sort exec failed","");
262 	}
263 	if((w=wait())==nil || w->msg[0]!=0)
264 		diag("Sort failed","");
265 	free(w);
266 
267 	getsort();
268 /*	remove(sortfile);
269 	remove(kfile);*/
270 fflush(0);
271 _exits(0);
272 /* I don't know what's wrong with the atexit func... */
273 /*	exits(0);	*/
274 }
275 
276 void
277 msg(char *s, char *arg)
278 {
279 	fprintf(stderr,"ptx: %s %s\n",s,arg);
280 	return;
281 }
282 
283 void
284 extra(int c)
285 {
286 	char s[] = "-x.";
287 	s[1] = c;
288 	diag("Extra option", s);
289 }
290 
291 void
292 diag(char *s, char *arg)
293 {
294 
295 	msg(s,arg);
296 /*
297 	remove(sortfile);
298 	remove(kfile);
299 */
300 	exits(s);
301 }
302 
303 
304 char*
305 getline(void)
306 {
307 
308 	int c;
309 	char *linep;
310 	char *endlinep;
311 
312 
313 	endlinep= line + mlen;
314 	linep = line;
315 	/* Throw away leading white space */
316 
317 	while(isspace(c=getc(inptr)))
318 		;
319 	if(c==EOF)
320 		return(0);
321 	ungetc(c,inptr);
322 	while(( c=getc(inptr)) != EOF) {
323 		switch (c) {
324 
325 			case '\t':
326 				if(linep<endlinep)
327 					*linep++ = ' ';
328 				break;
329 			case '\n':
330 				while(isspace(*--linep));
331 				*++linep = '\n';
332 				return(linep);
333 			default:
334 				if(linep < endlinep)
335 					*linep++ = c;
336 		}
337 	}
338 	return(0);
339 }
340 
341 void
342 cmpline(char *pend)
343 {
344 
345 	char *pstrt, *pchar, *cp;
346 	char **hp;
347 	int flag;
348 
349 	pchar = line;
350 	if(rflag)
351 		while(pchar<pend&&!isspace(*pchar))
352 			pchar++;
353 	while(pchar<pend){
354 	/* eliminate white space */
355 		if(isabreak(*pchar++))
356 			continue;
357 		pstrt = --pchar;
358 
359 		flag = 1;
360 		while(flag){
361 			if(isabreak(*pchar)) {
362 				hp = &hasht[hash(pstrt,pchar)];
363 				pchar--;
364 				while(cp = *hp++){
365 					if(hp == &hasht[MAXT])
366 						hp = hasht;
367 	/* possible match */
368 					if(cmpword(pstrt,pchar,cp)){
369 	/* exact match */
370 						if(!ignore && only)
371 							putline(pstrt,pend);
372 						flag = 0;
373 						break;
374 					}
375 				}
376 	/* no match */
377 				if(flag){
378 					if(ignore || !only)
379 						putline(pstrt,pend);
380 					flag = 0;
381 				}
382 			}
383 		pchar++;
384 		}
385 	}
386 }
387 
388 int
389 cmpword(char *cpp, char *pend, char *hpp)
390 {
391 	char c;
392 
393 	while(*hpp != '\0'){
394 		c = *cpp++;
395 		if((isupper(c)?tolower(c):c) != *hpp++)
396 			return(0);
397 	}
398 	if(--cpp == pend) return(1);
399 	return(0);
400 }
401 
402 void
403 putline(char *strt, char *end)
404 {
405 	char *cp;
406 
407 	for(cp=strt; cp<end; cp++)
408 		putc(*cp, sortptr);
409 	/* Add extra blank before TILDE to sort correctly
410 	   with -fd option */
411 	putc(' ',sortptr);
412 	putc(TILDE,sortptr);
413 	for (cp=line; cp<strt; cp++)
414 		putc(*cp,sortptr);
415 	putc('\n',sortptr);
416 }
417 
418 void
419 makek(void)
420 {
421 	int i, c;
422 	int nr = 0;
423 
424 	if((sortptr = fopen(sortfile,"r")) == NULL)
425 		diag("Cannot open sorted data:",sortfile);
426 	if((kptr = fopen(kfile,"w")) == NULL)
427 		diag("Cannot create mark file:",kfile);
428 	if(cmds)
429 		fprintf(kptr,"%s\n",cmds);
430 	fprintf(kptr,
431 		".nf\n"
432 		".pl 1\n"
433 		".tr %c\\&\n", TILDE);
434 	setlen();
435 
436 	while((c = getc(sortptr)) != EOF) {
437 		if(nr == 0) {
438 			fprintf(kptr,".di xx\n");
439 			nr++;
440 		}
441 		if(c == '\n') {
442 			fprintf(kptr,"\n.di\n");
443 			for(i=1; i<nr; i++)
444 				fprintf(kptr,"\\n(%.2d ",i);
445 			fprintf(kptr,"\n");
446 			nr = 0;
447 			continue;
448 		}
449 		if(isspace(c))
450 			fprintf(kptr,"\\k(%.2d",nr++);
451 		putc(c,kptr);
452 	}
453 	fclose(sortptr);
454 	fclose(kptr);
455 }
456 
457 
458 
459 void
460 getsort(void)
461 {
462 	char *tilde, *linep, *markp;
463 	int i0, i1, i2, i3, i4, i5, i6, i7;
464 	int w0, w6;
465 
466 	if((sortptr = fopen(sortfile,"r")) == NULL)
467 		diag("Cannot open sorted data:",sortfile);
468 	if((wptr = fopen(wfile,"r")) == NULL)
469 		diag("Cannot open width file:",wfile);
470 	getlen();
471 
472 	halflen = (llen-gutter)/2;
473 
474 	while(fgets(line,sizeof(line),sortptr) != 0) {
475 		if(fgets(mark,sizeof(mark),wptr) == 0)
476 			diag("Phase error 1","");
477 		linep = line;
478 		markp = mark;
479 		i3 = i7 = 0;
480 		word[i7].p = linep;
481 		word[i7].w = 0;
482 		for(linep=line; *linep; linep++) {
483 			if(*linep == TILDE)
484 				i3 = i7;
485 			else if(*linep == '\n')
486 				break;
487 			else if(isspace(*linep)) {
488 				i7++;
489 				word[i7].p = linep;
490 				if(!markp) {
491 					diag("Phase error 2","");
492 				}
493 				word[i7].w = atoi(markp);
494 				markp = strchr(markp+1,' ');
495 			}
496 		}
497 		i0 = 0;
498 		for(i1=i0; i1<i3; i1++)
499 			if(word[i1+1].w - word[i0].w >= halflen - spacesl)
500 				break;
501 		w0 = word[i1].w - word[i0].w;
502 		i4 = i3 + rflag;
503 		for(i6 = i7; i6>i4; i6--)
504 			if(word[i7].w - word[i6-1].w >= halflen)
505 				break;
506 		w6 = word[i7].w - word[i6].w - spacesl;
507 		for(i2=i1 ; i2<i3; i2++)
508 			if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
509 				break;
510 		for(i5=i6; i5>i4; i5--)
511 			if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
512 				break;
513 
514 		printf(".xx \"");
515 		putout(word[i1].p+1,word[i2].p);
516 		if(i1<i2 && i2<i3) putchar('/');
517 		printf("\" \"");
518 		if(i5>i4 && i6==i5) putchar('/');
519 		putout(word[i6].p+1+(i6==i3),word[i7].p);
520 		printf("\" \"");
521 		putout(word[i0].p,word[i1].p);
522 		if(i2<i3 && i1==i2) putchar('/');
523 		printf("\" \"");
524 		if(i5>i4 && i6>i5) putchar('/');
525 		putout(word[i5].p+1+(i5==i3),word[i6].p);
526 		if(rflag) {
527 			printf("\" \"");
528 			putout(word[i3].p+2,word[i4].p);
529 		}
530 		printf("\"\n");
531 	}
532 }
533 
534 void
535 putout(char *strt, char *end)
536 {
537 	char *cp;
538 
539 	for(cp=strt; cp<end; )
540 			putc(*cp++,outptr);
541 }
542 
543 void
544 setlen(void)
545 {
546 	fprintf(kptr,
547 		"\\w'\\h'%s''\n"
548 		"\\w' /'\n"
549 		"\\w'\\h'%s''\n"
550 		"\\w'\\h'%s''\n",lenarg,gutarg,holarg);
551 }
552 
553 void
554 getlen(void)
555 {
556 	char s[20];
557 	fgets(s,sizeof(s),kptr);
558 	llen = atoi(s);
559 
560 	fgets(s,sizeof(s),kptr);
561 	spacesl = atoi(s);
562 
563 	fgets(s,sizeof(s),kptr);
564 	gutter = atoi(s);
565 
566 	fgets(s,sizeof(s),kptr);
567 	hole = atoi(s);
568 	if(hole < 2*spacesl)
569 		hole = 2*spacesl;
570 }
571 
572 int
573 hash(char *strtp, char *endp)
574 {
575 	char *cp, c;
576 	int i, j, k;
577 
578 	/* Return zero hash number for single letter words */
579 	if((endp - strtp) == 1)
580 		return(0);
581 
582 	cp = strtp;
583 	c = *cp++;
584 	i = (isupper(c)?tolower(c):c);
585 	c = *cp;
586 	j = (isupper(c)?tolower(c):c);
587 	i = i*j;
588 	cp = --endp;
589 	c = *cp--;
590 	k = (isupper(c)?tolower(c):c);
591 	c = *cp;
592 	j = (isupper(c)?tolower(c):c);
593 	j = k*j;
594 
595 	k = (i ^ (j>>2)) & MASK;
596 	return(k);
597 }
598 
599 int
600 storeh(int num, char *strtp)
601 {
602 	int i;
603 
604 	for(i=num; i<MAXT; i++) {
605 		if(hasht[i] == 0) {
606 			hasht[i] = strtp;
607 			return(0);
608 		}
609 	}
610 	for(i=0; i<num; i++) {
611 		if(hasht[i] == 0) {
612 			hasht[i] = strtp;
613 			return(0);
614 		}
615 	}
616 	return(1);
617 }
618