xref: /plan9/sys/lib/man/permind/ptx1.c (revision b873a79ae9b6942791b873dd7fa2c27224798319)
1 /*
2 	permuted title index
3 	ptx [-t] [-i ignore] [-o only] [-w num] [-r]
4 	    [-c commands] [-g gap] [-f] [input]
5 
6 	Ptx reads the input file and permutes on words in it.
7 	It excludes all words in the ignore file.
8 	Alternately it includes words in the only file.
9 	if neither is given it excludes the words in
10 	/sys/lib/man/permind/ignore.
11 
12 	The width of the output line (except for -r field)
13 	can be changed to num,
14 	which is a troff width measure, ens by default.
15 	with no -w, num is 72n, or 100n under -t.
16 	the -f flag tells the program to fold the output
17 	the -t flag says the output is for troff
18 	font specifier -F implies -t.
19 	-g sets the gutter
20 	-h sets the hole between wrapped segments
21 	-r takes the first word on each line and makes it
22 	into a fifth field.
23 	-c inserts troff commands for font-setting etc at beginning
24  */
25 
26 #include <u.h>
27 #include <libc.h>
28 #include <stdio.h>
29 #include <ctype.h>
30 
31 #define DEFLTX "/sys/lib/man/permind/ignore"
32 #define TILDE	0177		/* actually RUBOUT, not ~ */
33 #define	N	30
34 #define	MAX	N*BUFSIZ
35 #define LMAX	2048
36 #define MAXT	2048
37 #define MASK	03777
38 #define ON	1
39 
40 #define isabreak(c) (btable[c])
41 
42 char *getline(void);
43 void msg(char *, char *);
44 void extra(int);
45 void diag(char *, char *);
46 void cmpline(char *);
47 int cmpword(char *, char *, char *);
48 void putline(char *, char *);
49 void makek(void);
50 void getsort(void);
51 char *rtrim(char *, char *, int);
52 char *ltrim(char *, char *, int);
53 void putout(char *, char *);
54 void setlen(void);
55 void getlen(void);
56 int hash(char *, char *);
57 int storeh(int, char *);
58 
59 int status;
60 
61 char *hasht[MAXT];
62 char line[LMAX];
63 char mark[LMAX];
64 struct word {
65 	char *p;
66 	int w;
67 } word[LMAX/2];
68 char btable[256];
69 int ignore;
70 int only;
71 char *lenarg;
72 char *gutarg;
73 char *holarg;
74 int llen;
75 int spacesl;
76 int gutter;
77 int hole;
78 int mlen = LMAX;
79 int halflen;
80 int rflag;
81 char *strtbufp, *endbufp;
82 
83 
84 char *empty = "";
85 char *font = "R";
86 char *roff = "/bin/nroff";
87 char *troff = "/bin/troff";
88 
89 char *infile = "/fd/0";
90 FILE *inptr;
91 
92 FILE *outptr = stdout;
93 
94 char *sortfile = "ptxsort";	/* output of sort program */
95 char nofold[] = {'-', 'd', 't', TILDE, 0};
96 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
97 char *sortopt = nofold;
98 FILE *sortptr;
99 
100 char *kfile = "ptxmark";	/* ptxsort + troff goo for widths */
101 FILE *kptr;
102 
103 char *wfile = "ptxwidth";	/* widths of words in ptxsort */
104 FILE *wptr;
105 
106 char *bfile;	/*contains user supplied break chars */
107 FILE *bptr;
108 
109 char *cmds;
110 
main(int argc,char ** argv)111 main(int argc, char **argv)
112 {
113 	int c;
114 	char *bufp;
115 	char *pend;
116 	char *xfile;
117 	FILE *xptr;
118 	Waitmsg *w;
119 
120 	/* argument decoding */
121 	xfile = DEFLTX;
122 	ARGBEGIN {
123 	case 'r':
124 		rflag = 1;
125 		break;
126 	case 'f':
127 		sortopt = fold;
128 		break;
129 	case 'w':
130 		if(lenarg)
131 			extra(ARGC());
132 		lenarg = ARGF();
133 		break;
134 	case 'c':
135 		if(cmds)
136 			extra(ARGC());
137 		cmds = ARGF();
138 	case 't':
139 		roff = troff;
140 		break;
141 	case 'g':
142 		if(gutarg)
143 			extra(ARGC());
144 		gutarg =  ARGF();
145 		break;
146 	case 'h':
147 		if(holarg)
148 			extra(ARGC());
149 		holarg =  ARGF();
150 		break;
151 
152 	case 'i':
153 		if(only|ignore)
154 			extra(ARGC());
155 		ignore++;
156 		xfile = ARGF();
157 		break;
158 
159 	case 'o':
160 		if(only|ignore)
161 			extra(ARGC());
162 		only++;
163 		xfile = ARGF();
164 		break;
165 
166 	case 'b':
167 		if(bfile)
168 			extra(ARGC());
169 		bfile = ARGF();
170 		break;
171 
172 	default:
173 		diag("Illegal argument:",*argv);
174 	} ARGEND
175 
176 	if(lenarg == 0)
177 		lenarg = troff? "100n": "72n";
178 	if(gutarg == 0)
179 		gutarg = "3n";
180 	if(holarg == 0)
181 		holarg = gutarg;
182 
183 	if(argc > 1)
184 		diag("Too many filenames",empty);
185 	if(argc == 1)
186 		infile = *argv;
187 
188 	/* Default breaks of blank, tab and newline */
189 	btable[' '] = ON;
190 	btable['\t'] = ON;
191 	btable['\n'] = ON;
192 	if(bfile) {
193 		if((bptr = fopen(bfile,"r")) == NULL)
194 			diag("Cannot open break char file",bfile);
195 
196 		while((c = getc(bptr)) != EOF)
197 			btable[c] = ON;
198 	}
199 
200 	/*
201 	Allocate space for a buffer.  If only or ignore file present
202 	read it into buffer. Else read in default ignore file
203 	and put resulting words in buffer.
204 	*/
205 
206 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
207 		diag("Out of memory space",empty);
208 	bufp = strtbufp;
209 	endbufp = strtbufp+MAX;
210 
211 	if((xptr = fopen(xfile,"r")) == NULL)
212 		diag("Cannot open  file",xfile);
213 
214 	while(bufp < endbufp && (c = getc(xptr)) != EOF)
215 		if(isabreak(c)) {
216 			if(storeh(hash(strtbufp,bufp),strtbufp))
217 				diag("Too many words",xfile);
218 			*bufp++ = '\0';
219 			strtbufp = bufp;
220 		} else
221 			*bufp++ = (isupper(c)?tolower(c):c);
222 	if (bufp >= endbufp)
223 		diag("Too many words in file",xfile);
224 	endbufp = --bufp;
225 
226 	/* open output file for sorting */
227 
228 	if((sortptr = fopen(sortfile, "w")) == NULL)
229 		diag("Cannot open output for sorting:",sortfile);
230 
231 	/*
232 	get a line of data and compare each word for
233 	inclusion or exclusion in the sort phase
234 	*/
235 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
236 		diag("Cannot open data: ",infile);
237 	while((pend = getline()) != NULL)
238 		cmpline(pend);
239 	fclose(sortptr);
240 
241 	if(fork()==0){
242 		execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
243 			sortfile, "-o", sortfile, 0);
244 		diag("Sort exec failed","");
245 	}
246 	if((w = wait()) == NULL || w->msg[0] != '\0')
247 		diag("Sort failed","");
248 	free(w);
249 
250 	makek();
251 	if(fork()==0){
252 		if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
253 			diag("Cannot create width file:",wfile);
254 		execl(roff, roff, "-a", kfile, 0);
255 		diag("Sort exec failed","");
256 	}
257 	if((w = wait()) == NULL || w->msg[0] != '\0')
258 		diag("Sort failed","");
259 	free(w);
260 
261 	getsort();
262 /*
263 	remove(sortfile);
264 	remove(kfile);
265  */
266 	fflush(0);
267 	_exits(0);
268 /* I don't know what's wrong with the atexit func... */
269 /*	exits(0);	*/
270 }
271 
272 void
msg(char * s,char * arg)273 msg(char *s, char *arg)
274 {
275 	fprintf(stderr,"ptx: %s %s\n",s,arg);
276 }
277 
278 void
extra(int c)279 extra(int c)
280 {
281 	char s[] = "-x.";
282 
283 	s[1] = c;
284 	diag("Extra option", s);
285 }
286 
287 void
diag(char * s,char * arg)288 diag(char *s, char *arg)
289 {
290 	msg(s,arg);
291 /*
292 	remove(sortfile);
293 	remove(kfile);
294 */
295 	exits(s);
296 }
297 
298 
299 char*
getline(void)300 getline(void)
301 {
302 	int c;
303 	char *linep;
304 	char *endlinep;
305 
306 	endlinep= line + mlen;
307 	linep = line;
308 	/* Throw away leading white space */
309 
310 	while(isspace(c = getc(inptr)))
311 		;
312 	if(c==EOF)
313 		return(0);
314 	ungetc(c,inptr);
315 	while((c = getc(inptr)) != EOF)
316 		switch (c) {
317 		case '\t':
318 			if(linep<endlinep)
319 				*linep++ = ' ';
320 			break;
321 		case '\n':
322 			while(isspace(*--linep))
323 				;
324 			*++linep = '\n';
325 			return(linep);
326 		default:
327 			if(linep < endlinep)
328 				*linep++ = c;
329 			break;
330 		}
331 	return(0);
332 }
333 
334 void
cmpline(char * pend)335 cmpline(char *pend)
336 {
337 	char *pstrt, *pchar, *cp;
338 	char **hp;
339 	int flag;
340 
341 	pchar = line;
342 	if(rflag)
343 		while(pchar < pend && !isspace(*pchar))
344 			pchar++;
345 	while(pchar < pend){
346 		/* eliminate white space */
347 		if(isabreak(*pchar++))
348 			continue;
349 		pstrt = --pchar;
350 
351 		flag = 1;
352 		while(flag){
353 			if(isabreak(*pchar)) {
354 				hp = &hasht[hash(pstrt,pchar)];
355 				pchar--;
356 				while(cp = *hp++){
357 					if(hp == &hasht[MAXT])
358 						hp = hasht;
359 					/* possible match */
360 					if(cmpword(pstrt,pchar,cp)){
361 						/* exact match */
362 						if(!ignore && only)
363 							putline(pstrt,pend);
364 						flag = 0;
365 						break;
366 					}
367 				}
368 				/* no match */
369 				if(flag){
370 					if(ignore || !only)
371 						putline(pstrt,pend);
372 					flag = 0;
373 				}
374 			}
375 			pchar++;
376 		}
377 	}
378 }
379 
380 int
cmpword(char * cpp,char * pend,char * hpp)381 cmpword(char *cpp, char *pend, char *hpp)
382 {
383 	char c;
384 
385 	while(*hpp != '\0'){
386 		c = *cpp++;
387 		if((isupper(c)?tolower(c):c) != *hpp++)
388 			return(0);
389 	}
390 	if(--cpp == pend)
391 		return(1);
392 	return(0);
393 }
394 
395 void
putline(char * strt,char * end)396 putline(char *strt, char *end)
397 {
398 	char *cp;
399 
400 	for(cp=strt; cp<end; cp++)
401 		putc(*cp, sortptr);
402 	/* Add extra blank before TILDE to sort correctly with -fd option */
403 	putc(' ',sortptr);
404 	putc(TILDE,sortptr);
405 	for (cp=line; cp<strt; cp++)
406 		putc(*cp,sortptr);
407 	putc('\n',sortptr);
408 }
409 
410 void
makek(void)411 makek(void)
412 {
413 	int i, c;
414 	int nr = 0;
415 
416 	if((sortptr = fopen(sortfile,"r")) == NULL)
417 		diag("Cannot open sorted data:",sortfile);
418 	if((kptr = fopen(kfile,"w")) == NULL)
419 		diag("Cannot create mark file:",kfile);
420 	if(cmds)
421 		fprintf(kptr,"%s\n",cmds);
422 	fprintf(kptr,
423 		".nf\n"
424 		".pl 1\n"
425 		".tr %c\\&\n", TILDE);
426 	setlen();
427 
428 	while((c = getc(sortptr)) != EOF) {
429 		if(nr == 0) {
430 			fprintf(kptr,".di xx\n");
431 			nr++;
432 		}
433 		if(c == '\n') {
434 			fprintf(kptr,"\n.di\n");
435 			for(i=1; i<nr; i++)
436 				fprintf(kptr,"\\n(%.2d ",i);
437 			fprintf(kptr,"\n");
438 			nr = 0;
439 			continue;
440 		}
441 		if(isspace(c))
442 			fprintf(kptr,"\\k(%.2d",nr++);
443 		putc(c,kptr);
444 	}
445 	fclose(sortptr);
446 	fclose(kptr);
447 }
448 
449 void
getsort(void)450 getsort(void)
451 {
452 	char *tilde, *linep, *markp;
453 	int i0, i1, i2, i3, i4, i5, i6, i7, w0, w6;
454 
455 	if((sortptr = fopen(sortfile, "r")) == NULL)
456 		diag("Cannot open sorted data:", sortfile);
457 	if((wptr = fopen(wfile, "r")) == NULL)
458 		diag("Cannot open width file:", wfile);
459 	getlen();
460 
461 	halflen = (llen-gutter)/2;
462 
463 	while(fgets(line, sizeof(line), sortptr) != NULL) {
464 		if(fgets(mark, sizeof(mark), wptr) == NULL)
465 			diag("Phase error 1: premature EOF on width file",
466 				wfile);
467 		linep = line;
468 		markp = mark;
469 		i3 = i7 = 0;
470 		word[i7].p = linep;
471 		word[i7].w = 0;
472 		for(linep=line; *linep; linep++) {
473 			if(*linep == TILDE)
474 				i3 = i7;
475 			else if(*linep == '\n')
476 				break;
477 			else if(isspace(*linep)) {
478 				i7++;
479 				word[i7].p = linep;
480 				if(!markp)
481 					diag("Phase error 2: no widths for summary",
482 						line);
483 				word[i7].w = atoi(markp);
484 				markp = strchr(markp+1, ' ');
485 			}
486 		}
487 		i0 = 0;
488 		for(i1=i0; i1<i3; i1++)
489 			if(word[i1+1].w - word[i0].w >= halflen - spacesl)
490 				break;
491 		w0 = word[i1].w - word[i0].w;
492 		i4 = i3 + rflag;
493 		for(i6 = i7; i6>i4; i6--)
494 			if(word[i7].w - word[i6-1].w >= halflen)
495 				break;
496 		w6 = word[i7].w - word[i6].w - spacesl;
497 		for(i2=i1 ; i2<i3; i2++)
498 			if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
499 				break;
500 		for(i5=i6; i5>i4; i5--)
501 			if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
502 				break;
503 
504 		printf(".xx \"");
505 		putout(word[i1].p+1,word[i2].p);
506 		if(i1<i2 && i2<i3) putchar('/');
507 		printf("\" \"");
508 		if(i5>i4 && i6==i5) putchar('/');
509 		putout(word[i6].p+1+(i6==i3),word[i7].p);
510 		printf("\" \"");
511 		putout(word[i0].p,word[i1].p);
512 		if(i2<i3 && i1==i2) putchar('/');
513 		printf("\" \"");
514 		if(i5>i4 && i6>i5) putchar('/');
515 		putout(word[i5].p+1+(i5==i3),word[i6].p);
516 		if(rflag) {
517 			printf("\" \"");
518 			putout(word[i3].p+2,word[i4].p);
519 		}
520 		printf("\"\n");
521 	}
522 }
523 
524 void
putout(char * strt,char * end)525 putout(char *strt, char *end)
526 {
527 	char *cp;
528 
529 	for(cp=strt; cp<end; )
530 		putc(*cp++,outptr);
531 }
532 
533 void
setlen(void)534 setlen(void)
535 {
536 	fprintf(kptr,
537 		"\\w'\\h'%s''\n"
538 		"\\w' /'\n"
539 		"\\w'\\h'%s''\n"
540 		"\\w'\\h'%s''\n",lenarg,gutarg,holarg);
541 }
542 
543 void
getlen(void)544 getlen(void)
545 {
546 	char s[128];
547 
548 	s[0] = '\0';
549 	fgets(s,sizeof(s),kptr);
550 	llen = atoi(s);
551 
552 	fgets(s,sizeof(s),kptr);
553 	spacesl = atoi(s);
554 
555 	fgets(s,sizeof(s),kptr);
556 	gutter = atoi(s);
557 
558 	fgets(s,sizeof(s),kptr);
559 	hole = atoi(s);
560 	if(hole < 2*spacesl)
561 		hole = 2*spacesl;
562 }
563 
564 int
hash(char * strtp,char * endp)565 hash(char *strtp, char *endp)
566 {
567 	char *cp, c;
568 	int i, j, k;
569 
570 	/* Return zero hash number for single letter words */
571 	if((endp - strtp) == 1)
572 		return(0);
573 
574 	cp = strtp;
575 	c = *cp++;
576 	i = (isupper(c)?tolower(c):c);
577 	c = *cp;
578 	j = (isupper(c)?tolower(c):c);
579 	i = i*j;
580 	cp = --endp;
581 	c = *cp--;
582 	k = (isupper(c)?tolower(c):c);
583 	c = *cp;
584 	j = (isupper(c)?tolower(c):c);
585 	j = k*j;
586 	return (i ^ (j>>2)) & MASK;
587 }
588 
589 int
storeh(int num,char * strtp)590 storeh(int num, char *strtp)
591 {
592 	int i;
593 
594 	for(i=num; i<MAXT; i++)
595 		if(hasht[i] == 0) {
596 			hasht[i] = strtp;
597 			return(0);
598 		}
599 	for(i=0; i<num; i++)
600 		if(hasht[i] == 0) {
601 			hasht[i] = strtp;
602 			return(0);
603 		}
604 	return(1);
605 }
606