xref: /plan9-contrib/sys/lib/man/permind/ptx1.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 
2 /*	permuted title index
3 	ptx [-t] [-i ignore] [-o only] [-w num] [-r]
4 	    [-c commands] [-g gap] [-f] [input]
5 	Ptx reads the input file and permutes on words in it.
6 	It excludes all words in the ignore file.
7 	Alternately it includes words in the only file.
8 	if neither is given it excludes the words in
9 	/sys/man/man0/permind/ignore.
10 
11 	The width of the output line (except for -r field)
12 	can be changed to num,
13 	which is a troff width measure, ens by default.
14 	with no -w, num is 72n, or 100n under -t.
15 	the -f flag tells the program to fold the output
16 	the -t flag says the output is for troff
17 	font specifier -F implies -t.
18 	-g sets the gutter
19 	-h sets the hole between wrapped segments
20 	-r takes the first word on each line and makes it
21 	into a fifth field.
22 	-c inserts troff commands for font-setting etc at beginning
23 
24 	*/
25 
26 #include <u.h>
27 #include <libc.h>
28 #include <stdio.h>
29 #include <ctype.h>
30 #define DEFLTX "/sys/lib/man/permind/ignore"
31 #define TILDE 0177
32 #define	N 30
33 #define	MAX	N*BUFSIZ
34 #define LMAX	2048
35 #define MAXT	2048
36 #define MASK	03777
37 #define ON	1
38 
39 #define isabreak(c) (btable[c])
40 
41 char *getline(void);
42 void msg(char *, char *);
43 void extra(int);
44 void diag(char *, char *);
45 void cmpline(char *);
46 int cmpword(char *, char *, char *);
47 void putline(char *, char *);
48 void makek(void);
49 void getsort(void);
50 char *rtrim(char *, char *, int);
51 char *ltrim(char *, char *, int);
52 void putout(char *, char *);
53 void setlen(void);
54 void getlen(void);
55 int hash(char *, char *);
56 int storeh(int, char *);
57 
58 int status;
59 
60 
61 char *hasht[MAXT];
62 char line[LMAX];
63 char mark[LMAX];
64 struct word {
65 	char *p;
66 	int w;
67 } word[LMAX/2];
68 char btable[256];
69 int ignore;
70 int only;
71 char *lenarg;
72 char *gutarg;
73 char *holarg;
74 int llen;
75 int spacesl;
76 int gutter;
77 int hole;
78 int mlen = LMAX;
79 int halflen;
80 int rflag;
81 char *strtbufp, *endbufp;
82 
83 
84 char *empty = "";
85 char *font = "R";
86 char *roff = "/bin/nroff";
87 char *troff = "/bin/troff";
88 
89 char *infile = "/fd/0";
90 FILE *inptr;
91 
92 FILE *outptr = stdout;
93 
94 char *sortfile = "ptxsort";	/* output of sort program */
95 char nofold[] = {'-', 'd', 't', TILDE, 0};
96 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
97 char *sortopt = nofold;
98 FILE *sortptr;
99 
100 char *kfile = "ptxmark";	/* ptxsort + troff goo for widths */
101 FILE *kptr;
102 
103 char *wfile = "ptxwidth";	/* widths of words in ptxsort */
104 FILE *wptr;
105 
106 char *bfile;	/*contains user supplied break chars */
107 FILE *bptr;
108 
109 char *cmds;
110 
111 main(int argc, char **argv)
112 {
113 	int c;
114 	char *bufp;
115 	char *pend;
116 
117 	char *xfile;
118 	FILE *xptr;
119 	Waitmsg w;
120 
121 
122 /*	argument decoding	*/
123 
124 	xfile = DEFLTX;
125 	ARGBEGIN {
126 	case 'r':
127 		rflag = 1;
128 		break;
129 	case 'f':
130 		sortopt = fold;
131 		break;
132 	case 'w':
133 		if(lenarg)
134 			extra(ARGC());
135 		lenarg = ARGF();
136 		break;
137 	case 'c':
138 		if(cmds)
139 			extra(ARGC());
140 		cmds = ARGF();
141 	case 't':
142 		roff = troff;
143 		break;
144 	case 'g':
145 		if(gutarg)
146 			extra(ARGC());
147 		gutarg =  ARGF();
148 		break;
149 	case 'h':
150 		if(holarg)
151 			extra(ARGC());
152 		holarg =  ARGF();
153 		break;
154 
155 	case 'i':
156 		if(only|ignore)
157 			extra(ARGC());
158 		ignore++;
159 		xfile = ARGF();
160 		break;
161 
162 	case 'o':
163 		if(only|ignore)
164 			extra(ARGC());
165 		only++;
166 		xfile = ARGF();
167 		break;
168 
169 	case 'b':
170 		if(bfile)
171 			extra(ARGC());
172 		bfile = ARGF();
173 		break;
174 
175 	default:
176 		diag("Illegal argument:",*argv);
177 	} ARGEND
178 
179 	if(lenarg == 0)
180 		lenarg = troff? "100n": "72n";
181 	if(gutarg == 0)
182 		gutarg = "3n";
183 	if(holarg == 0)
184 		holarg = gutarg;
185 
186 	if(argc > 1)
187 		diag("Too many filenames",empty);
188 	if(argc == 1)
189 		infile = *argv;
190 
191 
192 	/* Default breaks of blank, tab and newline */
193 	btable[' '] = ON;
194 	btable['\t'] = ON;
195 	btable['\n'] = ON;
196 	if(bfile) {
197 		if((bptr = fopen(bfile,"r")) == NULL)
198 			diag("Cannot open break char file",bfile);
199 
200 		while((c = getc(bptr)) != EOF)
201 			btable[c] = ON;
202 	}
203 
204 /*	Allocate space for a buffer.  If only or ignore file present
205 	read it into buffer. Else read in default ignore file
206 	and put resulting words in buffer.
207 	*/
208 
209 
210 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
211 		diag("Out of memory space",empty);
212 	bufp = strtbufp;
213 	endbufp = strtbufp+MAX;
214 
215 	if((xptr = fopen(xfile,"r")) == NULL)
216 		diag("Cannot open  file",xfile);
217 
218 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
219 		if(isabreak(c)) {
220 			if(storeh(hash(strtbufp,bufp),strtbufp))
221 				diag("Too many words",xfile);
222 			*bufp++ = '\0';
223 			strtbufp = bufp;
224 		}
225 		else {
226 			*bufp++ = (isupper(c)?tolower(c):c);
227 		}
228 	}
229 	if (bufp >= endbufp)
230 		diag("Too many words in file",xfile);
231 	endbufp = --bufp;
232 
233 	/* open output file for sorting */
234 
235 	if((sortptr = fopen(sortfile, "w")) == NULL)
236 		diag("Cannot open output for sorting:",sortfile);
237 
238 /*	get a line of data and compare each word for
239 	inclusion or exclusion in the sort phase
240 */
241 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
242 		diag("Cannot open data: ",infile);
243 	while(pend=getline())
244 		cmpline(pend);
245 	fclose(sortptr);
246 
247 	if(fork()==0){
248 		execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
249 			sortfile, "-o", sortfile, 0);
250 		diag("Sort exec failed","");
251 	}
252 	if(wait(&w)<0 || w.msg[0]!=0)
253 		diag("Sort failed","");
254 
255 	makek();
256 	if(fork()==0){
257 		if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
258 			diag("Cannot create width file:",wfile);
259 		execl(roff, roff, "-a", kfile, 0);
260 		diag("Sort exec failed","");
261 	}
262 	if(wait(&w)<0 || w.msg[0]!=0)
263 		diag("Sort failed","");
264 
265 	getsort();
266 /*	remove(sortfile);
267 	remove(kfile);*/
268 fflush(0);
269 _exits(0);
270 /* I don't know what's wrong with the atexit func... */
271 /*	exits(0);	*/
272 }
273 
274 void
275 msg(char *s, char *arg)
276 {
277 	fprintf(stderr,"ptx: %s %s\n",s,arg);
278 	return;
279 }
280 
281 void
282 extra(int c)
283 {
284 	char s[] = "-x.";
285 	s[1] = c;
286 	diag("Extra option", s);
287 }
288 
289 void
290 diag(char *s, char *arg)
291 {
292 
293 	msg(s,arg);
294 /*
295 	remove(sortfile);
296 	remove(kfile);
297 */
298 	exits(s);
299 }
300 
301 
302 char*
303 getline(void)
304 {
305 
306 	int c;
307 	char *linep;
308 	char *endlinep;
309 
310 
311 	endlinep= line + mlen;
312 	linep = line;
313 	/* Throw away leading white space */
314 
315 	while(isspace(c=getc(inptr)))
316 		;
317 	if(c==EOF)
318 		return(0);
319 	ungetc(c,inptr);
320 	while(( c=getc(inptr)) != EOF) {
321 		switch (c) {
322 
323 			case '\t':
324 				if(linep<endlinep)
325 					*linep++ = ' ';
326 				break;
327 			case '\n':
328 				while(isspace(*--linep));
329 				*++linep = '\n';
330 				return(linep);
331 			default:
332 				if(linep < endlinep)
333 					*linep++ = c;
334 		}
335 	}
336 	return(0);
337 }
338 
339 void
340 cmpline(char *pend)
341 {
342 
343 	char *pstrt, *pchar, *cp;
344 	char **hp;
345 	int flag;
346 
347 	pchar = line;
348 	if(rflag)
349 		while(pchar<pend&&!isspace(*pchar))
350 			pchar++;
351 	while(pchar<pend){
352 	/* eliminate white space */
353 		if(isabreak(*pchar++))
354 			continue;
355 		pstrt = --pchar;
356 
357 		flag = 1;
358 		while(flag){
359 			if(isabreak(*pchar)) {
360 				hp = &hasht[hash(pstrt,pchar)];
361 				pchar--;
362 				while(cp = *hp++){
363 					if(hp == &hasht[MAXT])
364 						hp = hasht;
365 	/* possible match */
366 					if(cmpword(pstrt,pchar,cp)){
367 	/* exact match */
368 						if(!ignore && only)
369 							putline(pstrt,pend);
370 						flag = 0;
371 						break;
372 					}
373 				}
374 	/* no match */
375 				if(flag){
376 					if(ignore || !only)
377 						putline(pstrt,pend);
378 					flag = 0;
379 				}
380 			}
381 		pchar++;
382 		}
383 	}
384 }
385 
386 int
387 cmpword(char *cpp, char *pend, char *hpp)
388 {
389 	char c;
390 
391 	while(*hpp != '\0'){
392 		c = *cpp++;
393 		if((isupper(c)?tolower(c):c) != *hpp++)
394 			return(0);
395 	}
396 	if(--cpp == pend) return(1);
397 	return(0);
398 }
399 
400 void
401 putline(char *strt, char *end)
402 {
403 	char *cp;
404 
405 	for(cp=strt; cp<end; cp++)
406 		putc(*cp, sortptr);
407 	/* Add extra blank before TILDE to sort correctly
408 	   with -fd option */
409 	putc(' ',sortptr);
410 	putc(TILDE,sortptr);
411 	for (cp=line; cp<strt; cp++)
412 		putc(*cp,sortptr);
413 	putc('\n',sortptr);
414 }
415 
416 void
417 makek(void)
418 {
419 	int i, c;
420 	int nr = 0;
421 
422 	if((sortptr = fopen(sortfile,"r")) == NULL)
423 		diag("Cannot open sorted data:",sortfile);
424 	if((kptr = fopen(kfile,"w")) == NULL)
425 		diag("Cannot create mark file:",kfile);
426 	if(cmds)
427 		fprintf(kptr,"%s\n",cmds);
428 	fprintf(kptr,
429 		".nf\n"
430 		".pl 1\n"
431 		".tr %c\\&\n", TILDE);
432 	setlen();
433 
434 	while((c = getc(sortptr)) != EOF) {
435 		if(nr == 0) {
436 			fprintf(kptr,".di xx\n");
437 			nr++;
438 		}
439 		if(c == '\n') {
440 			fprintf(kptr,"\n.di\n");
441 			for(i=1; i<nr; i++)
442 				fprintf(kptr,"\\n(%.2d ",i);
443 			fprintf(kptr,"\n");
444 			nr = 0;
445 			continue;
446 		}
447 		if(isspace(c))
448 			fprintf(kptr,"\\k(%.2d",nr++);
449 		putc(c,kptr);
450 	}
451 	fclose(sortptr);
452 	fclose(kptr);
453 }
454 
455 
456 
457 void
458 getsort(void)
459 {
460 	char *tilde, *linep, *markp;
461 	int i0, i1, i2, i3, i4, i5, i6, i7;
462 	int w0, w6;
463 
464 	if((sortptr = fopen(sortfile,"r")) == NULL)
465 		diag("Cannot open sorted data:",sortfile);
466 	if((wptr = fopen(wfile,"r")) == NULL)
467 		diag("Cannot open width file:",wfile);
468 	getlen();
469 
470 	halflen = (llen-gutter)/2;
471 
472 	while(fgets(line,sizeof(line),sortptr) != 0) {
473 		if(fgets(mark,sizeof(mark),wptr) == 0)
474 			diag("Phase error 1","");
475 		linep = line;
476 		markp = mark;
477 		i3 = i7 = 0;
478 		word[i7].p = linep;
479 		word[i7].w = 0;
480 		for(linep=line; *linep; linep++) {
481 			if(*linep == TILDE)
482 				i3 = i7;
483 			else if(*linep == '\n')
484 				break;
485 			else if(isspace(*linep)) {
486 				i7++;
487 				word[i7].p = linep;
488 				if(!markp) {
489 					diag("Phase error 2","");
490 				}
491 				word[i7].w = atoi(markp);
492 				markp = strchr(markp+1,' ');
493 			}
494 		}
495 		i0 = 0;
496 		for(i1=i0; i1<i3; i1++)
497 			if(word[i1+1].w - word[i0].w >= halflen - spacesl)
498 				break;
499 		w0 = word[i1].w - word[i0].w;
500 		i4 = i3 + rflag;
501 		for(i6 = i7; i6>i4; i6--)
502 			if(word[i7].w - word[i6-1].w >= halflen)
503 				break;
504 		w6 = word[i7].w - word[i6].w - spacesl;
505 		for(i2=i1 ; i2<i3; i2++)
506 			if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
507 				break;
508 		for(i5=i6; i5>i4; i5--)
509 			if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
510 				break;
511 
512 		printf(".xx \"");
513 		putout(word[i1].p+1,word[i2].p);
514 		if(i1<i2 && i2<i3) putchar('/');
515 		printf("\" \"");
516 		if(i5>i4 && i6==i5) putchar('/');
517 		putout(word[i6].p+1+(i6==i3),word[i7].p);
518 		printf("\" \"");
519 		putout(word[i0].p,word[i1].p);
520 		if(i2<i3 && i1==i2) putchar('/');
521 		printf("\" \"");
522 		if(i5>i4 && i6>i5) putchar('/');
523 		putout(word[i5].p+1+(i5==i3),word[i6].p);
524 		if(rflag) {
525 			printf("\" \"");
526 			putout(word[i3].p+2,word[i4].p);
527 		}
528 		printf("\"\n");
529 	}
530 }
531 
532 void
533 putout(char *strt, char *end)
534 {
535 	char *cp;
536 
537 	for(cp=strt; cp<end; )
538 			putc(*cp++,outptr);
539 }
540 
541 void
542 setlen(void)
543 {
544 	fprintf(kptr,
545 		"\\w'\\h'%s''\n"
546 		"\\w' /'\n"
547 		"\\w'\\h'%s''\n"
548 		"\\w'\\h'%s''\n",lenarg,gutarg,holarg);
549 }
550 
551 void
552 getlen(void)
553 {
554 	char s[20];
555 	fgets(s,sizeof(s),kptr);
556 	llen = atoi(s);
557 
558 	fgets(s,sizeof(s),kptr);
559 	spacesl = atoi(s);
560 
561 	fgets(s,sizeof(s),kptr);
562 	gutter = atoi(s);
563 
564 	fgets(s,sizeof(s),kptr);
565 	hole = atoi(s);
566 	if(hole < 2*spacesl)
567 		hole = 2*spacesl;
568 }
569 
570 int
571 hash(char *strtp, char *endp)
572 {
573 	char *cp, c;
574 	int i, j, k;
575 
576 	/* Return zero hash number for single letter words */
577 	if((endp - strtp) == 1)
578 		return(0);
579 
580 	cp = strtp;
581 	c = *cp++;
582 	i = (isupper(c)?tolower(c):c);
583 	c = *cp;
584 	j = (isupper(c)?tolower(c):c);
585 	i = i*j;
586 	cp = --endp;
587 	c = *cp--;
588 	k = (isupper(c)?tolower(c):c);
589 	c = *cp;
590 	j = (isupper(c)?tolower(c):c);
591 	j = k*j;
592 
593 	k = (i ^ (j>>2)) & MASK;
594 	return(k);
595 }
596 
597 int
598 storeh(int num, char *strtp)
599 {
600 	int i;
601 
602 	for(i=num; i<MAXT; i++) {
603 		if(hasht[i] == 0) {
604 			hasht[i] = strtp;
605 			return(0);
606 		}
607 	}
608 	for(i=0; i<num; i++) {
609 		if(hasht[i] == 0) {
610 			hasht[i] = strtp;
611 			return(0);
612 		}
613 	}
614 	return(1);
615 }
616