1 /*
2 permuted title index
3 ptx [-t] [-i ignore] [-o only] [-w num] [-r]
4 [-c commands] [-g gap] [-f] [input]
5
6 Ptx reads the input file and permutes on words in it.
7 It excludes all words in the ignore file.
8 Alternately it includes words in the only file.
9 if neither is given it excludes the words in
10 /sys/lib/man/permind/ignore.
11
12 The width of the output line (except for -r field)
13 can be changed to num,
14 which is a troff width measure, ens by default.
15 with no -w, num is 72n, or 100n under -t.
16 the -f flag tells the program to fold the output
17 the -t flag says the output is for troff
18 font specifier -F implies -t.
19 -g sets the gutter
20 -h sets the hole between wrapped segments
21 -r takes the first word on each line and makes it
22 into a fifth field.
23 -c inserts troff commands for font-setting etc at beginning
24 */
25
26 #include <u.h>
27 #include <libc.h>
28 #include <stdio.h>
29 #include <ctype.h>
30
31 #define DEFLTX "/sys/lib/man/permind/ignore"
32 #define TILDE 0177 /* actually RUBOUT, not ~ */
33 #define N 30
34 #define MAX N*BUFSIZ
35 #define LMAX 2048
36 #define MAXT 2048
37 #define MASK 03777
38 #define ON 1
39
40 #define isabreak(c) (btable[c])
41
42 char *getline(void);
43 void msg(char *, char *);
44 void extra(int);
45 void diag(char *, char *);
46 void cmpline(char *);
47 int cmpword(char *, char *, char *);
48 void putline(char *, char *);
49 void makek(void);
50 void getsort(void);
51 char *rtrim(char *, char *, int);
52 char *ltrim(char *, char *, int);
53 void putout(char *, char *);
54 void setlen(void);
55 void getlen(void);
56 int hash(char *, char *);
57 int storeh(int, char *);
58
59 int status;
60
61 char *hasht[MAXT];
62 char line[LMAX];
63 char mark[LMAX];
64 struct word {
65 char *p;
66 int w;
67 } word[LMAX/2];
68 char btable[256];
69 int ignore;
70 int only;
71 char *lenarg;
72 char *gutarg;
73 char *holarg;
74 int llen;
75 int spacesl;
76 int gutter;
77 int hole;
78 int mlen = LMAX;
79 int halflen;
80 int rflag;
81 char *strtbufp, *endbufp;
82
83
84 char *empty = "";
85 char *font = "R";
86 char *roff = "/bin/nroff";
87 char *troff = "/bin/troff";
88
89 char *infile = "/fd/0";
90 FILE *inptr;
91
92 FILE *outptr = stdout;
93
94 char *sortfile = "ptxsort"; /* output of sort program */
95 char nofold[] = {'-', 'd', 't', TILDE, 0};
96 char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
97 char *sortopt = nofold;
98 FILE *sortptr;
99
100 char *kfile = "ptxmark"; /* ptxsort + troff goo for widths */
101 FILE *kptr;
102
103 char *wfile = "ptxwidth"; /* widths of words in ptxsort */
104 FILE *wptr;
105
106 char *bfile; /*contains user supplied break chars */
107 FILE *bptr;
108
109 char *cmds;
110
main(int argc,char ** argv)111 main(int argc, char **argv)
112 {
113 int c;
114 char *bufp;
115 char *pend;
116 char *xfile;
117 FILE *xptr;
118 Waitmsg *w;
119
120 /* argument decoding */
121 xfile = DEFLTX;
122 ARGBEGIN {
123 case 'r':
124 rflag = 1;
125 break;
126 case 'f':
127 sortopt = fold;
128 break;
129 case 'w':
130 if(lenarg)
131 extra(ARGC());
132 lenarg = ARGF();
133 break;
134 case 'c':
135 if(cmds)
136 extra(ARGC());
137 cmds = ARGF();
138 case 't':
139 roff = troff;
140 break;
141 case 'g':
142 if(gutarg)
143 extra(ARGC());
144 gutarg = ARGF();
145 break;
146 case 'h':
147 if(holarg)
148 extra(ARGC());
149 holarg = ARGF();
150 break;
151
152 case 'i':
153 if(only|ignore)
154 extra(ARGC());
155 ignore++;
156 xfile = ARGF();
157 break;
158
159 case 'o':
160 if(only|ignore)
161 extra(ARGC());
162 only++;
163 xfile = ARGF();
164 break;
165
166 case 'b':
167 if(bfile)
168 extra(ARGC());
169 bfile = ARGF();
170 break;
171
172 default:
173 diag("Illegal argument:",*argv);
174 } ARGEND
175
176 if(lenarg == 0)
177 lenarg = troff? "100n": "72n";
178 if(gutarg == 0)
179 gutarg = "3n";
180 if(holarg == 0)
181 holarg = gutarg;
182
183 if(argc > 1)
184 diag("Too many filenames",empty);
185 if(argc == 1)
186 infile = *argv;
187
188 /* Default breaks of blank, tab and newline */
189 btable[' '] = ON;
190 btable['\t'] = ON;
191 btable['\n'] = ON;
192 if(bfile) {
193 if((bptr = fopen(bfile,"r")) == NULL)
194 diag("Cannot open break char file",bfile);
195
196 while((c = getc(bptr)) != EOF)
197 btable[c] = ON;
198 }
199
200 /*
201 Allocate space for a buffer. If only or ignore file present
202 read it into buffer. Else read in default ignore file
203 and put resulting words in buffer.
204 */
205
206 if((strtbufp = calloc(N,BUFSIZ)) == NULL)
207 diag("Out of memory space",empty);
208 bufp = strtbufp;
209 endbufp = strtbufp+MAX;
210
211 if((xptr = fopen(xfile,"r")) == NULL)
212 diag("Cannot open file",xfile);
213
214 while(bufp < endbufp && (c = getc(xptr)) != EOF)
215 if(isabreak(c)) {
216 if(storeh(hash(strtbufp,bufp),strtbufp))
217 diag("Too many words",xfile);
218 *bufp++ = '\0';
219 strtbufp = bufp;
220 } else
221 *bufp++ = (isupper(c)?tolower(c):c);
222 if (bufp >= endbufp)
223 diag("Too many words in file",xfile);
224 endbufp = --bufp;
225
226 /* open output file for sorting */
227
228 if((sortptr = fopen(sortfile, "w")) == NULL)
229 diag("Cannot open output for sorting:",sortfile);
230
231 /*
232 get a line of data and compare each word for
233 inclusion or exclusion in the sort phase
234 */
235 if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
236 diag("Cannot open data: ",infile);
237 while((pend = getline()) != NULL)
238 cmpline(pend);
239 fclose(sortptr);
240
241 if(fork()==0){
242 execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1",
243 sortfile, "-o", sortfile, 0);
244 diag("Sort exec failed","");
245 }
246 if((w = wait()) == NULL || w->msg[0] != '\0')
247 diag("Sort failed","");
248 free(w);
249
250 makek();
251 if(fork()==0){
252 if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
253 diag("Cannot create width file:",wfile);
254 execl(roff, roff, "-a", kfile, 0);
255 diag("Sort exec failed","");
256 }
257 if((w = wait()) == NULL || w->msg[0] != '\0')
258 diag("Sort failed","");
259 free(w);
260
261 getsort();
262 /*
263 remove(sortfile);
264 remove(kfile);
265 */
266 fflush(0);
267 _exits(0);
268 /* I don't know what's wrong with the atexit func... */
269 /* exits(0); */
270 }
271
272 void
msg(char * s,char * arg)273 msg(char *s, char *arg)
274 {
275 fprintf(stderr,"ptx: %s %s\n",s,arg);
276 }
277
278 void
extra(int c)279 extra(int c)
280 {
281 char s[] = "-x.";
282
283 s[1] = c;
284 diag("Extra option", s);
285 }
286
287 void
diag(char * s,char * arg)288 diag(char *s, char *arg)
289 {
290 msg(s,arg);
291 /*
292 remove(sortfile);
293 remove(kfile);
294 */
295 exits(s);
296 }
297
298
299 char*
getline(void)300 getline(void)
301 {
302 int c;
303 char *linep;
304 char *endlinep;
305
306 endlinep= line + mlen;
307 linep = line;
308 /* Throw away leading white space */
309
310 while(isspace(c = getc(inptr)))
311 ;
312 if(c==EOF)
313 return(0);
314 ungetc(c,inptr);
315 while((c = getc(inptr)) != EOF)
316 switch (c) {
317 case '\t':
318 if(linep<endlinep)
319 *linep++ = ' ';
320 break;
321 case '\n':
322 while(isspace(*--linep))
323 ;
324 *++linep = '\n';
325 return(linep);
326 default:
327 if(linep < endlinep)
328 *linep++ = c;
329 break;
330 }
331 return(0);
332 }
333
334 void
cmpline(char * pend)335 cmpline(char *pend)
336 {
337 char *pstrt, *pchar, *cp;
338 char **hp;
339 int flag;
340
341 pchar = line;
342 if(rflag)
343 while(pchar < pend && !isspace(*pchar))
344 pchar++;
345 while(pchar < pend){
346 /* eliminate white space */
347 if(isabreak(*pchar++))
348 continue;
349 pstrt = --pchar;
350
351 flag = 1;
352 while(flag){
353 if(isabreak(*pchar)) {
354 hp = &hasht[hash(pstrt,pchar)];
355 pchar--;
356 while(cp = *hp++){
357 if(hp == &hasht[MAXT])
358 hp = hasht;
359 /* possible match */
360 if(cmpword(pstrt,pchar,cp)){
361 /* exact match */
362 if(!ignore && only)
363 putline(pstrt,pend);
364 flag = 0;
365 break;
366 }
367 }
368 /* no match */
369 if(flag){
370 if(ignore || !only)
371 putline(pstrt,pend);
372 flag = 0;
373 }
374 }
375 pchar++;
376 }
377 }
378 }
379
380 int
cmpword(char * cpp,char * pend,char * hpp)381 cmpword(char *cpp, char *pend, char *hpp)
382 {
383 char c;
384
385 while(*hpp != '\0'){
386 c = *cpp++;
387 if((isupper(c)?tolower(c):c) != *hpp++)
388 return(0);
389 }
390 if(--cpp == pend)
391 return(1);
392 return(0);
393 }
394
395 void
putline(char * strt,char * end)396 putline(char *strt, char *end)
397 {
398 char *cp;
399
400 for(cp=strt; cp<end; cp++)
401 putc(*cp, sortptr);
402 /* Add extra blank before TILDE to sort correctly with -fd option */
403 putc(' ',sortptr);
404 putc(TILDE,sortptr);
405 for (cp=line; cp<strt; cp++)
406 putc(*cp,sortptr);
407 putc('\n',sortptr);
408 }
409
410 void
makek(void)411 makek(void)
412 {
413 int i, c;
414 int nr = 0;
415
416 if((sortptr = fopen(sortfile,"r")) == NULL)
417 diag("Cannot open sorted data:",sortfile);
418 if((kptr = fopen(kfile,"w")) == NULL)
419 diag("Cannot create mark file:",kfile);
420 if(cmds)
421 fprintf(kptr,"%s\n",cmds);
422 fprintf(kptr,
423 ".nf\n"
424 ".pl 1\n"
425 ".tr %c\\&\n", TILDE);
426 setlen();
427
428 while((c = getc(sortptr)) != EOF) {
429 if(nr == 0) {
430 fprintf(kptr,".di xx\n");
431 nr++;
432 }
433 if(c == '\n') {
434 fprintf(kptr,"\n.di\n");
435 for(i=1; i<nr; i++)
436 fprintf(kptr,"\\n(%.2d ",i);
437 fprintf(kptr,"\n");
438 nr = 0;
439 continue;
440 }
441 if(isspace(c))
442 fprintf(kptr,"\\k(%.2d",nr++);
443 putc(c,kptr);
444 }
445 fclose(sortptr);
446 fclose(kptr);
447 }
448
449 void
getsort(void)450 getsort(void)
451 {
452 char *tilde, *linep, *markp;
453 int i0, i1, i2, i3, i4, i5, i6, i7, w0, w6;
454
455 if((sortptr = fopen(sortfile, "r")) == NULL)
456 diag("Cannot open sorted data:", sortfile);
457 if((wptr = fopen(wfile, "r")) == NULL)
458 diag("Cannot open width file:", wfile);
459 getlen();
460
461 halflen = (llen-gutter)/2;
462
463 while(fgets(line, sizeof(line), sortptr) != NULL) {
464 if(fgets(mark, sizeof(mark), wptr) == NULL)
465 diag("Phase error 1: premature EOF on width file",
466 wfile);
467 linep = line;
468 markp = mark;
469 i3 = i7 = 0;
470 word[i7].p = linep;
471 word[i7].w = 0;
472 for(linep=line; *linep; linep++) {
473 if(*linep == TILDE)
474 i3 = i7;
475 else if(*linep == '\n')
476 break;
477 else if(isspace(*linep)) {
478 i7++;
479 word[i7].p = linep;
480 if(!markp)
481 diag("Phase error 2: no widths for summary",
482 line);
483 word[i7].w = atoi(markp);
484 markp = strchr(markp+1, ' ');
485 }
486 }
487 i0 = 0;
488 for(i1=i0; i1<i3; i1++)
489 if(word[i1+1].w - word[i0].w >= halflen - spacesl)
490 break;
491 w0 = word[i1].w - word[i0].w;
492 i4 = i3 + rflag;
493 for(i6 = i7; i6>i4; i6--)
494 if(word[i7].w - word[i6-1].w >= halflen)
495 break;
496 w6 = word[i7].w - word[i6].w - spacesl;
497 for(i2=i1 ; i2<i3; i2++)
498 if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
499 break;
500 for(i5=i6; i5>i4; i5--)
501 if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
502 break;
503
504 printf(".xx \"");
505 putout(word[i1].p+1,word[i2].p);
506 if(i1<i2 && i2<i3) putchar('/');
507 printf("\" \"");
508 if(i5>i4 && i6==i5) putchar('/');
509 putout(word[i6].p+1+(i6==i3),word[i7].p);
510 printf("\" \"");
511 putout(word[i0].p,word[i1].p);
512 if(i2<i3 && i1==i2) putchar('/');
513 printf("\" \"");
514 if(i5>i4 && i6>i5) putchar('/');
515 putout(word[i5].p+1+(i5==i3),word[i6].p);
516 if(rflag) {
517 printf("\" \"");
518 putout(word[i3].p+2,word[i4].p);
519 }
520 printf("\"\n");
521 }
522 }
523
524 void
putout(char * strt,char * end)525 putout(char *strt, char *end)
526 {
527 char *cp;
528
529 for(cp=strt; cp<end; )
530 putc(*cp++,outptr);
531 }
532
533 void
setlen(void)534 setlen(void)
535 {
536 fprintf(kptr,
537 "\\w'\\h'%s''\n"
538 "\\w' /'\n"
539 "\\w'\\h'%s''\n"
540 "\\w'\\h'%s''\n",lenarg,gutarg,holarg);
541 }
542
543 void
getlen(void)544 getlen(void)
545 {
546 char s[128];
547
548 s[0] = '\0';
549 fgets(s,sizeof(s),kptr);
550 llen = atoi(s);
551
552 fgets(s,sizeof(s),kptr);
553 spacesl = atoi(s);
554
555 fgets(s,sizeof(s),kptr);
556 gutter = atoi(s);
557
558 fgets(s,sizeof(s),kptr);
559 hole = atoi(s);
560 if(hole < 2*spacesl)
561 hole = 2*spacesl;
562 }
563
564 int
hash(char * strtp,char * endp)565 hash(char *strtp, char *endp)
566 {
567 char *cp, c;
568 int i, j, k;
569
570 /* Return zero hash number for single letter words */
571 if((endp - strtp) == 1)
572 return(0);
573
574 cp = strtp;
575 c = *cp++;
576 i = (isupper(c)?tolower(c):c);
577 c = *cp;
578 j = (isupper(c)?tolower(c):c);
579 i = i*j;
580 cp = --endp;
581 c = *cp--;
582 k = (isupper(c)?tolower(c):c);
583 c = *cp;
584 j = (isupper(c)?tolower(c):c);
585 j = k*j;
586 return (i ^ (j>>2)) & MASK;
587 }
588
589 int
storeh(int num,char * strtp)590 storeh(int num, char *strtp)
591 {
592 int i;
593
594 for(i=num; i<MAXT; i++)
595 if(hasht[i] == 0) {
596 hasht[i] = strtp;
597 return(0);
598 }
599 for(i=0; i<num; i++)
600 if(hasht[i] == 0) {
601 hasht[i] = strtp;
602 return(0);
603 }
604 return(1);
605 }
606