1*1071Sbill static char *sccsid = "@(#)ptx.c 4.1 (Berkeley) 10/01/80"; 2*1071Sbill # 3*1071Sbill 4*1071Sbill /* permuted title index 5*1071Sbill ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output] 6*1071Sbill Ptx reads the input file and permutes on words in it. 7*1071Sbill It excludes all words in the ignore file. 8*1071Sbill Alternately it includes words in the only file. 9*1071Sbill if neither is given it excludes the words in /usr/lib/eign. 10*1071Sbill 11*1071Sbill The width of the output line can be changed to num 12*1071Sbill characters. If omitted 72 is default unless troff than 100. 13*1071Sbill the -f flag tells the program to fold the output 14*1071Sbill the -t flag says the output is for troff and the 15*1071Sbill output is then wider. 16*1071Sbill 17*1071Sbill make: cc ptx.c -lS 18*1071Sbill */ 19*1071Sbill 20*1071Sbill #include <stdio.h> 21*1071Sbill #include <ctype.h> 22*1071Sbill #include <signal.h> 23*1071Sbill #define DEFLTX "/usr/lib/eign" 24*1071Sbill #define TILDE 0177 25*1071Sbill #define SORT "/usr/bin/sort" 26*1071Sbill #define N 30 27*1071Sbill #define MAX N*BUFSIZ 28*1071Sbill #define LMAX 200 29*1071Sbill #define MAXT 2048 30*1071Sbill #define MASK 03777 31*1071Sbill #define SET 1 32*1071Sbill 33*1071Sbill #define isabreak(c) (btable[c]) 34*1071Sbill 35*1071Sbill extern char *calloc(), *mktemp(); 36*1071Sbill extern char *getline(); 37*1071Sbill int status; 38*1071Sbill 39*1071Sbill 40*1071Sbill char *hasht[MAXT]; 41*1071Sbill char line[LMAX]; 42*1071Sbill char btable[128]; 43*1071Sbill int ignore; 44*1071Sbill int only; 45*1071Sbill int llen = 72; 46*1071Sbill int gap = 3; 47*1071Sbill int gutter = 3; 48*1071Sbill int mlen = LMAX; 49*1071Sbill int wlen; 50*1071Sbill int rflag; 51*1071Sbill int halflen; 52*1071Sbill char *strtbufp, *endbufp; 53*1071Sbill char *empty = ""; 54*1071Sbill 55*1071Sbill char *infile; 56*1071Sbill FILE *inptr = stdin; 57*1071Sbill 58*1071Sbill char *outfile; 59*1071Sbill FILE *outptr = stdout; 60*1071Sbill 61*1071Sbill char *sortfile; /* output of sort program */ 62*1071Sbill char nofold[] = {'-', 'd', 't', TILDE, 0}; 63*1071Sbill char fold[] = {'-', 'd', 'f', 't', TILDE, 0}; 64*1071Sbill char *sortopt = nofold; 65*1071Sbill FILE *sortptr; 66*1071Sbill 67*1071Sbill char *bfile; /*contains user supplied break chars */ 68*1071Sbill FILE *bptr; 69*1071Sbill 70*1071Sbill main(argc,argv) 71*1071Sbill int argc; 72*1071Sbill char **argv; 73*1071Sbill { 74*1071Sbill register int c; 75*1071Sbill register char *bufp; 76*1071Sbill int pid; 77*1071Sbill char *pend; 78*1071Sbill extern onintr(); 79*1071Sbill 80*1071Sbill char *xfile; 81*1071Sbill FILE *xptr; 82*1071Sbill 83*1071Sbill if(signal(SIGHUP,onintr)==SIG_IGN) 84*1071Sbill signal(SIGHUP,SIG_IGN); 85*1071Sbill if(signal(SIGINT,onintr)==SIG_IGN) 86*1071Sbill signal(SIGINT,SIG_IGN); 87*1071Sbill signal(SIGPIPE,onintr); 88*1071Sbill signal(SIGTERM,onintr); 89*1071Sbill 90*1071Sbill /* argument decoding */ 91*1071Sbill 92*1071Sbill xfile = DEFLTX; 93*1071Sbill argv++; 94*1071Sbill while(argc>1 && **argv == '-') { 95*1071Sbill switch (*++*argv){ 96*1071Sbill 97*1071Sbill case 'r': 98*1071Sbill rflag++; 99*1071Sbill break; 100*1071Sbill case 'f': 101*1071Sbill sortopt = fold; 102*1071Sbill break; 103*1071Sbill 104*1071Sbill case 'w': 105*1071Sbill if(argc >= 2) { 106*1071Sbill argc--; 107*1071Sbill wlen++; 108*1071Sbill llen = atoi(*++argv); 109*1071Sbill if(llen == 0) 110*1071Sbill diag("Wrong width:",*argv); 111*1071Sbill if(llen > LMAX) { 112*1071Sbill llen = LMAX; 113*1071Sbill msg("Lines truncated to 200 chars.",empty); 114*1071Sbill } 115*1071Sbill break; 116*1071Sbill } 117*1071Sbill 118*1071Sbill case 't': 119*1071Sbill if(wlen == 0) 120*1071Sbill llen = 100; 121*1071Sbill break; 122*1071Sbill case 'g': 123*1071Sbill if(argc >=2) { 124*1071Sbill argc--; 125*1071Sbill gap = gutter = atoi(*++argv); 126*1071Sbill } 127*1071Sbill break; 128*1071Sbill 129*1071Sbill case 'i': 130*1071Sbill if(only) 131*1071Sbill diag("Only file already given.",empty); 132*1071Sbill if (argc>=2){ 133*1071Sbill argc--; 134*1071Sbill ignore++; 135*1071Sbill xfile = *++argv; 136*1071Sbill } 137*1071Sbill break; 138*1071Sbill 139*1071Sbill case 'o': 140*1071Sbill if(ignore) 141*1071Sbill diag("Ignore file already given",empty); 142*1071Sbill if (argc>=2){ 143*1071Sbill only++; 144*1071Sbill argc--; 145*1071Sbill xfile = *++argv; 146*1071Sbill } 147*1071Sbill break; 148*1071Sbill 149*1071Sbill case 'b': 150*1071Sbill if(argc>=2) { 151*1071Sbill argc--; 152*1071Sbill bfile = *++argv; 153*1071Sbill } 154*1071Sbill break; 155*1071Sbill 156*1071Sbill default: 157*1071Sbill msg("Illegal argument:",*argv); 158*1071Sbill } 159*1071Sbill argc--; 160*1071Sbill argv++; 161*1071Sbill } 162*1071Sbill 163*1071Sbill if(argc>3) 164*1071Sbill diag("Too many filenames",empty); 165*1071Sbill else if(argc==3){ 166*1071Sbill infile = *argv++; 167*1071Sbill outfile = *argv; 168*1071Sbill if((outptr = fopen(outfile,"w")) == NULL) 169*1071Sbill diag("Cannot open output file:",outfile); 170*1071Sbill } else if(argc==2) { 171*1071Sbill infile = *argv; 172*1071Sbill outfile = 0; 173*1071Sbill } 174*1071Sbill 175*1071Sbill 176*1071Sbill /* Default breaks of blank, tab and newline */ 177*1071Sbill btable[' '] = SET; 178*1071Sbill btable['\t'] = SET; 179*1071Sbill btable['\n'] = SET; 180*1071Sbill if(bfile) { 181*1071Sbill if((bptr = fopen(bfile,"r")) == NULL) 182*1071Sbill diag("Cannot open break char file",bfile); 183*1071Sbill 184*1071Sbill while((c = getc(bptr)) != EOF) 185*1071Sbill btable[c] = SET; 186*1071Sbill } 187*1071Sbill 188*1071Sbill /* Allocate space for a buffer. If only or ignore file present 189*1071Sbill read it into buffer. Else read in default ignore file 190*1071Sbill and put resulting words in buffer. 191*1071Sbill */ 192*1071Sbill 193*1071Sbill 194*1071Sbill if((strtbufp = calloc(N,BUFSIZ)) == NULL) 195*1071Sbill diag("Out of memory space",empty); 196*1071Sbill bufp = strtbufp; 197*1071Sbill endbufp = strtbufp+MAX; 198*1071Sbill 199*1071Sbill if((xptr = fopen(xfile,"r")) == NULL) 200*1071Sbill diag("Cannot open file",xfile); 201*1071Sbill 202*1071Sbill while(bufp < endbufp && (c = getc(xptr)) != EOF) { 203*1071Sbill if(isabreak(c)) { 204*1071Sbill if(storeh(hash(strtbufp,bufp),strtbufp)) 205*1071Sbill diag("Too many words",xfile); 206*1071Sbill *bufp++ = '\0'; 207*1071Sbill strtbufp = bufp; 208*1071Sbill } 209*1071Sbill else { 210*1071Sbill *bufp++ = (isupper(c)?tolower(c):c); 211*1071Sbill } 212*1071Sbill } 213*1071Sbill if (bufp >= endbufp) 214*1071Sbill diag("Too many words in file",xfile); 215*1071Sbill endbufp = --bufp; 216*1071Sbill 217*1071Sbill /* open output file for sorting */ 218*1071Sbill 219*1071Sbill sortfile = mktemp("/tmp/ptxsXXXXX"); 220*1071Sbill if((sortptr = fopen(sortfile, "w")) == NULL) 221*1071Sbill diag("Cannot open output for sorting:",sortfile); 222*1071Sbill 223*1071Sbill /* get a line of data and compare each word for 224*1071Sbill inclusion or exclusion in the sort phase 225*1071Sbill */ 226*1071Sbill 227*1071Sbill if (infile!=0 && (inptr = fopen(infile,"r")) == NULL) 228*1071Sbill diag("Cannot open data: ",infile); 229*1071Sbill while(pend=getline()) 230*1071Sbill cmpline(pend); 231*1071Sbill fclose(sortptr); 232*1071Sbill 233*1071Sbill switch (pid = fork()){ 234*1071Sbill 235*1071Sbill case -1: /* cannot fork */ 236*1071Sbill diag("Cannot fork",empty); 237*1071Sbill 238*1071Sbill case 0: /* child */ 239*1071Sbill execl(SORT, SORT, sortopt, "+0", "-1", "+1", 240*1071Sbill sortfile, "-o", sortfile, 0); 241*1071Sbill 242*1071Sbill default: /* parent */ 243*1071Sbill while(wait(&status) != pid); 244*1071Sbill } 245*1071Sbill 246*1071Sbill 247*1071Sbill getsort(); 248*1071Sbill onintr(); 249*1071Sbill } 250*1071Sbill 251*1071Sbill msg(s,arg) 252*1071Sbill char *s; 253*1071Sbill char *arg; 254*1071Sbill { 255*1071Sbill fprintf(stderr,"%s %s\n",s,arg); 256*1071Sbill return; 257*1071Sbill } 258*1071Sbill diag(s,arg) 259*1071Sbill char *s, *arg; 260*1071Sbill { 261*1071Sbill 262*1071Sbill msg(s,arg); 263*1071Sbill exit(1); 264*1071Sbill } 265*1071Sbill 266*1071Sbill 267*1071Sbill char *getline() 268*1071Sbill { 269*1071Sbill 270*1071Sbill register c; 271*1071Sbill register char *linep; 272*1071Sbill char *endlinep; 273*1071Sbill 274*1071Sbill 275*1071Sbill endlinep= line + mlen; 276*1071Sbill linep = line; 277*1071Sbill /* Throw away leading white space */ 278*1071Sbill 279*1071Sbill while(isspace(c=getc(inptr))) 280*1071Sbill ; 281*1071Sbill if(c==EOF) 282*1071Sbill return(0); 283*1071Sbill ungetc(c,inptr); 284*1071Sbill while(( c=getc(inptr)) != EOF) { 285*1071Sbill switch (c) { 286*1071Sbill 287*1071Sbill case '\t': 288*1071Sbill if(linep<endlinep) 289*1071Sbill *linep++ = ' '; 290*1071Sbill break; 291*1071Sbill case '\n': 292*1071Sbill while(isspace(*--linep)); 293*1071Sbill *++linep = '\n'; 294*1071Sbill return(linep); 295*1071Sbill default: 296*1071Sbill if(linep < endlinep) 297*1071Sbill *linep++ = c; 298*1071Sbill } 299*1071Sbill } 300*1071Sbill return(0); 301*1071Sbill } 302*1071Sbill 303*1071Sbill cmpline(pend) 304*1071Sbill char *pend; 305*1071Sbill { 306*1071Sbill 307*1071Sbill char *pstrt, *pchar, *cp; 308*1071Sbill char **hp; 309*1071Sbill int flag; 310*1071Sbill 311*1071Sbill pchar = line; 312*1071Sbill if(rflag) 313*1071Sbill while(pchar<pend&&!isspace(*pchar)) 314*1071Sbill pchar++; 315*1071Sbill while(pchar<pend){ 316*1071Sbill /* eliminate white space */ 317*1071Sbill if(isabreak(*pchar++)) 318*1071Sbill continue; 319*1071Sbill pstrt = --pchar; 320*1071Sbill 321*1071Sbill flag = 1; 322*1071Sbill while(flag){ 323*1071Sbill if(isabreak(*pchar)) { 324*1071Sbill hp = &hasht[hash(pstrt,pchar)]; 325*1071Sbill pchar--; 326*1071Sbill while(cp = *hp++){ 327*1071Sbill if(hp == &hasht[MAXT]) 328*1071Sbill hp = hasht; 329*1071Sbill /* possible match */ 330*1071Sbill if(cmpword(pstrt,pchar,cp)){ 331*1071Sbill /* exact match */ 332*1071Sbill if(!ignore && only) 333*1071Sbill putline(pstrt,pend); 334*1071Sbill flag = 0; 335*1071Sbill break; 336*1071Sbill } 337*1071Sbill } 338*1071Sbill /* no match */ 339*1071Sbill if(flag){ 340*1071Sbill if(ignore || !only) 341*1071Sbill putline(pstrt,pend); 342*1071Sbill flag = 0; 343*1071Sbill } 344*1071Sbill } 345*1071Sbill pchar++; 346*1071Sbill } 347*1071Sbill } 348*1071Sbill } 349*1071Sbill 350*1071Sbill cmpword(cpp,pend,hpp) 351*1071Sbill char *cpp, *pend, *hpp; 352*1071Sbill { 353*1071Sbill char c; 354*1071Sbill 355*1071Sbill while(*hpp != '\0'){ 356*1071Sbill c = *cpp++; 357*1071Sbill if((isupper(c)?tolower(c):c) != *hpp++) 358*1071Sbill return(0); 359*1071Sbill } 360*1071Sbill if(--cpp == pend) return(1); 361*1071Sbill return(0); 362*1071Sbill } 363*1071Sbill 364*1071Sbill putline(strt, end) 365*1071Sbill char *strt, *end; 366*1071Sbill { 367*1071Sbill char *cp; 368*1071Sbill 369*1071Sbill for(cp=strt; cp<end; cp++) 370*1071Sbill putc(*cp, sortptr); 371*1071Sbill /* Add extra blank before TILDE to sort correctly 372*1071Sbill with -fd option */ 373*1071Sbill putc(' ',sortptr); 374*1071Sbill putc(TILDE,sortptr); 375*1071Sbill for (cp=line; cp<strt; cp++) 376*1071Sbill putc(*cp,sortptr); 377*1071Sbill putc('\n',sortptr); 378*1071Sbill } 379*1071Sbill 380*1071Sbill getsort() 381*1071Sbill { 382*1071Sbill register c; 383*1071Sbill register char *tilde, *linep, *ref; 384*1071Sbill char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b; 385*1071Sbill int w; 386*1071Sbill char *rtrim(), *ltrim(); 387*1071Sbill 388*1071Sbill if((sortptr = fopen(sortfile,"r")) == NULL) 389*1071Sbill diag("Cannot open sorted data:",sortfile); 390*1071Sbill 391*1071Sbill halflen = (llen-gutter)/2; 392*1071Sbill linep = line; 393*1071Sbill while((c = getc(sortptr)) != EOF) { 394*1071Sbill switch(c) { 395*1071Sbill 396*1071Sbill case TILDE: 397*1071Sbill tilde = linep; 398*1071Sbill break; 399*1071Sbill 400*1071Sbill case '\n': 401*1071Sbill while(isspace(linep[-1])) 402*1071Sbill linep--; 403*1071Sbill ref = tilde; 404*1071Sbill if(rflag) { 405*1071Sbill while(ref<linep&&!isspace(*ref)) 406*1071Sbill ref++; 407*1071Sbill *ref++ = 0; 408*1071Sbill } 409*1071Sbill /* the -1 is an overly conservative test to leave 410*1071Sbill space for the / that signifies truncation*/ 411*1071Sbill p3b = rtrim(p3a=line,tilde,halflen-1); 412*1071Sbill if(p3b-p3a>halflen-1) 413*1071Sbill p3b = p3a+halflen-1; 414*1071Sbill p2a = ltrim(ref,p2b=linep,halflen-1); 415*1071Sbill if(p2b-p2a>halflen-1) 416*1071Sbill p2a = p2b-halflen-1; 417*1071Sbill p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde, 418*1071Sbill w=halflen-(p2b-p2a)-gap); 419*1071Sbill if(p1b-p1a>w) 420*1071Sbill p1b = p1a; 421*1071Sbill p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0), 422*1071Sbill w=halflen-(p3b-p3a)-gap); 423*1071Sbill if(p4b-p4a>w) 424*1071Sbill p4a = p4b; 425*1071Sbill fprintf(outptr,".xx \""); 426*1071Sbill putout(p1a,p1b); 427*1071Sbill /* tilde-1 to account for extra space before TILDE */ 428*1071Sbill if(p1b!=(tilde-1) && p1a!=p1b) 429*1071Sbill fprintf(outptr,"/"); 430*1071Sbill fprintf(outptr,"\" \""); 431*1071Sbill if(p4a==p4b && p2a!=ref && p2a!=p2b) 432*1071Sbill fprintf(outptr,"/"); 433*1071Sbill putout(p2a,p2b); 434*1071Sbill fprintf(outptr,"\" \""); 435*1071Sbill putout(p3a,p3b); 436*1071Sbill /* ++p3b to account for extra blank after TILDE */ 437*1071Sbill /* ++p3b to account for extra space before TILDE */ 438*1071Sbill if(p1a==p1b && ++p3b!=tilde) 439*1071Sbill fprintf(outptr,"/"); 440*1071Sbill fprintf(outptr,"\" \""); 441*1071Sbill if(p1a==p1b && p4a!=ref && p4a!=p4b) 442*1071Sbill fprintf(outptr,"/"); 443*1071Sbill putout(p4a,p4b); 444*1071Sbill if(rflag) 445*1071Sbill fprintf(outptr,"\" %s\n",tilde); 446*1071Sbill else 447*1071Sbill fprintf(outptr,"\"\n"); 448*1071Sbill linep = line; 449*1071Sbill break; 450*1071Sbill 451*1071Sbill case '"': 452*1071Sbill /* put double " for " */ 453*1071Sbill *linep++ = c; 454*1071Sbill default: 455*1071Sbill *linep++ = c; 456*1071Sbill } 457*1071Sbill } 458*1071Sbill } 459*1071Sbill 460*1071Sbill char *rtrim(a,c,d) 461*1071Sbill char *a,*c; 462*1071Sbill { 463*1071Sbill char *b,*x; 464*1071Sbill b = c; 465*1071Sbill for(x=a+1; x<=c&&x-a<=d; x++) 466*1071Sbill if((x==c||isspace(x[0]))&&!isspace(x[-1])) 467*1071Sbill b = x; 468*1071Sbill if(b<c&&!isspace(b[0])) 469*1071Sbill b++; 470*1071Sbill return(b); 471*1071Sbill } 472*1071Sbill 473*1071Sbill char *ltrim(c,b,d) 474*1071Sbill char *c,*b; 475*1071Sbill { 476*1071Sbill char *a,*x; 477*1071Sbill a = c; 478*1071Sbill for(x=b-1; x>=c&&b-x<=d; x--) 479*1071Sbill if(!isspace(x[0])&&(x==c||isspace(x[-1]))) 480*1071Sbill a = x; 481*1071Sbill if(a>c&&!isspace(a[-1])) 482*1071Sbill a--; 483*1071Sbill return(a); 484*1071Sbill } 485*1071Sbill 486*1071Sbill putout(strt,end) 487*1071Sbill char *strt, *end; 488*1071Sbill { 489*1071Sbill char *cp; 490*1071Sbill 491*1071Sbill cp = strt; 492*1071Sbill 493*1071Sbill for(cp=strt; cp<end; cp++) { 494*1071Sbill putc(*cp,outptr); 495*1071Sbill } 496*1071Sbill } 497*1071Sbill 498*1071Sbill onintr() 499*1071Sbill { 500*1071Sbill 501*1071Sbill if(*sortfile) 502*1071Sbill unlink(sortfile); 503*1071Sbill exit(1); 504*1071Sbill } 505*1071Sbill 506*1071Sbill hash(strtp,endp) 507*1071Sbill char *strtp, *endp; 508*1071Sbill { 509*1071Sbill char *cp, c; 510*1071Sbill int i, j, k; 511*1071Sbill 512*1071Sbill /* Return zero hash number for single letter words */ 513*1071Sbill if((endp - strtp) == 1) 514*1071Sbill return(0); 515*1071Sbill 516*1071Sbill cp = strtp; 517*1071Sbill c = *cp++; 518*1071Sbill i = (isupper(c)?tolower(c):c); 519*1071Sbill c = *cp; 520*1071Sbill j = (isupper(c)?tolower(c):c); 521*1071Sbill i = i*j; 522*1071Sbill cp = --endp; 523*1071Sbill c = *cp--; 524*1071Sbill k = (isupper(c)?tolower(c):c); 525*1071Sbill c = *cp; 526*1071Sbill j = (isupper(c)?tolower(c):c); 527*1071Sbill j = k*j; 528*1071Sbill 529*1071Sbill k = (i ^ (j>>2)) & MASK; 530*1071Sbill return(k); 531*1071Sbill } 532*1071Sbill 533*1071Sbill storeh(num,strtp) 534*1071Sbill int num; 535*1071Sbill char *strtp; 536*1071Sbill { 537*1071Sbill int i; 538*1071Sbill 539*1071Sbill for(i=num; i<MAXT; i++) { 540*1071Sbill if(hasht[i] == 0) { 541*1071Sbill hasht[i] = strtp; 542*1071Sbill return(0); 543*1071Sbill } 544*1071Sbill } 545*1071Sbill for(i=0; i<num; i++) { 546*1071Sbill if(hasht[i] == 0) { 547*1071Sbill hasht[i] = strtp; 548*1071Sbill return(0); 549*1071Sbill } 550*1071Sbill } 551*1071Sbill return(1); 552*1071Sbill } 553