1*32744Sbostic #ifndef lint 2*32744Sbostic static char *sccsid = "@(#)ptx.c 4.3 (Berkeley) 12/02/87"; 3*32744Sbostic #endif /* not lint */ 41071Sbill 51071Sbill /* permuted title index 61071Sbill ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output] 71071Sbill Ptx reads the input file and permutes on words in it. 81071Sbill It excludes all words in the ignore file. 91071Sbill Alternately it includes words in the only file. 101071Sbill if neither is given it excludes the words in /usr/lib/eign. 111071Sbill 121071Sbill The width of the output line can be changed to num 131071Sbill characters. If omitted 72 is default unless troff than 100. 141071Sbill the -f flag tells the program to fold the output 151071Sbill the -t flag says the output is for troff and the 161071Sbill output is then wider. 171071Sbill 181071Sbill */ 191071Sbill 201071Sbill #include <stdio.h> 211071Sbill #include <ctype.h> 221071Sbill #include <signal.h> 231071Sbill #define DEFLTX "/usr/lib/eign" 241071Sbill #define TILDE 0177 251071Sbill #define SORT "/usr/bin/sort" 261071Sbill #define N 30 271071Sbill #define MAX N*BUFSIZ 281071Sbill #define LMAX 200 291071Sbill #define MAXT 2048 301071Sbill #define MASK 03777 311071Sbill #define SET 1 321071Sbill 331071Sbill #define isabreak(c) (btable[c]) 341071Sbill 351071Sbill extern char *calloc(), *mktemp(); 361071Sbill extern char *getline(); 371071Sbill int status; 381071Sbill 391071Sbill 401071Sbill char *hasht[MAXT]; 411071Sbill char line[LMAX]; 421071Sbill char btable[128]; 431071Sbill int ignore; 441071Sbill int only; 451071Sbill int llen = 72; 461071Sbill int gap = 3; 471071Sbill int gutter = 3; 481071Sbill int mlen = LMAX; 491071Sbill int wlen; 501071Sbill int rflag; 511071Sbill int halflen; 521071Sbill char *strtbufp, *endbufp; 531071Sbill char *empty = ""; 541071Sbill 551071Sbill char *infile; 561071Sbill FILE *inptr = stdin; 571071Sbill 581071Sbill char *outfile; 591071Sbill FILE *outptr = stdout; 601071Sbill 611071Sbill char *sortfile; /* output of sort program */ 621071Sbill char nofold[] = {'-', 'd', 't', TILDE, 0}; 631071Sbill char fold[] = {'-', 'd', 'f', 't', TILDE, 0}; 641071Sbill char *sortopt = nofold; 651071Sbill FILE *sortptr; 661071Sbill 671071Sbill char *bfile; /*contains user supplied break chars */ 681071Sbill FILE *bptr; 691071Sbill 701071Sbill main(argc,argv) 711071Sbill int argc; 721071Sbill char **argv; 731071Sbill { 741071Sbill register int c; 751071Sbill register char *bufp; 761071Sbill int pid; 771071Sbill char *pend; 781071Sbill extern onintr(); 791071Sbill 801071Sbill char *xfile; 811071Sbill FILE *xptr; 821071Sbill 831071Sbill if(signal(SIGHUP,onintr)==SIG_IGN) 841071Sbill signal(SIGHUP,SIG_IGN); 851071Sbill if(signal(SIGINT,onintr)==SIG_IGN) 861071Sbill signal(SIGINT,SIG_IGN); 871071Sbill signal(SIGPIPE,onintr); 881071Sbill signal(SIGTERM,onintr); 891071Sbill 901071Sbill /* argument decoding */ 911071Sbill 921071Sbill xfile = DEFLTX; 931071Sbill argv++; 941071Sbill while(argc>1 && **argv == '-') { 951071Sbill switch (*++*argv){ 961071Sbill 971071Sbill case 'r': 981071Sbill rflag++; 991071Sbill break; 1001071Sbill case 'f': 1011071Sbill sortopt = fold; 1021071Sbill break; 1031071Sbill 1041071Sbill case 'w': 1051071Sbill if(argc >= 2) { 1061071Sbill argc--; 1071071Sbill wlen++; 1081071Sbill llen = atoi(*++argv); 1091071Sbill if(llen == 0) 1101071Sbill diag("Wrong width:",*argv); 1111071Sbill if(llen > LMAX) { 1121071Sbill llen = LMAX; 1131071Sbill msg("Lines truncated to 200 chars.",empty); 1141071Sbill } 1151071Sbill break; 1161071Sbill } 1171071Sbill 1181071Sbill case 't': 1191071Sbill if(wlen == 0) 1201071Sbill llen = 100; 1211071Sbill break; 1221071Sbill case 'g': 1231071Sbill if(argc >=2) { 1241071Sbill argc--; 1251071Sbill gap = gutter = atoi(*++argv); 1261071Sbill } 1271071Sbill break; 1281071Sbill 1291071Sbill case 'i': 1301071Sbill if(only) 1311071Sbill diag("Only file already given.",empty); 1321071Sbill if (argc>=2){ 1331071Sbill argc--; 1341071Sbill ignore++; 1351071Sbill xfile = *++argv; 1361071Sbill } 1371071Sbill break; 1381071Sbill 1391071Sbill case 'o': 1401071Sbill if(ignore) 1411071Sbill diag("Ignore file already given",empty); 1421071Sbill if (argc>=2){ 1431071Sbill only++; 1441071Sbill argc--; 1451071Sbill xfile = *++argv; 1461071Sbill } 1471071Sbill break; 1481071Sbill 1491071Sbill case 'b': 1501071Sbill if(argc>=2) { 1511071Sbill argc--; 1521071Sbill bfile = *++argv; 1531071Sbill } 1541071Sbill break; 1551071Sbill 1561071Sbill default: 1571071Sbill msg("Illegal argument:",*argv); 1581071Sbill } 1591071Sbill argc--; 1601071Sbill argv++; 1611071Sbill } 1621071Sbill 1631071Sbill if(argc>3) 1641071Sbill diag("Too many filenames",empty); 1651071Sbill else if(argc==3){ 1661071Sbill infile = *argv++; 1671071Sbill outfile = *argv; 1681071Sbill if((outptr = fopen(outfile,"w")) == NULL) 1691071Sbill diag("Cannot open output file:",outfile); 1701071Sbill } else if(argc==2) { 1711071Sbill infile = *argv; 1721071Sbill outfile = 0; 1731071Sbill } 1741071Sbill 1751071Sbill 1761071Sbill /* Default breaks of blank, tab and newline */ 1771071Sbill btable[' '] = SET; 1781071Sbill btable['\t'] = SET; 1791071Sbill btable['\n'] = SET; 1801071Sbill if(bfile) { 1811071Sbill if((bptr = fopen(bfile,"r")) == NULL) 1821071Sbill diag("Cannot open break char file",bfile); 1831071Sbill 1841071Sbill while((c = getc(bptr)) != EOF) 1851071Sbill btable[c] = SET; 1861071Sbill } 1871071Sbill 1881071Sbill /* Allocate space for a buffer. If only or ignore file present 1891071Sbill read it into buffer. Else read in default ignore file 1901071Sbill and put resulting words in buffer. 1911071Sbill */ 1921071Sbill 1931071Sbill 1941071Sbill if((strtbufp = calloc(N,BUFSIZ)) == NULL) 1951071Sbill diag("Out of memory space",empty); 1961071Sbill bufp = strtbufp; 1971071Sbill endbufp = strtbufp+MAX; 1981071Sbill 1991071Sbill if((xptr = fopen(xfile,"r")) == NULL) 2001071Sbill diag("Cannot open file",xfile); 2011071Sbill 2021071Sbill while(bufp < endbufp && (c = getc(xptr)) != EOF) { 2031071Sbill if(isabreak(c)) { 2041071Sbill if(storeh(hash(strtbufp,bufp),strtbufp)) 2051071Sbill diag("Too many words",xfile); 2061071Sbill *bufp++ = '\0'; 2071071Sbill strtbufp = bufp; 2081071Sbill } 2091071Sbill else { 2101071Sbill *bufp++ = (isupper(c)?tolower(c):c); 2111071Sbill } 2121071Sbill } 2131071Sbill if (bufp >= endbufp) 2141071Sbill diag("Too many words in file",xfile); 2151071Sbill endbufp = --bufp; 2161071Sbill 2171071Sbill /* open output file for sorting */ 2181071Sbill 2191071Sbill sortfile = mktemp("/tmp/ptxsXXXXX"); 2201071Sbill if((sortptr = fopen(sortfile, "w")) == NULL) 2211071Sbill diag("Cannot open output for sorting:",sortfile); 2221071Sbill 2231071Sbill /* get a line of data and compare each word for 2241071Sbill inclusion or exclusion in the sort phase 2251071Sbill */ 2261071Sbill 2271071Sbill if (infile!=0 && (inptr = fopen(infile,"r")) == NULL) 2281071Sbill diag("Cannot open data: ",infile); 2291071Sbill while(pend=getline()) 2301071Sbill cmpline(pend); 2311071Sbill fclose(sortptr); 2321071Sbill 2331071Sbill switch (pid = fork()){ 2341071Sbill 2351071Sbill case -1: /* cannot fork */ 2361071Sbill diag("Cannot fork",empty); 2371071Sbill 2381071Sbill case 0: /* child */ 2391071Sbill execl(SORT, SORT, sortopt, "+0", "-1", "+1", 2401071Sbill sortfile, "-o", sortfile, 0); 2411071Sbill 2421071Sbill default: /* parent */ 2431071Sbill while(wait(&status) != pid); 2441071Sbill } 2451071Sbill 2461071Sbill 2471071Sbill getsort(); 248*32744Sbostic if(sortfile) 24925024Sbloom unlink(sortfile); 25025024Sbloom exit(0); 2511071Sbill } 2521071Sbill 2531071Sbill msg(s,arg) 2541071Sbill char *s; 2551071Sbill char *arg; 2561071Sbill { 2571071Sbill fprintf(stderr,"%s %s\n",s,arg); 2581071Sbill return; 2591071Sbill } 2601071Sbill diag(s,arg) 2611071Sbill char *s, *arg; 2621071Sbill { 2631071Sbill 2641071Sbill msg(s,arg); 2651071Sbill exit(1); 2661071Sbill } 2671071Sbill 2681071Sbill 2691071Sbill char *getline() 2701071Sbill { 2711071Sbill 2721071Sbill register c; 2731071Sbill register char *linep; 2741071Sbill char *endlinep; 2751071Sbill 2761071Sbill 2771071Sbill endlinep= line + mlen; 2781071Sbill linep = line; 2791071Sbill /* Throw away leading white space */ 2801071Sbill 2811071Sbill while(isspace(c=getc(inptr))) 2821071Sbill ; 2831071Sbill if(c==EOF) 2841071Sbill return(0); 2851071Sbill ungetc(c,inptr); 2861071Sbill while(( c=getc(inptr)) != EOF) { 2871071Sbill switch (c) { 2881071Sbill 2891071Sbill case '\t': 2901071Sbill if(linep<endlinep) 2911071Sbill *linep++ = ' '; 2921071Sbill break; 2931071Sbill case '\n': 2941071Sbill while(isspace(*--linep)); 2951071Sbill *++linep = '\n'; 2961071Sbill return(linep); 2971071Sbill default: 2981071Sbill if(linep < endlinep) 2991071Sbill *linep++ = c; 3001071Sbill } 3011071Sbill } 3021071Sbill return(0); 3031071Sbill } 3041071Sbill 3051071Sbill cmpline(pend) 3061071Sbill char *pend; 3071071Sbill { 3081071Sbill 3091071Sbill char *pstrt, *pchar, *cp; 3101071Sbill char **hp; 3111071Sbill int flag; 3121071Sbill 3131071Sbill pchar = line; 3141071Sbill if(rflag) 3151071Sbill while(pchar<pend&&!isspace(*pchar)) 3161071Sbill pchar++; 3171071Sbill while(pchar<pend){ 3181071Sbill /* eliminate white space */ 3191071Sbill if(isabreak(*pchar++)) 3201071Sbill continue; 3211071Sbill pstrt = --pchar; 3221071Sbill 3231071Sbill flag = 1; 3241071Sbill while(flag){ 3251071Sbill if(isabreak(*pchar)) { 3261071Sbill hp = &hasht[hash(pstrt,pchar)]; 3271071Sbill pchar--; 3281071Sbill while(cp = *hp++){ 3291071Sbill if(hp == &hasht[MAXT]) 3301071Sbill hp = hasht; 3311071Sbill /* possible match */ 3321071Sbill if(cmpword(pstrt,pchar,cp)){ 3331071Sbill /* exact match */ 3341071Sbill if(!ignore && only) 3351071Sbill putline(pstrt,pend); 3361071Sbill flag = 0; 3371071Sbill break; 3381071Sbill } 3391071Sbill } 3401071Sbill /* no match */ 3411071Sbill if(flag){ 3421071Sbill if(ignore || !only) 3431071Sbill putline(pstrt,pend); 3441071Sbill flag = 0; 3451071Sbill } 3461071Sbill } 3471071Sbill pchar++; 3481071Sbill } 3491071Sbill } 3501071Sbill } 3511071Sbill 3521071Sbill cmpword(cpp,pend,hpp) 3531071Sbill char *cpp, *pend, *hpp; 3541071Sbill { 3551071Sbill char c; 3561071Sbill 3571071Sbill while(*hpp != '\0'){ 3581071Sbill c = *cpp++; 3591071Sbill if((isupper(c)?tolower(c):c) != *hpp++) 3601071Sbill return(0); 3611071Sbill } 3621071Sbill if(--cpp == pend) return(1); 3631071Sbill return(0); 3641071Sbill } 3651071Sbill 3661071Sbill putline(strt, end) 3671071Sbill char *strt, *end; 3681071Sbill { 3691071Sbill char *cp; 3701071Sbill 3711071Sbill for(cp=strt; cp<end; cp++) 3721071Sbill putc(*cp, sortptr); 3731071Sbill /* Add extra blank before TILDE to sort correctly 3741071Sbill with -fd option */ 3751071Sbill putc(' ',sortptr); 3761071Sbill putc(TILDE,sortptr); 3771071Sbill for (cp=line; cp<strt; cp++) 3781071Sbill putc(*cp,sortptr); 3791071Sbill putc('\n',sortptr); 3801071Sbill } 3811071Sbill 3821071Sbill getsort() 3831071Sbill { 3841071Sbill register c; 3851071Sbill register char *tilde, *linep, *ref; 3861071Sbill char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b; 3871071Sbill int w; 3881071Sbill char *rtrim(), *ltrim(); 3891071Sbill 3901071Sbill if((sortptr = fopen(sortfile,"r")) == NULL) 3911071Sbill diag("Cannot open sorted data:",sortfile); 3921071Sbill 3931071Sbill halflen = (llen-gutter)/2; 3941071Sbill linep = line; 3951071Sbill while((c = getc(sortptr)) != EOF) { 3961071Sbill switch(c) { 3971071Sbill 3981071Sbill case TILDE: 3991071Sbill tilde = linep; 4001071Sbill break; 4011071Sbill 4021071Sbill case '\n': 4031071Sbill while(isspace(linep[-1])) 4041071Sbill linep--; 4051071Sbill ref = tilde; 4061071Sbill if(rflag) { 4071071Sbill while(ref<linep&&!isspace(*ref)) 4081071Sbill ref++; 4091071Sbill *ref++ = 0; 4101071Sbill } 4111071Sbill /* the -1 is an overly conservative test to leave 4121071Sbill space for the / that signifies truncation*/ 4131071Sbill p3b = rtrim(p3a=line,tilde,halflen-1); 4141071Sbill if(p3b-p3a>halflen-1) 4151071Sbill p3b = p3a+halflen-1; 4161071Sbill p2a = ltrim(ref,p2b=linep,halflen-1); 4171071Sbill if(p2b-p2a>halflen-1) 4181071Sbill p2a = p2b-halflen-1; 4191071Sbill p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde, 4201071Sbill w=halflen-(p2b-p2a)-gap); 4211071Sbill if(p1b-p1a>w) 4221071Sbill p1b = p1a; 4231071Sbill p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0), 4241071Sbill w=halflen-(p3b-p3a)-gap); 4251071Sbill if(p4b-p4a>w) 4261071Sbill p4a = p4b; 4271071Sbill fprintf(outptr,".xx \""); 4281071Sbill putout(p1a,p1b); 4291071Sbill /* tilde-1 to account for extra space before TILDE */ 4301071Sbill if(p1b!=(tilde-1) && p1a!=p1b) 4311071Sbill fprintf(outptr,"/"); 4321071Sbill fprintf(outptr,"\" \""); 4331071Sbill if(p4a==p4b && p2a!=ref && p2a!=p2b) 4341071Sbill fprintf(outptr,"/"); 4351071Sbill putout(p2a,p2b); 4361071Sbill fprintf(outptr,"\" \""); 4371071Sbill putout(p3a,p3b); 4381071Sbill /* ++p3b to account for extra blank after TILDE */ 4391071Sbill /* ++p3b to account for extra space before TILDE */ 4401071Sbill if(p1a==p1b && ++p3b!=tilde) 4411071Sbill fprintf(outptr,"/"); 4421071Sbill fprintf(outptr,"\" \""); 4431071Sbill if(p1a==p1b && p4a!=ref && p4a!=p4b) 4441071Sbill fprintf(outptr,"/"); 4451071Sbill putout(p4a,p4b); 4461071Sbill if(rflag) 4471071Sbill fprintf(outptr,"\" %s\n",tilde); 4481071Sbill else 4491071Sbill fprintf(outptr,"\"\n"); 4501071Sbill linep = line; 4511071Sbill break; 4521071Sbill 4531071Sbill case '"': 4541071Sbill /* put double " for " */ 4551071Sbill *linep++ = c; 4561071Sbill default: 4571071Sbill *linep++ = c; 4581071Sbill } 4591071Sbill } 4601071Sbill } 4611071Sbill 4621071Sbill char *rtrim(a,c,d) 4631071Sbill char *a,*c; 4641071Sbill { 4651071Sbill char *b,*x; 4661071Sbill b = c; 4671071Sbill for(x=a+1; x<=c&&x-a<=d; x++) 4681071Sbill if((x==c||isspace(x[0]))&&!isspace(x[-1])) 4691071Sbill b = x; 4701071Sbill if(b<c&&!isspace(b[0])) 4711071Sbill b++; 4721071Sbill return(b); 4731071Sbill } 4741071Sbill 4751071Sbill char *ltrim(c,b,d) 4761071Sbill char *c,*b; 4771071Sbill { 4781071Sbill char *a,*x; 4791071Sbill a = c; 4801071Sbill for(x=b-1; x>=c&&b-x<=d; x--) 4811071Sbill if(!isspace(x[0])&&(x==c||isspace(x[-1]))) 4821071Sbill a = x; 4831071Sbill if(a>c&&!isspace(a[-1])) 4841071Sbill a--; 4851071Sbill return(a); 4861071Sbill } 4871071Sbill 4881071Sbill putout(strt,end) 4891071Sbill char *strt, *end; 4901071Sbill { 4911071Sbill char *cp; 4921071Sbill 4931071Sbill cp = strt; 4941071Sbill 4951071Sbill for(cp=strt; cp<end; cp++) { 4961071Sbill putc(*cp,outptr); 4971071Sbill } 4981071Sbill } 4991071Sbill 5001071Sbill onintr() 5011071Sbill { 5021071Sbill 503*32744Sbostic if(sortfile) 5041071Sbill unlink(sortfile); 5051071Sbill exit(1); 5061071Sbill } 5071071Sbill 5081071Sbill hash(strtp,endp) 5091071Sbill char *strtp, *endp; 5101071Sbill { 5111071Sbill char *cp, c; 5121071Sbill int i, j, k; 5131071Sbill 5141071Sbill /* Return zero hash number for single letter words */ 5151071Sbill if((endp - strtp) == 1) 5161071Sbill return(0); 5171071Sbill 5181071Sbill cp = strtp; 5191071Sbill c = *cp++; 5201071Sbill i = (isupper(c)?tolower(c):c); 5211071Sbill c = *cp; 5221071Sbill j = (isupper(c)?tolower(c):c); 5231071Sbill i = i*j; 5241071Sbill cp = --endp; 5251071Sbill c = *cp--; 5261071Sbill k = (isupper(c)?tolower(c):c); 5271071Sbill c = *cp; 5281071Sbill j = (isupper(c)?tolower(c):c); 5291071Sbill j = k*j; 5301071Sbill 5311071Sbill k = (i ^ (j>>2)) & MASK; 5321071Sbill return(k); 5331071Sbill } 5341071Sbill 5351071Sbill storeh(num,strtp) 5361071Sbill int num; 5371071Sbill char *strtp; 5381071Sbill { 5391071Sbill int i; 5401071Sbill 5411071Sbill for(i=num; i<MAXT; i++) { 5421071Sbill if(hasht[i] == 0) { 5431071Sbill hasht[i] = strtp; 5441071Sbill return(0); 5451071Sbill } 5461071Sbill } 5471071Sbill for(i=0; i<num; i++) { 5481071Sbill if(hasht[i] == 0) { 5491071Sbill hasht[i] = strtp; 5501071Sbill return(0); 5511071Sbill } 5521071Sbill } 5531071Sbill return(1); 5541071Sbill } 555