132744Sbostic #ifndef lint 2*46846Sbostic static char *sccsid = "@(#)ptx.c 4.6 (Berkeley) 03/01/91"; 332744Sbostic #endif /* not lint */ 41071Sbill 51071Sbill /* permuted title index 61071Sbill ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output] 71071Sbill Ptx reads the input file and permutes on words in it. 81071Sbill It excludes all words in the ignore file. 91071Sbill Alternately it includes words in the only file. 1037876Sbostic if neither is given it excludes the words in _PATH_EIGN. 111071Sbill 121071Sbill The width of the output line can be changed to num 131071Sbill characters. If omitted 72 is default unless troff than 100. 141071Sbill the -f flag tells the program to fold the output 151071Sbill the -t flag says the output is for troff and the 161071Sbill output is then wider. 171071Sbill 181071Sbill */ 191071Sbill 201071Sbill #include <stdio.h> 211071Sbill #include <ctype.h> 221071Sbill #include <signal.h> 2337876Sbostic #include "pathnames.h" 2437876Sbostic 251071Sbill #define TILDE 0177 261071Sbill #define N 30 271071Sbill #define MAX N*BUFSIZ 281071Sbill #define LMAX 200 291071Sbill #define MAXT 2048 301071Sbill #define MASK 03777 311071Sbill #define SET 1 321071Sbill 331071Sbill #define isabreak(c) (btable[c]) 341071Sbill 351071Sbill extern char *calloc(), *mktemp(); 361071Sbill extern char *getline(); 371071Sbill int status; 381071Sbill 391071Sbill 401071Sbill char *hasht[MAXT]; 411071Sbill char line[LMAX]; 421071Sbill char btable[128]; 431071Sbill int ignore; 441071Sbill int only; 451071Sbill int llen = 72; 461071Sbill int gap = 3; 471071Sbill int gutter = 3; 481071Sbill int mlen = LMAX; 491071Sbill int wlen; 501071Sbill int rflag; 511071Sbill int halflen; 521071Sbill char *strtbufp, *endbufp; 531071Sbill char *empty = ""; 541071Sbill 551071Sbill char *infile; 561071Sbill FILE *inptr = stdin; 571071Sbill 581071Sbill char *outfile; 591071Sbill FILE *outptr = stdout; 601071Sbill 6137876Sbostic char sortfile[] = _PATH_TMP; /* output of sort program */ 621071Sbill char nofold[] = {'-', 'd', 't', TILDE, 0}; 631071Sbill char fold[] = {'-', 'd', 'f', 't', TILDE, 0}; 641071Sbill char *sortopt = nofold; 651071Sbill FILE *sortptr; 661071Sbill 671071Sbill char *bfile; /*contains user supplied break chars */ 681071Sbill FILE *bptr; 691071Sbill 701071Sbill main(argc,argv) 711071Sbill int argc; 721071Sbill char **argv; 731071Sbill { 741071Sbill register int c; 751071Sbill register char *bufp; 761071Sbill int pid; 771071Sbill char *pend; 78*46846Sbostic extern void onintr(); 791071Sbill 801071Sbill char *xfile; 811071Sbill FILE *xptr; 821071Sbill 831071Sbill if(signal(SIGHUP,onintr)==SIG_IGN) 841071Sbill signal(SIGHUP,SIG_IGN); 851071Sbill if(signal(SIGINT,onintr)==SIG_IGN) 861071Sbill signal(SIGINT,SIG_IGN); 871071Sbill signal(SIGPIPE,onintr); 881071Sbill signal(SIGTERM,onintr); 891071Sbill 901071Sbill /* argument decoding */ 911071Sbill 9237876Sbostic xfile = _PATH_EIGN; 931071Sbill argv++; 941071Sbill while(argc>1 && **argv == '-') { 951071Sbill switch (*++*argv){ 961071Sbill 971071Sbill case 'r': 981071Sbill rflag++; 991071Sbill break; 1001071Sbill case 'f': 1011071Sbill sortopt = fold; 1021071Sbill break; 1031071Sbill 1041071Sbill case 'w': 1051071Sbill if(argc >= 2) { 1061071Sbill argc--; 1071071Sbill wlen++; 1081071Sbill llen = atoi(*++argv); 1091071Sbill if(llen == 0) 1101071Sbill diag("Wrong width:",*argv); 1111071Sbill if(llen > LMAX) { 1121071Sbill llen = LMAX; 1131071Sbill msg("Lines truncated to 200 chars.",empty); 1141071Sbill } 1151071Sbill break; 1161071Sbill } 1171071Sbill 1181071Sbill case 't': 1191071Sbill if(wlen == 0) 1201071Sbill llen = 100; 1211071Sbill break; 1221071Sbill case 'g': 1231071Sbill if(argc >=2) { 1241071Sbill argc--; 1251071Sbill gap = gutter = atoi(*++argv); 1261071Sbill } 1271071Sbill break; 1281071Sbill 1291071Sbill case 'i': 1301071Sbill if(only) 1311071Sbill diag("Only file already given.",empty); 1321071Sbill if (argc>=2){ 1331071Sbill argc--; 1341071Sbill ignore++; 1351071Sbill xfile = *++argv; 1361071Sbill } 1371071Sbill break; 1381071Sbill 1391071Sbill case 'o': 1401071Sbill if(ignore) 1411071Sbill diag("Ignore file already given",empty); 1421071Sbill if (argc>=2){ 1431071Sbill only++; 1441071Sbill argc--; 1451071Sbill xfile = *++argv; 1461071Sbill } 1471071Sbill break; 1481071Sbill 1491071Sbill case 'b': 1501071Sbill if(argc>=2) { 1511071Sbill argc--; 1521071Sbill bfile = *++argv; 1531071Sbill } 1541071Sbill break; 1551071Sbill 1561071Sbill default: 1571071Sbill msg("Illegal argument:",*argv); 1581071Sbill } 1591071Sbill argc--; 1601071Sbill argv++; 1611071Sbill } 1621071Sbill 1631071Sbill if(argc>3) 1641071Sbill diag("Too many filenames",empty); 1651071Sbill else if(argc==3){ 1661071Sbill infile = *argv++; 1671071Sbill outfile = *argv; 1681071Sbill if((outptr = fopen(outfile,"w")) == NULL) 1691071Sbill diag("Cannot open output file:",outfile); 1701071Sbill } else if(argc==2) { 1711071Sbill infile = *argv; 1721071Sbill outfile = 0; 1731071Sbill } 1741071Sbill 1751071Sbill 1761071Sbill /* Default breaks of blank, tab and newline */ 1771071Sbill btable[' '] = SET; 1781071Sbill btable['\t'] = SET; 1791071Sbill btable['\n'] = SET; 1801071Sbill if(bfile) { 1811071Sbill if((bptr = fopen(bfile,"r")) == NULL) 1821071Sbill diag("Cannot open break char file",bfile); 1831071Sbill 1841071Sbill while((c = getc(bptr)) != EOF) 1851071Sbill btable[c] = SET; 1861071Sbill } 1871071Sbill 1881071Sbill /* Allocate space for a buffer. If only or ignore file present 1891071Sbill read it into buffer. Else read in default ignore file 1901071Sbill and put resulting words in buffer. 1911071Sbill */ 1921071Sbill 1931071Sbill 1941071Sbill if((strtbufp = calloc(N,BUFSIZ)) == NULL) 1951071Sbill diag("Out of memory space",empty); 1961071Sbill bufp = strtbufp; 1971071Sbill endbufp = strtbufp+MAX; 1981071Sbill 1991071Sbill if((xptr = fopen(xfile,"r")) == NULL) 2001071Sbill diag("Cannot open file",xfile); 2011071Sbill 2021071Sbill while(bufp < endbufp && (c = getc(xptr)) != EOF) { 2031071Sbill if(isabreak(c)) { 2041071Sbill if(storeh(hash(strtbufp,bufp),strtbufp)) 2051071Sbill diag("Too many words",xfile); 2061071Sbill *bufp++ = '\0'; 2071071Sbill strtbufp = bufp; 2081071Sbill } 2091071Sbill else { 2101071Sbill *bufp++ = (isupper(c)?tolower(c):c); 2111071Sbill } 2121071Sbill } 2131071Sbill if (bufp >= endbufp) 2141071Sbill diag("Too many words in file",xfile); 2151071Sbill endbufp = --bufp; 2161071Sbill 2171071Sbill /* open output file for sorting */ 2181071Sbill 21935258Sbostic mktemp(sortfile); 2201071Sbill if((sortptr = fopen(sortfile, "w")) == NULL) 2211071Sbill diag("Cannot open output for sorting:",sortfile); 2221071Sbill 2231071Sbill /* get a line of data and compare each word for 2241071Sbill inclusion or exclusion in the sort phase 2251071Sbill */ 2261071Sbill 2271071Sbill if (infile!=0 && (inptr = fopen(infile,"r")) == NULL) 2281071Sbill diag("Cannot open data: ",infile); 2291071Sbill while(pend=getline()) 2301071Sbill cmpline(pend); 2311071Sbill fclose(sortptr); 2321071Sbill 2331071Sbill switch (pid = fork()){ 2341071Sbill 2351071Sbill case -1: /* cannot fork */ 2361071Sbill diag("Cannot fork",empty); 2371071Sbill 2381071Sbill case 0: /* child */ 23937876Sbostic execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1", 2401071Sbill sortfile, "-o", sortfile, 0); 2411071Sbill 2421071Sbill default: /* parent */ 2431071Sbill while(wait(&status) != pid); 2441071Sbill } 2451071Sbill 2461071Sbill 2471071Sbill getsort(); 24835258Sbostic unlink(sortfile); 24925024Sbloom exit(0); 2501071Sbill } 2511071Sbill 2521071Sbill msg(s,arg) 2531071Sbill char *s; 2541071Sbill char *arg; 2551071Sbill { 2561071Sbill fprintf(stderr,"%s %s\n",s,arg); 2571071Sbill return; 2581071Sbill } 2591071Sbill diag(s,arg) 2601071Sbill char *s, *arg; 2611071Sbill { 2621071Sbill 2631071Sbill msg(s,arg); 2641071Sbill exit(1); 2651071Sbill } 2661071Sbill 2671071Sbill 2681071Sbill char *getline() 2691071Sbill { 2701071Sbill 2711071Sbill register c; 2721071Sbill register char *linep; 2731071Sbill char *endlinep; 2741071Sbill 2751071Sbill 2761071Sbill endlinep= line + mlen; 2771071Sbill linep = line; 2781071Sbill /* Throw away leading white space */ 2791071Sbill 2801071Sbill while(isspace(c=getc(inptr))) 2811071Sbill ; 2821071Sbill if(c==EOF) 2831071Sbill return(0); 2841071Sbill ungetc(c,inptr); 2851071Sbill while(( c=getc(inptr)) != EOF) { 2861071Sbill switch (c) { 2871071Sbill 2881071Sbill case '\t': 2891071Sbill if(linep<endlinep) 2901071Sbill *linep++ = ' '; 2911071Sbill break; 2921071Sbill case '\n': 2931071Sbill while(isspace(*--linep)); 2941071Sbill *++linep = '\n'; 2951071Sbill return(linep); 2961071Sbill default: 2971071Sbill if(linep < endlinep) 2981071Sbill *linep++ = c; 2991071Sbill } 3001071Sbill } 3011071Sbill return(0); 3021071Sbill } 3031071Sbill 3041071Sbill cmpline(pend) 3051071Sbill char *pend; 3061071Sbill { 3071071Sbill 3081071Sbill char *pstrt, *pchar, *cp; 3091071Sbill char **hp; 3101071Sbill int flag; 3111071Sbill 3121071Sbill pchar = line; 3131071Sbill if(rflag) 3141071Sbill while(pchar<pend&&!isspace(*pchar)) 3151071Sbill pchar++; 3161071Sbill while(pchar<pend){ 3171071Sbill /* eliminate white space */ 3181071Sbill if(isabreak(*pchar++)) 3191071Sbill continue; 3201071Sbill pstrt = --pchar; 3211071Sbill 3221071Sbill flag = 1; 3231071Sbill while(flag){ 3241071Sbill if(isabreak(*pchar)) { 3251071Sbill hp = &hasht[hash(pstrt,pchar)]; 3261071Sbill pchar--; 3271071Sbill while(cp = *hp++){ 3281071Sbill if(hp == &hasht[MAXT]) 3291071Sbill hp = hasht; 3301071Sbill /* possible match */ 3311071Sbill if(cmpword(pstrt,pchar,cp)){ 3321071Sbill /* exact match */ 3331071Sbill if(!ignore && only) 3341071Sbill putline(pstrt,pend); 3351071Sbill flag = 0; 3361071Sbill break; 3371071Sbill } 3381071Sbill } 3391071Sbill /* no match */ 3401071Sbill if(flag){ 3411071Sbill if(ignore || !only) 3421071Sbill putline(pstrt,pend); 3431071Sbill flag = 0; 3441071Sbill } 3451071Sbill } 3461071Sbill pchar++; 3471071Sbill } 3481071Sbill } 3491071Sbill } 3501071Sbill 3511071Sbill cmpword(cpp,pend,hpp) 3521071Sbill char *cpp, *pend, *hpp; 3531071Sbill { 3541071Sbill char c; 3551071Sbill 3561071Sbill while(*hpp != '\0'){ 3571071Sbill c = *cpp++; 3581071Sbill if((isupper(c)?tolower(c):c) != *hpp++) 3591071Sbill return(0); 3601071Sbill } 3611071Sbill if(--cpp == pend) return(1); 3621071Sbill return(0); 3631071Sbill } 3641071Sbill 3651071Sbill putline(strt, end) 3661071Sbill char *strt, *end; 3671071Sbill { 3681071Sbill char *cp; 3691071Sbill 3701071Sbill for(cp=strt; cp<end; cp++) 3711071Sbill putc(*cp, sortptr); 3721071Sbill /* Add extra blank before TILDE to sort correctly 3731071Sbill with -fd option */ 3741071Sbill putc(' ',sortptr); 3751071Sbill putc(TILDE,sortptr); 3761071Sbill for (cp=line; cp<strt; cp++) 3771071Sbill putc(*cp,sortptr); 3781071Sbill putc('\n',sortptr); 3791071Sbill } 3801071Sbill 3811071Sbill getsort() 3821071Sbill { 3831071Sbill register c; 3841071Sbill register char *tilde, *linep, *ref; 3851071Sbill char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b; 3861071Sbill int w; 3871071Sbill char *rtrim(), *ltrim(); 3881071Sbill 3891071Sbill if((sortptr = fopen(sortfile,"r")) == NULL) 3901071Sbill diag("Cannot open sorted data:",sortfile); 3911071Sbill 3921071Sbill halflen = (llen-gutter)/2; 3931071Sbill linep = line; 3941071Sbill while((c = getc(sortptr)) != EOF) { 3951071Sbill switch(c) { 3961071Sbill 3971071Sbill case TILDE: 3981071Sbill tilde = linep; 3991071Sbill break; 4001071Sbill 4011071Sbill case '\n': 4021071Sbill while(isspace(linep[-1])) 4031071Sbill linep--; 4041071Sbill ref = tilde; 4051071Sbill if(rflag) { 4061071Sbill while(ref<linep&&!isspace(*ref)) 4071071Sbill ref++; 4081071Sbill *ref++ = 0; 4091071Sbill } 4101071Sbill /* the -1 is an overly conservative test to leave 4111071Sbill space for the / that signifies truncation*/ 4121071Sbill p3b = rtrim(p3a=line,tilde,halflen-1); 4131071Sbill if(p3b-p3a>halflen-1) 4141071Sbill p3b = p3a+halflen-1; 4151071Sbill p2a = ltrim(ref,p2b=linep,halflen-1); 4161071Sbill if(p2b-p2a>halflen-1) 4171071Sbill p2a = p2b-halflen-1; 4181071Sbill p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde, 4191071Sbill w=halflen-(p2b-p2a)-gap); 4201071Sbill if(p1b-p1a>w) 4211071Sbill p1b = p1a; 4221071Sbill p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0), 4231071Sbill w=halflen-(p3b-p3a)-gap); 4241071Sbill if(p4b-p4a>w) 4251071Sbill p4a = p4b; 4261071Sbill fprintf(outptr,".xx \""); 4271071Sbill putout(p1a,p1b); 4281071Sbill /* tilde-1 to account for extra space before TILDE */ 4291071Sbill if(p1b!=(tilde-1) && p1a!=p1b) 4301071Sbill fprintf(outptr,"/"); 4311071Sbill fprintf(outptr,"\" \""); 4321071Sbill if(p4a==p4b && p2a!=ref && p2a!=p2b) 4331071Sbill fprintf(outptr,"/"); 4341071Sbill putout(p2a,p2b); 4351071Sbill fprintf(outptr,"\" \""); 4361071Sbill putout(p3a,p3b); 4371071Sbill /* ++p3b to account for extra blank after TILDE */ 4381071Sbill /* ++p3b to account for extra space before TILDE */ 4391071Sbill if(p1a==p1b && ++p3b!=tilde) 4401071Sbill fprintf(outptr,"/"); 4411071Sbill fprintf(outptr,"\" \""); 4421071Sbill if(p1a==p1b && p4a!=ref && p4a!=p4b) 4431071Sbill fprintf(outptr,"/"); 4441071Sbill putout(p4a,p4b); 4451071Sbill if(rflag) 4461071Sbill fprintf(outptr,"\" %s\n",tilde); 4471071Sbill else 4481071Sbill fprintf(outptr,"\"\n"); 4491071Sbill linep = line; 4501071Sbill break; 4511071Sbill 4521071Sbill case '"': 4531071Sbill /* put double " for " */ 4541071Sbill *linep++ = c; 4551071Sbill default: 4561071Sbill *linep++ = c; 4571071Sbill } 4581071Sbill } 4591071Sbill } 4601071Sbill 4611071Sbill char *rtrim(a,c,d) 4621071Sbill char *a,*c; 4631071Sbill { 4641071Sbill char *b,*x; 4651071Sbill b = c; 4661071Sbill for(x=a+1; x<=c&&x-a<=d; x++) 4671071Sbill if((x==c||isspace(x[0]))&&!isspace(x[-1])) 4681071Sbill b = x; 4691071Sbill if(b<c&&!isspace(b[0])) 4701071Sbill b++; 4711071Sbill return(b); 4721071Sbill } 4731071Sbill 4741071Sbill char *ltrim(c,b,d) 4751071Sbill char *c,*b; 4761071Sbill { 4771071Sbill char *a,*x; 4781071Sbill a = c; 4791071Sbill for(x=b-1; x>=c&&b-x<=d; x--) 4801071Sbill if(!isspace(x[0])&&(x==c||isspace(x[-1]))) 4811071Sbill a = x; 4821071Sbill if(a>c&&!isspace(a[-1])) 4831071Sbill a--; 4841071Sbill return(a); 4851071Sbill } 4861071Sbill 4871071Sbill putout(strt,end) 4881071Sbill char *strt, *end; 4891071Sbill { 4901071Sbill char *cp; 4911071Sbill 4921071Sbill cp = strt; 4931071Sbill 4941071Sbill for(cp=strt; cp<end; cp++) { 4951071Sbill putc(*cp,outptr); 4961071Sbill } 4971071Sbill } 4981071Sbill 499*46846Sbostic void 5001071Sbill onintr() 5011071Sbill { 5021071Sbill 50335258Sbostic unlink(sortfile); 5041071Sbill exit(1); 5051071Sbill } 5061071Sbill 5071071Sbill hash(strtp,endp) 5081071Sbill char *strtp, *endp; 5091071Sbill { 5101071Sbill char *cp, c; 5111071Sbill int i, j, k; 5121071Sbill 5131071Sbill /* Return zero hash number for single letter words */ 5141071Sbill if((endp - strtp) == 1) 5151071Sbill return(0); 5161071Sbill 5171071Sbill cp = strtp; 5181071Sbill c = *cp++; 5191071Sbill i = (isupper(c)?tolower(c):c); 5201071Sbill c = *cp; 5211071Sbill j = (isupper(c)?tolower(c):c); 5221071Sbill i = i*j; 5231071Sbill cp = --endp; 5241071Sbill c = *cp--; 5251071Sbill k = (isupper(c)?tolower(c):c); 5261071Sbill c = *cp; 5271071Sbill j = (isupper(c)?tolower(c):c); 5281071Sbill j = k*j; 5291071Sbill 5301071Sbill k = (i ^ (j>>2)) & MASK; 5311071Sbill return(k); 5321071Sbill } 5331071Sbill 5341071Sbill storeh(num,strtp) 5351071Sbill int num; 5361071Sbill char *strtp; 5371071Sbill { 5381071Sbill int i; 5391071Sbill 5401071Sbill for(i=num; i<MAXT; i++) { 5411071Sbill if(hasht[i] == 0) { 5421071Sbill hasht[i] = strtp; 5431071Sbill return(0); 5441071Sbill } 5451071Sbill } 5461071Sbill for(i=0; i<num; i++) { 5471071Sbill if(hasht[i] == 0) { 5481071Sbill hasht[i] = strtp; 5491071Sbill return(0); 5501071Sbill } 5511071Sbill } 5521071Sbill return(1); 5531071Sbill } 554