xref: /csrg-svn/usr.bin/ptx/ptx.c (revision 46846)
132744Sbostic #ifndef lint
2*46846Sbostic static char *sccsid = "@(#)ptx.c	4.6 (Berkeley) 03/01/91";
332744Sbostic #endif /* not lint */
41071Sbill 
51071Sbill /*	permuted title index
61071Sbill 	ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
71071Sbill 	Ptx reads the input file and permutes on words in it.
81071Sbill 	It excludes all words in the ignore file.
91071Sbill 	Alternately it includes words in the only file.
1037876Sbostic 	if neither is given it excludes the words in _PATH_EIGN.
111071Sbill 
121071Sbill 	The width of the output line can be changed to num
131071Sbill 	characters.  If omitted 72 is default unless troff than 100.
141071Sbill 	the -f flag tells the program to fold the output
151071Sbill 	the -t flag says the output is for troff and the
161071Sbill 	output is then wider.
171071Sbill 
181071Sbill 	*/
191071Sbill 
201071Sbill #include <stdio.h>
211071Sbill #include <ctype.h>
221071Sbill #include <signal.h>
2337876Sbostic #include "pathnames.h"
2437876Sbostic 
251071Sbill #define TILDE 0177
261071Sbill #define	N 30
271071Sbill #define	MAX	N*BUFSIZ
281071Sbill #define LMAX	200
291071Sbill #define MAXT	2048
301071Sbill #define MASK	03777
311071Sbill #define SET	1
321071Sbill 
331071Sbill #define isabreak(c) (btable[c])
341071Sbill 
351071Sbill extern char *calloc(), *mktemp();
361071Sbill extern char *getline();
371071Sbill int status;
381071Sbill 
391071Sbill 
401071Sbill char *hasht[MAXT];
411071Sbill char line[LMAX];
421071Sbill char btable[128];
431071Sbill int ignore;
441071Sbill int only;
451071Sbill int llen = 72;
461071Sbill int gap = 3;
471071Sbill int gutter = 3;
481071Sbill int mlen = LMAX;
491071Sbill int wlen;
501071Sbill int rflag;
511071Sbill int halflen;
521071Sbill char *strtbufp, *endbufp;
531071Sbill char *empty = "";
541071Sbill 
551071Sbill char *infile;
561071Sbill FILE *inptr = stdin;
571071Sbill 
581071Sbill char *outfile;
591071Sbill FILE *outptr = stdout;
601071Sbill 
6137876Sbostic char sortfile[] = _PATH_TMP;		/* output of sort program */
621071Sbill char nofold[] = {'-', 'd', 't', TILDE, 0};
631071Sbill char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
641071Sbill char *sortopt = nofold;
651071Sbill FILE *sortptr;
661071Sbill 
671071Sbill char *bfile;	/*contains user supplied break chars */
681071Sbill FILE *bptr;
691071Sbill 
701071Sbill main(argc,argv)
711071Sbill int argc;
721071Sbill char **argv;
731071Sbill {
741071Sbill 	register int c;
751071Sbill 	register char *bufp;
761071Sbill 	int pid;
771071Sbill 	char *pend;
78*46846Sbostic 	extern void onintr();
791071Sbill 
801071Sbill 	char *xfile;
811071Sbill 	FILE *xptr;
821071Sbill 
831071Sbill 	if(signal(SIGHUP,onintr)==SIG_IGN)
841071Sbill 		signal(SIGHUP,SIG_IGN);
851071Sbill 	if(signal(SIGINT,onintr)==SIG_IGN)
861071Sbill 		signal(SIGINT,SIG_IGN);
871071Sbill 	signal(SIGPIPE,onintr);
881071Sbill 	signal(SIGTERM,onintr);
891071Sbill 
901071Sbill /*	argument decoding	*/
911071Sbill 
9237876Sbostic 	xfile = _PATH_EIGN;
931071Sbill 	argv++;
941071Sbill 	while(argc>1 && **argv == '-') {
951071Sbill 		switch (*++*argv){
961071Sbill 
971071Sbill 		case 'r':
981071Sbill 			rflag++;
991071Sbill 			break;
1001071Sbill 		case 'f':
1011071Sbill 			sortopt = fold;
1021071Sbill 			break;
1031071Sbill 
1041071Sbill 		case 'w':
1051071Sbill 			if(argc >= 2) {
1061071Sbill 				argc--;
1071071Sbill 				wlen++;
1081071Sbill 				llen = atoi(*++argv);
1091071Sbill 				if(llen == 0)
1101071Sbill 					diag("Wrong width:",*argv);
1111071Sbill 				if(llen > LMAX) {
1121071Sbill 					llen = LMAX;
1131071Sbill 					msg("Lines truncated to 200 chars.",empty);
1141071Sbill 				}
1151071Sbill 				break;
1161071Sbill 			}
1171071Sbill 
1181071Sbill 		case 't':
1191071Sbill 			if(wlen == 0)
1201071Sbill 				llen = 100;
1211071Sbill 			break;
1221071Sbill 		case 'g':
1231071Sbill 			if(argc >=2) {
1241071Sbill 				argc--;
1251071Sbill 				gap = gutter = atoi(*++argv);
1261071Sbill 			}
1271071Sbill 			break;
1281071Sbill 
1291071Sbill 		case 'i':
1301071Sbill 			if(only)
1311071Sbill 				diag("Only file already given.",empty);
1321071Sbill 			if (argc>=2){
1331071Sbill 				argc--;
1341071Sbill 				ignore++;
1351071Sbill 				xfile = *++argv;
1361071Sbill 			}
1371071Sbill 			break;
1381071Sbill 
1391071Sbill 		case 'o':
1401071Sbill 			if(ignore)
1411071Sbill 				diag("Ignore file already given",empty);
1421071Sbill 			if (argc>=2){
1431071Sbill 				only++;
1441071Sbill 				argc--;
1451071Sbill 				xfile = *++argv;
1461071Sbill 			}
1471071Sbill 			break;
1481071Sbill 
1491071Sbill 		case 'b':
1501071Sbill 			if(argc>=2) {
1511071Sbill 				argc--;
1521071Sbill 				bfile = *++argv;
1531071Sbill 			}
1541071Sbill 			break;
1551071Sbill 
1561071Sbill 		default:
1571071Sbill 			msg("Illegal argument:",*argv);
1581071Sbill 		}
1591071Sbill 		argc--;
1601071Sbill 		argv++;
1611071Sbill 	}
1621071Sbill 
1631071Sbill 	if(argc>3)
1641071Sbill 		diag("Too many filenames",empty);
1651071Sbill 	else if(argc==3){
1661071Sbill 		infile = *argv++;
1671071Sbill 		outfile = *argv;
1681071Sbill 		if((outptr = fopen(outfile,"w")) == NULL)
1691071Sbill 			diag("Cannot open output file:",outfile);
1701071Sbill 	} else if(argc==2) {
1711071Sbill 		infile = *argv;
1721071Sbill 		outfile = 0;
1731071Sbill 	}
1741071Sbill 
1751071Sbill 
1761071Sbill 	/* Default breaks of blank, tab and newline */
1771071Sbill 	btable[' '] = SET;
1781071Sbill 	btable['\t'] = SET;
1791071Sbill 	btable['\n'] = SET;
1801071Sbill 	if(bfile) {
1811071Sbill 		if((bptr = fopen(bfile,"r")) == NULL)
1821071Sbill 			diag("Cannot open break char file",bfile);
1831071Sbill 
1841071Sbill 		while((c = getc(bptr)) != EOF)
1851071Sbill 			btable[c] = SET;
1861071Sbill 	}
1871071Sbill 
1881071Sbill /*	Allocate space for a buffer.  If only or ignore file present
1891071Sbill 	read it into buffer. Else read in default ignore file
1901071Sbill 	and put resulting words in buffer.
1911071Sbill 	*/
1921071Sbill 
1931071Sbill 
1941071Sbill 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
1951071Sbill 		diag("Out of memory space",empty);
1961071Sbill 	bufp = strtbufp;
1971071Sbill 	endbufp = strtbufp+MAX;
1981071Sbill 
1991071Sbill 	if((xptr = fopen(xfile,"r")) == NULL)
2001071Sbill 		diag("Cannot open  file",xfile);
2011071Sbill 
2021071Sbill 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
2031071Sbill 		if(isabreak(c)) {
2041071Sbill 			if(storeh(hash(strtbufp,bufp),strtbufp))
2051071Sbill 				diag("Too many words",xfile);
2061071Sbill 			*bufp++ = '\0';
2071071Sbill 			strtbufp = bufp;
2081071Sbill 		}
2091071Sbill 		else {
2101071Sbill 			*bufp++ = (isupper(c)?tolower(c):c);
2111071Sbill 		}
2121071Sbill 	}
2131071Sbill 	if (bufp >= endbufp)
2141071Sbill 		diag("Too many words in file",xfile);
2151071Sbill 	endbufp = --bufp;
2161071Sbill 
2171071Sbill 	/* open output file for sorting */
2181071Sbill 
21935258Sbostic 	mktemp(sortfile);
2201071Sbill 	if((sortptr = fopen(sortfile, "w")) == NULL)
2211071Sbill 		diag("Cannot open output for sorting:",sortfile);
2221071Sbill 
2231071Sbill /*	get a line of data and compare each word for
2241071Sbill 	inclusion or exclusion in the sort phase
2251071Sbill */
2261071Sbill 
2271071Sbill 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
2281071Sbill 		diag("Cannot open data: ",infile);
2291071Sbill 	while(pend=getline())
2301071Sbill 		cmpline(pend);
2311071Sbill 	fclose(sortptr);
2321071Sbill 
2331071Sbill 	switch (pid = fork()){
2341071Sbill 
2351071Sbill 	case -1:	/* cannot fork */
2361071Sbill 		diag("Cannot fork",empty);
2371071Sbill 
2381071Sbill 	case 0:		/* child */
23937876Sbostic 		execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1",
2401071Sbill 			sortfile, "-o", sortfile, 0);
2411071Sbill 
2421071Sbill 	default:	/* parent */
2431071Sbill 		while(wait(&status) != pid);
2441071Sbill 	}
2451071Sbill 
2461071Sbill 
2471071Sbill 	getsort();
24835258Sbostic 	unlink(sortfile);
24925024Sbloom 	exit(0);
2501071Sbill }
2511071Sbill 
2521071Sbill msg(s,arg)
2531071Sbill char *s;
2541071Sbill char *arg;
2551071Sbill {
2561071Sbill 	fprintf(stderr,"%s %s\n",s,arg);
2571071Sbill 	return;
2581071Sbill }
2591071Sbill diag(s,arg)
2601071Sbill char *s, *arg;
2611071Sbill {
2621071Sbill 
2631071Sbill 	msg(s,arg);
2641071Sbill 	exit(1);
2651071Sbill }
2661071Sbill 
2671071Sbill 
2681071Sbill char *getline()
2691071Sbill {
2701071Sbill 
2711071Sbill 	register c;
2721071Sbill 	register char *linep;
2731071Sbill 	char *endlinep;
2741071Sbill 
2751071Sbill 
2761071Sbill 	endlinep= line + mlen;
2771071Sbill 	linep = line;
2781071Sbill 	/* Throw away leading white space */
2791071Sbill 
2801071Sbill 	while(isspace(c=getc(inptr)))
2811071Sbill 		;
2821071Sbill 	if(c==EOF)
2831071Sbill 		return(0);
2841071Sbill 	ungetc(c,inptr);
2851071Sbill 	while(( c=getc(inptr)) != EOF) {
2861071Sbill 		switch (c) {
2871071Sbill 
2881071Sbill 			case '\t':
2891071Sbill 				if(linep<endlinep)
2901071Sbill 					*linep++ = ' ';
2911071Sbill 				break;
2921071Sbill 			case '\n':
2931071Sbill 				while(isspace(*--linep));
2941071Sbill 				*++linep = '\n';
2951071Sbill 				return(linep);
2961071Sbill 			default:
2971071Sbill 				if(linep < endlinep)
2981071Sbill 					*linep++ = c;
2991071Sbill 		}
3001071Sbill 	}
3011071Sbill 	return(0);
3021071Sbill }
3031071Sbill 
3041071Sbill cmpline(pend)
3051071Sbill char *pend;
3061071Sbill {
3071071Sbill 
3081071Sbill 	char *pstrt, *pchar, *cp;
3091071Sbill 	char **hp;
3101071Sbill 	int flag;
3111071Sbill 
3121071Sbill 	pchar = line;
3131071Sbill 	if(rflag)
3141071Sbill 		while(pchar<pend&&!isspace(*pchar))
3151071Sbill 			pchar++;
3161071Sbill 	while(pchar<pend){
3171071Sbill 	/* eliminate white space */
3181071Sbill 		if(isabreak(*pchar++))
3191071Sbill 			continue;
3201071Sbill 		pstrt = --pchar;
3211071Sbill 
3221071Sbill 		flag = 1;
3231071Sbill 		while(flag){
3241071Sbill 			if(isabreak(*pchar)) {
3251071Sbill 				hp = &hasht[hash(pstrt,pchar)];
3261071Sbill 				pchar--;
3271071Sbill 				while(cp = *hp++){
3281071Sbill 					if(hp == &hasht[MAXT])
3291071Sbill 						hp = hasht;
3301071Sbill 	/* possible match */
3311071Sbill 					if(cmpword(pstrt,pchar,cp)){
3321071Sbill 	/* exact match */
3331071Sbill 						if(!ignore && only)
3341071Sbill 							putline(pstrt,pend);
3351071Sbill 						flag = 0;
3361071Sbill 						break;
3371071Sbill 					}
3381071Sbill 				}
3391071Sbill 	/* no match */
3401071Sbill 				if(flag){
3411071Sbill 					if(ignore || !only)
3421071Sbill 						putline(pstrt,pend);
3431071Sbill 					flag = 0;
3441071Sbill 				}
3451071Sbill 			}
3461071Sbill 		pchar++;
3471071Sbill 		}
3481071Sbill 	}
3491071Sbill }
3501071Sbill 
3511071Sbill cmpword(cpp,pend,hpp)
3521071Sbill char *cpp, *pend, *hpp;
3531071Sbill {
3541071Sbill 	char c;
3551071Sbill 
3561071Sbill 	while(*hpp != '\0'){
3571071Sbill 		c = *cpp++;
3581071Sbill 		if((isupper(c)?tolower(c):c) != *hpp++)
3591071Sbill 			return(0);
3601071Sbill 	}
3611071Sbill 	if(--cpp == pend) return(1);
3621071Sbill 	return(0);
3631071Sbill }
3641071Sbill 
3651071Sbill putline(strt, end)
3661071Sbill char *strt, *end;
3671071Sbill {
3681071Sbill 	char *cp;
3691071Sbill 
3701071Sbill 	for(cp=strt; cp<end; cp++)
3711071Sbill 		putc(*cp, sortptr);
3721071Sbill 	/* Add extra blank before TILDE to sort correctly
3731071Sbill 	   with -fd option */
3741071Sbill 	putc(' ',sortptr);
3751071Sbill 	putc(TILDE,sortptr);
3761071Sbill 	for (cp=line; cp<strt; cp++)
3771071Sbill 		putc(*cp,sortptr);
3781071Sbill 	putc('\n',sortptr);
3791071Sbill }
3801071Sbill 
3811071Sbill getsort()
3821071Sbill {
3831071Sbill 	register c;
3841071Sbill 	register char *tilde, *linep, *ref;
3851071Sbill 	char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
3861071Sbill 	int w;
3871071Sbill 	char *rtrim(), *ltrim();
3881071Sbill 
3891071Sbill 	if((sortptr = fopen(sortfile,"r")) == NULL)
3901071Sbill 		diag("Cannot open sorted data:",sortfile);
3911071Sbill 
3921071Sbill 	halflen = (llen-gutter)/2;
3931071Sbill 	linep = line;
3941071Sbill 	while((c = getc(sortptr)) != EOF) {
3951071Sbill 		switch(c) {
3961071Sbill 
3971071Sbill 		case TILDE:
3981071Sbill 			tilde = linep;
3991071Sbill 			break;
4001071Sbill 
4011071Sbill 		case '\n':
4021071Sbill 			while(isspace(linep[-1]))
4031071Sbill 				linep--;
4041071Sbill 			ref = tilde;
4051071Sbill 			if(rflag) {
4061071Sbill 				while(ref<linep&&!isspace(*ref))
4071071Sbill 					ref++;
4081071Sbill 				*ref++ = 0;
4091071Sbill 			}
4101071Sbill 		/* the -1 is an overly conservative test to leave
4111071Sbill 		   space for the / that signifies truncation*/
4121071Sbill 			p3b = rtrim(p3a=line,tilde,halflen-1);
4131071Sbill 			if(p3b-p3a>halflen-1)
4141071Sbill 				p3b = p3a+halflen-1;
4151071Sbill 			p2a = ltrim(ref,p2b=linep,halflen-1);
4161071Sbill 			if(p2b-p2a>halflen-1)
4171071Sbill 				p2a = p2b-halflen-1;
4181071Sbill 			p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
4191071Sbill 				w=halflen-(p2b-p2a)-gap);
4201071Sbill 			if(p1b-p1a>w)
4211071Sbill 				p1b = p1a;
4221071Sbill 			p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
4231071Sbill 				w=halflen-(p3b-p3a)-gap);
4241071Sbill 			if(p4b-p4a>w)
4251071Sbill 				p4a = p4b;
4261071Sbill 			fprintf(outptr,".xx \"");
4271071Sbill 			putout(p1a,p1b);
4281071Sbill 	/* tilde-1 to account for extra space before TILDE */
4291071Sbill 			if(p1b!=(tilde-1) && p1a!=p1b)
4301071Sbill 				fprintf(outptr,"/");
4311071Sbill 			fprintf(outptr,"\" \"");
4321071Sbill 			if(p4a==p4b && p2a!=ref && p2a!=p2b)
4331071Sbill 				fprintf(outptr,"/");
4341071Sbill 			putout(p2a,p2b);
4351071Sbill 			fprintf(outptr,"\" \"");
4361071Sbill 			putout(p3a,p3b);
4371071Sbill 	/* ++p3b to account for extra blank after TILDE */
4381071Sbill 	/* ++p3b to account for extra space before TILDE */
4391071Sbill 			if(p1a==p1b && ++p3b!=tilde)
4401071Sbill 				fprintf(outptr,"/");
4411071Sbill 			fprintf(outptr,"\" \"");
4421071Sbill 			if(p1a==p1b && p4a!=ref && p4a!=p4b)
4431071Sbill 				fprintf(outptr,"/");
4441071Sbill 			putout(p4a,p4b);
4451071Sbill 			if(rflag)
4461071Sbill 				fprintf(outptr,"\" %s\n",tilde);
4471071Sbill 			else
4481071Sbill 				fprintf(outptr,"\"\n");
4491071Sbill 			linep = line;
4501071Sbill 			break;
4511071Sbill 
4521071Sbill 		case '"':
4531071Sbill 	/* put double " for "  */
4541071Sbill 			*linep++ = c;
4551071Sbill 		default:
4561071Sbill 			*linep++ = c;
4571071Sbill 		}
4581071Sbill 	}
4591071Sbill }
4601071Sbill 
4611071Sbill char *rtrim(a,c,d)
4621071Sbill char *a,*c;
4631071Sbill {
4641071Sbill 	char *b,*x;
4651071Sbill 	b = c;
4661071Sbill 	for(x=a+1; x<=c&&x-a<=d; x++)
4671071Sbill 		if((x==c||isspace(x[0]))&&!isspace(x[-1]))
4681071Sbill 			b = x;
4691071Sbill 	if(b<c&&!isspace(b[0]))
4701071Sbill 		b++;
4711071Sbill 	return(b);
4721071Sbill }
4731071Sbill 
4741071Sbill char *ltrim(c,b,d)
4751071Sbill char *c,*b;
4761071Sbill {
4771071Sbill 	char *a,*x;
4781071Sbill 	a = c;
4791071Sbill 	for(x=b-1; x>=c&&b-x<=d; x--)
4801071Sbill 		if(!isspace(x[0])&&(x==c||isspace(x[-1])))
4811071Sbill 			a = x;
4821071Sbill 	if(a>c&&!isspace(a[-1]))
4831071Sbill 		a--;
4841071Sbill 	return(a);
4851071Sbill }
4861071Sbill 
4871071Sbill putout(strt,end)
4881071Sbill char *strt, *end;
4891071Sbill {
4901071Sbill 	char *cp;
4911071Sbill 
4921071Sbill 	cp = strt;
4931071Sbill 
4941071Sbill 	for(cp=strt; cp<end; cp++) {
4951071Sbill 		putc(*cp,outptr);
4961071Sbill 	}
4971071Sbill }
4981071Sbill 
499*46846Sbostic void
5001071Sbill onintr()
5011071Sbill {
5021071Sbill 
50335258Sbostic 	unlink(sortfile);
5041071Sbill 	exit(1);
5051071Sbill }
5061071Sbill 
5071071Sbill hash(strtp,endp)
5081071Sbill char *strtp, *endp;
5091071Sbill {
5101071Sbill 	char *cp, c;
5111071Sbill 	int i, j, k;
5121071Sbill 
5131071Sbill 	/* Return zero hash number for single letter words */
5141071Sbill 	if((endp - strtp) == 1)
5151071Sbill 		return(0);
5161071Sbill 
5171071Sbill 	cp = strtp;
5181071Sbill 	c = *cp++;
5191071Sbill 	i = (isupper(c)?tolower(c):c);
5201071Sbill 	c = *cp;
5211071Sbill 	j = (isupper(c)?tolower(c):c);
5221071Sbill 	i = i*j;
5231071Sbill 	cp = --endp;
5241071Sbill 	c = *cp--;
5251071Sbill 	k = (isupper(c)?tolower(c):c);
5261071Sbill 	c = *cp;
5271071Sbill 	j = (isupper(c)?tolower(c):c);
5281071Sbill 	j = k*j;
5291071Sbill 
5301071Sbill 	k = (i ^ (j>>2)) & MASK;
5311071Sbill 	return(k);
5321071Sbill }
5331071Sbill 
5341071Sbill storeh(num,strtp)
5351071Sbill int num;
5361071Sbill char *strtp;
5371071Sbill {
5381071Sbill 	int i;
5391071Sbill 
5401071Sbill 	for(i=num; i<MAXT; i++) {
5411071Sbill 		if(hasht[i] == 0) {
5421071Sbill 			hasht[i] = strtp;
5431071Sbill 			return(0);
5441071Sbill 		}
5451071Sbill 	}
5461071Sbill 	for(i=0; i<num; i++) {
5471071Sbill 		if(hasht[i] == 0) {
5481071Sbill 			hasht[i] = strtp;
5491071Sbill 			return(0);
5501071Sbill 		}
5511071Sbill 	}
5521071Sbill 	return(1);
5531071Sbill }
554