xref: /csrg-svn/usr.bin/ptx/ptx.c (revision 62191)
148290Sbostic /*-
2*62191Sbostic  * Copyright (c) 1989, 1993
3*62191Sbostic  *	The Regents of the University of California.  All rights reserved.
448290Sbostic  *
548290Sbostic  * %sccs.include.proprietary.c%
648290Sbostic  */
748290Sbostic 
832744Sbostic #ifndef lint
9*62191Sbostic static char copyright[] =
10*62191Sbostic "@(#) Copyright (c) 1989, 1993\n\
11*62191Sbostic 	The Regents of the University of California.  All rights reserved.\n";
1232744Sbostic #endif /* not lint */
131071Sbill 
1448290Sbostic #ifndef lint
15*62191Sbostic static char sccsid[] = "@(#)ptx.c	8.1 (Berkeley) 06/06/93";
1648290Sbostic #endif /* not lint */
1748290Sbostic 
181071Sbill /*	permuted title index
191071Sbill 	ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
201071Sbill 	Ptx reads the input file and permutes on words in it.
211071Sbill 	It excludes all words in the ignore file.
221071Sbill 	Alternately it includes words in the only file.
2337876Sbostic 	if neither is given it excludes the words in _PATH_EIGN.
241071Sbill 
251071Sbill 	The width of the output line can be changed to num
261071Sbill 	characters.  If omitted 72 is default unless troff than 100.
271071Sbill 	the -f flag tells the program to fold the output
281071Sbill 	the -t flag says the output is for troff and the
291071Sbill 	output is then wider.
301071Sbill 
311071Sbill 	*/
321071Sbill 
331071Sbill #include <stdio.h>
341071Sbill #include <ctype.h>
351071Sbill #include <signal.h>
3637876Sbostic #include "pathnames.h"
3737876Sbostic 
381071Sbill #define TILDE 0177
391071Sbill #define	N 30
401071Sbill #define	MAX	N*BUFSIZ
411071Sbill #define LMAX	200
421071Sbill #define MAXT	2048
431071Sbill #define MASK	03777
441071Sbill #define SET	1
451071Sbill 
461071Sbill #define isabreak(c) (btable[c])
471071Sbill 
481071Sbill extern char *calloc(), *mktemp();
491071Sbill extern char *getline();
501071Sbill int status;
511071Sbill 
521071Sbill 
531071Sbill char *hasht[MAXT];
541071Sbill char line[LMAX];
551071Sbill char btable[128];
561071Sbill int ignore;
571071Sbill int only;
581071Sbill int llen = 72;
591071Sbill int gap = 3;
601071Sbill int gutter = 3;
611071Sbill int mlen = LMAX;
621071Sbill int wlen;
631071Sbill int rflag;
641071Sbill int halflen;
651071Sbill char *strtbufp, *endbufp;
661071Sbill char *empty = "";
671071Sbill 
681071Sbill char *infile;
691071Sbill FILE *inptr = stdin;
701071Sbill 
711071Sbill char *outfile;
721071Sbill FILE *outptr = stdout;
731071Sbill 
7437876Sbostic char sortfile[] = _PATH_TMP;		/* output of sort program */
751071Sbill char nofold[] = {'-', 'd', 't', TILDE, 0};
761071Sbill char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
771071Sbill char *sortopt = nofold;
781071Sbill FILE *sortptr;
791071Sbill 
801071Sbill char *bfile;	/*contains user supplied break chars */
811071Sbill FILE *bptr;
821071Sbill 
main(argc,argv)831071Sbill main(argc,argv)
841071Sbill int argc;
851071Sbill char **argv;
861071Sbill {
871071Sbill 	register int c;
881071Sbill 	register char *bufp;
891071Sbill 	int pid;
901071Sbill 	char *pend;
9146846Sbostic 	extern void onintr();
921071Sbill 
931071Sbill 	char *xfile;
941071Sbill 	FILE *xptr;
951071Sbill 
961071Sbill 	if(signal(SIGHUP,onintr)==SIG_IGN)
971071Sbill 		signal(SIGHUP,SIG_IGN);
981071Sbill 	if(signal(SIGINT,onintr)==SIG_IGN)
991071Sbill 		signal(SIGINT,SIG_IGN);
1001071Sbill 	signal(SIGPIPE,onintr);
1011071Sbill 	signal(SIGTERM,onintr);
1021071Sbill 
1031071Sbill /*	argument decoding	*/
1041071Sbill 
10537876Sbostic 	xfile = _PATH_EIGN;
1061071Sbill 	argv++;
1071071Sbill 	while(argc>1 && **argv == '-') {
1081071Sbill 		switch (*++*argv){
1091071Sbill 
1101071Sbill 		case 'r':
1111071Sbill 			rflag++;
1121071Sbill 			break;
1131071Sbill 		case 'f':
1141071Sbill 			sortopt = fold;
1151071Sbill 			break;
1161071Sbill 
1171071Sbill 		case 'w':
1181071Sbill 			if(argc >= 2) {
1191071Sbill 				argc--;
1201071Sbill 				wlen++;
1211071Sbill 				llen = atoi(*++argv);
1221071Sbill 				if(llen == 0)
1231071Sbill 					diag("Wrong width:",*argv);
1241071Sbill 				if(llen > LMAX) {
1251071Sbill 					llen = LMAX;
1261071Sbill 					msg("Lines truncated to 200 chars.",empty);
1271071Sbill 				}
1281071Sbill 				break;
1291071Sbill 			}
1301071Sbill 
1311071Sbill 		case 't':
1321071Sbill 			if(wlen == 0)
1331071Sbill 				llen = 100;
1341071Sbill 			break;
1351071Sbill 		case 'g':
1361071Sbill 			if(argc >=2) {
1371071Sbill 				argc--;
1381071Sbill 				gap = gutter = atoi(*++argv);
1391071Sbill 			}
1401071Sbill 			break;
1411071Sbill 
1421071Sbill 		case 'i':
1431071Sbill 			if(only)
1441071Sbill 				diag("Only file already given.",empty);
1451071Sbill 			if (argc>=2){
1461071Sbill 				argc--;
1471071Sbill 				ignore++;
1481071Sbill 				xfile = *++argv;
1491071Sbill 			}
1501071Sbill 			break;
1511071Sbill 
1521071Sbill 		case 'o':
1531071Sbill 			if(ignore)
1541071Sbill 				diag("Ignore file already given",empty);
1551071Sbill 			if (argc>=2){
1561071Sbill 				only++;
1571071Sbill 				argc--;
1581071Sbill 				xfile = *++argv;
1591071Sbill 			}
1601071Sbill 			break;
1611071Sbill 
1621071Sbill 		case 'b':
1631071Sbill 			if(argc>=2) {
1641071Sbill 				argc--;
1651071Sbill 				bfile = *++argv;
1661071Sbill 			}
1671071Sbill 			break;
1681071Sbill 
1691071Sbill 		default:
1701071Sbill 			msg("Illegal argument:",*argv);
1711071Sbill 		}
1721071Sbill 		argc--;
1731071Sbill 		argv++;
1741071Sbill 	}
1751071Sbill 
1761071Sbill 	if(argc>3)
1771071Sbill 		diag("Too many filenames",empty);
1781071Sbill 	else if(argc==3){
1791071Sbill 		infile = *argv++;
1801071Sbill 		outfile = *argv;
1811071Sbill 		if((outptr = fopen(outfile,"w")) == NULL)
1821071Sbill 			diag("Cannot open output file:",outfile);
1831071Sbill 	} else if(argc==2) {
1841071Sbill 		infile = *argv;
1851071Sbill 		outfile = 0;
1861071Sbill 	}
1871071Sbill 
1881071Sbill 
1891071Sbill 	/* Default breaks of blank, tab and newline */
1901071Sbill 	btable[' '] = SET;
1911071Sbill 	btable['\t'] = SET;
1921071Sbill 	btable['\n'] = SET;
1931071Sbill 	if(bfile) {
1941071Sbill 		if((bptr = fopen(bfile,"r")) == NULL)
1951071Sbill 			diag("Cannot open break char file",bfile);
1961071Sbill 
1971071Sbill 		while((c = getc(bptr)) != EOF)
1981071Sbill 			btable[c] = SET;
1991071Sbill 	}
2001071Sbill 
2011071Sbill /*	Allocate space for a buffer.  If only or ignore file present
2021071Sbill 	read it into buffer. Else read in default ignore file
2031071Sbill 	and put resulting words in buffer.
2041071Sbill 	*/
2051071Sbill 
2061071Sbill 
2071071Sbill 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
2081071Sbill 		diag("Out of memory space",empty);
2091071Sbill 	bufp = strtbufp;
2101071Sbill 	endbufp = strtbufp+MAX;
2111071Sbill 
2121071Sbill 	if((xptr = fopen(xfile,"r")) == NULL)
2131071Sbill 		diag("Cannot open  file",xfile);
2141071Sbill 
2151071Sbill 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
2161071Sbill 		if(isabreak(c)) {
2171071Sbill 			if(storeh(hash(strtbufp,bufp),strtbufp))
2181071Sbill 				diag("Too many words",xfile);
2191071Sbill 			*bufp++ = '\0';
2201071Sbill 			strtbufp = bufp;
2211071Sbill 		}
2221071Sbill 		else {
2231071Sbill 			*bufp++ = (isupper(c)?tolower(c):c);
2241071Sbill 		}
2251071Sbill 	}
2261071Sbill 	if (bufp >= endbufp)
2271071Sbill 		diag("Too many words in file",xfile);
2281071Sbill 	endbufp = --bufp;
2291071Sbill 
2301071Sbill 	/* open output file for sorting */
2311071Sbill 
23235258Sbostic 	mktemp(sortfile);
2331071Sbill 	if((sortptr = fopen(sortfile, "w")) == NULL)
2341071Sbill 		diag("Cannot open output for sorting:",sortfile);
2351071Sbill 
2361071Sbill /*	get a line of data and compare each word for
2371071Sbill 	inclusion or exclusion in the sort phase
2381071Sbill */
2391071Sbill 
2401071Sbill 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
2411071Sbill 		diag("Cannot open data: ",infile);
2421071Sbill 	while(pend=getline())
2431071Sbill 		cmpline(pend);
2441071Sbill 	fclose(sortptr);
2451071Sbill 
2461071Sbill 	switch (pid = fork()){
2471071Sbill 
2481071Sbill 	case -1:	/* cannot fork */
2491071Sbill 		diag("Cannot fork",empty);
2501071Sbill 
2511071Sbill 	case 0:		/* child */
25237876Sbostic 		execl(_PATH_SORT, "sort", sortopt, "+0", "-1", "+1",
2531071Sbill 			sortfile, "-o", sortfile, 0);
2541071Sbill 
2551071Sbill 	default:	/* parent */
2561071Sbill 		while(wait(&status) != pid);
2571071Sbill 	}
2581071Sbill 
2591071Sbill 
2601071Sbill 	getsort();
26135258Sbostic 	unlink(sortfile);
26225024Sbloom 	exit(0);
2631071Sbill }
2641071Sbill 
msg(s,arg)2651071Sbill msg(s,arg)
2661071Sbill char *s;
2671071Sbill char *arg;
2681071Sbill {
2691071Sbill 	fprintf(stderr,"%s %s\n",s,arg);
2701071Sbill 	return;
2711071Sbill }
diag(s,arg)2721071Sbill diag(s,arg)
2731071Sbill char *s, *arg;
2741071Sbill {
2751071Sbill 
2761071Sbill 	msg(s,arg);
2771071Sbill 	exit(1);
2781071Sbill }
2791071Sbill 
2801071Sbill 
getline()2811071Sbill char *getline()
2821071Sbill {
2831071Sbill 
2841071Sbill 	register c;
2851071Sbill 	register char *linep;
2861071Sbill 	char *endlinep;
2871071Sbill 
2881071Sbill 
2891071Sbill 	endlinep= line + mlen;
2901071Sbill 	linep = line;
2911071Sbill 	/* Throw away leading white space */
2921071Sbill 
2931071Sbill 	while(isspace(c=getc(inptr)))
2941071Sbill 		;
2951071Sbill 	if(c==EOF)
2961071Sbill 		return(0);
2971071Sbill 	ungetc(c,inptr);
2981071Sbill 	while(( c=getc(inptr)) != EOF) {
2991071Sbill 		switch (c) {
3001071Sbill 
3011071Sbill 			case '\t':
3021071Sbill 				if(linep<endlinep)
3031071Sbill 					*linep++ = ' ';
3041071Sbill 				break;
3051071Sbill 			case '\n':
3061071Sbill 				while(isspace(*--linep));
3071071Sbill 				*++linep = '\n';
3081071Sbill 				return(linep);
3091071Sbill 			default:
3101071Sbill 				if(linep < endlinep)
3111071Sbill 					*linep++ = c;
3121071Sbill 		}
3131071Sbill 	}
3141071Sbill 	return(0);
3151071Sbill }
3161071Sbill 
cmpline(pend)3171071Sbill cmpline(pend)
3181071Sbill char *pend;
3191071Sbill {
3201071Sbill 
3211071Sbill 	char *pstrt, *pchar, *cp;
3221071Sbill 	char **hp;
3231071Sbill 	int flag;
3241071Sbill 
3251071Sbill 	pchar = line;
3261071Sbill 	if(rflag)
3271071Sbill 		while(pchar<pend&&!isspace(*pchar))
3281071Sbill 			pchar++;
3291071Sbill 	while(pchar<pend){
3301071Sbill 	/* eliminate white space */
3311071Sbill 		if(isabreak(*pchar++))
3321071Sbill 			continue;
3331071Sbill 		pstrt = --pchar;
3341071Sbill 
3351071Sbill 		flag = 1;
3361071Sbill 		while(flag){
3371071Sbill 			if(isabreak(*pchar)) {
3381071Sbill 				hp = &hasht[hash(pstrt,pchar)];
3391071Sbill 				pchar--;
3401071Sbill 				while(cp = *hp++){
3411071Sbill 					if(hp == &hasht[MAXT])
3421071Sbill 						hp = hasht;
3431071Sbill 	/* possible match */
3441071Sbill 					if(cmpword(pstrt,pchar,cp)){
3451071Sbill 	/* exact match */
3461071Sbill 						if(!ignore && only)
3471071Sbill 							putline(pstrt,pend);
3481071Sbill 						flag = 0;
3491071Sbill 						break;
3501071Sbill 					}
3511071Sbill 				}
3521071Sbill 	/* no match */
3531071Sbill 				if(flag){
3541071Sbill 					if(ignore || !only)
3551071Sbill 						putline(pstrt,pend);
3561071Sbill 					flag = 0;
3571071Sbill 				}
3581071Sbill 			}
3591071Sbill 		pchar++;
3601071Sbill 		}
3611071Sbill 	}
3621071Sbill }
3631071Sbill 
cmpword(cpp,pend,hpp)3641071Sbill cmpword(cpp,pend,hpp)
3651071Sbill char *cpp, *pend, *hpp;
3661071Sbill {
3671071Sbill 	char c;
3681071Sbill 
3691071Sbill 	while(*hpp != '\0'){
3701071Sbill 		c = *cpp++;
3711071Sbill 		if((isupper(c)?tolower(c):c) != *hpp++)
3721071Sbill 			return(0);
3731071Sbill 	}
3741071Sbill 	if(--cpp == pend) return(1);
3751071Sbill 	return(0);
3761071Sbill }
3771071Sbill 
putline(strt,end)3781071Sbill putline(strt, end)
3791071Sbill char *strt, *end;
3801071Sbill {
3811071Sbill 	char *cp;
3821071Sbill 
3831071Sbill 	for(cp=strt; cp<end; cp++)
3841071Sbill 		putc(*cp, sortptr);
3851071Sbill 	/* Add extra blank before TILDE to sort correctly
3861071Sbill 	   with -fd option */
3871071Sbill 	putc(' ',sortptr);
3881071Sbill 	putc(TILDE,sortptr);
3891071Sbill 	for (cp=line; cp<strt; cp++)
3901071Sbill 		putc(*cp,sortptr);
3911071Sbill 	putc('\n',sortptr);
3921071Sbill }
3931071Sbill 
getsort()3941071Sbill getsort()
3951071Sbill {
3961071Sbill 	register c;
3971071Sbill 	register char *tilde, *linep, *ref;
3981071Sbill 	char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
3991071Sbill 	int w;
4001071Sbill 	char *rtrim(), *ltrim();
4011071Sbill 
4021071Sbill 	if((sortptr = fopen(sortfile,"r")) == NULL)
4031071Sbill 		diag("Cannot open sorted data:",sortfile);
4041071Sbill 
4051071Sbill 	halflen = (llen-gutter)/2;
4061071Sbill 	linep = line;
4071071Sbill 	while((c = getc(sortptr)) != EOF) {
4081071Sbill 		switch(c) {
4091071Sbill 
4101071Sbill 		case TILDE:
4111071Sbill 			tilde = linep;
4121071Sbill 			break;
4131071Sbill 
4141071Sbill 		case '\n':
4151071Sbill 			while(isspace(linep[-1]))
4161071Sbill 				linep--;
4171071Sbill 			ref = tilde;
4181071Sbill 			if(rflag) {
4191071Sbill 				while(ref<linep&&!isspace(*ref))
4201071Sbill 					ref++;
4211071Sbill 				*ref++ = 0;
4221071Sbill 			}
4231071Sbill 		/* the -1 is an overly conservative test to leave
4241071Sbill 		   space for the / that signifies truncation*/
4251071Sbill 			p3b = rtrim(p3a=line,tilde,halflen-1);
4261071Sbill 			if(p3b-p3a>halflen-1)
4271071Sbill 				p3b = p3a+halflen-1;
4281071Sbill 			p2a = ltrim(ref,p2b=linep,halflen-1);
4291071Sbill 			if(p2b-p2a>halflen-1)
4301071Sbill 				p2a = p2b-halflen-1;
4311071Sbill 			p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
4321071Sbill 				w=halflen-(p2b-p2a)-gap);
4331071Sbill 			if(p1b-p1a>w)
4341071Sbill 				p1b = p1a;
4351071Sbill 			p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
4361071Sbill 				w=halflen-(p3b-p3a)-gap);
4371071Sbill 			if(p4b-p4a>w)
4381071Sbill 				p4a = p4b;
4391071Sbill 			fprintf(outptr,".xx \"");
4401071Sbill 			putout(p1a,p1b);
4411071Sbill 	/* tilde-1 to account for extra space before TILDE */
4421071Sbill 			if(p1b!=(tilde-1) && p1a!=p1b)
4431071Sbill 				fprintf(outptr,"/");
4441071Sbill 			fprintf(outptr,"\" \"");
4451071Sbill 			if(p4a==p4b && p2a!=ref && p2a!=p2b)
4461071Sbill 				fprintf(outptr,"/");
4471071Sbill 			putout(p2a,p2b);
4481071Sbill 			fprintf(outptr,"\" \"");
4491071Sbill 			putout(p3a,p3b);
4501071Sbill 	/* ++p3b to account for extra blank after TILDE */
4511071Sbill 	/* ++p3b to account for extra space before TILDE */
4521071Sbill 			if(p1a==p1b && ++p3b!=tilde)
4531071Sbill 				fprintf(outptr,"/");
4541071Sbill 			fprintf(outptr,"\" \"");
4551071Sbill 			if(p1a==p1b && p4a!=ref && p4a!=p4b)
4561071Sbill 				fprintf(outptr,"/");
4571071Sbill 			putout(p4a,p4b);
4581071Sbill 			if(rflag)
4591071Sbill 				fprintf(outptr,"\" %s\n",tilde);
4601071Sbill 			else
4611071Sbill 				fprintf(outptr,"\"\n");
4621071Sbill 			linep = line;
4631071Sbill 			break;
4641071Sbill 
4651071Sbill 		case '"':
4661071Sbill 	/* put double " for "  */
4671071Sbill 			*linep++ = c;
4681071Sbill 		default:
4691071Sbill 			*linep++ = c;
4701071Sbill 		}
4711071Sbill 	}
4721071Sbill }
4731071Sbill 
rtrim(a,c,d)4741071Sbill char *rtrim(a,c,d)
4751071Sbill char *a,*c;
4761071Sbill {
4771071Sbill 	char *b,*x;
4781071Sbill 	b = c;
4791071Sbill 	for(x=a+1; x<=c&&x-a<=d; x++)
4801071Sbill 		if((x==c||isspace(x[0]))&&!isspace(x[-1]))
4811071Sbill 			b = x;
4821071Sbill 	if(b<c&&!isspace(b[0]))
4831071Sbill 		b++;
4841071Sbill 	return(b);
4851071Sbill }
4861071Sbill 
ltrim(c,b,d)4871071Sbill char *ltrim(c,b,d)
4881071Sbill char *c,*b;
4891071Sbill {
4901071Sbill 	char *a,*x;
4911071Sbill 	a = c;
4921071Sbill 	for(x=b-1; x>=c&&b-x<=d; x--)
4931071Sbill 		if(!isspace(x[0])&&(x==c||isspace(x[-1])))
4941071Sbill 			a = x;
4951071Sbill 	if(a>c&&!isspace(a[-1]))
4961071Sbill 		a--;
4971071Sbill 	return(a);
4981071Sbill }
4991071Sbill 
putout(strt,end)5001071Sbill putout(strt,end)
5011071Sbill char *strt, *end;
5021071Sbill {
5031071Sbill 	char *cp;
5041071Sbill 
5051071Sbill 	cp = strt;
5061071Sbill 
5071071Sbill 	for(cp=strt; cp<end; cp++) {
5081071Sbill 		putc(*cp,outptr);
5091071Sbill 	}
5101071Sbill }
5111071Sbill 
51246846Sbostic void
onintr()5131071Sbill onintr()
5141071Sbill {
5151071Sbill 
51635258Sbostic 	unlink(sortfile);
5171071Sbill 	exit(1);
5181071Sbill }
5191071Sbill 
hash(strtp,endp)5201071Sbill hash(strtp,endp)
5211071Sbill char *strtp, *endp;
5221071Sbill {
5231071Sbill 	char *cp, c;
5241071Sbill 	int i, j, k;
5251071Sbill 
5261071Sbill 	/* Return zero hash number for single letter words */
5271071Sbill 	if((endp - strtp) == 1)
5281071Sbill 		return(0);
5291071Sbill 
5301071Sbill 	cp = strtp;
5311071Sbill 	c = *cp++;
5321071Sbill 	i = (isupper(c)?tolower(c):c);
5331071Sbill 	c = *cp;
5341071Sbill 	j = (isupper(c)?tolower(c):c);
5351071Sbill 	i = i*j;
5361071Sbill 	cp = --endp;
5371071Sbill 	c = *cp--;
5381071Sbill 	k = (isupper(c)?tolower(c):c);
5391071Sbill 	c = *cp;
5401071Sbill 	j = (isupper(c)?tolower(c):c);
5411071Sbill 	j = k*j;
5421071Sbill 
5431071Sbill 	k = (i ^ (j>>2)) & MASK;
5441071Sbill 	return(k);
5451071Sbill }
5461071Sbill 
storeh(num,strtp)5471071Sbill storeh(num,strtp)
5481071Sbill int num;
5491071Sbill char *strtp;
5501071Sbill {
5511071Sbill 	int i;
5521071Sbill 
5531071Sbill 	for(i=num; i<MAXT; i++) {
5541071Sbill 		if(hasht[i] == 0) {
5551071Sbill 			hasht[i] = strtp;
5561071Sbill 			return(0);
5571071Sbill 		}
5581071Sbill 	}
5591071Sbill 	for(i=0; i<num; i++) {
5601071Sbill 		if(hasht[i] == 0) {
5611071Sbill 			hasht[i] = strtp;
5621071Sbill 			return(0);
5631071Sbill 		}
5641071Sbill 	}
5651071Sbill 	return(1);
5661071Sbill }
567