xref: /csrg-svn/usr.bin/ptx/ptx.c (revision 32744)
1*32744Sbostic #ifndef lint
2*32744Sbostic static char *sccsid = "@(#)ptx.c	4.3 (Berkeley) 12/02/87";
3*32744Sbostic #endif /* not lint */
41071Sbill 
51071Sbill /*	permuted title index
61071Sbill 	ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output]
71071Sbill 	Ptx reads the input file and permutes on words in it.
81071Sbill 	It excludes all words in the ignore file.
91071Sbill 	Alternately it includes words in the only file.
101071Sbill 	if neither is given it excludes the words in /usr/lib/eign.
111071Sbill 
121071Sbill 	The width of the output line can be changed to num
131071Sbill 	characters.  If omitted 72 is default unless troff than 100.
141071Sbill 	the -f flag tells the program to fold the output
151071Sbill 	the -t flag says the output is for troff and the
161071Sbill 	output is then wider.
171071Sbill 
181071Sbill 	*/
191071Sbill 
201071Sbill #include <stdio.h>
211071Sbill #include <ctype.h>
221071Sbill #include <signal.h>
231071Sbill #define DEFLTX "/usr/lib/eign"
241071Sbill #define TILDE 0177
251071Sbill #define SORT "/usr/bin/sort"
261071Sbill #define	N 30
271071Sbill #define	MAX	N*BUFSIZ
281071Sbill #define LMAX	200
291071Sbill #define MAXT	2048
301071Sbill #define MASK	03777
311071Sbill #define SET	1
321071Sbill 
331071Sbill #define isabreak(c) (btable[c])
341071Sbill 
351071Sbill extern char *calloc(), *mktemp();
361071Sbill extern char *getline();
371071Sbill int status;
381071Sbill 
391071Sbill 
401071Sbill char *hasht[MAXT];
411071Sbill char line[LMAX];
421071Sbill char btable[128];
431071Sbill int ignore;
441071Sbill int only;
451071Sbill int llen = 72;
461071Sbill int gap = 3;
471071Sbill int gutter = 3;
481071Sbill int mlen = LMAX;
491071Sbill int wlen;
501071Sbill int rflag;
511071Sbill int halflen;
521071Sbill char *strtbufp, *endbufp;
531071Sbill char *empty = "";
541071Sbill 
551071Sbill char *infile;
561071Sbill FILE *inptr = stdin;
571071Sbill 
581071Sbill char *outfile;
591071Sbill FILE *outptr = stdout;
601071Sbill 
611071Sbill char *sortfile;	/* output of sort program */
621071Sbill char nofold[] = {'-', 'd', 't', TILDE, 0};
631071Sbill char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
641071Sbill char *sortopt = nofold;
651071Sbill FILE *sortptr;
661071Sbill 
671071Sbill char *bfile;	/*contains user supplied break chars */
681071Sbill FILE *bptr;
691071Sbill 
701071Sbill main(argc,argv)
711071Sbill int argc;
721071Sbill char **argv;
731071Sbill {
741071Sbill 	register int c;
751071Sbill 	register char *bufp;
761071Sbill 	int pid;
771071Sbill 	char *pend;
781071Sbill 	extern onintr();
791071Sbill 
801071Sbill 	char *xfile;
811071Sbill 	FILE *xptr;
821071Sbill 
831071Sbill 	if(signal(SIGHUP,onintr)==SIG_IGN)
841071Sbill 		signal(SIGHUP,SIG_IGN);
851071Sbill 	if(signal(SIGINT,onintr)==SIG_IGN)
861071Sbill 		signal(SIGINT,SIG_IGN);
871071Sbill 	signal(SIGPIPE,onintr);
881071Sbill 	signal(SIGTERM,onintr);
891071Sbill 
901071Sbill /*	argument decoding	*/
911071Sbill 
921071Sbill 	xfile = DEFLTX;
931071Sbill 	argv++;
941071Sbill 	while(argc>1 && **argv == '-') {
951071Sbill 		switch (*++*argv){
961071Sbill 
971071Sbill 		case 'r':
981071Sbill 			rflag++;
991071Sbill 			break;
1001071Sbill 		case 'f':
1011071Sbill 			sortopt = fold;
1021071Sbill 			break;
1031071Sbill 
1041071Sbill 		case 'w':
1051071Sbill 			if(argc >= 2) {
1061071Sbill 				argc--;
1071071Sbill 				wlen++;
1081071Sbill 				llen = atoi(*++argv);
1091071Sbill 				if(llen == 0)
1101071Sbill 					diag("Wrong width:",*argv);
1111071Sbill 				if(llen > LMAX) {
1121071Sbill 					llen = LMAX;
1131071Sbill 					msg("Lines truncated to 200 chars.",empty);
1141071Sbill 				}
1151071Sbill 				break;
1161071Sbill 			}
1171071Sbill 
1181071Sbill 		case 't':
1191071Sbill 			if(wlen == 0)
1201071Sbill 				llen = 100;
1211071Sbill 			break;
1221071Sbill 		case 'g':
1231071Sbill 			if(argc >=2) {
1241071Sbill 				argc--;
1251071Sbill 				gap = gutter = atoi(*++argv);
1261071Sbill 			}
1271071Sbill 			break;
1281071Sbill 
1291071Sbill 		case 'i':
1301071Sbill 			if(only)
1311071Sbill 				diag("Only file already given.",empty);
1321071Sbill 			if (argc>=2){
1331071Sbill 				argc--;
1341071Sbill 				ignore++;
1351071Sbill 				xfile = *++argv;
1361071Sbill 			}
1371071Sbill 			break;
1381071Sbill 
1391071Sbill 		case 'o':
1401071Sbill 			if(ignore)
1411071Sbill 				diag("Ignore file already given",empty);
1421071Sbill 			if (argc>=2){
1431071Sbill 				only++;
1441071Sbill 				argc--;
1451071Sbill 				xfile = *++argv;
1461071Sbill 			}
1471071Sbill 			break;
1481071Sbill 
1491071Sbill 		case 'b':
1501071Sbill 			if(argc>=2) {
1511071Sbill 				argc--;
1521071Sbill 				bfile = *++argv;
1531071Sbill 			}
1541071Sbill 			break;
1551071Sbill 
1561071Sbill 		default:
1571071Sbill 			msg("Illegal argument:",*argv);
1581071Sbill 		}
1591071Sbill 		argc--;
1601071Sbill 		argv++;
1611071Sbill 	}
1621071Sbill 
1631071Sbill 	if(argc>3)
1641071Sbill 		diag("Too many filenames",empty);
1651071Sbill 	else if(argc==3){
1661071Sbill 		infile = *argv++;
1671071Sbill 		outfile = *argv;
1681071Sbill 		if((outptr = fopen(outfile,"w")) == NULL)
1691071Sbill 			diag("Cannot open output file:",outfile);
1701071Sbill 	} else if(argc==2) {
1711071Sbill 		infile = *argv;
1721071Sbill 		outfile = 0;
1731071Sbill 	}
1741071Sbill 
1751071Sbill 
1761071Sbill 	/* Default breaks of blank, tab and newline */
1771071Sbill 	btable[' '] = SET;
1781071Sbill 	btable['\t'] = SET;
1791071Sbill 	btable['\n'] = SET;
1801071Sbill 	if(bfile) {
1811071Sbill 		if((bptr = fopen(bfile,"r")) == NULL)
1821071Sbill 			diag("Cannot open break char file",bfile);
1831071Sbill 
1841071Sbill 		while((c = getc(bptr)) != EOF)
1851071Sbill 			btable[c] = SET;
1861071Sbill 	}
1871071Sbill 
1881071Sbill /*	Allocate space for a buffer.  If only or ignore file present
1891071Sbill 	read it into buffer. Else read in default ignore file
1901071Sbill 	and put resulting words in buffer.
1911071Sbill 	*/
1921071Sbill 
1931071Sbill 
1941071Sbill 	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
1951071Sbill 		diag("Out of memory space",empty);
1961071Sbill 	bufp = strtbufp;
1971071Sbill 	endbufp = strtbufp+MAX;
1981071Sbill 
1991071Sbill 	if((xptr = fopen(xfile,"r")) == NULL)
2001071Sbill 		diag("Cannot open  file",xfile);
2011071Sbill 
2021071Sbill 	while(bufp < endbufp && (c = getc(xptr)) != EOF) {
2031071Sbill 		if(isabreak(c)) {
2041071Sbill 			if(storeh(hash(strtbufp,bufp),strtbufp))
2051071Sbill 				diag("Too many words",xfile);
2061071Sbill 			*bufp++ = '\0';
2071071Sbill 			strtbufp = bufp;
2081071Sbill 		}
2091071Sbill 		else {
2101071Sbill 			*bufp++ = (isupper(c)?tolower(c):c);
2111071Sbill 		}
2121071Sbill 	}
2131071Sbill 	if (bufp >= endbufp)
2141071Sbill 		diag("Too many words in file",xfile);
2151071Sbill 	endbufp = --bufp;
2161071Sbill 
2171071Sbill 	/* open output file for sorting */
2181071Sbill 
2191071Sbill 	sortfile = mktemp("/tmp/ptxsXXXXX");
2201071Sbill 	if((sortptr = fopen(sortfile, "w")) == NULL)
2211071Sbill 		diag("Cannot open output for sorting:",sortfile);
2221071Sbill 
2231071Sbill /*	get a line of data and compare each word for
2241071Sbill 	inclusion or exclusion in the sort phase
2251071Sbill */
2261071Sbill 
2271071Sbill 	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
2281071Sbill 		diag("Cannot open data: ",infile);
2291071Sbill 	while(pend=getline())
2301071Sbill 		cmpline(pend);
2311071Sbill 	fclose(sortptr);
2321071Sbill 
2331071Sbill 	switch (pid = fork()){
2341071Sbill 
2351071Sbill 	case -1:	/* cannot fork */
2361071Sbill 		diag("Cannot fork",empty);
2371071Sbill 
2381071Sbill 	case 0:		/* child */
2391071Sbill 		execl(SORT, SORT, sortopt, "+0", "-1", "+1",
2401071Sbill 			sortfile, "-o", sortfile, 0);
2411071Sbill 
2421071Sbill 	default:	/* parent */
2431071Sbill 		while(wait(&status) != pid);
2441071Sbill 	}
2451071Sbill 
2461071Sbill 
2471071Sbill 	getsort();
248*32744Sbostic 	if(sortfile)
24925024Sbloom 		unlink(sortfile);
25025024Sbloom 	exit(0);
2511071Sbill }
2521071Sbill 
2531071Sbill msg(s,arg)
2541071Sbill char *s;
2551071Sbill char *arg;
2561071Sbill {
2571071Sbill 	fprintf(stderr,"%s %s\n",s,arg);
2581071Sbill 	return;
2591071Sbill }
2601071Sbill diag(s,arg)
2611071Sbill char *s, *arg;
2621071Sbill {
2631071Sbill 
2641071Sbill 	msg(s,arg);
2651071Sbill 	exit(1);
2661071Sbill }
2671071Sbill 
2681071Sbill 
2691071Sbill char *getline()
2701071Sbill {
2711071Sbill 
2721071Sbill 	register c;
2731071Sbill 	register char *linep;
2741071Sbill 	char *endlinep;
2751071Sbill 
2761071Sbill 
2771071Sbill 	endlinep= line + mlen;
2781071Sbill 	linep = line;
2791071Sbill 	/* Throw away leading white space */
2801071Sbill 
2811071Sbill 	while(isspace(c=getc(inptr)))
2821071Sbill 		;
2831071Sbill 	if(c==EOF)
2841071Sbill 		return(0);
2851071Sbill 	ungetc(c,inptr);
2861071Sbill 	while(( c=getc(inptr)) != EOF) {
2871071Sbill 		switch (c) {
2881071Sbill 
2891071Sbill 			case '\t':
2901071Sbill 				if(linep<endlinep)
2911071Sbill 					*linep++ = ' ';
2921071Sbill 				break;
2931071Sbill 			case '\n':
2941071Sbill 				while(isspace(*--linep));
2951071Sbill 				*++linep = '\n';
2961071Sbill 				return(linep);
2971071Sbill 			default:
2981071Sbill 				if(linep < endlinep)
2991071Sbill 					*linep++ = c;
3001071Sbill 		}
3011071Sbill 	}
3021071Sbill 	return(0);
3031071Sbill }
3041071Sbill 
3051071Sbill cmpline(pend)
3061071Sbill char *pend;
3071071Sbill {
3081071Sbill 
3091071Sbill 	char *pstrt, *pchar, *cp;
3101071Sbill 	char **hp;
3111071Sbill 	int flag;
3121071Sbill 
3131071Sbill 	pchar = line;
3141071Sbill 	if(rflag)
3151071Sbill 		while(pchar<pend&&!isspace(*pchar))
3161071Sbill 			pchar++;
3171071Sbill 	while(pchar<pend){
3181071Sbill 	/* eliminate white space */
3191071Sbill 		if(isabreak(*pchar++))
3201071Sbill 			continue;
3211071Sbill 		pstrt = --pchar;
3221071Sbill 
3231071Sbill 		flag = 1;
3241071Sbill 		while(flag){
3251071Sbill 			if(isabreak(*pchar)) {
3261071Sbill 				hp = &hasht[hash(pstrt,pchar)];
3271071Sbill 				pchar--;
3281071Sbill 				while(cp = *hp++){
3291071Sbill 					if(hp == &hasht[MAXT])
3301071Sbill 						hp = hasht;
3311071Sbill 	/* possible match */
3321071Sbill 					if(cmpword(pstrt,pchar,cp)){
3331071Sbill 	/* exact match */
3341071Sbill 						if(!ignore && only)
3351071Sbill 							putline(pstrt,pend);
3361071Sbill 						flag = 0;
3371071Sbill 						break;
3381071Sbill 					}
3391071Sbill 				}
3401071Sbill 	/* no match */
3411071Sbill 				if(flag){
3421071Sbill 					if(ignore || !only)
3431071Sbill 						putline(pstrt,pend);
3441071Sbill 					flag = 0;
3451071Sbill 				}
3461071Sbill 			}
3471071Sbill 		pchar++;
3481071Sbill 		}
3491071Sbill 	}
3501071Sbill }
3511071Sbill 
3521071Sbill cmpword(cpp,pend,hpp)
3531071Sbill char *cpp, *pend, *hpp;
3541071Sbill {
3551071Sbill 	char c;
3561071Sbill 
3571071Sbill 	while(*hpp != '\0'){
3581071Sbill 		c = *cpp++;
3591071Sbill 		if((isupper(c)?tolower(c):c) != *hpp++)
3601071Sbill 			return(0);
3611071Sbill 	}
3621071Sbill 	if(--cpp == pend) return(1);
3631071Sbill 	return(0);
3641071Sbill }
3651071Sbill 
3661071Sbill putline(strt, end)
3671071Sbill char *strt, *end;
3681071Sbill {
3691071Sbill 	char *cp;
3701071Sbill 
3711071Sbill 	for(cp=strt; cp<end; cp++)
3721071Sbill 		putc(*cp, sortptr);
3731071Sbill 	/* Add extra blank before TILDE to sort correctly
3741071Sbill 	   with -fd option */
3751071Sbill 	putc(' ',sortptr);
3761071Sbill 	putc(TILDE,sortptr);
3771071Sbill 	for (cp=line; cp<strt; cp++)
3781071Sbill 		putc(*cp,sortptr);
3791071Sbill 	putc('\n',sortptr);
3801071Sbill }
3811071Sbill 
3821071Sbill getsort()
3831071Sbill {
3841071Sbill 	register c;
3851071Sbill 	register char *tilde, *linep, *ref;
3861071Sbill 	char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b;
3871071Sbill 	int w;
3881071Sbill 	char *rtrim(), *ltrim();
3891071Sbill 
3901071Sbill 	if((sortptr = fopen(sortfile,"r")) == NULL)
3911071Sbill 		diag("Cannot open sorted data:",sortfile);
3921071Sbill 
3931071Sbill 	halflen = (llen-gutter)/2;
3941071Sbill 	linep = line;
3951071Sbill 	while((c = getc(sortptr)) != EOF) {
3961071Sbill 		switch(c) {
3971071Sbill 
3981071Sbill 		case TILDE:
3991071Sbill 			tilde = linep;
4001071Sbill 			break;
4011071Sbill 
4021071Sbill 		case '\n':
4031071Sbill 			while(isspace(linep[-1]))
4041071Sbill 				linep--;
4051071Sbill 			ref = tilde;
4061071Sbill 			if(rflag) {
4071071Sbill 				while(ref<linep&&!isspace(*ref))
4081071Sbill 					ref++;
4091071Sbill 				*ref++ = 0;
4101071Sbill 			}
4111071Sbill 		/* the -1 is an overly conservative test to leave
4121071Sbill 		   space for the / that signifies truncation*/
4131071Sbill 			p3b = rtrim(p3a=line,tilde,halflen-1);
4141071Sbill 			if(p3b-p3a>halflen-1)
4151071Sbill 				p3b = p3a+halflen-1;
4161071Sbill 			p2a = ltrim(ref,p2b=linep,halflen-1);
4171071Sbill 			if(p2b-p2a>halflen-1)
4181071Sbill 				p2a = p2b-halflen-1;
4191071Sbill 			p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde,
4201071Sbill 				w=halflen-(p2b-p2a)-gap);
4211071Sbill 			if(p1b-p1a>w)
4221071Sbill 				p1b = p1a;
4231071Sbill 			p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0),
4241071Sbill 				w=halflen-(p3b-p3a)-gap);
4251071Sbill 			if(p4b-p4a>w)
4261071Sbill 				p4a = p4b;
4271071Sbill 			fprintf(outptr,".xx \"");
4281071Sbill 			putout(p1a,p1b);
4291071Sbill 	/* tilde-1 to account for extra space before TILDE */
4301071Sbill 			if(p1b!=(tilde-1) && p1a!=p1b)
4311071Sbill 				fprintf(outptr,"/");
4321071Sbill 			fprintf(outptr,"\" \"");
4331071Sbill 			if(p4a==p4b && p2a!=ref && p2a!=p2b)
4341071Sbill 				fprintf(outptr,"/");
4351071Sbill 			putout(p2a,p2b);
4361071Sbill 			fprintf(outptr,"\" \"");
4371071Sbill 			putout(p3a,p3b);
4381071Sbill 	/* ++p3b to account for extra blank after TILDE */
4391071Sbill 	/* ++p3b to account for extra space before TILDE */
4401071Sbill 			if(p1a==p1b && ++p3b!=tilde)
4411071Sbill 				fprintf(outptr,"/");
4421071Sbill 			fprintf(outptr,"\" \"");
4431071Sbill 			if(p1a==p1b && p4a!=ref && p4a!=p4b)
4441071Sbill 				fprintf(outptr,"/");
4451071Sbill 			putout(p4a,p4b);
4461071Sbill 			if(rflag)
4471071Sbill 				fprintf(outptr,"\" %s\n",tilde);
4481071Sbill 			else
4491071Sbill 				fprintf(outptr,"\"\n");
4501071Sbill 			linep = line;
4511071Sbill 			break;
4521071Sbill 
4531071Sbill 		case '"':
4541071Sbill 	/* put double " for "  */
4551071Sbill 			*linep++ = c;
4561071Sbill 		default:
4571071Sbill 			*linep++ = c;
4581071Sbill 		}
4591071Sbill 	}
4601071Sbill }
4611071Sbill 
4621071Sbill char *rtrim(a,c,d)
4631071Sbill char *a,*c;
4641071Sbill {
4651071Sbill 	char *b,*x;
4661071Sbill 	b = c;
4671071Sbill 	for(x=a+1; x<=c&&x-a<=d; x++)
4681071Sbill 		if((x==c||isspace(x[0]))&&!isspace(x[-1]))
4691071Sbill 			b = x;
4701071Sbill 	if(b<c&&!isspace(b[0]))
4711071Sbill 		b++;
4721071Sbill 	return(b);
4731071Sbill }
4741071Sbill 
4751071Sbill char *ltrim(c,b,d)
4761071Sbill char *c,*b;
4771071Sbill {
4781071Sbill 	char *a,*x;
4791071Sbill 	a = c;
4801071Sbill 	for(x=b-1; x>=c&&b-x<=d; x--)
4811071Sbill 		if(!isspace(x[0])&&(x==c||isspace(x[-1])))
4821071Sbill 			a = x;
4831071Sbill 	if(a>c&&!isspace(a[-1]))
4841071Sbill 		a--;
4851071Sbill 	return(a);
4861071Sbill }
4871071Sbill 
4881071Sbill putout(strt,end)
4891071Sbill char *strt, *end;
4901071Sbill {
4911071Sbill 	char *cp;
4921071Sbill 
4931071Sbill 	cp = strt;
4941071Sbill 
4951071Sbill 	for(cp=strt; cp<end; cp++) {
4961071Sbill 		putc(*cp,outptr);
4971071Sbill 	}
4981071Sbill }
4991071Sbill 
5001071Sbill onintr()
5011071Sbill {
5021071Sbill 
503*32744Sbostic 	if(sortfile)
5041071Sbill 		unlink(sortfile);
5051071Sbill 	exit(1);
5061071Sbill }
5071071Sbill 
5081071Sbill hash(strtp,endp)
5091071Sbill char *strtp, *endp;
5101071Sbill {
5111071Sbill 	char *cp, c;
5121071Sbill 	int i, j, k;
5131071Sbill 
5141071Sbill 	/* Return zero hash number for single letter words */
5151071Sbill 	if((endp - strtp) == 1)
5161071Sbill 		return(0);
5171071Sbill 
5181071Sbill 	cp = strtp;
5191071Sbill 	c = *cp++;
5201071Sbill 	i = (isupper(c)?tolower(c):c);
5211071Sbill 	c = *cp;
5221071Sbill 	j = (isupper(c)?tolower(c):c);
5231071Sbill 	i = i*j;
5241071Sbill 	cp = --endp;
5251071Sbill 	c = *cp--;
5261071Sbill 	k = (isupper(c)?tolower(c):c);
5271071Sbill 	c = *cp;
5281071Sbill 	j = (isupper(c)?tolower(c):c);
5291071Sbill 	j = k*j;
5301071Sbill 
5311071Sbill 	k = (i ^ (j>>2)) & MASK;
5321071Sbill 	return(k);
5331071Sbill }
5341071Sbill 
5351071Sbill storeh(num,strtp)
5361071Sbill int num;
5371071Sbill char *strtp;
5381071Sbill {
5391071Sbill 	int i;
5401071Sbill 
5411071Sbill 	for(i=num; i<MAXT; i++) {
5421071Sbill 		if(hasht[i] == 0) {
5431071Sbill 			hasht[i] = strtp;
5441071Sbill 			return(0);
5451071Sbill 		}
5461071Sbill 	}
5471071Sbill 	for(i=0; i<num; i++) {
5481071Sbill 		if(hasht[i] == 0) {
5491071Sbill 			hasht[i] = strtp;
5501071Sbill 			return(0);
5511071Sbill 		}
5521071Sbill 	}
5531071Sbill 	return(1);
5541071Sbill }
555