xref: /csrg-svn/usr.bin/fmt/fmt.c (revision 40367)
122453Sdist /*
222453Sdist  * Copyright (c) 1980 Regents of the University of California.
333499Sbostic  * All rights reserved.
433499Sbostic  *
533499Sbostic  * Redistribution and use in source and binary forms are permitted
634905Sbostic  * provided that the above copyright notice and this paragraph are
734905Sbostic  * duplicated in all such forms and that any documentation,
834905Sbostic  * advertising materials, and other materials related to such
934905Sbostic  * distribution and use acknowledge that the software was developed
1034905Sbostic  * by the University of California, Berkeley.  The name of the
1134905Sbostic  * University may not be used to endorse or promote products derived
1234905Sbostic  * from this software without specific prior written permission.
1334905Sbostic  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1434905Sbostic  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1534905Sbostic  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1622453Sdist  */
1722453Sdist 
1814531Ssam #ifndef lint
1933499Sbostic char copyright[] =
2022453Sdist "@(#) Copyright (c) 1980 Regents of the University of California.\n\
2122453Sdist  All rights reserved.\n";
2233499Sbostic #endif /* not lint */
231232Skas 
2422453Sdist #ifndef lint
25*40367Smckusick static char sccsid[] = "@(#)fmt.c	5.9 (Berkeley) 03/08/90";
2633499Sbostic #endif /* not lint */
2722453Sdist 
281232Skas #include <stdio.h>
291232Skas #include <ctype.h>
301232Skas 
311232Skas /*
321232Skas  * fmt -- format the concatenation of input files or standard input
331232Skas  * onto standard output.  Designed for use with Mail ~|
341232Skas  *
3529545Smckusick  * Syntax : fmt [ goal [ max ] ] [ name ... ]
3629545Smckusick  * Authors: Kurt Shoens (UCB) 12/7/78;
3729545Smckusick  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
381232Skas  */
391232Skas 
4029545Smckusick /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
4129545Smckusick  * #define	LENGTH	72		Max line length in output
4229545Smckusick  */
431232Skas #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
441232Skas 
4529545Smckusick /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
4634965Sedward #define GOAL_LENGTH 65
4734965Sedward #define MAX_LENGTH 75
4834965Sedward int	goal_length;		/* Target or goal line length in output */
4934965Sedward int	max_length;		/* Max line length in output */
501232Skas int	pfx;			/* Current leading blank count */
511232Skas int	lineno;			/* Current input line */
521232Skas int	mark;			/* Last place we saw a head line */
531232Skas 
5431142Sedward char	*malloc();		/* for lint . . . */
551232Skas char	*headnames[] = {"To", "Subject", "Cc", 0};
561232Skas 
571232Skas /*
581232Skas  * Drive the whole formatter by managing input files.  Also,
591232Skas  * cause initialization of the output stuff and flush it out
601232Skas  * at the end.
611232Skas  */
621232Skas 
631232Skas main(argc, argv)
6429545Smckusick 	int argc;
651232Skas 	char **argv;
661232Skas {
671232Skas 	register FILE *fi;
681232Skas 	register int errs = 0;
6929545Smckusick 	int number;		/* LIZ@UOM 6/18/85 */
701232Skas 
7134965Sedward 	goal_length = GOAL_LENGTH;
7234965Sedward 	max_length = MAX_LENGTH;
731232Skas 	setout();
741232Skas 	lineno = 1;
751232Skas 	mark = -10;
7629545Smckusick 	/*
7729545Smckusick 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
7829545Smckusick 	 */
7929545Smckusick 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
8029545Smckusick 		argv++;
8129545Smckusick 		argc--;
8229545Smckusick 		goal_length = number;
8329545Smckusick 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
8429545Smckusick 			argv++;
8529545Smckusick 			argc--;
8629545Smckusick 			max_length = number;
8729545Smckusick 		}
8829545Smckusick 	}
8929545Smckusick 	if (max_length <= goal_length) {
9029545Smckusick 		fprintf(stderr, "Max length must be greater than %s\n",
9129545Smckusick 			"goal length");
9229545Smckusick 		exit(1);
9329545Smckusick 	}
941232Skas 	if (argc < 2) {
951232Skas 		fmt(stdin);
961232Skas 		oflush();
971232Skas 		exit(0);
981232Skas 	}
991232Skas 	while (--argc) {
10029545Smckusick 		if ((fi = fopen(*++argv, "r")) == NULL) {
10129545Smckusick 			perror(*argv);
1021232Skas 			errs++;
1031232Skas 			continue;
1041232Skas 		}
1051232Skas 		fmt(fi);
1061232Skas 		fclose(fi);
1071232Skas 	}
1081232Skas 	oflush();
1091232Skas 	exit(errs);
1101232Skas }
1111232Skas 
1121232Skas /*
1131232Skas  * Read up characters from the passed input file, forming lines,
1141232Skas  * doing ^H processing, expanding tabs, stripping trailing blanks,
1151232Skas  * and sending each line down for analysis.
1161232Skas  */
1171232Skas fmt(fi)
1181232Skas 	FILE *fi;
1191232Skas {
1201232Skas 	char linebuf[BUFSIZ], canonb[BUFSIZ];
1211232Skas 	register char *cp, *cp2;
1221232Skas 	register int c, col;
1231232Skas 
1241232Skas 	c = getc(fi);
1251232Skas 	while (c != EOF) {
1261232Skas 		/*
1271232Skas 		 * Collect a line, doing ^H processing.
1281232Skas 		 * Leave tabs for now.
1291232Skas 		 */
1301232Skas 		cp = linebuf;
1311232Skas 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
1321232Skas 			if (c == '\b') {
1331232Skas 				if (cp > linebuf)
1341232Skas 					cp--;
1351232Skas 				c = getc(fi);
1361232Skas 				continue;
1371232Skas 			}
1381232Skas 			if ((c < ' ' || c >= 0177) && c != '\t') {
1391232Skas 				c = getc(fi);
1401232Skas 				continue;
1411232Skas 			}
1421232Skas 			*cp++ = c;
1431232Skas 			c = getc(fi);
1441232Skas 		}
1451232Skas 		*cp = '\0';
1461232Skas 
1471232Skas 		/*
1481232Skas 		 * Toss anything remaining on the input line.
1491232Skas 		 */
1501232Skas 		while (c != '\n' && c != EOF)
1511232Skas 			c = getc(fi);
1521232Skas 
1531232Skas 		/*
1541232Skas 		 * Expand tabs on the way to canonb.
1551232Skas 		 */
1561232Skas 		col = 0;
1571232Skas 		cp = linebuf;
1581232Skas 		cp2 = canonb;
1591232Skas 		while (c = *cp++) {
1601232Skas 			if (c != '\t') {
1611232Skas 				col++;
1621232Skas 				if (cp2-canonb < BUFSIZ-1)
1631232Skas 					*cp2++ = c;
1641232Skas 				continue;
1651232Skas 			}
1661232Skas 			do {
1671232Skas 				if (cp2-canonb < BUFSIZ-1)
1681232Skas 					*cp2++ = ' ';
1691232Skas 				col++;
1701232Skas 			} while ((col & 07) != 0);
1711232Skas 		}
1721232Skas 
1731232Skas 		/*
1741232Skas 		 * Swipe trailing blanks from the line.
1751232Skas 		 */
1761232Skas 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
1771232Skas 			;
1781232Skas 		*++cp2 = '\0';
1791232Skas 		prefix(canonb);
1801232Skas 		if (c != EOF)
1811232Skas 			c = getc(fi);
1821232Skas 	}
1831232Skas }
1841232Skas 
1851232Skas /*
1861232Skas  * Take a line devoid of tabs and other garbage and determine its
1871232Skas  * blank prefix.  If the indent changes, call for a linebreak.
1881232Skas  * If the input line is blank, echo the blank line on the output.
1891232Skas  * Finally, if the line minus the prefix is a mail header, try to keep
1901232Skas  * it on a line by itself.
1911232Skas  */
1921232Skas prefix(line)
1931232Skas 	char line[];
1941232Skas {
1951232Skas 	register char *cp, **hp;
1961232Skas 	register int np, h;
1971232Skas 
1981232Skas 	if (strlen(line) == 0) {
1991232Skas 		oflush();
2001232Skas 		putchar('\n');
2011232Skas 		return;
2021232Skas 	}
2031232Skas 	for (cp = line; *cp == ' '; cp++)
2041232Skas 		;
2051232Skas 	np = cp - line;
2061232Skas 
2071232Skas 	/*
2081232Skas 	 * The following horrible expression attempts to avoid linebreaks
2091232Skas 	 * when the indent changes due to a paragraph.
2101232Skas 	 */
2111232Skas 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
2121232Skas 		oflush();
2131232Skas 	if (h = ishead(cp))
2141232Skas 		oflush(), mark = lineno;
2151232Skas 	if (lineno - mark < 3 && lineno - mark > 0)
2161232Skas 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
2171232Skas 			if (ispref(*hp, cp)) {
2181232Skas 				h = 1;
2191232Skas 				oflush();
2201232Skas 				break;
2211232Skas 			}
2221232Skas 	if (!h && (h = (*cp == '.')))
2231232Skas 		oflush();
2241232Skas 	pfx = np;
2251232Skas 	if (h)
226*40367Smckusick 		pack(cp);
227*40367Smckusick 	else	split(cp);
228*40367Smckusick 	if (h)
2291232Skas 		oflush();
2301232Skas 	lineno++;
2311232Skas }
2321232Skas 
2331232Skas /*
2341232Skas  * Split up the passed line into output "words" which are
2351232Skas  * maximal strings of non-blanks with the blank separation
2361232Skas  * attached at the end.  Pass these words along to the output
2371232Skas  * line packer.
2381232Skas  */
2391232Skas split(line)
2401232Skas 	char line[];
2411232Skas {
2421232Skas 	register char *cp, *cp2;
2431232Skas 	char word[BUFSIZ];
24429545Smckusick 	int wordl;		/* LIZ@UOM 6/18/85 */
2451232Skas 
2461232Skas 	cp = line;
2471232Skas 	while (*cp) {
2481232Skas 		cp2 = word;
24929545Smckusick 		wordl = 0;	/* LIZ@UOM 6/18/85 */
2501232Skas 
2511232Skas 		/*
25229545Smckusick 		 * Collect a 'word,' allowing it to contain escaped white
25329545Smckusick 		 * space.
2541232Skas 		 */
2551232Skas 		while (*cp && *cp != ' ') {
2561232Skas 			if (*cp == '\\' && isspace(cp[1]))
2571232Skas 				*cp2++ = *cp++;
2581232Skas 			*cp2++ = *cp++;
25929545Smckusick 			wordl++;/* LIZ@UOM 6/18/85 */
2601232Skas 		}
2611232Skas 
2621232Skas 		/*
26329545Smckusick 		 * Guarantee a space at end of line. Two spaces after end of
26429545Smckusick 		 * sentence punctuation.
2651232Skas 		 */
2661232Skas 		if (*cp == '\0') {
2671232Skas 			*cp2++ = ' ';
26834987Sedward 			if (index(".:!", cp[-1]))
2691232Skas 				*cp2++ = ' ';
2701232Skas 		}
2711232Skas 		while (*cp == ' ')
2721232Skas 			*cp2++ = *cp++;
2731232Skas 		*cp2 = '\0';
27429545Smckusick 		/*
27529545Smckusick 		 * LIZ@UOM 6/18/85 pack(word);
27629545Smckusick 		 */
27729545Smckusick 		pack(word, wordl);
2781232Skas 	}
2791232Skas }
2801232Skas 
2811232Skas /*
2821232Skas  * Output section.
2831232Skas  * Build up line images from the words passed in.  Prefix
2841232Skas  * each line with correct number of blanks.  The buffer "outbuf"
2851232Skas  * contains the current partial line image, including prefixed blanks.
2861232Skas  * "outp" points to the next available space therein.  When outp is NOSTR,
2871232Skas  * there ain't nothing in there yet.  At the bottom of this whole mess,
2881232Skas  * leading tabs are reinserted.
2891232Skas  */
2901232Skas char	outbuf[BUFSIZ];			/* Sandbagged output line image */
2911232Skas char	*outp;				/* Pointer in above */
2921232Skas 
2931232Skas /*
2941232Skas  * Initialize the output section.
2951232Skas  */
2961232Skas setout()
2971232Skas {
2981232Skas 	outp = NOSTR;
2991232Skas }
3001232Skas 
3011232Skas /*
3021232Skas  * Pack a word onto the output line.  If this is the beginning of
3031232Skas  * the line, push on the appropriately-sized string of blanks first.
3041232Skas  * If the word won't fit on the current line, flush and begin a new
3051232Skas  * line.  If the word is too long to fit all by itself on a line,
3061232Skas  * just give it its own and hope for the best.
30729545Smckusick  *
30829545Smckusick  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
30929545Smckusick  *	goal length, take it.  If not, then check to see if the line
31029545Smckusick  *	will be over the max length; if so put the word on the next
31129545Smckusick  *	line.  If not, check to see if the line will be closer to the
31229545Smckusick  *	goal length with or without the word and take it or put it on
31329545Smckusick  *	the next line accordingly.
3141232Skas  */
3151232Skas 
31629545Smckusick /*
31729545Smckusick  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
31829545Smckusick  * pack(word)
31929545Smckusick  *	char word[];
32029545Smckusick  */
32129545Smckusick pack(word,wl)
3221232Skas 	char word[];
32329545Smckusick 	int wl;
3241232Skas {
3251232Skas 	register char *cp;
3261232Skas 	register int s, t;
3271232Skas 
3281232Skas 	if (outp == NOSTR)
3291232Skas 		leadin();
33029545Smckusick 	/*
33129545Smckusick 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
33229545Smckusick 	 * length of the line before the word is added; t is now the length
33329545Smckusick 	 * of the line after the word is added
33429545Smckusick 	 *	t = strlen(word);
33529545Smckusick 	 *	if (t+s <= LENGTH)
33629545Smckusick 	 */
33729545Smckusick 	s = outp - outbuf;
33829545Smckusick 	t = wl + s;
33929545Smckusick 	if ((t <= goal_length) ||
34029545Smckusick 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
3411232Skas 		/*
34229545Smckusick 		 * In like flint!
3431232Skas 		 */
34429545Smckusick 		for (cp = word; *cp; *outp++ = *cp++);
3451232Skas 		return;
3461232Skas 	}
3471232Skas 	if (s > pfx) {
3481232Skas 		oflush();
3491232Skas 		leadin();
3501232Skas 	}
35129545Smckusick 	for (cp = word; *cp; *outp++ = *cp++);
3521232Skas }
3531232Skas 
3541232Skas /*
3551232Skas  * If there is anything on the current output line, send it on
3561232Skas  * its way.  Set outp to NOSTR to indicate the absence of the current
3571232Skas  * line prefix.
3581232Skas  */
3591232Skas oflush()
3601232Skas {
3611232Skas 	if (outp == NOSTR)
3621232Skas 		return;
3631232Skas 	*outp = '\0';
3641232Skas 	tabulate(outbuf);
3651232Skas 	outp = NOSTR;
3661232Skas }
3671232Skas 
3681232Skas /*
3691232Skas  * Take the passed line buffer, insert leading tabs where possible, and
3701232Skas  * output on standard output (finally).
3711232Skas  */
3721232Skas tabulate(line)
3731232Skas 	char line[];
3741232Skas {
37534987Sedward 	register char *cp;
3761232Skas 	register int b, t;
3771232Skas 
3781232Skas 	/*
3791232Skas 	 * Toss trailing blanks in the output line.
3801232Skas 	 */
3811232Skas 	cp = line + strlen(line) - 1;
3821232Skas 	while (cp >= line && *cp == ' ')
3831232Skas 		cp--;
3841232Skas 	*++cp = '\0';
3851232Skas 
3861232Skas 	/*
3871232Skas 	 * Count the leading blank space and tabulate.
3881232Skas 	 */
3891232Skas 	for (cp = line; *cp == ' '; cp++)
3901232Skas 		;
3911232Skas 	b = cp-line;
3921232Skas 	t = b >> 3;
3931232Skas 	b &= 07;
3941232Skas 	if (t > 0)
3951232Skas 		do
3961232Skas 			putc('\t', stdout);
3971232Skas 		while (--t);
3981232Skas 	if (b > 0)
3991232Skas 		do
4001232Skas 			putc(' ', stdout);
4011232Skas 		while (--b);
4021232Skas 	while (*cp)
4031232Skas 		putc(*cp++, stdout);
4041232Skas 	putc('\n', stdout);
4051232Skas }
4061232Skas 
4071232Skas /*
4081232Skas  * Initialize the output line with the appropriate number of
4091232Skas  * leading blanks.
4101232Skas  */
4111232Skas leadin()
4121232Skas {
4131232Skas 	register int b;
4141232Skas 	register char *cp;
4151232Skas 
4161232Skas 	for (b = 0, cp = outbuf; b < pfx; b++)
4171232Skas 		*cp++ = ' ';
4181232Skas 	outp = cp;
4191232Skas }
4201232Skas 
4211232Skas /*
4221232Skas  * Save a string in dynamic space.
4231232Skas  * This little goodie is needed for
4241232Skas  * a headline detector in head.c
4251232Skas  */
4261232Skas char *
4271232Skas savestr(str)
4281232Skas 	char str[];
4291232Skas {
4301232Skas 	register char *top;
4311232Skas 
43231142Sedward 	top = malloc(strlen(str) + 1);
4331232Skas 	if (top == NOSTR) {
4341232Skas 		fprintf(stderr, "fmt:  Ran out of memory\n");
4351232Skas 		exit(1);
4361232Skas 	}
43731142Sedward 	strcpy(top, str);
43829545Smckusick 	return (top);
4391232Skas }
4401232Skas 
4411232Skas /*
4421232Skas  * Is s1 a prefix of s2??
4431232Skas  */
4441232Skas ispref(s1, s2)
4451232Skas 	register char *s1, *s2;
4461232Skas {
4471232Skas 
4481232Skas 	while (*s1++ == *s2)
4491232Skas 		;
45029545Smckusick 	return (*s1 == '\0');
4511232Skas }
452