xref: /netbsd-src/usr.bin/fmt/fmt.c (revision 481fca6e59249d8ffcf24fef7cfbe7b131bfb080)
1 /*	$NetBSD: fmt.c,v 1.11 1999/11/02 21:17:16 jwise Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
45 #endif
46 __RCSID("$NetBSD: fmt.c,v 1.11 1999/11/02 21:17:16 jwise Exp $");
47 #endif /* not lint */
48 
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <locale.h>
54 
55 /*
56  * fmt -- format the concatenation of input files or standard input
57  * onto standard output.  Designed for use with Mail ~|
58  *
59  * Syntax : fmt [ goal [ max ] ] [ name ... ]
60  * Authors: Kurt Shoens (UCB) 12/7/78;
61  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
62  */
63 
64 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
65  * #define	LENGTH	72		Max line length in output
66  */
67 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
68 
69 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
70 #define GOAL_LENGTH 65
71 #define MAX_LENGTH 75
72 int	goal_length;		/* Target or goal line length in output */
73 int	max_length;		/* Max line length in output */
74 int	pfx;			/* Current leading blank count */
75 int	lineno;			/* Current input line */
76 int	mark;			/* Last place we saw a head line */
77 
78 char	*headnames[] = {"To", "Subject", "Cc", 0};
79 
80 static void	fmt __P((FILE *));
81 static int	ispref __P((const char *, const char *));
82 static void	leadin __P((void));
83 static void	oflush __P((void));
84 static void	pack __P((const char *, int));
85 static void	prefix __P((const char *, int));
86 static void	setout __P((void));
87 static void	split __P((const char *, int));
88 static void	tabulate __P((char *));
89 
90 int	ishead __P((const char *));
91 int	main __P((int, char **));
92 
93 /*
94  * Drive the whole formatter by managing input files.  Also,
95  * cause initialization of the output stuff and flush it out
96  * at the end.
97  */
98 
99 int
100 main(argc, argv)
101 	int argc;
102 	char **argv;
103 {
104 	FILE *fi;
105 	int errs = 0;
106 	int number;		/* LIZ@UOM 6/18/85 */
107 
108 	goal_length = GOAL_LENGTH;
109 	max_length = MAX_LENGTH;
110 	setout();
111 	lineno = 1;
112 	mark = -10;
113 
114 	setlocale(LC_ALL, "");
115 
116 	/*
117 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
118 	 */
119 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
120 		argv++;
121 		argc--;
122 		goal_length = abs(number);
123 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
124 			argv++;
125 			argc--;
126 			max_length = abs(number);
127 		}
128 	}
129 	if (max_length <= goal_length) {
130 		fprintf(stderr, "Max length must be greater than %s\n",
131 			"goal length");
132 		exit(1);
133 	}
134 	if (argc < 2) {
135 		fmt(stdin);
136 		oflush();
137 		exit(0);
138 	}
139 	while (--argc) {
140 		if ((fi = fopen(*++argv, "r")) == NULL) {
141 			perror(*argv);
142 			errs++;
143 			continue;
144 		}
145 		fmt(fi);
146 		fclose(fi);
147 	}
148 	oflush();
149 	exit(errs);
150 }
151 
152 /*
153  * Read up characters from the passed input file, forming lines,
154  * doing ^H processing, expanding tabs, stripping trailing blanks,
155  * and sending each line down for analysis.
156  */
157 static void
158 fmt(fi)
159 	FILE *fi;
160 {
161 	char linebuf[BUFSIZ], canonb[BUFSIZ];
162 	char *cp, *cp2;
163 	int c, col, add_space;
164 
165 	c = getc(fi);
166 	while (c != EOF) {
167 		/*
168 		 * Collect a line, doing ^H processing.
169 		 * Leave tabs for now.
170 		 */
171 		cp = linebuf;
172 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
173 			if (c == '\b') {
174 				if (cp > linebuf)
175 					cp--;
176 				c = getc(fi);
177 				continue;
178 			}
179 			if(!(isprint(c) || c == '\t')) {
180 				c = getc(fi);
181 				continue;
182 			}
183 			*cp++ = c;
184 			c = getc(fi);
185 		}
186 		*cp = '\0';
187 
188 		/*
189 		 * By default, add space after the end of current input
190 		 * (normally end of line)
191 		 */
192 		add_space = 1;
193 
194 		/*
195 		 * If the input line is longer than linebuf buffer can hold,
196 		 * process the data read so far as if it was a separate line -
197 		 * if there is any whitespace character in the read data,
198 		 * process all the data up to it, otherwise process all.
199 		 */
200 		if (c != '\n' && c != EOF && !isspace(c)) {
201 			/*
202 			 * Find out if any whitespace character has been read.
203 			 */
204 			for(cp2 = cp; cp2 >= linebuf
205 				&& !isspace((unsigned char)*cp2); cp2--);
206 
207 			if (cp2 < linebuf) {
208 				/*
209 				 * ungetc() last read character so that it
210 				 * won't get lost.
211 				 */
212 				ungetc(c, fi);
213 				/*
214 				 * Don't append space on the end in split().
215 				 */
216 				add_space = 0;
217 			} else {
218 				/*
219 				 * To avoid splitting a word in a middle,
220 				 * ungetc() all characters after last
221 				 * whitespace char.
222 				 */
223 				while (!isspace(c) && (cp >= linebuf)) {
224 					ungetc(c, fi);
225 					c = *--cp;
226 				}
227 				*cp = '\0';
228 			}
229 		}
230 
231 		/*
232 		 * Expand tabs on the way to canonb.
233 		 */
234 		col = 0;
235 		cp = linebuf;
236 		cp2 = canonb;
237 		while ((c = *cp++) != 0) {
238 			if (c != '\t') {
239 				col++;
240 				if (cp2-canonb < BUFSIZ-1)
241 					*cp2++ = c;
242 				continue;
243 			}
244 			do {
245 				if (cp2-canonb < BUFSIZ-1)
246 					*cp2++ = ' ';
247 				col++;
248 			} while ((col & 07) != 0);
249 		}
250 
251 		/*
252 		 * Swipe trailing blanks from the line.
253 		 */
254 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
255 			;
256 		*++cp2 = '\0';
257 		prefix(canonb, add_space);
258 		if (c != EOF)
259 			c = getc(fi);
260 	}
261 }
262 
263 /*
264  * Take a line devoid of tabs and other garbage and determine its
265  * blank prefix.  If the indent changes, call for a linebreak.
266  * If the input line is blank, echo the blank line on the output.
267  * Finally, if the line minus the prefix is a mail header, try to keep
268  * it on a line by itself.
269  */
270 static void
271 prefix(line, add_space)
272 	const char line[];
273 	int add_space;
274 {
275 	const char *cp;
276 	char **hp;
277 	int np, h;
278 
279 	if (strlen(line) == 0) {
280 		oflush();
281 		putchar('\n');
282 		return;
283 	}
284 	for (cp = line; *cp == ' '; cp++)
285 		;
286 	np = cp - line;
287 
288 	/*
289 	 * The following horrible expression attempts to avoid linebreaks
290 	 * when the indent changes due to a paragraph.
291 	 */
292 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
293 		oflush();
294 	if ((h = ishead(cp)) != 0)
295 		oflush(), mark = lineno;
296 	if (lineno - mark < 3 && lineno - mark > 0)
297 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
298 			if (ispref(*hp, cp)) {
299 				h = 1;
300 				oflush();
301 				break;
302 			}
303 	if (!h && (h = (*cp == '.')))
304 		oflush();
305 	pfx = np;
306 	if (h) {
307 		pack(cp, strlen(cp));
308 		oflush();
309 	} else
310 		split(cp, add_space);
311 	lineno++;
312 }
313 
314 /*
315  * Split up the passed line into output "words" which are
316  * maximal strings of non-blanks with the blank separation
317  * attached at the end.  Pass these words along to the output
318  * line packer.
319  */
320 static void
321 split(line, add_space)
322 	const char line[];
323 	int add_space;
324 {
325 	const char *cp;
326 	char *cp2;
327 	char word[BUFSIZ];
328 	int wordl;		/* LIZ@UOM 6/18/85 */
329 
330 	cp = line;
331 	while (*cp) {
332 		cp2 = word;
333 		wordl = 0;	/* LIZ@UOM 6/18/85 */
334 
335 		/*
336 		 * Collect a 'word,' allowing it to contain escaped white
337 		 * space.
338 		 */
339 		while (*cp && *cp != ' ') {
340 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
341 				*cp2++ = *cp++;
342 			*cp2++ = *cp++;
343 			wordl++;/* LIZ@UOM 6/18/85 */
344 		}
345 
346 		/*
347 		 * Guarantee a space at end of line. Two spaces after end of
348 		 * sentence punctuation.
349 		 */
350 		if (*cp == '\0' && add_space) {
351 			*cp2++ = ' ';
352 			if (strchr(".:!", cp[-1]))
353 				*cp2++ = ' ';
354 		}
355 		while (*cp == ' ')
356 			*cp2++ = *cp++;
357 		*cp2 = '\0';
358 		/*
359 		 * LIZ@UOM 6/18/85 pack(word);
360 		 */
361 		pack(word, wordl);
362 	}
363 }
364 
365 /*
366  * Output section.
367  * Build up line images from the words passed in.  Prefix
368  * each line with correct number of blanks.  The buffer "outbuf"
369  * contains the current partial line image, including prefixed blanks.
370  * "outp" points to the next available space therein.  When outp is NOSTR,
371  * there ain't nothing in there yet.  At the bottom of this whole mess,
372  * leading tabs are reinserted.
373  */
374 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
375 char	*outp;				/* Pointer in above */
376 
377 /*
378  * Initialize the output section.
379  */
380 static void
381 setout()
382 {
383 	outp = NOSTR;
384 }
385 
386 /*
387  * Pack a word onto the output line.  If this is the beginning of
388  * the line, push on the appropriately-sized string of blanks first.
389  * If the word won't fit on the current line, flush and begin a new
390  * line.  If the word is too long to fit all by itself on a line,
391  * just give it its own and hope for the best.
392  *
393  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
394  *	goal length, take it.  If not, then check to see if the line
395  *	will be over the max length; if so put the word on the next
396  *	line.  If not, check to see if the line will be closer to the
397  *	goal length with or without the word and take it or put it on
398  *	the next line accordingly.
399  */
400 
401 /*
402  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
403  * pack(word)
404  *	char word[];
405  */
406 static void
407 pack(word,wl)
408 	const char word[];
409 	int wl;
410 {
411 	const char *cp;
412 	int s, t;
413 
414 	if (outp == NOSTR)
415 		leadin();
416 	/*
417 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
418 	 * length of the line before the word is added; t is now the length
419 	 * of the line after the word is added
420 	 *	t = strlen(word);
421 	 *	if (t+s <= LENGTH)
422 	 */
423 	s = outp - outbuf;
424 	t = wl + s;
425 	if ((t <= goal_length) ||
426 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
427 		/*
428 		 * In like flint!
429 		 */
430 		for (cp = word; *cp; *outp++ = *cp++);
431 		return;
432 	}
433 	if (s > pfx) {
434 		oflush();
435 		leadin();
436 	}
437 	for (cp = word; *cp; *outp++ = *cp++);
438 }
439 
440 /*
441  * If there is anything on the current output line, send it on
442  * its way.  Set outp to NOSTR to indicate the absence of the current
443  * line prefix.
444  */
445 static void
446 oflush()
447 {
448 	if (outp == NOSTR)
449 		return;
450 	*outp = '\0';
451 	tabulate(outbuf);
452 	outp = NOSTR;
453 }
454 
455 /*
456  * Take the passed line buffer, insert leading tabs where possible, and
457  * output on standard output (finally).
458  */
459 static void
460 tabulate(line)
461 	char line[];
462 {
463 	char *cp;
464 	int b, t;
465 
466 	/*
467 	 * Toss trailing blanks in the output line.
468 	 */
469 	cp = line + strlen(line) - 1;
470 	while (cp >= line && *cp == ' ')
471 		cp--;
472 	*++cp = '\0';
473 
474 	/*
475 	 * Count the leading blank space and tabulate.
476 	 */
477 	for (cp = line; *cp == ' '; cp++)
478 		;
479 	b = cp-line;
480 	t = b >> 3;
481 	b &= 07;
482 	if (t > 0)
483 		do
484 			putc('\t', stdout);
485 		while (--t);
486 	if (b > 0)
487 		do
488 			putc(' ', stdout);
489 		while (--b);
490 	while (*cp)
491 		putc(*cp++, stdout);
492 	putc('\n', stdout);
493 }
494 
495 /*
496  * Initialize the output line with the appropriate number of
497  * leading blanks.
498  */
499 static void
500 leadin()
501 {
502 	int b;
503 	char *cp;
504 
505 	for (b = 0, cp = outbuf; b < pfx; b++)
506 		*cp++ = ' ';
507 	outp = cp;
508 }
509 
510 /*
511  * Is s1 a prefix of s2??
512  */
513 static int
514 ispref(s1, s2)
515 	const char *s1, *s2;
516 {
517 
518 	while (*s1++ == *s2)
519 		;
520 	return (*s1 == '\0');
521 }
522