xref: /netbsd-src/usr.bin/fmt/fmt.c (revision 56a34939419542e88b386b2229be7565f4f45461)
1 /*	$NetBSD: fmt.c,v 1.31 2008/07/21 14:19:22 lukem Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35  The Regents of the University of California.  All rights reserved.");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.31 2008/07/21 14:19:22 lukem Exp $");
43 #endif /* not lint */
44 
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <err.h>
52 #include <limits.h>
53 #include <string.h>
54 #include "buffer.h"
55 
56 /*
57  * fmt -- format the concatenation of input files or standard input
58  * onto standard output.  Designed for use with Mail ~|
59  *
60  * Syntax : fmt [ goal [ max ] ] [ name ... ]
61  * Authors: Kurt Shoens (UCB) 12/7/78;
62  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
63  */
64 
65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
66 #define GOAL_LENGTH 65
67 #define MAX_LENGTH 75
68 static size_t	goal_length;	/* Target or goal line length in output */
69 static size_t	max_length;	/* Max line length in output */
70 static size_t	pfx;		/* Current leading blank count */
71 static int	raw;		/* Don't treat mail specially */
72 static int	lineno;		/* Current input line */
73 static int	mark;		/* Last place we saw a head line */
74 static int	center;
75 static struct buffer outbuf;
76 
77 static const char	*headnames[] = {"To", "Subject", "Cc", 0};
78 
79 static void	usage(void) __dead;
80 static int 	getnum(const char *, const char *, size_t *, int);
81 static void	fmt(FILE *);
82 static int	ispref(const char *, const char *);
83 static void	leadin(void);
84 static void	oflush(void);
85 static void	pack(const char *, size_t);
86 static void	prefix(const struct buffer *, int);
87 static void	split(const char *, int);
88 static void	tabulate(struct buffer *);
89 
90 
91 int		ishead(const char *);
92 
93 /*
94  * Drive the whole formatter by managing input files.  Also,
95  * cause initialization of the output stuff and flush it out
96  * at the end.
97  */
98 
99 int
100 main(int argc, char **argv)
101 {
102 	FILE *fi;
103 	int errs = 0;
104 	int compat = 1;
105 	int c;
106 
107 	goal_length = GOAL_LENGTH;
108 	max_length = MAX_LENGTH;
109 	buf_init(&outbuf);
110 	lineno = 1;
111 	mark = -10;
112 
113 	setprogname(*argv);
114 	(void)setlocale(LC_ALL, "");
115 
116 	while ((c = getopt(argc, argv, "Cg:m:r")) != -1)
117 		switch (c) {
118 		case 'C':
119 			center++;
120 			break;
121 		case 'g':
122 			(void)getnum(optarg, "goal", &goal_length, 1);
123 			compat = 0;
124 			break;
125 		case 'm':
126 			(void)getnum(optarg, "max", &max_length, 1);
127 			compat = 0;
128 			break;
129 		case 'r':
130 			raw++;
131 			break;
132 		default:
133 			usage();
134 		}
135 
136 	argc -= optind;
137 	argv += optind;
138 
139 	/*
140 	 * compatibility with old usage.
141 	 */
142 	if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
143 		argv++;
144 		argc--;
145 		if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
146 			argv++;
147 			argc--;
148 		}
149 	}
150 
151 	if (max_length <= goal_length) {
152 		errx(1, "Max length (%zu) must be greater than goal "
153 		    "length (%zu)", max_length, goal_length);
154 	}
155 	if (argc == 0) {
156 		fmt(stdin);
157 		oflush();
158 		return 0;
159 	}
160 	for (;argc; argc--, argv++) {
161 		if ((fi = fopen(*argv, "r")) == NULL) {
162 			warn("Cannot open `%s'", *argv);
163 			errs++;
164 			continue;
165 		}
166 		fmt(fi);
167 		(void)fclose(fi);
168 	}
169 	oflush();
170 	buf_end(&outbuf);
171 	return errs;
172 }
173 
174 static void
175 usage(void)
176 {
177 	(void)fprintf(stderr,
178 	    "Usage: %s [-Cr] [-g <goal>] [-m <max>] [<files>..]\n"
179 	    "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
180 	    getprogname(), getprogname());
181 	exit(1);
182 }
183 
184 static int
185 getnum(const char *str, const char *what, size_t *res, int badnum)
186 {
187 	unsigned long ul;
188 	char *ep;
189 
190 	errno = 0;
191 	ul = strtoul(str, &ep, 0);
192         if (*str != '\0' && *ep == '\0') {
193 		 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
194 			errx(1, "%s number `%s' too big", what, str);
195 		*res = (size_t)ul;
196 		return 1;
197 	} else if (badnum)
198 		errx(1, "Bad %s number `%s'", what, str);
199 
200 	return 0;
201 }
202 
203 /*
204  * Read up characters from the passed input file, forming lines,
205  * doing ^H processing, expanding tabs, stripping trailing blanks,
206  * and sending each line down for analysis.
207  */
208 static void
209 fmt(FILE *fi)
210 {
211 	struct buffer lbuf, cbuf;
212 	char *cp, *cp2;
213 	int c, add_space;
214 	size_t len, col, i;
215 
216 	if (center) {
217 		for (;;) {
218 			cp = fgetln(fi, &len);
219 			if (!cp)
220 				return;
221 
222 			/* skip over leading space */
223 			while (len > 0) {
224 				if (!isspace((unsigned char)*cp))
225 					break;
226 				cp++;
227 				len--;
228 			}
229 
230 			/* clear trailing space */
231 			while (len > 0) {
232 				if (!isspace((unsigned char)cp[len-1]))
233 					break;
234 				len--;
235 			}
236 
237 			if (len == 0) {
238 				/* blank line */
239 				(void)putchar('\n');
240 				continue;
241 			}
242 
243 			if (goal_length > len) {
244 				for (i = 0; i < (goal_length - len) / 2; i++) {
245 					(void)putchar(' ');
246 				}
247 			}
248 			for (i = 0; i < len; i++) {
249 				(void)putchar(cp[i]);
250 			}
251 			(void)putchar('\n');
252 		}
253 	}
254 
255 	buf_init(&lbuf);
256 	buf_init(&cbuf);
257 	c = getc(fi);
258 
259 	while (c != EOF) {
260 		/*
261 		 * Collect a line, doing ^H processing.
262 		 * Leave tabs for now.
263 		 */
264 		buf_reset(&lbuf);
265 		while (c != '\n' && c != EOF) {
266 			if (c == '\b') {
267 				(void)buf_unputc(&lbuf);
268 				c = getc(fi);
269 				continue;
270 			}
271 			if(!(isprint(c) || c == '\t' || c >= 160)) {
272 				c = getc(fi);
273 				continue;
274 			}
275 			buf_putc(&lbuf, c);
276 			c = getc(fi);
277 		}
278 		buf_putc(&lbuf, '\0');
279 		(void)buf_unputc(&lbuf);
280 		add_space = c != EOF;
281 
282 		/*
283 		 * Expand tabs on the way.
284 		 */
285 		col = 0;
286 		cp = lbuf.bptr;
287 		buf_reset(&cbuf);
288 		while ((c = *cp++) != '\0') {
289 			if (c != '\t') {
290 				col++;
291 				buf_putc(&cbuf, c);
292 				continue;
293 			}
294 			do {
295 				buf_putc(&cbuf, ' ');
296 				col++;
297 			} while ((col & 07) != 0);
298 		}
299 
300 		/*
301 		 * Swipe trailing blanks from the line.
302 		 */
303 		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
304 			continue;
305 		cbuf.ptr = cp2 + 1;
306 		buf_putc(&cbuf, '\0');
307 		(void)buf_unputc(&cbuf);
308 		prefix(&cbuf, add_space);
309 		if (c != EOF)
310 			c = getc(fi);
311 	}
312 	buf_end(&cbuf);
313 	buf_end(&lbuf);
314 }
315 
316 /*
317  * Take a line devoid of tabs and other garbage and determine its
318  * blank prefix.  If the indent changes, call for a linebreak.
319  * If the input line is blank, echo the blank line on the output.
320  * Finally, if the line minus the prefix is a mail header, try to keep
321  * it on a line by itself.
322  */
323 static void
324 prefix(const struct buffer *buf, int add_space)
325 {
326 	const char *cp;
327 	const char **hp;
328 	size_t np;
329 	int h;
330 
331 	if (buf->ptr == buf->bptr) {
332 		oflush();
333 		(void)putchar('\n');
334 		return;
335 	}
336 	for (cp = buf->bptr; *cp == ' '; cp++)
337 		continue;
338 	np = cp - buf->bptr;
339 
340 	/*
341 	 * The following horrible expression attempts to avoid linebreaks
342 	 * when the indent changes due to a paragraph.
343 	 */
344 	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
345 		oflush();
346 	if (!raw) {
347 		if ((h = ishead(cp)) != 0) {
348 			oflush();
349 			mark = lineno;
350 		}
351 		if (lineno - mark < 3 && lineno - mark > 0)
352 			for (hp = &headnames[0]; *hp != NULL; hp++)
353 				if (ispref(*hp, cp)) {
354 					h = 1;
355 					oflush();
356 					break;
357 				}
358 		if (!h && (h = (*cp == '.')))
359 			oflush();
360 	} else
361 		h = 0;
362 	pfx = np;
363 	if (h) {
364 		pack(cp, (size_t)(buf->ptr - cp));
365 		oflush();
366 	} else
367 		split(cp, add_space);
368 	lineno++;
369 }
370 
371 /*
372  * Split up the passed line into output "words" which are
373  * maximal strings of non-blanks with the blank separation
374  * attached at the end.  Pass these words along to the output
375  * line packer.
376  */
377 static void
378 split(const char line[], int add_space)
379 {
380 	const char *cp;
381 	struct buffer word;
382 	size_t wlen;
383 
384 	buf_init(&word);
385 	cp = line;
386 	while (*cp) {
387 		buf_reset(&word);
388 		wlen = 0;
389 
390 		/*
391 		 * Collect a 'word,' allowing it to contain escaped white
392 		 * space.
393 		 */
394 		while (*cp && *cp != ' ') {
395 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
396 				buf_putc(&word, *cp++);
397 			buf_putc(&word, *cp++);
398 			wlen++;
399 		}
400 
401 		/*
402 		 * Guarantee a space at end of line. Two spaces after end of
403 		 * sentence punctuation.
404 		 */
405 		if (*cp == '\0' && add_space) {
406 			buf_putc(&word, ' ');
407 			if (strchr(".:!", cp[-1]))
408 				buf_putc(&word, ' ');
409 		}
410 		while (*cp == ' ')
411 			buf_putc(&word, *cp++);
412 
413 		buf_putc(&word, '\0');
414 		(void)buf_unputc(&word);
415 
416 		pack(word.bptr, wlen);
417 	}
418 	buf_end(&word);
419 }
420 
421 /*
422  * Output section.
423  * Build up line images from the words passed in.  Prefix
424  * each line with correct number of blanks.
425  *
426  * At the bottom of this whole mess, leading tabs are reinserted.
427  */
428 
429 /*
430  * Pack a word onto the output line.  If this is the beginning of
431  * the line, push on the appropriately-sized string of blanks first.
432  * If the word won't fit on the current line, flush and begin a new
433  * line.  If the word is too long to fit all by itself on a line,
434  * just give it its own and hope for the best.
435  *
436  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
437  *	goal length, take it.  If not, then check to see if the line
438  *	will be over the max length; if so put the word on the next
439  *	line.  If not, check to see if the line will be closer to the
440  *	goal length with or without the word and take it or put it on
441  *	the next line accordingly.
442  */
443 
444 static void
445 pack(const char *word, size_t wlen)
446 {
447 	const char *cp;
448 	size_t s, t;
449 
450 	if (outbuf.bptr == outbuf.ptr)
451 		leadin();
452 	/*
453 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
454 	 * length of the line before the word is added; t is now the length
455 	 * of the line after the word is added
456 	 */
457 	s = outbuf.ptr - outbuf.bptr;
458 	t = wlen + s;
459 	if ((t <= goal_length) || ((t <= max_length) &&
460 	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
461 		/*
462 		 * In like flint!
463 		 */
464 		for (cp = word; *cp;)
465 			buf_putc(&outbuf, *cp++);
466 		return;
467 	}
468 	if (s > pfx) {
469 		oflush();
470 		leadin();
471 	}
472 	for (cp = word; *cp;)
473 		buf_putc(&outbuf, *cp++);
474 }
475 
476 /*
477  * If there is anything on the current output line, send it on
478  * its way.  Reset outbuf.
479  */
480 static void
481 oflush(void)
482 {
483 	if (outbuf.bptr == outbuf.ptr)
484 		return;
485 	buf_putc(&outbuf, '\0');
486 	(void)buf_unputc(&outbuf);
487 	tabulate(&outbuf);
488 	buf_reset(&outbuf);
489 }
490 
491 /*
492  * Take the passed line buffer, insert leading tabs where possible, and
493  * output on standard output (finally).
494  */
495 static void
496 tabulate(struct buffer *buf)
497 {
498 	char *cp;
499 	size_t b, t;
500 
501 	/*
502 	 * Toss trailing blanks in the output line.
503 	 */
504 	for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
505 		continue;
506 	*++cp = '\0';
507 
508 	/*
509 	 * Count the leading blank space and tabulate.
510 	 */
511 	for (cp = buf->bptr; *cp == ' '; cp++)
512 		continue;
513 	b = cp - buf->bptr;
514 	t = b / 8;
515 	b = b % 8;
516 	if (t > 0)
517 		do
518 			(void)putchar('\t');
519 		while (--t);
520 	if (b > 0)
521 		do
522 			(void)putchar(' ');
523 		while (--b);
524 	while (*cp)
525 		(void)putchar(*cp++);
526 	(void)putchar('\n');
527 }
528 
529 /*
530  * Initialize the output line with the appropriate number of
531  * leading blanks.
532  */
533 static void
534 leadin(void)
535 {
536 	size_t b;
537 
538 	buf_reset(&outbuf);
539 
540 	for (b = 0; b < pfx; b++)
541 		buf_putc(&outbuf, ' ');
542 }
543 
544 /*
545  * Is s1 a prefix of s2??
546  */
547 static int
548 ispref(const char *s1, const char *s2)
549 {
550 
551 	while (*s1++ == *s2)
552 		continue;
553 	return *s1 == '\0';
554 }
555