xref: /netbsd-src/usr.bin/fmt/fmt.c (revision d40754b094fab3343b249f59ade092ca3bddf330)
1 /*	$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35  The Regents of the University of California.  All rights reserved.");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $");
43 #endif /* not lint */
44 
45 #include <wctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <err.h>
52 #include <limits.h>
53 #include <string.h>
54 #include <locale.h>
55 #include "buffer.h"
56 
57 /*
58  * fmt -- format the concatenation of input files or standard input
59  * onto standard output.  Designed for use with Mail ~|
60  *
61  * Syntax : fmt [ goal [ max ] ] [ name ... ]
62  * Authors: Kurt Shoens (UCB) 12/7/78;
63  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
64  */
65 
66 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
67 #define GOAL_LENGTH 65
68 #define MAX_LENGTH 75
69 static size_t	goal_length;	/* Target or goal line length in output */
70 static size_t	max_length;	/* Max line length in output */
71 static size_t	pfx;		/* Current leading blank count */
72 static int	raw;		/* Don't treat mail specially */
73 static int	lineno;		/* Current input line */
74 static int	mark;		/* Last place we saw a head line */
75 static int	center;
76 static struct buffer outbuf;
77 
78 static const wchar_t *headnames[] = { L"To", L"Subject", L"Cc", NULL };
79 
80 static void	usage(void) __dead;
81 static int 	getnum(const char *, const char *, size_t *, int);
82 static void	fmt(FILE *);
83 static int	ispref(const wchar_t *, const wchar_t *);
84 static void	leadin(void);
85 static void	oflush(void);
86 static void	pack(const wchar_t *, size_t);
87 static void	prefix(const struct buffer *, int);
88 static void	split(const wchar_t *, int);
89 static void	tabulate(struct buffer *);
90 
91 
92 int		ishead(const wchar_t *);
93 
94 /*
95  * Drive the whole formatter by managing input files.  Also,
96  * cause initialization of the output stuff and flush it out
97  * at the end.
98  */
99 
100 int
main(int argc,char ** argv)101 main(int argc, char **argv)
102 {
103 	FILE *fi;
104 	int errs = 0;
105 	int compat = 1;
106 	int c;
107 
108 	goal_length = GOAL_LENGTH;
109 	max_length = MAX_LENGTH;
110 	buf_init(&outbuf);
111 	lineno = 1;
112 	mark = -10;
113 
114 	setprogname(*argv);
115 	(void)setlocale(LC_ALL, "");
116 
117 	while ((c = getopt(argc, argv, "Cg:m:rw:")) != -1)
118 		switch (c) {
119 		case 'C':
120 			center++;
121 			break;
122 		case 'g':
123 			(void)getnum(optarg, "goal", &goal_length, 1);
124 			compat = 0;
125 			break;
126 		case 'm':
127 		case 'w':
128 			(void)getnum(optarg, "max", &max_length, 1);
129 			compat = 0;
130 			break;
131 		case 'r':
132 			raw++;
133 			break;
134 		default:
135 			usage();
136 		}
137 
138 	argc -= optind;
139 	argv += optind;
140 
141 	/*
142 	 * compatibility with old usage.
143 	 */
144 	if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
145 		argv++;
146 		argc--;
147 		if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
148 			argv++;
149 			argc--;
150 		}
151 	}
152 
153 	if (max_length <= goal_length) {
154 		errx(1, "Max length (%zu) must be greater than goal "
155 		    "length (%zu)", max_length, goal_length);
156 	}
157 	if (argc == 0) {
158 		fmt(stdin);
159 		oflush();
160 		return 0;
161 	}
162 	for (;argc; argc--, argv++) {
163 		if ((fi = fopen(*argv, "r")) == NULL) {
164 			warn("Cannot open `%s'", *argv);
165 			errs++;
166 			continue;
167 		}
168 		fmt(fi);
169 		(void)fclose(fi);
170 	}
171 	oflush();
172 	buf_end(&outbuf);
173 	return errs;
174 }
175 
176 static void
usage(void)177 usage(void)
178 {
179 	(void)fprintf(stderr,
180 	    "Usage: %s [-Cr] [-g <goal>] [-m|w <max>] [<files>..]\n"
181 	    "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
182 	    getprogname(), getprogname());
183 	exit(1);
184 }
185 
186 static int
getnum(const char * str,const char * what,size_t * res,int badnum)187 getnum(const char *str, const char *what, size_t *res, int badnum)
188 {
189 	unsigned long ul;
190 	char *ep;
191 
192 	errno = 0;
193 	ul = strtoul(str, &ep, 0);
194         if (*str != '\0' && *ep == '\0') {
195 		 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
196 			errx(1, "%s number `%s' too big", what, str);
197 		*res = (size_t)ul;
198 		return 1;
199 	} else if (badnum)
200 		errx(1, "Bad %s number `%s'", what, str);
201 
202 	return 0;
203 }
204 
205 /*
206  * Read up characters from the passed input file, forming lines,
207  * doing ^H processing, expanding tabs, stripping trailing blanks,
208  * and sending each line down for analysis.
209  */
210 static void
fmt(FILE * fi)211 fmt(FILE *fi)
212 {
213 	struct buffer lbuf, cbuf;
214 	wchar_t *cp, *cp2;
215 	wint_t c;
216 	int add_space;
217 	size_t len, col, i;
218 
219 	if (center) {
220 		for (;;) {
221 			cp = fgetwln(fi, &len);
222 			if (!cp)
223 				return;
224 
225 			/* skip over leading space */
226 			while (len > 0) {
227 				if (!iswspace(*cp))
228 					break;
229 				cp++;
230 				len--;
231 			}
232 
233 			/* clear trailing space */
234 			while (len > 0) {
235 				if (!iswspace((unsigned char)cp[len-1]))
236 					break;
237 				len--;
238 			}
239 
240 			if (len == 0) {
241 				/* blank line */
242 				(void)putwchar(L'\n');
243 				continue;
244 			}
245 
246 			if (goal_length > len) {
247 				for (i = 0; i < (goal_length - len) / 2; i++) {
248 					(void)putwchar(L' ');
249 				}
250 			}
251 			for (i = 0; i < len; i++) {
252 				(void)putwchar(cp[i]);
253 			}
254 			(void)putwchar(L'\n');
255 		}
256 	}
257 
258 	buf_init(&lbuf);
259 	buf_init(&cbuf);
260 	c = getwc(fi);
261 
262 	while (c != WEOF) {
263 		/*
264 		 * Collect a line, doing ^H processing.
265 		 * Leave tabs for now.
266 		 */
267 		buf_reset(&lbuf);
268 		while (c != '\n' && c != WEOF) {
269 			if (c == '\b') {
270 				(void)buf_unputc(&lbuf);
271 				c = getwc(fi);
272 				continue;
273 			}
274 			if(!(iswprint(c) || c == '\t' || c >= 160)) {
275 				c = getwc(fi);
276 				continue;
277 			}
278 			buf_putc(&lbuf, c);
279 			c = getwc(fi);
280 		}
281 		buf_putc(&lbuf, '\0');
282 		(void)buf_unputc(&lbuf);
283 		add_space = c != WEOF;
284 
285 		/*
286 		 * Expand tabs on the way.
287 		 */
288 		col = 0;
289 		cp = lbuf.bptr;
290 		buf_reset(&cbuf);
291 		while ((c = *cp++) != '\0') {
292 			if (c != '\t') {
293 				col++;
294 				buf_putc(&cbuf, c);
295 				continue;
296 			}
297 			do {
298 				buf_putc(&cbuf, ' ');
299 				col++;
300 			} while ((col & 07) != 0);
301 		}
302 
303 		/*
304 		 * Swipe trailing blanks from the line.
305 		 */
306 		for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
307 			continue;
308 		cbuf.ptr = cp2 + 1;
309 		buf_putc(&cbuf, '\0');
310 		(void)buf_unputc(&cbuf);
311 		prefix(&cbuf, add_space);
312 		if (c != WEOF)
313 			c = getwc(fi);
314 	}
315 	buf_end(&cbuf);
316 	buf_end(&lbuf);
317 }
318 
319 /*
320  * Take a line devoid of tabs and other garbage and determine its
321  * blank prefix.  If the indent changes, call for a linebreak.
322  * If the input line is blank, echo the blank line on the output.
323  * Finally, if the line minus the prefix is a mail header, try to keep
324  * it on a line by itself.
325  */
326 static void
prefix(const struct buffer * buf,int add_space)327 prefix(const struct buffer *buf, int add_space)
328 {
329 	const wchar_t *cp;
330 	const wchar_t **hp;
331 	size_t np;
332 	int h;
333 
334 	if (buf->ptr == buf->bptr) {
335 		oflush();
336 		(void)putwchar(L'\n');
337 		return;
338 	}
339 	for (cp = buf->bptr; *cp == ' '; cp++)
340 		continue;
341 	np = cp - buf->bptr;
342 
343 	/*
344 	 * The following horrible expression attempts to avoid linebreaks
345 	 * when the indent changes due to a paragraph.
346 	 */
347 	if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
348 		oflush();
349 	if (!raw) {
350 		if ((h = ishead(cp)) != 0) {
351 			oflush();
352 			mark = lineno;
353 		}
354 		if (lineno - mark < 3 && lineno - mark > 0)
355 			for (hp = &headnames[0]; *hp != NULL; hp++)
356 				if (ispref(*hp, cp)) {
357 					h = 1;
358 					oflush();
359 					break;
360 				}
361 		if (!h && (h = (*cp == '.')))
362 			oflush();
363 	} else
364 		h = 0;
365 	pfx = np;
366 	if (h) {
367 		pack(cp, (size_t)(buf->ptr - cp));
368 		oflush();
369 	} else
370 		split(cp, add_space);
371 	lineno++;
372 }
373 
374 /*
375  * Split up the passed line into output "words" which are
376  * maximal strings of non-blanks with the blank separation
377  * attached at the end.  Pass these words along to the output
378  * line packer.
379  */
380 static void
split(const wchar_t line[],int add_space)381 split(const wchar_t line[], int add_space)
382 {
383 	const wchar_t *cp;
384 	struct buffer word;
385 	size_t wlen;
386 
387 	buf_init(&word);
388 	cp = line;
389 	while (*cp) {
390 		buf_reset(&word);
391 		wlen = 0;
392 
393 		/*
394 		 * Collect a 'word,' allowing it to contain escaped white
395 		 * space.
396 		 */
397 		while (*cp && *cp != ' ') {
398 			if (*cp == '\\' && iswspace(cp[1]))
399 				buf_putc(&word, *cp++);
400 			buf_putc(&word, *cp++);
401 			wlen++;
402 		}
403 
404 		/*
405 		 * Guarantee a space at end of line. Two spaces after end of
406 		 * sentence punctuation.
407 		 */
408 		if (*cp == '\0' && add_space) {
409 			buf_putc(&word, ' ');
410 			if (strchr(".:!", cp[-1]))
411 				buf_putc(&word, ' ');
412 		}
413 		while (*cp == ' ')
414 			buf_putc(&word, *cp++);
415 
416 		buf_putc(&word, '\0');
417 		(void)buf_unputc(&word);
418 
419 		pack(word.bptr, wlen);
420 	}
421 	buf_end(&word);
422 }
423 
424 /*
425  * Output section.
426  * Build up line images from the words passed in.  Prefix
427  * each line with correct number of blanks.
428  *
429  * At the bottom of this whole mess, leading tabs are reinserted.
430  */
431 
432 /*
433  * Pack a word onto the output line.  If this is the beginning of
434  * the line, push on the appropriately-sized string of blanks first.
435  * If the word won't fit on the current line, flush and begin a new
436  * line.  If the word is too long to fit all by itself on a line,
437  * just give it its own and hope for the best.
438  *
439  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
440  *	goal length, take it.  If not, then check to see if the line
441  *	will be over the max length; if so put the word on the next
442  *	line.  If not, check to see if the line will be closer to the
443  *	goal length with or without the word and take it or put it on
444  *	the next line accordingly.
445  */
446 
447 static void
pack(const wchar_t * word,size_t wlen)448 pack(const wchar_t *word, size_t wlen)
449 {
450 	const wchar_t *cp;
451 	size_t s, t;
452 
453 	if (outbuf.bptr == outbuf.ptr)
454 		leadin();
455 	/*
456 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
457 	 * length of the line before the word is added; t is now the length
458 	 * of the line after the word is added
459 	 */
460 	s = outbuf.ptr - outbuf.bptr;
461 	t = wlen + s;
462 	if ((t <= goal_length) || ((t <= max_length) &&
463 	    (s <= goal_length) && (t - goal_length <= goal_length - s))) {
464 		/*
465 		 * In like flint!
466 		 */
467 		for (cp = word; *cp;)
468 			buf_putc(&outbuf, *cp++);
469 		return;
470 	}
471 	if (s > pfx) {
472 		oflush();
473 		leadin();
474 	}
475 	for (cp = word; *cp;)
476 		buf_putc(&outbuf, *cp++);
477 }
478 
479 /*
480  * If there is anything on the current output line, send it on
481  * its way.  Reset outbuf.
482  */
483 static void
oflush(void)484 oflush(void)
485 {
486 	if (outbuf.bptr == outbuf.ptr)
487 		return;
488 	buf_putc(&outbuf, '\0');
489 	(void)buf_unputc(&outbuf);
490 	tabulate(&outbuf);
491 	buf_reset(&outbuf);
492 }
493 
494 /*
495  * Take the passed line buffer, insert leading tabs where possible, and
496  * output on standard output (finally).
497  */
498 static void
tabulate(struct buffer * buf)499 tabulate(struct buffer *buf)
500 {
501 	wchar_t *cp;
502 	size_t b, t;
503 
504 	/*
505 	 * Toss trailing blanks in the output line.
506 	 */
507 	for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
508 		continue;
509 	*++cp = '\0';
510 
511 	/*
512 	 * Count the leading blank space and tabulate.
513 	 */
514 	for (cp = buf->bptr; *cp == ' '; cp++)
515 		continue;
516 	b = cp - buf->bptr;
517 	t = b / 8;
518 	b = b % 8;
519 	if (t > 0)
520 		do
521 			(void)putwchar(L'\t');
522 		while (--t);
523 	if (b > 0)
524 		do
525 			(void)putwchar(L' ');
526 		while (--b);
527 	while (*cp)
528 		(void)putwchar(*cp++);
529 	(void)putwchar(L'\n');
530 }
531 
532 /*
533  * Initialize the output line with the appropriate number of
534  * leading blanks.
535  */
536 static void
leadin(void)537 leadin(void)
538 {
539 	size_t b;
540 
541 	buf_reset(&outbuf);
542 
543 	for (b = 0; b < pfx; b++)
544 		buf_putc(&outbuf, ' ');
545 }
546 
547 /*
548  * Is s1 a prefix of s2??
549  */
550 static int
ispref(const wchar_t * s1,const wchar_t * s2)551 ispref(const wchar_t *s1, const wchar_t *s2)
552 {
553 
554 	while (*s1++ == *s2)
555 		continue;
556 	return *s1 == '\0';
557 }
558