xref: /netbsd-src/usr.bin/fmt/fmt.c (revision 3b01aba77a7a698587faaae455bbfe740923c1f5)
1 /*	$NetBSD: fmt.c,v 1.15 2000/10/02 18:32:55 abs Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
45 #endif
46 __RCSID("$NetBSD: fmt.c,v 1.15 2000/10/02 18:32:55 abs Exp $");
47 #endif /* not lint */
48 
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <ctype.h>
53 #include <locale.h>
54 
55 /*
56  * fmt -- format the concatenation of input files or standard input
57  * onto standard output.  Designed for use with Mail ~|
58  *
59  * Syntax : fmt [ goal [ max ] ] [ name ... ]
60  * Authors: Kurt Shoens (UCB) 12/7/78;
61  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
62  */
63 
64 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
65  * #define	LENGTH	72		Max line length in output
66  */
67 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
68 
69 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
70 #define GOAL_LENGTH 65
71 #define MAX_LENGTH 75
72 int	goal_length;		/* Target or goal line length in output */
73 int	max_length;		/* Max line length in output */
74 int	pfx;			/* Current leading blank count */
75 int	lineno;			/* Current input line */
76 int	mark;			/* Last place we saw a head line */
77 int	center;
78 
79 char	*headnames[] = {"To", "Subject", "Cc", 0};
80 
81 static void	fmt __P((FILE *));
82 static int	ispref __P((const char *, const char *));
83 static void	leadin __P((void));
84 static void	oflush __P((void));
85 static void	pack __P((const char *, int));
86 static void	prefix __P((const char *, int));
87 static void	setout __P((void));
88 static void	split __P((const char *, int));
89 static void	tabulate __P((char *));
90 
91 int	ishead __P((const char *));
92 int	main __P((int, char **));
93 
94 /*
95  * Drive the whole formatter by managing input files.  Also,
96  * cause initialization of the output stuff and flush it out
97  * at the end.
98  */
99 
100 int
101 main(argc, argv)
102 	int argc;
103 	char **argv;
104 {
105 	FILE *fi;
106 	int errs = 0;
107 	int number;		/* LIZ@UOM 6/18/85 */
108 
109 	goal_length = GOAL_LENGTH;
110 	max_length = MAX_LENGTH;
111 	setout();
112 	lineno = 1;
113 	mark = -10;
114 
115 	setlocale(LC_ALL, "");
116 
117 	/*
118 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
119 	 */
120 	if (argc > 1 && !strcmp(argv[1], "-C")) {
121 		center++;
122 		argc--;
123 		argv++;
124 	}
125 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
126 		argv++;
127 		argc--;
128 		goal_length = abs(number);
129 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
130 			argv++;
131 			argc--;
132 			max_length = abs(number);
133 		}
134 	}
135 	if (max_length <= goal_length) {
136 		fprintf(stderr, "Max length must be greater than %s\n",
137 			"goal length");
138 		exit(1);
139 	}
140 	if (argc < 2) {
141 		fmt(stdin);
142 		oflush();
143 		exit(0);
144 	}
145 	while (--argc) {
146 		if ((fi = fopen(*++argv, "r")) == NULL) {
147 			perror(*argv);
148 			errs++;
149 			continue;
150 		}
151 		fmt(fi);
152 		fclose(fi);
153 	}
154 	oflush();
155 	exit(errs);
156 }
157 
158 /*
159  * Read up characters from the passed input file, forming lines,
160  * doing ^H processing, expanding tabs, stripping trailing blanks,
161  * and sending each line down for analysis.
162  */
163 static void
164 fmt(fi)
165 	FILE *fi;
166 {
167 	char linebuf[BUFSIZ], canonb[BUFSIZ];
168 	char *cp, *cp2;
169 	int c, col, add_space;
170 
171 	if (center) {
172 		while (1) {
173 			cp = fgets(linebuf, BUFSIZ, fi);
174 			if (!cp)
175 				return;
176 			while (*cp && isspace(*cp))
177 				cp++;
178 			cp2 = cp + strlen(cp) - 1;
179 			while (cp2 > cp && isspace(*cp2))
180 				cp2--;
181 			if (cp == cp2)
182 				putchar('\n');
183 			col = cp2 - cp;
184 			for (c = 0; c < (goal_length-col)/2; c++)
185 				putchar(' ');
186 			while (cp <= cp2)
187 				putchar(*cp++);
188 			putchar('\n');
189 		}
190 	}
191 	c = getc(fi);
192 	while (c != EOF) {
193 		/*
194 		 * Collect a line, doing ^H processing.
195 		 * Leave tabs for now.
196 		 */
197 		cp = linebuf;
198 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
199 			if (c == '\b') {
200 				if (cp > linebuf)
201 					cp--;
202 				c = getc(fi);
203 				continue;
204 			}
205 			if(!(isprint(c) || c == '\t' || c >= 160)) {
206 				c = getc(fi);
207 				continue;
208 			}
209 			*cp++ = c;
210 			c = getc(fi);
211 		}
212 		*cp = '\0';
213 
214 		/*
215 		 * By default, add space after the end of current input
216 		 * (normally end of line)
217 		 */
218 		add_space = 1;
219 
220 		/*
221 		 * If the input line is longer than linebuf buffer can hold,
222 		 * process the data read so far as if it was a separate line -
223 		 * if there is any whitespace character in the read data,
224 		 * process all the data up to it, otherwise process all.
225 		 */
226 		if (c != '\n' && c != EOF && !isspace(c)) {
227 			/*
228 			 * Find out if any whitespace character has been read.
229 			 */
230 			for(cp2 = cp; cp2 >= linebuf
231 				&& !isspace((unsigned char)*cp2); cp2--);
232 
233 			if (cp2 < linebuf) {
234 				/*
235 				 * ungetc() last read character so that it
236 				 * won't get lost.
237 				 */
238 				ungetc(c, fi);
239 				/*
240 				 * Don't append space on the end in split().
241 				 */
242 				add_space = 0;
243 			} else {
244 				/*
245 				 * To avoid splitting a word in a middle,
246 				 * ungetc() all characters after last
247 				 * whitespace char.
248 				 */
249 				while (!isspace(c) && (cp >= linebuf)) {
250 					ungetc(c, fi);
251 					c = *--cp;
252 				}
253 				*cp = '\0';
254 			}
255 		}
256 
257 		/*
258 		 * Expand tabs on the way to canonb.
259 		 */
260 		col = 0;
261 		cp = linebuf;
262 		cp2 = canonb;
263 		while ((c = *cp++) != 0) {
264 			if (c != '\t') {
265 				col++;
266 				if (cp2-canonb < BUFSIZ-1)
267 					*cp2++ = c;
268 				continue;
269 			}
270 			do {
271 				if (cp2-canonb < BUFSIZ-1)
272 					*cp2++ = ' ';
273 				col++;
274 			} while ((col & 07) != 0);
275 		}
276 
277 		/*
278 		 * Swipe trailing blanks from the line.
279 		 */
280 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
281 			;
282 		*++cp2 = '\0';
283 		prefix(canonb, add_space);
284 		if (c != EOF)
285 			c = getc(fi);
286 	}
287 }
288 
289 /*
290  * Take a line devoid of tabs and other garbage and determine its
291  * blank prefix.  If the indent changes, call for a linebreak.
292  * If the input line is blank, echo the blank line on the output.
293  * Finally, if the line minus the prefix is a mail header, try to keep
294  * it on a line by itself.
295  */
296 static void
297 prefix(line, add_space)
298 	const char line[];
299 	int add_space;
300 {
301 	const char *cp;
302 	char **hp;
303 	int np, h;
304 
305 	if (strlen(line) == 0) {
306 		oflush();
307 		putchar('\n');
308 		return;
309 	}
310 	for (cp = line; *cp == ' '; cp++)
311 		;
312 	np = cp - line;
313 
314 	/*
315 	 * The following horrible expression attempts to avoid linebreaks
316 	 * when the indent changes due to a paragraph.
317 	 */
318 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
319 		oflush();
320 	if ((h = ishead(cp)) != 0)
321 		oflush(), mark = lineno;
322 	if (lineno - mark < 3 && lineno - mark > 0)
323 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
324 			if (ispref(*hp, cp)) {
325 				h = 1;
326 				oflush();
327 				break;
328 			}
329 	if (!h && (h = (*cp == '.')))
330 		oflush();
331 	pfx = np;
332 	if (h) {
333 		pack(cp, strlen(cp));
334 		oflush();
335 	} else
336 		split(cp, add_space);
337 	lineno++;
338 }
339 
340 /*
341  * Split up the passed line into output "words" which are
342  * maximal strings of non-blanks with the blank separation
343  * attached at the end.  Pass these words along to the output
344  * line packer.
345  */
346 static void
347 split(line, add_space)
348 	const char line[];
349 	int add_space;
350 {
351 	const char *cp;
352 	char *cp2;
353 	char word[BUFSIZ];
354 	int wordl;		/* LIZ@UOM 6/18/85 */
355 
356 	cp = line;
357 	while (*cp) {
358 		cp2 = word;
359 		wordl = 0;	/* LIZ@UOM 6/18/85 */
360 
361 		/*
362 		 * Collect a 'word,' allowing it to contain escaped white
363 		 * space.
364 		 */
365 		while (*cp && *cp != ' ') {
366 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
367 				*cp2++ = *cp++;
368 			*cp2++ = *cp++;
369 			wordl++;/* LIZ@UOM 6/18/85 */
370 		}
371 
372 		/*
373 		 * Guarantee a space at end of line. Two spaces after end of
374 		 * sentence punctuation.
375 		 */
376 		if (*cp == '\0' && add_space) {
377 			*cp2++ = ' ';
378 			if (strchr(".:!", cp[-1]))
379 				*cp2++ = ' ';
380 		}
381 		while (*cp == ' ')
382 			*cp2++ = *cp++;
383 		*cp2 = '\0';
384 		/*
385 		 * LIZ@UOM 6/18/85 pack(word);
386 		 */
387 		pack(word, wordl);
388 	}
389 }
390 
391 /*
392  * Output section.
393  * Build up line images from the words passed in.  Prefix
394  * each line with correct number of blanks.  The buffer "outbuf"
395  * contains the current partial line image, including prefixed blanks.
396  * "outp" points to the next available space therein.  When outp is NOSTR,
397  * there ain't nothing in there yet.  At the bottom of this whole mess,
398  * leading tabs are reinserted.
399  */
400 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
401 char	*outp;				/* Pointer in above */
402 
403 /*
404  * Initialize the output section.
405  */
406 static void
407 setout()
408 {
409 	outp = NOSTR;
410 }
411 
412 /*
413  * Pack a word onto the output line.  If this is the beginning of
414  * the line, push on the appropriately-sized string of blanks first.
415  * If the word won't fit on the current line, flush and begin a new
416  * line.  If the word is too long to fit all by itself on a line,
417  * just give it its own and hope for the best.
418  *
419  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
420  *	goal length, take it.  If not, then check to see if the line
421  *	will be over the max length; if so put the word on the next
422  *	line.  If not, check to see if the line will be closer to the
423  *	goal length with or without the word and take it or put it on
424  *	the next line accordingly.
425  */
426 
427 /*
428  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
429  * pack(word)
430  *	char word[];
431  */
432 static void
433 pack(word,wl)
434 	const char word[];
435 	int wl;
436 {
437 	const char *cp;
438 	int s, t;
439 
440 	if (outp == NOSTR)
441 		leadin();
442 	/*
443 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
444 	 * length of the line before the word is added; t is now the length
445 	 * of the line after the word is added
446 	 *	t = strlen(word);
447 	 *	if (t+s <= LENGTH)
448 	 */
449 	s = outp - outbuf;
450 	t = wl + s;
451 	if ((t <= goal_length) ||
452 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
453 		/*
454 		 * In like flint!
455 		 */
456 		for (cp = word; *cp; *outp++ = *cp++);
457 		return;
458 	}
459 	if (s > pfx) {
460 		oflush();
461 		leadin();
462 	}
463 	for (cp = word; *cp; *outp++ = *cp++);
464 }
465 
466 /*
467  * If there is anything on the current output line, send it on
468  * its way.  Set outp to NOSTR to indicate the absence of the current
469  * line prefix.
470  */
471 static void
472 oflush()
473 {
474 	if (outp == NOSTR)
475 		return;
476 	*outp = '\0';
477 	tabulate(outbuf);
478 	outp = NOSTR;
479 }
480 
481 /*
482  * Take the passed line buffer, insert leading tabs where possible, and
483  * output on standard output (finally).
484  */
485 static void
486 tabulate(line)
487 	char line[];
488 {
489 	char *cp;
490 	int b, t;
491 
492 	/*
493 	 * Toss trailing blanks in the output line.
494 	 */
495 	cp = line + strlen(line) - 1;
496 	while (cp >= line && *cp == ' ')
497 		cp--;
498 	*++cp = '\0';
499 
500 	/*
501 	 * Count the leading blank space and tabulate.
502 	 */
503 	for (cp = line; *cp == ' '; cp++)
504 		;
505 	b = cp-line;
506 	t = b >> 3;
507 	b &= 07;
508 	if (t > 0)
509 		do
510 			putc('\t', stdout);
511 		while (--t);
512 	if (b > 0)
513 		do
514 			putc(' ', stdout);
515 		while (--b);
516 	while (*cp)
517 		putc(*cp++, stdout);
518 	putc('\n', stdout);
519 }
520 
521 /*
522  * Initialize the output line with the appropriate number of
523  * leading blanks.
524  */
525 static void
526 leadin()
527 {
528 	int b;
529 	char *cp;
530 
531 	for (b = 0, cp = outbuf; b < pfx; b++)
532 		*cp++ = ' ';
533 	outp = cp;
534 }
535 
536 /*
537  * Is s1 a prefix of s2??
538  */
539 static int
540 ispref(s1, s2)
541 	const char *s1, *s2;
542 {
543 
544 	while (*s1++ == *s2)
545 		;
546 	return (*s1 == '\0');
547 }
548