xref: /netbsd-src/usr.bin/fmt/fmt.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: fmt.c,v 1.18 2004/10/30 17:23:40 dsl Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
35 	The Regents of the University of California.  All rights reserved.\n");
36 #endif /* not lint */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.18 2004/10/30 17:23:40 dsl Exp $");
43 #endif /* not lint */
44 
45 #include <ctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 
51 /*
52  * fmt -- format the concatenation of input files or standard input
53  * onto standard output.  Designed for use with Mail ~|
54  *
55  * Syntax : fmt [ goal [ max ] ] [ name ... ]
56  * Authors: Kurt Shoens (UCB) 12/7/78;
57  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
58  */
59 
60 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
61  * #define	LENGTH	72		Max line length in output
62  */
63 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
64 
65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
66 #define GOAL_LENGTH 65
67 #define MAX_LENGTH 75
68 int	goal_length;		/* Target or goal line length in output */
69 int	max_length;		/* Max line length in output */
70 int	pfx;			/* Current leading blank count */
71 int	lineno;			/* Current input line */
72 int	mark;			/* Last place we saw a head line */
73 int	center;
74 
75 char	*headnames[] = {"To", "Subject", "Cc", 0};
76 
77 static void	fmt(FILE *);
78 static int	ispref(const char *, const char *);
79 static void	leadin(void);
80 static void	oflush(void);
81 static void	pack(const char *, int);
82 static void	prefix(const char *, int);
83 static void	setout(void);
84 static void	split(const char *, int);
85 static void	tabulate(char *);
86 
87 int	ishead(const char *);
88 int	main(int, char **);
89 
90 /*
91  * Drive the whole formatter by managing input files.  Also,
92  * cause initialization of the output stuff and flush it out
93  * at the end.
94  */
95 
96 int
97 main(int argc, char **argv)
98 {
99 	FILE *fi;
100 	int errs = 0;
101 	int number;		/* LIZ@UOM 6/18/85 */
102 
103 	goal_length = GOAL_LENGTH;
104 	max_length = MAX_LENGTH;
105 	setout();
106 	lineno = 1;
107 	mark = -10;
108 
109 	setlocale(LC_ALL, "");
110 
111 	/*
112 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
113 	 */
114 	if (argc > 1 && !strcmp(argv[1], "-C")) {
115 		center++;
116 		argc--;
117 		argv++;
118 	}
119 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
120 		argv++;
121 		argc--;
122 		goal_length = abs(number);
123 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
124 			argv++;
125 			argc--;
126 			max_length = abs(number);
127 		}
128 	}
129 	if (max_length <= goal_length) {
130 		fprintf(stderr, "Max length must be greater than %s\n",
131 			"goal length");
132 		exit(1);
133 	}
134 	if (argc < 2) {
135 		fmt(stdin);
136 		oflush();
137 		exit(0);
138 	}
139 	while (--argc) {
140 		if ((fi = fopen(*++argv, "r")) == NULL) {
141 			perror(*argv);
142 			errs++;
143 			continue;
144 		}
145 		fmt(fi);
146 		fclose(fi);
147 	}
148 	oflush();
149 	exit(errs);
150 }
151 
152 /*
153  * Read up characters from the passed input file, forming lines,
154  * doing ^H processing, expanding tabs, stripping trailing blanks,
155  * and sending each line down for analysis.
156  */
157 static void
158 fmt(FILE *fi)
159 {
160 	char linebuf[BUFSIZ], canonb[BUFSIZ];
161 	char *cp, *cp2;
162 	int c, col, add_space;
163 
164 	if (center) {
165 		while (1) {
166 			cp = fgets(linebuf, BUFSIZ, fi);
167 			if (!cp)
168 				return;
169 			while (*cp && isspace((unsigned char)*cp))
170 				cp++;
171 			cp2 = cp + strlen(cp) - 1;
172 			while (cp2 > cp && isspace((unsigned char)*cp2))
173 				cp2--;
174 			if (cp == cp2)
175 				putchar('\n');
176 			col = cp2 - cp;
177 			for (c = 0; c < (goal_length-col)/2; c++)
178 				putchar(' ');
179 			while (cp <= cp2)
180 				putchar(*cp++);
181 			putchar('\n');
182 		}
183 	}
184 	c = getc(fi);
185 	while (c != EOF) {
186 		/*
187 		 * Collect a line, doing ^H processing.
188 		 * Leave tabs for now.
189 		 */
190 		cp = linebuf;
191 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
192 			if (c == '\b') {
193 				if (cp > linebuf)
194 					cp--;
195 				c = getc(fi);
196 				continue;
197 			}
198 			if(!(isprint(c) || c == '\t' || c >= 160)) {
199 				c = getc(fi);
200 				continue;
201 			}
202 			*cp++ = c;
203 			c = getc(fi);
204 		}
205 		*cp = '\0';
206 
207 		/*
208 		 * By default, add space after the end of current input
209 		 * (normally end of line)
210 		 */
211 		add_space = 1;
212 
213 		/*
214 		 * If the input line is longer than linebuf buffer can hold,
215 		 * process the data read so far as if it was a separate line -
216 		 * if there is any whitespace character in the read data,
217 		 * process all the data up to it, otherwise process all.
218 		 */
219 		if (c != '\n' && c != EOF && !isspace(c)) {
220 			/*
221 			 * Find out if any whitespace character has been read.
222 			 */
223 			for(cp2 = cp; cp2 >= linebuf
224 				&& !isspace((unsigned char)*cp2); cp2--);
225 
226 			if (cp2 < linebuf) {
227 				/*
228 				 * ungetc() last read character so that it
229 				 * won't get lost.
230 				 */
231 				ungetc(c, fi);
232 				/*
233 				 * Don't append space on the end in split().
234 				 */
235 				add_space = 0;
236 			} else {
237 				/*
238 				 * To avoid splitting a word in a middle,
239 				 * ungetc() all characters after last
240 				 * whitespace char.
241 				 */
242 				while (!isspace(c) && (cp >= linebuf)) {
243 					ungetc(c, fi);
244 					c = *--cp;
245 				}
246 				*cp = '\0';
247 			}
248 		}
249 
250 		/*
251 		 * Expand tabs on the way to canonb.
252 		 */
253 		col = 0;
254 		cp = linebuf;
255 		cp2 = canonb;
256 		while ((c = *cp++) != 0) {
257 			if (c != '\t') {
258 				col++;
259 				if (cp2-canonb < BUFSIZ-1)
260 					*cp2++ = c;
261 				continue;
262 			}
263 			do {
264 				if (cp2-canonb < BUFSIZ-1)
265 					*cp2++ = ' ';
266 				col++;
267 			} while ((col & 07) != 0);
268 		}
269 
270 		/*
271 		 * Swipe trailing blanks from the line.
272 		 */
273 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
274 			;
275 		*++cp2 = '\0';
276 		prefix(canonb, add_space);
277 		if (c != EOF)
278 			c = getc(fi);
279 	}
280 }
281 
282 /*
283  * Take a line devoid of tabs and other garbage and determine its
284  * blank prefix.  If the indent changes, call for a linebreak.
285  * If the input line is blank, echo the blank line on the output.
286  * Finally, if the line minus the prefix is a mail header, try to keep
287  * it on a line by itself.
288  */
289 static void
290 prefix(const char line[], int add_space)
291 {
292 	const char *cp;
293 	char **hp;
294 	int np, h;
295 
296 	if (strlen(line) == 0) {
297 		oflush();
298 		putchar('\n');
299 		return;
300 	}
301 	for (cp = line; *cp == ' '; cp++)
302 		;
303 	np = cp - line;
304 
305 	/*
306 	 * The following horrible expression attempts to avoid linebreaks
307 	 * when the indent changes due to a paragraph.
308 	 */
309 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
310 		oflush();
311 	if ((h = ishead(cp)) != 0)
312 		oflush(), mark = lineno;
313 	if (lineno - mark < 3 && lineno - mark > 0)
314 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
315 			if (ispref(*hp, cp)) {
316 				h = 1;
317 				oflush();
318 				break;
319 			}
320 	if (!h && (h = (*cp == '.')))
321 		oflush();
322 	pfx = np;
323 	if (h) {
324 		pack(cp, strlen(cp));
325 		oflush();
326 	} else
327 		split(cp, add_space);
328 	lineno++;
329 }
330 
331 /*
332  * Split up the passed line into output "words" which are
333  * maximal strings of non-blanks with the blank separation
334  * attached at the end.  Pass these words along to the output
335  * line packer.
336  */
337 static void
338 split(const char line[], int add_space)
339 {
340 	const char *cp;
341 	char *cp2;
342 	char word[BUFSIZ];
343 	int wordl;		/* LIZ@UOM 6/18/85 */
344 
345 	cp = line;
346 	while (*cp) {
347 		cp2 = word;
348 		wordl = 0;	/* LIZ@UOM 6/18/85 */
349 
350 		/*
351 		 * Collect a 'word,' allowing it to contain escaped white
352 		 * space.
353 		 */
354 		while (*cp && *cp != ' ') {
355 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
356 				*cp2++ = *cp++;
357 			*cp2++ = *cp++;
358 			wordl++;/* LIZ@UOM 6/18/85 */
359 		}
360 
361 		/*
362 		 * Guarantee a space at end of line. Two spaces after end of
363 		 * sentence punctuation.
364 		 */
365 		if (*cp == '\0' && add_space) {
366 			*cp2++ = ' ';
367 			if (strchr(".:!", cp[-1]))
368 				*cp2++ = ' ';
369 		}
370 		while (*cp == ' ')
371 			*cp2++ = *cp++;
372 		*cp2 = '\0';
373 		/*
374 		 * LIZ@UOM 6/18/85 pack(word);
375 		 */
376 		pack(word, wordl);
377 	}
378 }
379 
380 /*
381  * Output section.
382  * Build up line images from the words passed in.  Prefix
383  * each line with correct number of blanks.  The buffer "outbuf"
384  * contains the current partial line image, including prefixed blanks.
385  * "outp" points to the next available space therein.  When outp is NOSTR,
386  * there ain't nothing in there yet.  At the bottom of this whole mess,
387  * leading tabs are reinserted.
388  */
389 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
390 char	*outp;				/* Pointer in above */
391 
392 /*
393  * Initialize the output section.
394  */
395 static void
396 setout(void)
397 {
398 	outp = NOSTR;
399 }
400 
401 /*
402  * Pack a word onto the output line.  If this is the beginning of
403  * the line, push on the appropriately-sized string of blanks first.
404  * If the word won't fit on the current line, flush and begin a new
405  * line.  If the word is too long to fit all by itself on a line,
406  * just give it its own and hope for the best.
407  *
408  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
409  *	goal length, take it.  If not, then check to see if the line
410  *	will be over the max length; if so put the word on the next
411  *	line.  If not, check to see if the line will be closer to the
412  *	goal length with or without the word and take it or put it on
413  *	the next line accordingly.
414  */
415 
416 /*
417  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
418  * pack(word)
419  *	char word[];
420  */
421 static void
422 pack(const char word[], int wl)
423 {
424 	const char *cp;
425 	int s, t;
426 
427 	if (outp == NOSTR)
428 		leadin();
429 	/*
430 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
431 	 * length of the line before the word is added; t is now the length
432 	 * of the line after the word is added
433 	 *	t = strlen(word);
434 	 *	if (t+s <= LENGTH)
435 	 */
436 	s = outp - outbuf;
437 	t = wl + s;
438 	if ((t <= goal_length) ||
439 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
440 		/*
441 		 * In like flint!
442 		 */
443 		for (cp = word; *cp; *outp++ = *cp++);
444 		return;
445 	}
446 	if (s > pfx) {
447 		oflush();
448 		leadin();
449 	}
450 	for (cp = word; *cp; *outp++ = *cp++);
451 }
452 
453 /*
454  * If there is anything on the current output line, send it on
455  * its way.  Set outp to NOSTR to indicate the absence of the current
456  * line prefix.
457  */
458 static void
459 oflush(void)
460 {
461 	if (outp == NOSTR)
462 		return;
463 	*outp = '\0';
464 	tabulate(outbuf);
465 	outp = NOSTR;
466 }
467 
468 /*
469  * Take the passed line buffer, insert leading tabs where possible, and
470  * output on standard output (finally).
471  */
472 static void
473 tabulate(char line[])
474 {
475 	char *cp;
476 	int b, t;
477 
478 	/*
479 	 * Toss trailing blanks in the output line.
480 	 */
481 	cp = line + strlen(line) - 1;
482 	while (cp >= line && *cp == ' ')
483 		cp--;
484 	*++cp = '\0';
485 
486 	/*
487 	 * Count the leading blank space and tabulate.
488 	 */
489 	for (cp = line; *cp == ' '; cp++)
490 		;
491 	b = cp-line;
492 	t = b >> 3;
493 	b &= 07;
494 	if (t > 0)
495 		do
496 			putc('\t', stdout);
497 		while (--t);
498 	if (b > 0)
499 		do
500 			putc(' ', stdout);
501 		while (--b);
502 	while (*cp)
503 		putc(*cp++, stdout);
504 	putc('\n', stdout);
505 }
506 
507 /*
508  * Initialize the output line with the appropriate number of
509  * leading blanks.
510  */
511 static void
512 leadin(void)
513 {
514 	int b;
515 	char *cp;
516 
517 	for (b = 0, cp = outbuf; b < pfx; b++)
518 		*cp++ = ' ';
519 	outp = cp;
520 }
521 
522 /*
523  * Is s1 a prefix of s2??
524  */
525 static int
526 ispref(const char *s1, const char *s2)
527 {
528 
529 	while (*s1++ == *s2)
530 		;
531 	return (*s1 == '\0');
532 }
533