xref: /netbsd-src/usr.bin/fmt/fmt.c (revision ce0bb6e8d2e560ecacbe865a848624f94498063b)
1 /*
2  * Copyright (c) 1980 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 char copyright[] =
36 "@(#) Copyright (c) 1980 Regents of the University of California.\n\
37  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #ifndef lint
41 /*static char sccsid[] = "from: @(#)fmt.c	5.10 (Berkeley) 6/1/90";*/
42 static char rcsid[] = "$Id: fmt.c,v 1.3 1994/12/24 16:35:17 cgd Exp $";
43 #endif /* not lint */
44 
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <ctype.h>
49 
50 /*
51  * fmt -- format the concatenation of input files or standard input
52  * onto standard output.  Designed for use with Mail ~|
53  *
54  * Syntax : fmt [ goal [ max ] ] [ name ... ]
55  * Authors: Kurt Shoens (UCB) 12/7/78;
56  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
57  */
58 
59 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
60  * #define	LENGTH	72		Max line length in output
61  */
62 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
63 
64 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
65 #define GOAL_LENGTH 65
66 #define MAX_LENGTH 75
67 int	goal_length;		/* Target or goal line length in output */
68 int	max_length;		/* Max line length in output */
69 int	pfx;			/* Current leading blank count */
70 int	lineno;			/* Current input line */
71 int	mark;			/* Last place we saw a head line */
72 
73 char	*headnames[] = {"To", "Subject", "Cc", 0};
74 
75 /*
76  * Drive the whole formatter by managing input files.  Also,
77  * cause initialization of the output stuff and flush it out
78  * at the end.
79  */
80 
81 main(argc, argv)
82 	int argc;
83 	char **argv;
84 {
85 	register FILE *fi;
86 	register int errs = 0;
87 	int number;		/* LIZ@UOM 6/18/85 */
88 
89 	goal_length = GOAL_LENGTH;
90 	max_length = MAX_LENGTH;
91 	setout();
92 	lineno = 1;
93 	mark = -10;
94 	/*
95 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
96 	 */
97 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
98 		argv++;
99 		argc--;
100 		goal_length = number;
101 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
102 			argv++;
103 			argc--;
104 			max_length = number;
105 		}
106 	}
107 	if (max_length <= goal_length) {
108 		fprintf(stderr, "Max length must be greater than %s\n",
109 			"goal length");
110 		exit(1);
111 	}
112 	if (argc < 2) {
113 		fmt(stdin);
114 		oflush();
115 		exit(0);
116 	}
117 	while (--argc) {
118 		if ((fi = fopen(*++argv, "r")) == NULL) {
119 			perror(*argv);
120 			errs++;
121 			continue;
122 		}
123 		fmt(fi);
124 		fclose(fi);
125 	}
126 	oflush();
127 	exit(errs);
128 }
129 
130 /*
131  * Read up characters from the passed input file, forming lines,
132  * doing ^H processing, expanding tabs, stripping trailing blanks,
133  * and sending each line down for analysis.
134  */
135 fmt(fi)
136 	FILE *fi;
137 {
138 	char linebuf[BUFSIZ], canonb[BUFSIZ];
139 	register char *cp, *cp2;
140 	register int c, col;
141 
142 	c = getc(fi);
143 	while (c != EOF) {
144 		/*
145 		 * Collect a line, doing ^H processing.
146 		 * Leave tabs for now.
147 		 */
148 		cp = linebuf;
149 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
150 			if (c == '\b') {
151 				if (cp > linebuf)
152 					cp--;
153 				c = getc(fi);
154 				continue;
155 			}
156 			if ((c < ' ' || c >= 0177) && c != '\t') {
157 				c = getc(fi);
158 				continue;
159 			}
160 			*cp++ = c;
161 			c = getc(fi);
162 		}
163 		*cp = '\0';
164 
165 		/*
166 		 * Toss anything remaining on the input line.
167 		 */
168 		while (c != '\n' && c != EOF)
169 			c = getc(fi);
170 
171 		/*
172 		 * Expand tabs on the way to canonb.
173 		 */
174 		col = 0;
175 		cp = linebuf;
176 		cp2 = canonb;
177 		while (c = *cp++) {
178 			if (c != '\t') {
179 				col++;
180 				if (cp2-canonb < BUFSIZ-1)
181 					*cp2++ = c;
182 				continue;
183 			}
184 			do {
185 				if (cp2-canonb < BUFSIZ-1)
186 					*cp2++ = ' ';
187 				col++;
188 			} while ((col & 07) != 0);
189 		}
190 
191 		/*
192 		 * Swipe trailing blanks from the line.
193 		 */
194 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
195 			;
196 		*++cp2 = '\0';
197 		prefix(canonb);
198 		if (c != EOF)
199 			c = getc(fi);
200 	}
201 }
202 
203 /*
204  * Take a line devoid of tabs and other garbage and determine its
205  * blank prefix.  If the indent changes, call for a linebreak.
206  * If the input line is blank, echo the blank line on the output.
207  * Finally, if the line minus the prefix is a mail header, try to keep
208  * it on a line by itself.
209  */
210 prefix(line)
211 	char line[];
212 {
213 	register char *cp, **hp;
214 	register int np, h;
215 
216 	if (strlen(line) == 0) {
217 		oflush();
218 		putchar('\n');
219 		return;
220 	}
221 	for (cp = line; *cp == ' '; cp++)
222 		;
223 	np = cp - line;
224 
225 	/*
226 	 * The following horrible expression attempts to avoid linebreaks
227 	 * when the indent changes due to a paragraph.
228 	 */
229 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
230 		oflush();
231 	if (h = ishead(cp))
232 		oflush(), mark = lineno;
233 	if (lineno - mark < 3 && lineno - mark > 0)
234 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
235 			if (ispref(*hp, cp)) {
236 				h = 1;
237 				oflush();
238 				break;
239 			}
240 	if (!h && (h = (*cp == '.')))
241 		oflush();
242 	pfx = np;
243 	if (h)
244 		pack(cp);
245 	else	split(cp);
246 	if (h)
247 		oflush();
248 	lineno++;
249 }
250 
251 /*
252  * Split up the passed line into output "words" which are
253  * maximal strings of non-blanks with the blank separation
254  * attached at the end.  Pass these words along to the output
255  * line packer.
256  */
257 split(line)
258 	char line[];
259 {
260 	register char *cp, *cp2;
261 	char word[BUFSIZ];
262 	int wordl;		/* LIZ@UOM 6/18/85 */
263 
264 	cp = line;
265 	while (*cp) {
266 		cp2 = word;
267 		wordl = 0;	/* LIZ@UOM 6/18/85 */
268 
269 		/*
270 		 * Collect a 'word,' allowing it to contain escaped white
271 		 * space.
272 		 */
273 		while (*cp && *cp != ' ') {
274 			if (*cp == '\\' && isspace(cp[1]))
275 				*cp2++ = *cp++;
276 			*cp2++ = *cp++;
277 			wordl++;/* LIZ@UOM 6/18/85 */
278 		}
279 
280 		/*
281 		 * Guarantee a space at end of line. Two spaces after end of
282 		 * sentence punctuation.
283 		 */
284 		if (*cp == '\0') {
285 			*cp2++ = ' ';
286 			if (index(".:!", cp[-1]))
287 				*cp2++ = ' ';
288 		}
289 		while (*cp == ' ')
290 			*cp2++ = *cp++;
291 		*cp2 = '\0';
292 		/*
293 		 * LIZ@UOM 6/18/85 pack(word);
294 		 */
295 		pack(word, wordl);
296 	}
297 }
298 
299 /*
300  * Output section.
301  * Build up line images from the words passed in.  Prefix
302  * each line with correct number of blanks.  The buffer "outbuf"
303  * contains the current partial line image, including prefixed blanks.
304  * "outp" points to the next available space therein.  When outp is NOSTR,
305  * there ain't nothing in there yet.  At the bottom of this whole mess,
306  * leading tabs are reinserted.
307  */
308 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
309 char	*outp;				/* Pointer in above */
310 
311 /*
312  * Initialize the output section.
313  */
314 setout()
315 {
316 	outp = NOSTR;
317 }
318 
319 /*
320  * Pack a word onto the output line.  If this is the beginning of
321  * the line, push on the appropriately-sized string of blanks first.
322  * If the word won't fit on the current line, flush and begin a new
323  * line.  If the word is too long to fit all by itself on a line,
324  * just give it its own and hope for the best.
325  *
326  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
327  *	goal length, take it.  If not, then check to see if the line
328  *	will be over the max length; if so put the word on the next
329  *	line.  If not, check to see if the line will be closer to the
330  *	goal length with or without the word and take it or put it on
331  *	the next line accordingly.
332  */
333 
334 /*
335  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
336  * pack(word)
337  *	char word[];
338  */
339 pack(word,wl)
340 	char word[];
341 	int wl;
342 {
343 	register char *cp;
344 	register int s, t;
345 
346 	if (outp == NOSTR)
347 		leadin();
348 	/*
349 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
350 	 * length of the line before the word is added; t is now the length
351 	 * of the line after the word is added
352 	 *	t = strlen(word);
353 	 *	if (t+s <= LENGTH)
354 	 */
355 	s = outp - outbuf;
356 	t = wl + s;
357 	if ((t <= goal_length) ||
358 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
359 		/*
360 		 * In like flint!
361 		 */
362 		for (cp = word; *cp; *outp++ = *cp++);
363 		return;
364 	}
365 	if (s > pfx) {
366 		oflush();
367 		leadin();
368 	}
369 	for (cp = word; *cp; *outp++ = *cp++);
370 }
371 
372 /*
373  * If there is anything on the current output line, send it on
374  * its way.  Set outp to NOSTR to indicate the absence of the current
375  * line prefix.
376  */
377 oflush()
378 {
379 	if (outp == NOSTR)
380 		return;
381 	*outp = '\0';
382 	tabulate(outbuf);
383 	outp = NOSTR;
384 }
385 
386 /*
387  * Take the passed line buffer, insert leading tabs where possible, and
388  * output on standard output (finally).
389  */
390 tabulate(line)
391 	char line[];
392 {
393 	register char *cp;
394 	register int b, t;
395 
396 	/*
397 	 * Toss trailing blanks in the output line.
398 	 */
399 	cp = line + strlen(line) - 1;
400 	while (cp >= line && *cp == ' ')
401 		cp--;
402 	*++cp = '\0';
403 
404 	/*
405 	 * Count the leading blank space and tabulate.
406 	 */
407 	for (cp = line; *cp == ' '; cp++)
408 		;
409 	b = cp-line;
410 	t = b >> 3;
411 	b &= 07;
412 	if (t > 0)
413 		do
414 			putc('\t', stdout);
415 		while (--t);
416 	if (b > 0)
417 		do
418 			putc(' ', stdout);
419 		while (--b);
420 	while (*cp)
421 		putc(*cp++, stdout);
422 	putc('\n', stdout);
423 }
424 
425 /*
426  * Initialize the output line with the appropriate number of
427  * leading blanks.
428  */
429 leadin()
430 {
431 	register int b;
432 	register char *cp;
433 
434 	for (b = 0, cp = outbuf; b < pfx; b++)
435 		*cp++ = ' ';
436 	outp = cp;
437 }
438 
439 /*
440  * Save a string in dynamic space.
441  * This little goodie is needed for
442  * a headline detector in head.c
443  */
444 char *
445 savestr(str)
446 	char str[];
447 {
448 	register char *top;
449 
450 	top = malloc(strlen(str) + 1);
451 	if (top == NOSTR) {
452 		fprintf(stderr, "fmt:  Ran out of memory\n");
453 		exit(1);
454 	}
455 	strcpy(top, str);
456 	return (top);
457 }
458 
459 /*
460  * Is s1 a prefix of s2??
461  */
462 ispref(s1, s2)
463 	register char *s1, *s2;
464 {
465 
466 	while (*s1++ == *s2)
467 		;
468 	return (*s1 == '\0');
469 }
470