1 /* $NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #ifndef lint
34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\
35 The Regents of the University of California. All rights reserved.");
36 #endif /* not lint */
37
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93";
41 #endif
42 __RCSID("$NetBSD: fmt.c,v 1.33 2017/10/13 00:11:56 christos Exp $");
43 #endif /* not lint */
44
45 #include <wctype.h>
46 #include <locale.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <unistd.h>
50 #include <errno.h>
51 #include <err.h>
52 #include <limits.h>
53 #include <string.h>
54 #include <locale.h>
55 #include "buffer.h"
56
57 /*
58 * fmt -- format the concatenation of input files or standard input
59 * onto standard output. Designed for use with Mail ~|
60 *
61 * Syntax : fmt [ goal [ max ] ] [ name ... ]
62 * Authors: Kurt Shoens (UCB) 12/7/78;
63 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
64 */
65
66 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
67 #define GOAL_LENGTH 65
68 #define MAX_LENGTH 75
69 static size_t goal_length; /* Target or goal line length in output */
70 static size_t max_length; /* Max line length in output */
71 static size_t pfx; /* Current leading blank count */
72 static int raw; /* Don't treat mail specially */
73 static int lineno; /* Current input line */
74 static int mark; /* Last place we saw a head line */
75 static int center;
76 static struct buffer outbuf;
77
78 static const wchar_t *headnames[] = { L"To", L"Subject", L"Cc", NULL };
79
80 static void usage(void) __dead;
81 static int getnum(const char *, const char *, size_t *, int);
82 static void fmt(FILE *);
83 static int ispref(const wchar_t *, const wchar_t *);
84 static void leadin(void);
85 static void oflush(void);
86 static void pack(const wchar_t *, size_t);
87 static void prefix(const struct buffer *, int);
88 static void split(const wchar_t *, int);
89 static void tabulate(struct buffer *);
90
91
92 int ishead(const wchar_t *);
93
94 /*
95 * Drive the whole formatter by managing input files. Also,
96 * cause initialization of the output stuff and flush it out
97 * at the end.
98 */
99
100 int
main(int argc,char ** argv)101 main(int argc, char **argv)
102 {
103 FILE *fi;
104 int errs = 0;
105 int compat = 1;
106 int c;
107
108 goal_length = GOAL_LENGTH;
109 max_length = MAX_LENGTH;
110 buf_init(&outbuf);
111 lineno = 1;
112 mark = -10;
113
114 setprogname(*argv);
115 (void)setlocale(LC_ALL, "");
116
117 while ((c = getopt(argc, argv, "Cg:m:rw:")) != -1)
118 switch (c) {
119 case 'C':
120 center++;
121 break;
122 case 'g':
123 (void)getnum(optarg, "goal", &goal_length, 1);
124 compat = 0;
125 break;
126 case 'm':
127 case 'w':
128 (void)getnum(optarg, "max", &max_length, 1);
129 compat = 0;
130 break;
131 case 'r':
132 raw++;
133 break;
134 default:
135 usage();
136 }
137
138 argc -= optind;
139 argv += optind;
140
141 /*
142 * compatibility with old usage.
143 */
144 if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) {
145 argv++;
146 argc--;
147 if (argc > 0 && getnum(*argv, "max", &max_length, 0)) {
148 argv++;
149 argc--;
150 }
151 }
152
153 if (max_length <= goal_length) {
154 errx(1, "Max length (%zu) must be greater than goal "
155 "length (%zu)", max_length, goal_length);
156 }
157 if (argc == 0) {
158 fmt(stdin);
159 oflush();
160 return 0;
161 }
162 for (;argc; argc--, argv++) {
163 if ((fi = fopen(*argv, "r")) == NULL) {
164 warn("Cannot open `%s'", *argv);
165 errs++;
166 continue;
167 }
168 fmt(fi);
169 (void)fclose(fi);
170 }
171 oflush();
172 buf_end(&outbuf);
173 return errs;
174 }
175
176 static void
usage(void)177 usage(void)
178 {
179 (void)fprintf(stderr,
180 "Usage: %s [-Cr] [-g <goal>] [-m|w <max>] [<files>..]\n"
181 "\t %s [-Cr] [<goal>] [<max>] [<files>]\n",
182 getprogname(), getprogname());
183 exit(1);
184 }
185
186 static int
getnum(const char * str,const char * what,size_t * res,int badnum)187 getnum(const char *str, const char *what, size_t *res, int badnum)
188 {
189 unsigned long ul;
190 char *ep;
191
192 errno = 0;
193 ul = strtoul(str, &ep, 0);
194 if (*str != '\0' && *ep == '\0') {
195 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX)
196 errx(1, "%s number `%s' too big", what, str);
197 *res = (size_t)ul;
198 return 1;
199 } else if (badnum)
200 errx(1, "Bad %s number `%s'", what, str);
201
202 return 0;
203 }
204
205 /*
206 * Read up characters from the passed input file, forming lines,
207 * doing ^H processing, expanding tabs, stripping trailing blanks,
208 * and sending each line down for analysis.
209 */
210 static void
fmt(FILE * fi)211 fmt(FILE *fi)
212 {
213 struct buffer lbuf, cbuf;
214 wchar_t *cp, *cp2;
215 wint_t c;
216 int add_space;
217 size_t len, col, i;
218
219 if (center) {
220 for (;;) {
221 cp = fgetwln(fi, &len);
222 if (!cp)
223 return;
224
225 /* skip over leading space */
226 while (len > 0) {
227 if (!iswspace(*cp))
228 break;
229 cp++;
230 len--;
231 }
232
233 /* clear trailing space */
234 while (len > 0) {
235 if (!iswspace((unsigned char)cp[len-1]))
236 break;
237 len--;
238 }
239
240 if (len == 0) {
241 /* blank line */
242 (void)putwchar(L'\n');
243 continue;
244 }
245
246 if (goal_length > len) {
247 for (i = 0; i < (goal_length - len) / 2; i++) {
248 (void)putwchar(L' ');
249 }
250 }
251 for (i = 0; i < len; i++) {
252 (void)putwchar(cp[i]);
253 }
254 (void)putwchar(L'\n');
255 }
256 }
257
258 buf_init(&lbuf);
259 buf_init(&cbuf);
260 c = getwc(fi);
261
262 while (c != WEOF) {
263 /*
264 * Collect a line, doing ^H processing.
265 * Leave tabs for now.
266 */
267 buf_reset(&lbuf);
268 while (c != '\n' && c != WEOF) {
269 if (c == '\b') {
270 (void)buf_unputc(&lbuf);
271 c = getwc(fi);
272 continue;
273 }
274 if(!(iswprint(c) || c == '\t' || c >= 160)) {
275 c = getwc(fi);
276 continue;
277 }
278 buf_putc(&lbuf, c);
279 c = getwc(fi);
280 }
281 buf_putc(&lbuf, '\0');
282 (void)buf_unputc(&lbuf);
283 add_space = c != WEOF;
284
285 /*
286 * Expand tabs on the way.
287 */
288 col = 0;
289 cp = lbuf.bptr;
290 buf_reset(&cbuf);
291 while ((c = *cp++) != '\0') {
292 if (c != '\t') {
293 col++;
294 buf_putc(&cbuf, c);
295 continue;
296 }
297 do {
298 buf_putc(&cbuf, ' ');
299 col++;
300 } while ((col & 07) != 0);
301 }
302
303 /*
304 * Swipe trailing blanks from the line.
305 */
306 for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--)
307 continue;
308 cbuf.ptr = cp2 + 1;
309 buf_putc(&cbuf, '\0');
310 (void)buf_unputc(&cbuf);
311 prefix(&cbuf, add_space);
312 if (c != WEOF)
313 c = getwc(fi);
314 }
315 buf_end(&cbuf);
316 buf_end(&lbuf);
317 }
318
319 /*
320 * Take a line devoid of tabs and other garbage and determine its
321 * blank prefix. If the indent changes, call for a linebreak.
322 * If the input line is blank, echo the blank line on the output.
323 * Finally, if the line minus the prefix is a mail header, try to keep
324 * it on a line by itself.
325 */
326 static void
prefix(const struct buffer * buf,int add_space)327 prefix(const struct buffer *buf, int add_space)
328 {
329 const wchar_t *cp;
330 const wchar_t **hp;
331 size_t np;
332 int h;
333
334 if (buf->ptr == buf->bptr) {
335 oflush();
336 (void)putwchar(L'\n');
337 return;
338 }
339 for (cp = buf->bptr; *cp == ' '; cp++)
340 continue;
341 np = cp - buf->bptr;
342
343 /*
344 * The following horrible expression attempts to avoid linebreaks
345 * when the indent changes due to a paragraph.
346 */
347 if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8))
348 oflush();
349 if (!raw) {
350 if ((h = ishead(cp)) != 0) {
351 oflush();
352 mark = lineno;
353 }
354 if (lineno - mark < 3 && lineno - mark > 0)
355 for (hp = &headnames[0]; *hp != NULL; hp++)
356 if (ispref(*hp, cp)) {
357 h = 1;
358 oflush();
359 break;
360 }
361 if (!h && (h = (*cp == '.')))
362 oflush();
363 } else
364 h = 0;
365 pfx = np;
366 if (h) {
367 pack(cp, (size_t)(buf->ptr - cp));
368 oflush();
369 } else
370 split(cp, add_space);
371 lineno++;
372 }
373
374 /*
375 * Split up the passed line into output "words" which are
376 * maximal strings of non-blanks with the blank separation
377 * attached at the end. Pass these words along to the output
378 * line packer.
379 */
380 static void
split(const wchar_t line[],int add_space)381 split(const wchar_t line[], int add_space)
382 {
383 const wchar_t *cp;
384 struct buffer word;
385 size_t wlen;
386
387 buf_init(&word);
388 cp = line;
389 while (*cp) {
390 buf_reset(&word);
391 wlen = 0;
392
393 /*
394 * Collect a 'word,' allowing it to contain escaped white
395 * space.
396 */
397 while (*cp && *cp != ' ') {
398 if (*cp == '\\' && iswspace(cp[1]))
399 buf_putc(&word, *cp++);
400 buf_putc(&word, *cp++);
401 wlen++;
402 }
403
404 /*
405 * Guarantee a space at end of line. Two spaces after end of
406 * sentence punctuation.
407 */
408 if (*cp == '\0' && add_space) {
409 buf_putc(&word, ' ');
410 if (strchr(".:!", cp[-1]))
411 buf_putc(&word, ' ');
412 }
413 while (*cp == ' ')
414 buf_putc(&word, *cp++);
415
416 buf_putc(&word, '\0');
417 (void)buf_unputc(&word);
418
419 pack(word.bptr, wlen);
420 }
421 buf_end(&word);
422 }
423
424 /*
425 * Output section.
426 * Build up line images from the words passed in. Prefix
427 * each line with correct number of blanks.
428 *
429 * At the bottom of this whole mess, leading tabs are reinserted.
430 */
431
432 /*
433 * Pack a word onto the output line. If this is the beginning of
434 * the line, push on the appropriately-sized string of blanks first.
435 * If the word won't fit on the current line, flush and begin a new
436 * line. If the word is too long to fit all by itself on a line,
437 * just give it its own and hope for the best.
438 *
439 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
440 * goal length, take it. If not, then check to see if the line
441 * will be over the max length; if so put the word on the next
442 * line. If not, check to see if the line will be closer to the
443 * goal length with or without the word and take it or put it on
444 * the next line accordingly.
445 */
446
447 static void
pack(const wchar_t * word,size_t wlen)448 pack(const wchar_t *word, size_t wlen)
449 {
450 const wchar_t *cp;
451 size_t s, t;
452
453 if (outbuf.bptr == outbuf.ptr)
454 leadin();
455 /*
456 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
457 * length of the line before the word is added; t is now the length
458 * of the line after the word is added
459 */
460 s = outbuf.ptr - outbuf.bptr;
461 t = wlen + s;
462 if ((t <= goal_length) || ((t <= max_length) &&
463 (s <= goal_length) && (t - goal_length <= goal_length - s))) {
464 /*
465 * In like flint!
466 */
467 for (cp = word; *cp;)
468 buf_putc(&outbuf, *cp++);
469 return;
470 }
471 if (s > pfx) {
472 oflush();
473 leadin();
474 }
475 for (cp = word; *cp;)
476 buf_putc(&outbuf, *cp++);
477 }
478
479 /*
480 * If there is anything on the current output line, send it on
481 * its way. Reset outbuf.
482 */
483 static void
oflush(void)484 oflush(void)
485 {
486 if (outbuf.bptr == outbuf.ptr)
487 return;
488 buf_putc(&outbuf, '\0');
489 (void)buf_unputc(&outbuf);
490 tabulate(&outbuf);
491 buf_reset(&outbuf);
492 }
493
494 /*
495 * Take the passed line buffer, insert leading tabs where possible, and
496 * output on standard output (finally).
497 */
498 static void
tabulate(struct buffer * buf)499 tabulate(struct buffer *buf)
500 {
501 wchar_t *cp;
502 size_t b, t;
503
504 /*
505 * Toss trailing blanks in the output line.
506 */
507 for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--)
508 continue;
509 *++cp = '\0';
510
511 /*
512 * Count the leading blank space and tabulate.
513 */
514 for (cp = buf->bptr; *cp == ' '; cp++)
515 continue;
516 b = cp - buf->bptr;
517 t = b / 8;
518 b = b % 8;
519 if (t > 0)
520 do
521 (void)putwchar(L'\t');
522 while (--t);
523 if (b > 0)
524 do
525 (void)putwchar(L' ');
526 while (--b);
527 while (*cp)
528 (void)putwchar(*cp++);
529 (void)putwchar(L'\n');
530 }
531
532 /*
533 * Initialize the output line with the appropriate number of
534 * leading blanks.
535 */
536 static void
leadin(void)537 leadin(void)
538 {
539 size_t b;
540
541 buf_reset(&outbuf);
542
543 for (b = 0; b < pfx; b++)
544 buf_putc(&outbuf, ' ');
545 }
546
547 /*
548 * Is s1 a prefix of s2??
549 */
550 static int
ispref(const wchar_t * s1,const wchar_t * s2)551 ispref(const wchar_t *s1, const wchar_t *s2)
552 {
553
554 while (*s1++ == *s2)
555 continue;
556 return *s1 == '\0';
557 }
558