1 /* $NetBSD: fmt.c,v 1.28 2007/12/15 19:44:50 perry Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 #ifndef lint 34 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\ 35 The Regents of the University of California. All rights reserved.\n"); 36 #endif /* not lint */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)fmt.c 8.1 (Berkeley) 7/20/93"; 41 #endif 42 __RCSID("$NetBSD: fmt.c,v 1.28 2007/12/15 19:44:50 perry Exp $"); 43 #endif /* not lint */ 44 45 #include <ctype.h> 46 #include <locale.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <unistd.h> 50 #include <errno.h> 51 #include <err.h> 52 #include <limits.h> 53 #include <string.h> 54 #include "buffer.h" 55 56 /* 57 * fmt -- format the concatenation of input files or standard input 58 * onto standard output. Designed for use with Mail ~| 59 * 60 * Syntax : fmt [ goal [ max ] ] [ name ... ] 61 * Authors: Kurt Shoens (UCB) 12/7/78; 62 * Liz Allen (UMCP) 2/24/83 [Addition of goal length concept]. 63 */ 64 65 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */ 66 #define GOAL_LENGTH 65 67 #define MAX_LENGTH 75 68 static size_t goal_length; /* Target or goal line length in output */ 69 static size_t max_length; /* Max line length in output */ 70 static size_t pfx; /* Current leading blank count */ 71 static int raw; /* Don't treat mail specially */ 72 static int lineno; /* Current input line */ 73 static int mark; /* Last place we saw a head line */ 74 static int center; 75 static struct buffer outbuf; 76 77 static const char *headnames[] = {"To", "Subject", "Cc", 0}; 78 79 static void usage(void) __dead; 80 static int getnum(const char *, const char *, size_t *, int); 81 static void fmt(FILE *); 82 static int ispref(const char *, const char *); 83 static void leadin(void); 84 static void oflush(void); 85 static void pack(const char *, size_t); 86 static void prefix(const struct buffer *, int); 87 static void split(const char *, int); 88 static void tabulate(struct buffer *); 89 90 91 int ishead(const char *); 92 93 /* 94 * Drive the whole formatter by managing input files. Also, 95 * cause initialization of the output stuff and flush it out 96 * at the end. 97 */ 98 99 int 100 main(int argc, char **argv) 101 { 102 FILE *fi; 103 int errs = 0; 104 int compat = 1; 105 int c; 106 107 goal_length = GOAL_LENGTH; 108 max_length = MAX_LENGTH; 109 buf_init(&outbuf); 110 lineno = 1; 111 mark = -10; 112 113 setprogname(*argv); 114 (void)setlocale(LC_ALL, ""); 115 116 while ((c = getopt(argc, argv, "Cg:m:r")) != -1) 117 switch (c) { 118 case 'C': 119 center++; 120 break; 121 case 'g': 122 (void)getnum(optarg, "goal", &goal_length, 1); 123 compat = 0; 124 break; 125 case 'm': 126 (void)getnum(optarg, "max", &max_length, 1); 127 compat = 0; 128 break; 129 case 'r': 130 raw++; 131 break; 132 default: 133 usage(); 134 } 135 136 argc -= optind; 137 argv += optind; 138 139 /* 140 * compatibility with old usage. 141 */ 142 if (compat && argc > 0 && getnum(*argv, "goal", &goal_length, 0)) { 143 argv++; 144 argc--; 145 if (argc > 0 && getnum(*argv, "max", &max_length, 0)) { 146 argv++; 147 argc--; 148 } 149 } 150 151 if (max_length <= goal_length) { 152 errx(1, "Max length (%zu) must be greater than goal " 153 "length (%zu)", max_length, goal_length); 154 } 155 if (argc == 0) { 156 fmt(stdin); 157 oflush(); 158 return 0; 159 } 160 while (argc--) { 161 if ((fi = fopen(*argv++, "r")) == NULL) { 162 warn("Cannot open `%s'", *argv); 163 errs++; 164 continue; 165 } 166 fmt(fi); 167 (void)fclose(fi); 168 } 169 oflush(); 170 buf_end(&outbuf); 171 return errs; 172 } 173 174 static void 175 usage(void) 176 { 177 (void)fprintf(stderr, 178 "Usage: %s [-Cr] [-g <goal>] [-m <max>] [<files>..]\n" 179 "\t %s [-Cr] [<goal>] [<max>] [<files>]\n", 180 getprogname(), getprogname()); 181 exit(1); 182 } 183 184 static int 185 getnum(const char *str, const char *what, size_t *res, int badnum) 186 { 187 unsigned long ul; 188 char *ep; 189 190 errno = 0; 191 ul = strtoul(str, &ep, 0); 192 if (*str != '\0' && *ep == '\0') { 193 if ((errno == ERANGE && ul == ULONG_MAX) || ul > SIZE_T_MAX) 194 errx(1, "%s number `%s' too big", what, str); 195 *res = (size_t)ul; 196 return 1; 197 } else if (badnum) 198 errx(1, "Bad %s number `%s'", what, str); 199 200 return 0; 201 } 202 203 /* 204 * Read up characters from the passed input file, forming lines, 205 * doing ^H processing, expanding tabs, stripping trailing blanks, 206 * and sending each line down for analysis. 207 */ 208 static void 209 fmt(FILE *fi) 210 { 211 struct buffer lbuf, cbuf; 212 char *cp, *cp2; 213 int c, add_space; 214 size_t len, col; 215 216 if (center) { 217 for (;;) { 218 cp = fgetln(fi, &len); 219 if (!cp) 220 return; 221 cp2 = cp + len - 1; 222 while (len-- && isspace((unsigned char)*cp)) 223 cp++; 224 while (cp2 > cp && isspace((unsigned char)*cp2)) 225 cp2--; 226 if (cp == cp2) 227 (void)putchar('\n'); 228 col = cp2 - cp; 229 if (goal_length > col) 230 for (c = 0; c < (goal_length - col) / 2; c++) 231 (void)putchar(' '); 232 while (cp <= cp2) 233 (void)putchar(*cp++); 234 (void)putchar('\n'); 235 } 236 } 237 238 buf_init(&lbuf); 239 buf_init(&cbuf); 240 c = getc(fi); 241 242 while (c != EOF) { 243 /* 244 * Collect a line, doing ^H processing. 245 * Leave tabs for now. 246 */ 247 buf_reset(&lbuf); 248 while (c != '\n' && c != EOF) { 249 if (c == '\b') { 250 (void)buf_unputc(&lbuf); 251 c = getc(fi); 252 continue; 253 } 254 if(!(isprint(c) || c == '\t' || c >= 160)) { 255 c = getc(fi); 256 continue; 257 } 258 buf_putc(&lbuf, c); 259 c = getc(fi); 260 } 261 buf_putc(&lbuf, '\0'); 262 (void)buf_unputc(&lbuf); 263 add_space = c != EOF; 264 265 /* 266 * Expand tabs on the way. 267 */ 268 col = 0; 269 cp = lbuf.bptr; 270 buf_reset(&cbuf); 271 while ((c = *cp++) != '\0') { 272 if (c != '\t') { 273 col++; 274 buf_putc(&cbuf, c); 275 continue; 276 } 277 do { 278 buf_putc(&cbuf, ' '); 279 col++; 280 } while ((col & 07) != 0); 281 } 282 283 /* 284 * Swipe trailing blanks from the line. 285 */ 286 for (cp2 = cbuf.ptr - 1; cp2 >= cbuf.bptr && *cp2 == ' '; cp2--) 287 continue; 288 cbuf.ptr = cp2 + 1; 289 buf_putc(&cbuf, '\0'); 290 (void)buf_unputc(&cbuf); 291 prefix(&cbuf, add_space); 292 if (c != EOF) 293 c = getc(fi); 294 } 295 buf_end(&cbuf); 296 buf_end(&lbuf); 297 } 298 299 /* 300 * Take a line devoid of tabs and other garbage and determine its 301 * blank prefix. If the indent changes, call for a linebreak. 302 * If the input line is blank, echo the blank line on the output. 303 * Finally, if the line minus the prefix is a mail header, try to keep 304 * it on a line by itself. 305 */ 306 static void 307 prefix(const struct buffer *buf, int add_space) 308 { 309 const char *cp; 310 const char **hp; 311 size_t np; 312 int h; 313 314 if (buf->ptr == buf->bptr) { 315 oflush(); 316 (void)putchar('\n'); 317 return; 318 } 319 for (cp = buf->bptr; *cp == ' '; cp++) 320 continue; 321 np = cp - buf->bptr; 322 323 /* 324 * The following horrible expression attempts to avoid linebreaks 325 * when the indent changes due to a paragraph. 326 */ 327 if (np != pfx && (np > pfx || abs((int)(pfx - np)) > 8)) 328 oflush(); 329 if (!raw) { 330 if ((h = ishead(cp)) != 0) { 331 oflush(); 332 mark = lineno; 333 } 334 if (lineno - mark < 3 && lineno - mark > 0) 335 for (hp = &headnames[0]; *hp != NULL; hp++) 336 if (ispref(*hp, cp)) { 337 h = 1; 338 oflush(); 339 break; 340 } 341 if (!h && (h = (*cp == '.'))) 342 oflush(); 343 } else 344 h = 0; 345 pfx = np; 346 if (h) { 347 pack(cp, (size_t)(buf->ptr - cp)); 348 oflush(); 349 } else 350 split(cp, add_space); 351 lineno++; 352 } 353 354 /* 355 * Split up the passed line into output "words" which are 356 * maximal strings of non-blanks with the blank separation 357 * attached at the end. Pass these words along to the output 358 * line packer. 359 */ 360 static void 361 split(const char line[], int add_space) 362 { 363 const char *cp; 364 struct buffer word; 365 size_t wlen; 366 367 buf_init(&word); 368 cp = line; 369 while (*cp) { 370 buf_reset(&word); 371 wlen = 0; 372 373 /* 374 * Collect a 'word,' allowing it to contain escaped white 375 * space. 376 */ 377 while (*cp && *cp != ' ') { 378 if (*cp == '\\' && isspace((unsigned char)cp[1])) 379 buf_putc(&word, *cp++); 380 buf_putc(&word, *cp++); 381 wlen++; 382 } 383 384 /* 385 * Guarantee a space at end of line. Two spaces after end of 386 * sentence punctuation. 387 */ 388 if (*cp == '\0' && add_space) { 389 buf_putc(&word, ' '); 390 if (strchr(".:!", cp[-1])) 391 buf_putc(&word, ' '); 392 } 393 while (*cp == ' ') 394 buf_putc(&word, *cp++); 395 396 buf_putc(&word, '\0'); 397 (void)buf_unputc(&word); 398 399 pack(word.bptr, wlen); 400 } 401 buf_end(&word); 402 } 403 404 /* 405 * Output section. 406 * Build up line images from the words passed in. Prefix 407 * each line with correct number of blanks. 408 * 409 * At the bottom of this whole mess, leading tabs are reinserted. 410 */ 411 412 /* 413 * Pack a word onto the output line. If this is the beginning of 414 * the line, push on the appropriately-sized string of blanks first. 415 * If the word won't fit on the current line, flush and begin a new 416 * line. If the word is too long to fit all by itself on a line, 417 * just give it its own and hope for the best. 418 * 419 * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the 420 * goal length, take it. If not, then check to see if the line 421 * will be over the max length; if so put the word on the next 422 * line. If not, check to see if the line will be closer to the 423 * goal length with or without the word and take it or put it on 424 * the next line accordingly. 425 */ 426 427 static void 428 pack(const char *word, size_t wlen) 429 { 430 const char *cp; 431 size_t s, t; 432 433 if (outbuf.bptr == outbuf.ptr) 434 leadin(); 435 /* 436 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the 437 * length of the line before the word is added; t is now the length 438 * of the line after the word is added 439 */ 440 s = outbuf.ptr - outbuf.bptr; 441 t = wlen + s; 442 if ((t <= goal_length) || ((t <= max_length) && 443 (s <= goal_length) && (t - goal_length <= goal_length - s))) { 444 /* 445 * In like flint! 446 */ 447 for (cp = word; *cp;) 448 buf_putc(&outbuf, *cp++); 449 return; 450 } 451 if (s > pfx) { 452 oflush(); 453 leadin(); 454 } 455 for (cp = word; *cp;) 456 buf_putc(&outbuf, *cp++); 457 } 458 459 /* 460 * If there is anything on the current output line, send it on 461 * its way. Reset outbuf. 462 */ 463 static void 464 oflush(void) 465 { 466 if (outbuf.bptr == outbuf.ptr) 467 return; 468 buf_putc(&outbuf, '\0'); 469 (void)buf_unputc(&outbuf); 470 tabulate(&outbuf); 471 buf_reset(&outbuf); 472 } 473 474 /* 475 * Take the passed line buffer, insert leading tabs where possible, and 476 * output on standard output (finally). 477 */ 478 static void 479 tabulate(struct buffer *buf) 480 { 481 char *cp; 482 size_t b, t; 483 484 /* 485 * Toss trailing blanks in the output line. 486 */ 487 for (cp = buf->ptr - 1; cp >= buf->bptr && *cp == ' '; cp--) 488 continue; 489 *++cp = '\0'; 490 491 /* 492 * Count the leading blank space and tabulate. 493 */ 494 for (cp = buf->bptr; *cp == ' '; cp++) 495 continue; 496 b = cp - buf->bptr; 497 t = b / 8; 498 b = b % 8; 499 if (t > 0) 500 do 501 (void)putchar('\t'); 502 while (--t); 503 if (b > 0) 504 do 505 (void)putchar(' '); 506 while (--b); 507 while (*cp) 508 (void)putchar(*cp++); 509 (void)putchar('\n'); 510 } 511 512 /* 513 * Initialize the output line with the appropriate number of 514 * leading blanks. 515 */ 516 static void 517 leadin(void) 518 { 519 size_t b; 520 521 buf_reset(&outbuf); 522 523 for (b = 0; b < pfx; b++) 524 buf_putc(&outbuf, ' '); 525 } 526 527 /* 528 * Is s1 a prefix of s2?? 529 */ 530 static int 531 ispref(const char *s1, const char *s2) 532 { 533 534 while (*s1++ == *s2) 535 continue; 536 return *s1 == '\0'; 537 } 538