14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*8462SApril.Chin@Sun.COM * Copyright (c) 1992-2008 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 7*8462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * * 204887Schin ***********************************************************************/ 214887Schin #pragma prototyped 224887Schin 234887Schin static const char usage[] = 244887Schin "[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]" 254887Schin USAGE_LICENSE 264887Schin "[+NAME?fmt - simple text formatter]" 274887Schin "[+DESCRIPTION?\bfmt\b reads the input files and left justifies space " 284887Schin "separated words into lines \awidth\a characters or less in length and " 294887Schin "writes the lines to the standard output. The standard input is read if " 304887Schin "\b-\b or no files are specified. Blank lines and interword spacing are " 314887Schin "preserved in the output. Indentation is preserved, and lines with " 324887Schin "identical indentation are joined and justified.]" 334887Schin "[+?\bfmt\b is meant to format mail messages prior to sending, but may " 344887Schin "also be useful for other simple tasks. For example, in \bvi\b(1) the " 354887Schin "command \b:!}fmt\b will justify the lines in the current paragraph.]" 364887Schin "[c:crown-margin?Preserve the indentation of the first two lines within " 374887Schin "a paragraph, and align the left margin of each subsequent line with " 384887Schin "that of the second line.]" 394887Schin "[o:optget?Format concatenated \boptget\b(3) usage strings.]" 404887Schin "[s:split-only?Split lines only; do not join short lines to form longer " 414887Schin "ones.]" 424887Schin "[u:uniform-spacing?One space between words, two after sentences.]" 434887Schin "[w:width?Set the output line width to \acolumns\a.]#[columns:=72]" 444887Schin "\n\n" 454887Schin "[ file ... ]" 464887Schin "\n\n" 474887Schin "[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), " 484887Schin "\boptget\b(3)]" 494887Schin ; 504887Schin 514887Schin #include <cmd.h> 524887Schin #include <ctype.h> 534887Schin 544887Schin typedef struct Fmt_s 554887Schin { 564887Schin long flags; 574887Schin char* outp; 584887Schin char* outbuf; 594887Schin char* endbuf; 604887Schin Sfio_t* in; 614887Schin Sfio_t* out; 624887Schin int indent; 634887Schin int nextdent; 644887Schin int nwords; 654887Schin int prefix; 664887Schin int quote; 674887Schin int retain; 684887Schin int section; 694887Schin } Fmt_t; 704887Schin 714887Schin #define INDENT 4 724887Schin #define TABSZ 8 734887Schin 744887Schin #define isoption(fp,c) ((fp)->flags&(1L<<((c)-'a'))) 754887Schin #define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a'))) 764887Schin #define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a'))) 774887Schin 784887Schin static void 794887Schin outline(Fmt_t* fp) 804887Schin { 814887Schin register char* cp = fp->outbuf; 824887Schin int n = 0; 834887Schin int c; 844887Schin int d; 854887Schin 864887Schin if (!fp->outp) 874887Schin return; 884887Schin while (fp->outp[-1] == ' ') 894887Schin fp->outp--; 904887Schin *fp->outp = 0; 914887Schin while (*cp++ == ' ') 924887Schin n++; 934887Schin if (n >= TABSZ) 944887Schin { 954887Schin n /= TABSZ; 964887Schin cp = &fp->outbuf[TABSZ*n]; 974887Schin while (n--) 984887Schin *--cp = '\t'; 994887Schin } 1004887Schin else 1014887Schin cp = fp->outbuf; 1024887Schin fp->nwords = 0; 1034887Schin if (!isoption(fp, 'o')) 1044887Schin sfputr(fp->out, cp, '\n'); 1054887Schin else if (*cp) 1064887Schin { 1074887Schin n = fp->indent; 1084887Schin if (*cp != '[') 1094887Schin { 1104887Schin if (*cp == ' ') 1114887Schin cp++; 1124887Schin n += INDENT; 1134887Schin } 1144887Schin while (n--) 1154887Schin sfputc(fp->out, ' '); 1164887Schin if (fp->quote) 1174887Schin { 1184887Schin if ((d = (fp->outp - cp)) <= 0) 1194887Schin c = 0; 1204887Schin else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\') 1214887Schin c = '}'; 1224887Schin sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " "); 1234887Schin } 1244887Schin else 1254887Schin sfputr(fp->out, cp, '\n'); 1264887Schin if (fp->nextdent) 1274887Schin { 1284887Schin fp->indent += fp->nextdent; 1294887Schin fp->endbuf -= fp->nextdent; 1304887Schin fp->nextdent = 0; 1314887Schin } 1324887Schin } 1334887Schin fp->outp = 0; 1344887Schin } 1354887Schin 1364887Schin static void 1374887Schin split(Fmt_t* fp, char* buf, int splice) 1384887Schin { 1394887Schin register char* cp; 1404887Schin register char* ep; 1414887Schin register char* qp; 1424887Schin register int c = 1; 1434887Schin register int q = 0; 1444887Schin register int n; 1454887Schin int prefix; 1464887Schin 1474887Schin for (ep = buf; *ep == ' '; ep++); 1484887Schin prefix = ep - buf; 1494887Schin 1504887Schin /* 1514887Schin * preserve blank lines 1524887Schin */ 1534887Schin 1544887Schin if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o')) 1554887Schin { 1564887Schin if (*ep) 1574887Schin prefix = strlen(buf); 1584887Schin outline(fp); 1594887Schin strcpy(fp->outbuf, buf); 1604887Schin fp->outp = fp->outbuf+prefix; 1614887Schin outline(fp); 1624887Schin return; 1634887Schin } 1644887Schin if (fp->prefix < prefix && !isoption(fp, 'c')) 1654887Schin outline(fp); 1664887Schin if (!fp->outp || prefix < fp->prefix) 1674887Schin fp->prefix = prefix; 1684887Schin while (c) 1694887Schin { 1704887Schin cp = ep; 1714887Schin while (*ep == ' ') 1724887Schin ep++; 1734887Schin if (cp != ep && isoption(fp, 'u')) 1744887Schin cp = ep-1; 1754887Schin while (c = *ep) 1764887Schin { 1774887Schin if (c == ' ') 1784887Schin break; 1794887Schin ep++; 1804887Schin 1814887Schin /* 1824887Schin * skip over \space 1834887Schin */ 1844887Schin 1854887Schin if (c == '\\' && *ep) 1864887Schin ep++; 1874887Schin } 1884887Schin n = (ep-cp); 1894887Schin if (n && isoption(fp, 'o')) 1904887Schin { 1914887Schin for (qp = cp; qp < ep; qp++) 1924887Schin if (*qp == '\\') 1934887Schin qp++; 1944887Schin else if (*qp == '"') 1954887Schin q = !q; 1964887Schin if (*(ep-1) == '"') 1974887Schin goto skip; 1984887Schin } 1994887Schin if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q) 2004887Schin outline(fp); 2014887Schin skip: 2024887Schin if (fp->nwords == 0) 2034887Schin { 2044887Schin if (fp->prefix) 2054887Schin memset(fp->outbuf, ' ', fp->prefix); 2064887Schin fp->outp = &fp->outbuf[fp->prefix]; 2074887Schin while (*cp == ' ') 2084887Schin cp++; 2094887Schin n = (ep-cp); 2104887Schin } 2114887Schin memcpy(fp->outp, cp, n); 2124887Schin fp->outp += n; 2134887Schin fp->nwords++; 2144887Schin } 2154887Schin if (isoption(fp, 's') || *buf == 0) 2164887Schin outline(fp); 2174887Schin else if (fp->outp) 2184887Schin { 2194887Schin /* 2204887Schin * two spaces at ends of sentences 2214887Schin */ 2224887Schin 2234887Schin if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1])) 2244887Schin *fp->outp++ = ' '; 2254887Schin if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' ')) 2264887Schin *fp->outp++ = ' '; 2274887Schin } 2284887Schin } 2294887Schin 2304887Schin static int 2314887Schin dofmt(Fmt_t* fp) 2324887Schin { 2334887Schin register int c; 2344887Schin int b; 2354887Schin int x; 2364887Schin int splice; 2374887Schin char* cp; 2384887Schin char* dp; 2394887Schin char* ep; 2404887Schin char* lp; 2414887Schin char* tp; 2424887Schin char buf[8192]; 2434887Schin 2444887Schin cp = 0; 2454887Schin while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in))) 2464887Schin { 2474887Schin if (isoption(fp, 'o')) 2484887Schin { 2494887Schin if (!isoption(fp, 'i')) 2504887Schin { 2514887Schin setoption(fp, 'i'); 2524887Schin b = 0; 2534887Schin while (cp < lp) 2544887Schin { 2554887Schin if (*cp == ' ') 2564887Schin b += 1; 2574887Schin else if (*cp == '\t') 2584887Schin b += INDENT; 2594887Schin else 2604887Schin break; 2614887Schin cp++; 2624887Schin } 2634887Schin fp->indent = roundof(b, INDENT); 2644887Schin } 2654887Schin else 2664887Schin while (cp < lp && (*cp == ' ' || *cp == '\t')) 2674887Schin cp++; 2684887Schin if (!isoption(fp, 'q') && cp < lp) 2694887Schin { 2704887Schin setoption(fp, 'q'); 2714887Schin if (*cp == '"') 2724887Schin { 2734887Schin ep = lp; 2744887Schin while (--ep > cp) 2754887Schin if (*ep == '"') 2764887Schin { 2774887Schin fp->quote = 1; 2784887Schin break; 2794887Schin } 2804887Schin else if (*ep != ' ' && *ep != '\t') 2814887Schin break; 2824887Schin } 2834887Schin } 2844887Schin } 2854887Schin again: 2864887Schin dp = buf; 2874887Schin ep = 0; 2884887Schin for (b = 1;; b = 0) 2894887Schin { 2904887Schin if (cp >= lp) 2914887Schin { 2924887Schin cp = 0; 2934887Schin break; 2944887Schin } 2954887Schin c = *cp++; 2964887Schin if (isoption(fp, 'o')) 2974887Schin { 2984887Schin if (c == '\\') 2994887Schin { 3004887Schin x = 0; 3014887Schin c = ' '; 3024887Schin cp--; 3034887Schin while (cp < lp) 3044887Schin { 3054887Schin if (*cp == '\\') 3064887Schin { 3074887Schin cp++; 3084887Schin if ((lp - cp) < 1) 3094887Schin { 3104887Schin c = '\\'; 3114887Schin break; 3124887Schin } 3134887Schin if (*cp == 'n') 3144887Schin { 3154887Schin cp++; 3164887Schin c = '\n'; 3174887Schin if ((lp - cp) > 2) 3184887Schin { 3194887Schin if (*cp == ']' || *cp == '@' && *(cp + 1) == '(') 3204887Schin { 3214887Schin *dp++ = '\\'; 3224887Schin *dp++ = 'n'; 3234887Schin c = *cp++; 3244887Schin break; 3254887Schin } 3264887Schin if (*cp == '\\' && *(cp + 1) == 'n') 3274887Schin { 3284887Schin cp += 2; 3294887Schin *dp++ = '\n'; 3304887Schin break; 3314887Schin } 3324887Schin } 3334887Schin } 3344887Schin else if (*cp == 't' || *cp == ' ') 3354887Schin { 3364887Schin cp++; 3374887Schin x = 1; 3384887Schin c = ' '; 3394887Schin } 3404887Schin else 3414887Schin { 3424887Schin if (x && dp != buf && *(dp - 1) != ' ') 3434887Schin *dp++ = ' '; 3444887Schin *dp++ = '\\'; 3454887Schin c = *cp++; 3464887Schin break; 3474887Schin } 3484887Schin } 3494887Schin else if (*cp == ' ' || *cp == '\t') 3504887Schin { 3514887Schin cp++; 3524887Schin c = ' '; 3534887Schin x = 1; 3544887Schin } 3554887Schin else 3564887Schin { 3574887Schin if (x && c != '\n' && dp != buf && *(dp - 1) != ' ') 3584887Schin *dp++ = ' '; 3594887Schin break; 3604887Schin } 3614887Schin } 3624887Schin if (c == '\n') 3634887Schin { 3644887Schin c = 0; 3654887Schin goto flush; 3664887Schin } 3674887Schin if (c == ' ' && (dp == buf || *(dp - 1) == ' ')) 3684887Schin continue; 3694887Schin } 3704887Schin else if (c == '"') 3714887Schin { 3724887Schin if (b || cp >= lp) 3734887Schin { 3744887Schin if (fp->quote) 3754887Schin continue; 3764887Schin fp->section = 0; 3774887Schin } 3784887Schin } 3794887Schin else if (c == '\a') 3804887Schin { 3814887Schin *dp++ = '\\'; 3824887Schin c = 'a'; 3834887Schin } 3844887Schin else if (c == '\b') 3854887Schin { 3864887Schin *dp++ = '\\'; 3874887Schin c = 'b'; 3884887Schin } 3894887Schin else if (c == '\f') 3904887Schin { 3914887Schin *dp++ = '\\'; 3924887Schin c = 'f'; 3934887Schin } 3944887Schin else if (c == '\v') 3954887Schin { 3964887Schin *dp++ = '\\'; 3974887Schin c = 'v'; 3984887Schin } 3994887Schin else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!')) 4004887Schin { 4014887Schin if (cp < lp && *cp == ']') 4024887Schin { 4034887Schin cp++; 4044887Schin *dp++ = c; 4054887Schin } 4064887Schin else 4074887Schin { 4084887Schin fp->section = 1; 4094887Schin fp->retain = 0; 4104887Schin flush: 4114887Schin *dp++ = c; 4124887Schin *dp = 0; 4134887Schin split(fp, buf, 0); 4144887Schin outline(fp); 4154887Schin goto again; 4164887Schin } 4174887Schin } 4184887Schin else if (fp->section) 4194887Schin { 4204887Schin if (c == '[') 4214887Schin { 4224887Schin if (b) 4234887Schin fp->retain = 1; 4244887Schin else 4254887Schin { 4264887Schin cp--; 4274887Schin c = 0; 4284887Schin goto flush; 4294887Schin } 4304887Schin fp->section = 0; 4314887Schin } 4324887Schin else if (c == '{') 4334887Schin { 4344887Schin x = 1; 4354887Schin for (tp = cp; tp < lp; tp++) 4364887Schin { 4374887Schin if (*tp == '[' || *tp == '\n') 4384887Schin break; 4394887Schin if (*tp == ' ' || *tp == '\t' || *tp == '"') 4404887Schin continue; 4414887Schin if (*tp == '\\' && (lp - tp) > 1) 4424887Schin { 4434887Schin if (*++tp == 'n') 4444887Schin break; 4454887Schin if (*tp == 't' || *tp == '\n') 4464887Schin continue; 4474887Schin } 4484887Schin x = 0; 4494887Schin break; 4504887Schin } 4514887Schin if (x) 4524887Schin { 4534887Schin if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT)) 4544887Schin fp->nextdent = 2*INDENT; 4554887Schin goto flush; 4564887Schin } 4574887Schin else 4584887Schin fp->section = 0; 4594887Schin } 4604887Schin else if (c == '}') 4614887Schin { 4624887Schin if (fp->indent && (b || *(cp - 2) != 'f')) 4634887Schin { 4644887Schin if (b) 4654887Schin { 4664887Schin fp->indent -= 2*INDENT; 4674887Schin fp->endbuf += 2*INDENT; 4684887Schin } 4694887Schin else 4704887Schin { 4714887Schin cp--; 4724887Schin c = 0; 4734887Schin } 4744887Schin goto flush; 4754887Schin } 4764887Schin else 4774887Schin fp->section = 0; 4784887Schin } 4794887Schin else if (c == ' ' || c == '\t') 4804887Schin continue; 4814887Schin else 4824887Schin fp->section = 0; 4834887Schin } 4844887Schin else if (c == '?' && (cp >= lp || *cp != '?')) 4854887Schin { 4864887Schin if (fp->retain) 4874887Schin { 4884887Schin cp--; 4894887Schin while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3]) 4904887Schin *dp++ = *cp++; 4914887Schin if (cp < lp && (*cp == ' ' || *cp == '\t')) 4924887Schin *dp++ = *cp++; 4934887Schin *dp = 0; 4944887Schin split(fp, buf, 0); 4954887Schin dp = buf; 4964887Schin ep = 0; 4974887Schin fp->retain = 0; 4984887Schin if (fp->outp >= fp->endbuf) 4994887Schin outline(fp); 5004887Schin continue; 5014887Schin } 5024887Schin } 5034887Schin else if (c == ' ' || c == '\t') 5044887Schin for (c = ' '; *cp == ' ' || *cp == '\t'; cp++); 5054887Schin } 5064887Schin else if (c == '\b') 5074887Schin { 5084887Schin if (dp > buf) 5094887Schin { 5104887Schin dp--; 5114887Schin if (ep) 5124887Schin ep--; 5134887Schin } 5144887Schin continue; 5154887Schin } 5164887Schin else if (c == '\t') 5174887Schin { 5184887Schin /* 5194887Schin * expand tabs 5204887Schin */ 5214887Schin 5224887Schin if (!ep) 5234887Schin ep = dp; 5244887Schin c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ; 5254887Schin if (dp >= &buf[sizeof(buf) - c - 3]) 5264887Schin { 5274887Schin cp--; 5284887Schin break; 5294887Schin } 5304887Schin while (c-- > 0) 5314887Schin *dp++ = ' '; 5324887Schin continue; 5334887Schin } 5344887Schin else if (!isprint(c)) 5354887Schin continue; 5364887Schin if (dp >= &buf[sizeof(buf) - 3]) 5374887Schin { 5384887Schin tp = dp; 5394887Schin while (--tp > buf) 5404887Schin if (isspace(*tp)) 5414887Schin { 5424887Schin cp -= dp - tp; 5434887Schin dp = tp; 5444887Schin break; 5454887Schin } 5464887Schin ep = 0; 5474887Schin break; 5484887Schin } 5494887Schin if (c != ' ') 5504887Schin ep = 0; 5514887Schin else if (!ep) 5524887Schin ep = dp; 5534887Schin *dp++ = c; 5544887Schin } 5554887Schin if (ep) 5564887Schin *ep = 0; 5574887Schin else 5584887Schin *dp = 0; 5594887Schin split(fp, buf, splice); 5604887Schin } 5614887Schin return 0; 5624887Schin } 5634887Schin 5644887Schin int 5654887Schin b_fmt(int argc, char** argv, void *context) 5664887Schin { 5674887Schin register int n; 5684887Schin char* cp; 5694887Schin Fmt_t fmt; 5704887Schin char outbuf[8 * 1024]; 5714887Schin 5724887Schin fmt.flags = 0; 5734887Schin fmt.out = sfstdout; 5744887Schin fmt.outbuf = outbuf; 5754887Schin fmt.outp = 0; 5764887Schin fmt.endbuf = &outbuf[72]; 5774887Schin fmt.indent = 0; 5784887Schin fmt.nextdent = 0; 5794887Schin fmt.nwords = 0; 5804887Schin fmt.prefix = 0; 5814887Schin fmt.quote = 0; 5824887Schin fmt.retain = 0; 5834887Schin fmt.section = 1; 5844887Schin cmdinit(argc, argv, context, ERROR_CATALOG, 0); 5854887Schin while (n = optget(argv, usage)) 5864887Schin switch (n) 5874887Schin { 5884887Schin case 'c': 5894887Schin case 'o': 5904887Schin case 's': 5914887Schin case 'u': 5924887Schin setoption(&fmt, n); 5934887Schin break; 5944887Schin case 'w': 5954887Schin if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf)) 5964887Schin error(2, "width out of range"); 5974887Schin fmt.endbuf = &outbuf[opt_info.num]; 5984887Schin break; 5994887Schin case ':': 6004887Schin error(2, "%s", opt_info.arg); 6014887Schin break; 6024887Schin case '?': 6034887Schin error(ERROR_usage(2), "%s", opt_info.arg); 6044887Schin break; 6054887Schin } 6064887Schin argv += opt_info.index; 6074887Schin if (error_info.errors) 6084887Schin error(ERROR_usage(2), "%s", optusage(NiL)); 6094887Schin if (isoption(&fmt, 'o')) 6104887Schin setoption(&fmt, 'c'); 6114887Schin if (isoption(&fmt, 's')) 6124887Schin clroption(&fmt, 'u'); 6134887Schin if (cp = *argv) 6144887Schin argv++; 6154887Schin do { 6164887Schin if (!cp || streq(cp, "-")) 6174887Schin fmt.in = sfstdin; 6184887Schin else if (!(fmt.in = sfopen(NiL, cp, "r"))) 6194887Schin { 6204887Schin error(ERROR_system(0), "%s: cannot open", cp); 6214887Schin error_info.errors = 1; 6224887Schin continue; 6234887Schin } 6244887Schin dofmt(&fmt); 6254887Schin if (fmt.in != sfstdin) 6264887Schin sfclose(fmt.in); 6274887Schin } while (cp = *argv++); 6284887Schin outline(&fmt); 6294887Schin if (sfsync(sfstdout)) 6304887Schin error(ERROR_system(0), "write error"); 6314887Schin return error_info.errors != 0; 6324887Schin } 633