14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1992-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * *
204887Schin ***********************************************************************/
214887Schin #pragma prototyped
224887Schin
234887Schin static const char usage[] =
244887Schin "[-?\n@(#)$Id: fmt (AT&T Research) 2007-01-02 $\n]"
254887Schin USAGE_LICENSE
264887Schin "[+NAME?fmt - simple text formatter]"
274887Schin "[+DESCRIPTION?\bfmt\b reads the input files and left justifies space "
284887Schin "separated words into lines \awidth\a characters or less in length and "
294887Schin "writes the lines to the standard output. The standard input is read if "
304887Schin "\b-\b or no files are specified. Blank lines and interword spacing are "
314887Schin "preserved in the output. Indentation is preserved, and lines with "
324887Schin "identical indentation are joined and justified.]"
334887Schin "[+?\bfmt\b is meant to format mail messages prior to sending, but may "
344887Schin "also be useful for other simple tasks. For example, in \bvi\b(1) the "
354887Schin "command \b:!}fmt\b will justify the lines in the current paragraph.]"
364887Schin "[c:crown-margin?Preserve the indentation of the first two lines within "
374887Schin "a paragraph, and align the left margin of each subsequent line with "
384887Schin "that of the second line.]"
394887Schin "[o:optget?Format concatenated \boptget\b(3) usage strings.]"
404887Schin "[s:split-only?Split lines only; do not join short lines to form longer "
414887Schin "ones.]"
424887Schin "[u:uniform-spacing?One space between words, two after sentences.]"
434887Schin "[w:width?Set the output line width to \acolumns\a.]#[columns:=72]"
444887Schin "\n\n"
454887Schin "[ file ... ]"
464887Schin "\n\n"
474887Schin "[+SEE ALSO?\bmailx\b(1), \bnroff\b(1), \btroff\b(1), \bvi\b(1), "
484887Schin "\boptget\b(3)]"
494887Schin ;
504887Schin
514887Schin #include <cmd.h>
524887Schin #include <ctype.h>
534887Schin
544887Schin typedef struct Fmt_s
554887Schin {
564887Schin long flags;
574887Schin char* outp;
584887Schin char* outbuf;
594887Schin char* endbuf;
604887Schin Sfio_t* in;
614887Schin Sfio_t* out;
624887Schin int indent;
634887Schin int nextdent;
644887Schin int nwords;
654887Schin int prefix;
664887Schin int quote;
674887Schin int retain;
684887Schin int section;
694887Schin } Fmt_t;
704887Schin
714887Schin #define INDENT 4
724887Schin #define TABSZ 8
734887Schin
744887Schin #define isoption(fp,c) ((fp)->flags&(1L<<((c)-'a')))
754887Schin #define setoption(fp,c) ((fp)->flags|=(1L<<((c)-'a')))
764887Schin #define clroption(fp,c) ((fp)->flags&=~(1L<<((c)-'a')))
774887Schin
784887Schin static void
outline(Fmt_t * fp)794887Schin outline(Fmt_t* fp)
804887Schin {
814887Schin register char* cp = fp->outbuf;
824887Schin int n = 0;
834887Schin int c;
844887Schin int d;
854887Schin
864887Schin if (!fp->outp)
874887Schin return;
884887Schin while (fp->outp[-1] == ' ')
894887Schin fp->outp--;
904887Schin *fp->outp = 0;
914887Schin while (*cp++ == ' ')
924887Schin n++;
934887Schin if (n >= TABSZ)
944887Schin {
954887Schin n /= TABSZ;
964887Schin cp = &fp->outbuf[TABSZ*n];
974887Schin while (n--)
984887Schin *--cp = '\t';
994887Schin }
1004887Schin else
1014887Schin cp = fp->outbuf;
1024887Schin fp->nwords = 0;
1034887Schin if (!isoption(fp, 'o'))
1044887Schin sfputr(fp->out, cp, '\n');
1054887Schin else if (*cp)
1064887Schin {
1074887Schin n = fp->indent;
1084887Schin if (*cp != '[')
1094887Schin {
1104887Schin if (*cp == ' ')
1114887Schin cp++;
1124887Schin n += INDENT;
1134887Schin }
1144887Schin while (n--)
1154887Schin sfputc(fp->out, ' ');
1164887Schin if (fp->quote)
1174887Schin {
1184887Schin if ((d = (fp->outp - cp)) <= 0)
1194887Schin c = 0;
1204887Schin else if ((c = fp->outp[-1]) == 'n' && d > 1 && fp->outp[-2] == '\\')
1214887Schin c = '}';
1224887Schin sfprintf(fp->out, "\"%s%s\"\n", cp, c == ']' || c == '{' || c == '}' ? "" : " ");
1234887Schin }
1244887Schin else
1254887Schin sfputr(fp->out, cp, '\n');
1264887Schin if (fp->nextdent)
1274887Schin {
1284887Schin fp->indent += fp->nextdent;
1294887Schin fp->endbuf -= fp->nextdent;
1304887Schin fp->nextdent = 0;
1314887Schin }
1324887Schin }
1334887Schin fp->outp = 0;
1344887Schin }
1354887Schin
1364887Schin static void
split(Fmt_t * fp,char * buf,int splice)1374887Schin split(Fmt_t* fp, char* buf, int splice)
1384887Schin {
1394887Schin register char* cp;
1404887Schin register char* ep;
1414887Schin register char* qp;
1424887Schin register int c = 1;
1434887Schin register int q = 0;
1444887Schin register int n;
1454887Schin int prefix;
1464887Schin
1474887Schin for (ep = buf; *ep == ' '; ep++);
1484887Schin prefix = ep - buf;
1494887Schin
1504887Schin /*
1514887Schin * preserve blank lines
1524887Schin */
1534887Schin
1544887Schin if ((*ep == 0 || *buf == '.') && !isoption(fp, 'o'))
1554887Schin {
1564887Schin if (*ep)
1574887Schin prefix = strlen(buf);
1584887Schin outline(fp);
1594887Schin strcpy(fp->outbuf, buf);
1604887Schin fp->outp = fp->outbuf+prefix;
1614887Schin outline(fp);
1624887Schin return;
1634887Schin }
1644887Schin if (fp->prefix < prefix && !isoption(fp, 'c'))
1654887Schin outline(fp);
1664887Schin if (!fp->outp || prefix < fp->prefix)
1674887Schin fp->prefix = prefix;
1684887Schin while (c)
1694887Schin {
1704887Schin cp = ep;
1714887Schin while (*ep == ' ')
1724887Schin ep++;
1734887Schin if (cp != ep && isoption(fp, 'u'))
1744887Schin cp = ep-1;
1754887Schin while (c = *ep)
1764887Schin {
1774887Schin if (c == ' ')
1784887Schin break;
1794887Schin ep++;
1804887Schin
1814887Schin /*
1824887Schin * skip over \space
1834887Schin */
1844887Schin
1854887Schin if (c == '\\' && *ep)
1864887Schin ep++;
1874887Schin }
1884887Schin n = (ep-cp);
1894887Schin if (n && isoption(fp, 'o'))
1904887Schin {
1914887Schin for (qp = cp; qp < ep; qp++)
1924887Schin if (*qp == '\\')
1934887Schin qp++;
1944887Schin else if (*qp == '"')
1954887Schin q = !q;
1964887Schin if (*(ep-1) == '"')
1974887Schin goto skip;
1984887Schin }
1994887Schin if (fp->nwords > 0 && &fp->outp[n] >= fp->endbuf && !fp->retain && !q)
2004887Schin outline(fp);
2014887Schin skip:
2024887Schin if (fp->nwords == 0)
2034887Schin {
2044887Schin if (fp->prefix)
2054887Schin memset(fp->outbuf, ' ', fp->prefix);
2064887Schin fp->outp = &fp->outbuf[fp->prefix];
2074887Schin while (*cp == ' ')
2084887Schin cp++;
2094887Schin n = (ep-cp);
2104887Schin }
2114887Schin memcpy(fp->outp, cp, n);
2124887Schin fp->outp += n;
2134887Schin fp->nwords++;
2144887Schin }
2154887Schin if (isoption(fp, 's') || *buf == 0)
2164887Schin outline(fp);
2174887Schin else if (fp->outp)
2184887Schin {
2194887Schin /*
2204887Schin * two spaces at ends of sentences
2214887Schin */
2224887Schin
2234887Schin if (!isoption(fp, 'o') && strchr(".:!?", fp->outp[-1]))
2244887Schin *fp->outp++ = ' ';
2254887Schin if (!splice && !fp->retain && (!fp->quote || (fp->outp - fp->outbuf) < 2 || fp->outp[-2] != '\\' || fp->outp[-1] != 'n' && fp->outp[-1] != 't' && fp->outp[-1] != ' '))
2264887Schin *fp->outp++ = ' ';
2274887Schin }
2284887Schin }
2294887Schin
2304887Schin static int
dofmt(Fmt_t * fp)2314887Schin dofmt(Fmt_t* fp)
2324887Schin {
2334887Schin register int c;
2344887Schin int b;
2354887Schin int x;
2364887Schin int splice;
2374887Schin char* cp;
2384887Schin char* dp;
2394887Schin char* ep;
2404887Schin char* lp;
2414887Schin char* tp;
2424887Schin char buf[8192];
2434887Schin
2444887Schin cp = 0;
2454887Schin while (cp || (cp = sfgetr(fp->in, '\n', 0)) && !(splice = 0) && (lp = cp + sfvalue(fp->in) - 1) || (cp = sfgetr(fp->in, '\n', SF_LASTR)) && (splice = 1) && (lp = cp + sfvalue(fp->in)))
2464887Schin {
2474887Schin if (isoption(fp, 'o'))
2484887Schin {
2494887Schin if (!isoption(fp, 'i'))
2504887Schin {
2514887Schin setoption(fp, 'i');
2524887Schin b = 0;
2534887Schin while (cp < lp)
2544887Schin {
2554887Schin if (*cp == ' ')
2564887Schin b += 1;
2574887Schin else if (*cp == '\t')
2584887Schin b += INDENT;
2594887Schin else
2604887Schin break;
2614887Schin cp++;
2624887Schin }
2634887Schin fp->indent = roundof(b, INDENT);
2644887Schin }
2654887Schin else
2664887Schin while (cp < lp && (*cp == ' ' || *cp == '\t'))
2674887Schin cp++;
2684887Schin if (!isoption(fp, 'q') && cp < lp)
2694887Schin {
2704887Schin setoption(fp, 'q');
2714887Schin if (*cp == '"')
2724887Schin {
2734887Schin ep = lp;
2744887Schin while (--ep > cp)
2754887Schin if (*ep == '"')
2764887Schin {
2774887Schin fp->quote = 1;
2784887Schin break;
2794887Schin }
2804887Schin else if (*ep != ' ' && *ep != '\t')
2814887Schin break;
2824887Schin }
2834887Schin }
2844887Schin }
2854887Schin again:
2864887Schin dp = buf;
2874887Schin ep = 0;
2884887Schin for (b = 1;; b = 0)
2894887Schin {
2904887Schin if (cp >= lp)
2914887Schin {
2924887Schin cp = 0;
2934887Schin break;
2944887Schin }
2954887Schin c = *cp++;
2964887Schin if (isoption(fp, 'o'))
2974887Schin {
2984887Schin if (c == '\\')
2994887Schin {
3004887Schin x = 0;
3014887Schin c = ' ';
3024887Schin cp--;
3034887Schin while (cp < lp)
3044887Schin {
3054887Schin if (*cp == '\\')
3064887Schin {
3074887Schin cp++;
3084887Schin if ((lp - cp) < 1)
3094887Schin {
3104887Schin c = '\\';
3114887Schin break;
3124887Schin }
3134887Schin if (*cp == 'n')
3144887Schin {
3154887Schin cp++;
3164887Schin c = '\n';
3174887Schin if ((lp - cp) > 2)
3184887Schin {
3194887Schin if (*cp == ']' || *cp == '@' && *(cp + 1) == '(')
3204887Schin {
3214887Schin *dp++ = '\\';
3224887Schin *dp++ = 'n';
3234887Schin c = *cp++;
3244887Schin break;
3254887Schin }
3264887Schin if (*cp == '\\' && *(cp + 1) == 'n')
3274887Schin {
3284887Schin cp += 2;
3294887Schin *dp++ = '\n';
3304887Schin break;
3314887Schin }
3324887Schin }
3334887Schin }
3344887Schin else if (*cp == 't' || *cp == ' ')
3354887Schin {
3364887Schin cp++;
3374887Schin x = 1;
3384887Schin c = ' ';
3394887Schin }
3404887Schin else
3414887Schin {
3424887Schin if (x && dp != buf && *(dp - 1) != ' ')
3434887Schin *dp++ = ' ';
3444887Schin *dp++ = '\\';
3454887Schin c = *cp++;
3464887Schin break;
3474887Schin }
3484887Schin }
3494887Schin else if (*cp == ' ' || *cp == '\t')
3504887Schin {
3514887Schin cp++;
3524887Schin c = ' ';
3534887Schin x = 1;
3544887Schin }
3554887Schin else
3564887Schin {
3574887Schin if (x && c != '\n' && dp != buf && *(dp - 1) != ' ')
3584887Schin *dp++ = ' ';
3594887Schin break;
3604887Schin }
3614887Schin }
3624887Schin if (c == '\n')
3634887Schin {
3644887Schin c = 0;
3654887Schin goto flush;
3664887Schin }
3674887Schin if (c == ' ' && (dp == buf || *(dp - 1) == ' '))
3684887Schin continue;
3694887Schin }
3704887Schin else if (c == '"')
3714887Schin {
3724887Schin if (b || cp >= lp)
3734887Schin {
3744887Schin if (fp->quote)
3754887Schin continue;
3764887Schin fp->section = 0;
3774887Schin }
3784887Schin }
3794887Schin else if (c == '\a')
3804887Schin {
3814887Schin *dp++ = '\\';
3824887Schin c = 'a';
3834887Schin }
3844887Schin else if (c == '\b')
3854887Schin {
3864887Schin *dp++ = '\\';
3874887Schin c = 'b';
3884887Schin }
3894887Schin else if (c == '\f')
3904887Schin {
3914887Schin *dp++ = '\\';
3924887Schin c = 'f';
3934887Schin }
3944887Schin else if (c == '\v')
3954887Schin {
3964887Schin *dp++ = '\\';
3974887Schin c = 'v';
3984887Schin }
3994887Schin else if (c == ']' && (cp >= lp || *cp != ':' && *cp != '#' && *cp != '!'))
4004887Schin {
4014887Schin if (cp < lp && *cp == ']')
4024887Schin {
4034887Schin cp++;
4044887Schin *dp++ = c;
4054887Schin }
4064887Schin else
4074887Schin {
4084887Schin fp->section = 1;
4094887Schin fp->retain = 0;
4104887Schin flush:
4114887Schin *dp++ = c;
4124887Schin *dp = 0;
4134887Schin split(fp, buf, 0);
4144887Schin outline(fp);
4154887Schin goto again;
4164887Schin }
4174887Schin }
4184887Schin else if (fp->section)
4194887Schin {
4204887Schin if (c == '[')
4214887Schin {
4224887Schin if (b)
4234887Schin fp->retain = 1;
4244887Schin else
4254887Schin {
4264887Schin cp--;
4274887Schin c = 0;
4284887Schin goto flush;
4294887Schin }
4304887Schin fp->section = 0;
4314887Schin }
4324887Schin else if (c == '{')
4334887Schin {
4344887Schin x = 1;
4354887Schin for (tp = cp; tp < lp; tp++)
4364887Schin {
4374887Schin if (*tp == '[' || *tp == '\n')
4384887Schin break;
4394887Schin if (*tp == ' ' || *tp == '\t' || *tp == '"')
4404887Schin continue;
4414887Schin if (*tp == '\\' && (lp - tp) > 1)
4424887Schin {
4434887Schin if (*++tp == 'n')
4444887Schin break;
4454887Schin if (*tp == 't' || *tp == '\n')
4464887Schin continue;
4474887Schin }
4484887Schin x = 0;
4494887Schin break;
4504887Schin }
4514887Schin if (x)
4524887Schin {
4534887Schin if (fp->endbuf > (fp->outbuf + fp->indent + 2*INDENT))
4544887Schin fp->nextdent = 2*INDENT;
4554887Schin goto flush;
4564887Schin }
4574887Schin else
4584887Schin fp->section = 0;
4594887Schin }
4604887Schin else if (c == '}')
4614887Schin {
4624887Schin if (fp->indent && (b || *(cp - 2) != 'f'))
4634887Schin {
4644887Schin if (b)
4654887Schin {
4664887Schin fp->indent -= 2*INDENT;
4674887Schin fp->endbuf += 2*INDENT;
4684887Schin }
4694887Schin else
4704887Schin {
4714887Schin cp--;
4724887Schin c = 0;
4734887Schin }
4744887Schin goto flush;
4754887Schin }
4764887Schin else
4774887Schin fp->section = 0;
4784887Schin }
4794887Schin else if (c == ' ' || c == '\t')
4804887Schin continue;
4814887Schin else
4824887Schin fp->section = 0;
4834887Schin }
4844887Schin else if (c == '?' && (cp >= lp || *cp != '?'))
4854887Schin {
4864887Schin if (fp->retain)
4874887Schin {
4884887Schin cp--;
4894887Schin while (cp < lp && *cp != ' ' && *cp != '\t' && *cp != ']' && dp < &buf[sizeof(buf)-3])
4904887Schin *dp++ = *cp++;
4914887Schin if (cp < lp && (*cp == ' ' || *cp == '\t'))
4924887Schin *dp++ = *cp++;
4934887Schin *dp = 0;
4944887Schin split(fp, buf, 0);
4954887Schin dp = buf;
4964887Schin ep = 0;
4974887Schin fp->retain = 0;
4984887Schin if (fp->outp >= fp->endbuf)
4994887Schin outline(fp);
5004887Schin continue;
5014887Schin }
5024887Schin }
5034887Schin else if (c == ' ' || c == '\t')
5044887Schin for (c = ' '; *cp == ' ' || *cp == '\t'; cp++);
5054887Schin }
5064887Schin else if (c == '\b')
5074887Schin {
5084887Schin if (dp > buf)
5094887Schin {
5104887Schin dp--;
5114887Schin if (ep)
5124887Schin ep--;
5134887Schin }
5144887Schin continue;
5154887Schin }
5164887Schin else if (c == '\t')
5174887Schin {
5184887Schin /*
5194887Schin * expand tabs
5204887Schin */
5214887Schin
5224887Schin if (!ep)
5234887Schin ep = dp;
5244887Schin c = isoption(fp, 'o') ? 1 : TABSZ - (dp - buf) % TABSZ;
5254887Schin if (dp >= &buf[sizeof(buf) - c - 3])
5264887Schin {
5274887Schin cp--;
5284887Schin break;
5294887Schin }
5304887Schin while (c-- > 0)
5314887Schin *dp++ = ' ';
5324887Schin continue;
5334887Schin }
5344887Schin else if (!isprint(c))
5354887Schin continue;
5364887Schin if (dp >= &buf[sizeof(buf) - 3])
5374887Schin {
5384887Schin tp = dp;
5394887Schin while (--tp > buf)
5404887Schin if (isspace(*tp))
5414887Schin {
5424887Schin cp -= dp - tp;
5434887Schin dp = tp;
5444887Schin break;
5454887Schin }
5464887Schin ep = 0;
5474887Schin break;
5484887Schin }
5494887Schin if (c != ' ')
5504887Schin ep = 0;
5514887Schin else if (!ep)
5524887Schin ep = dp;
5534887Schin *dp++ = c;
5544887Schin }
5554887Schin if (ep)
5564887Schin *ep = 0;
5574887Schin else
5584887Schin *dp = 0;
5594887Schin split(fp, buf, splice);
5604887Schin }
5614887Schin return 0;
5624887Schin }
5634887Schin
5644887Schin int
b_fmt(int argc,char ** argv,void * context)5654887Schin b_fmt(int argc, char** argv, void *context)
5664887Schin {
5674887Schin register int n;
5684887Schin char* cp;
5694887Schin Fmt_t fmt;
5704887Schin char outbuf[8 * 1024];
5714887Schin
5724887Schin fmt.flags = 0;
5734887Schin fmt.out = sfstdout;
5744887Schin fmt.outbuf = outbuf;
5754887Schin fmt.outp = 0;
5764887Schin fmt.endbuf = &outbuf[72];
5774887Schin fmt.indent = 0;
5784887Schin fmt.nextdent = 0;
5794887Schin fmt.nwords = 0;
5804887Schin fmt.prefix = 0;
5814887Schin fmt.quote = 0;
5824887Schin fmt.retain = 0;
5834887Schin fmt.section = 1;
5844887Schin cmdinit(argc, argv, context, ERROR_CATALOG, 0);
5854887Schin while (n = optget(argv, usage))
5864887Schin switch (n)
5874887Schin {
5884887Schin case 'c':
5894887Schin case 'o':
5904887Schin case 's':
5914887Schin case 'u':
5924887Schin setoption(&fmt, n);
5934887Schin break;
5944887Schin case 'w':
5954887Schin if (opt_info.num < TABSZ || opt_info.num>= sizeof(outbuf))
5964887Schin error(2, "width out of range");
5974887Schin fmt.endbuf = &outbuf[opt_info.num];
5984887Schin break;
5994887Schin case ':':
6004887Schin error(2, "%s", opt_info.arg);
6014887Schin break;
6024887Schin case '?':
6034887Schin error(ERROR_usage(2), "%s", opt_info.arg);
6044887Schin break;
6054887Schin }
6064887Schin argv += opt_info.index;
6074887Schin if (error_info.errors)
6084887Schin error(ERROR_usage(2), "%s", optusage(NiL));
6094887Schin if (isoption(&fmt, 'o'))
6104887Schin setoption(&fmt, 'c');
6114887Schin if (isoption(&fmt, 's'))
6124887Schin clroption(&fmt, 'u');
6134887Schin if (cp = *argv)
6144887Schin argv++;
6154887Schin do {
6164887Schin if (!cp || streq(cp, "-"))
6174887Schin fmt.in = sfstdin;
6184887Schin else if (!(fmt.in = sfopen(NiL, cp, "r")))
6194887Schin {
6204887Schin error(ERROR_system(0), "%s: cannot open", cp);
6214887Schin error_info.errors = 1;
6224887Schin continue;
6234887Schin }
6244887Schin dofmt(&fmt);
6254887Schin if (fmt.in != sfstdin)
6264887Schin sfclose(fmt.in);
6274887Schin } while (cp = *argv++);
6284887Schin outline(&fmt);
6294887Schin if (sfsync(sfstdout))
6304887Schin error(ERROR_system(0), "write error");
6314887Schin return error_info.errors != 0;
6324887Schin }
633