1*2dad2d2dSchristos /* $NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $ */
29afe744eSjtc
361f28255Scgd /*-
49afe744eSjtc * Copyright (c) 1989, 1993
59afe744eSjtc * The Regents of the University of California. All rights reserved.
661f28255Scgd *
761f28255Scgd * Redistribution and use in source and binary forms, with or without
861f28255Scgd * modification, are permitted provided that the following conditions
961f28255Scgd * are met:
1061f28255Scgd * 1. Redistributions of source code must retain the above copyright
1161f28255Scgd * notice, this list of conditions and the following disclaimer.
1261f28255Scgd * 2. Redistributions in binary form must reproduce the above copyright
1361f28255Scgd * notice, this list of conditions and the following disclaimer in the
1461f28255Scgd * documentation and/or other materials provided with the distribution.
1589aaa1bbSagc * 3. Neither the name of the University nor the names of its contributors
1661f28255Scgd * may be used to endorse or promote products derived from this software
1761f28255Scgd * without specific prior written permission.
1861f28255Scgd *
1961f28255Scgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2061f28255Scgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2161f28255Scgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2261f28255Scgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2361f28255Scgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2461f28255Scgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2561f28255Scgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2661f28255Scgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2761f28255Scgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2861f28255Scgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2961f28255Scgd * SUCH DAMAGE.
3061f28255Scgd */
3161f28255Scgd
323ad51f34Slukem #include <sys/cdefs.h>
3361f28255Scgd #ifndef lint
3498e5374cSlukem __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
3598e5374cSlukem The Regents of the University of California. All rights reserved.");
3661f28255Scgd #endif /* not lint */
3761f28255Scgd
3861f28255Scgd #ifndef lint
399afe744eSjtc #if 0
409afe744eSjtc static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93";
419afe744eSjtc #endif
42*2dad2d2dSchristos __RCSID("$NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $");
4361f28255Scgd #endif /* not lint */
4461f28255Scgd
4561f28255Scgd #include <stdio.h>
464915ed99Sjtc #include <string.h>
474915ed99Sjtc #include <stdlib.h>
4896689e7eSchristos #include <string.h>
49c10d89b6Schristos #include <errno.h>
506db0248cSchristos #include <wchar.h>
51ff77c2a8Schristos #include <limits.h>
524915ed99Sjtc #include <unistd.h>
534915ed99Sjtc #include <err.h>
5461f28255Scgd #include <vis.h>
5561f28255Scgd
56bb0dd614Schristos #include "extern.h"
5761f28255Scgd
58bb0dd614Schristos static int eflags, fold, foldwidth = 80, none, markeol;
59bb0dd614Schristos #ifdef DEBUG
60bb0dd614Schristos int debug;
61bb0dd614Schristos #endif
62652899b7Schristos static const char *extra = "";
63bb0dd614Schristos
64bb0dd614Schristos static void process(FILE *);
654915ed99Sjtc
664915ed99Sjtc int
main(int argc,char * argv[])67bb0dd614Schristos main(int argc, char *argv[])
6861f28255Scgd {
6961f28255Scgd FILE *fp;
7061f28255Scgd int ch;
71be63aecfSitohy int rval;
7261f28255Scgd
73*2dad2d2dSchristos while ((ch = getopt(argc, argv, "bcde:F:fhlMmNnoSstw")) != -1)
7461f28255Scgd switch((char)ch) {
75bb0dd614Schristos case 'b':
76bb0dd614Schristos eflags |= VIS_NOSLASH;
7761f28255Scgd break;
7861f28255Scgd case 'c':
7961f28255Scgd eflags |= VIS_CSTYLE;
8061f28255Scgd break;
81bb0dd614Schristos #ifdef DEBUG
82bb0dd614Schristos case 'd':
83bb0dd614Schristos debug++;
8461f28255Scgd break;
85bb0dd614Schristos #endif
8630c6688fSlukem case 'e':
8730c6688fSlukem extra = optarg;
8830c6688fSlukem break;
8961f28255Scgd case 'F':
9061f28255Scgd if ((foldwidth = atoi(optarg)) < 5) {
914915ed99Sjtc errx(1, "can't fold lines to less than 5 cols");
924915ed99Sjtc /* NOTREACHED */
9361f28255Scgd }
94bb0dd614Schristos markeol++;
95bb0dd614Schristos break;
9661f28255Scgd case 'f':
9761f28255Scgd fold++; /* fold output lines to 80 cols */
9861f28255Scgd break; /* using hidden newline */
99bb0dd614Schristos case 'h':
100bb0dd614Schristos eflags |= VIS_HTTPSTYLE;
101bb0dd614Schristos break;
10261f28255Scgd case 'l':
10361f28255Scgd markeol++; /* mark end of line with \$ */
10461f28255Scgd break;
105*2dad2d2dSchristos case 'M':
106*2dad2d2dSchristos eflags |= VIS_META;
107*2dad2d2dSchristos break;
108bb0dd614Schristos case 'm':
109bb0dd614Schristos eflags |= VIS_MIMESTYLE;
110bb0dd614Schristos if (foldwidth == 80)
111bb0dd614Schristos foldwidth = 76;
11261f28255Scgd break;
113*2dad2d2dSchristos case 'N':
114*2dad2d2dSchristos eflags |= VIS_NOLOCALE;
115e36fdb7eSchristos break;
116bb0dd614Schristos case 'n':
117bb0dd614Schristos none++;
118bb0dd614Schristos break;
119bb0dd614Schristos case 'o':
120bb0dd614Schristos eflags |= VIS_OCTAL;
121bb0dd614Schristos break;
122e36fdb7eSchristos case 'S':
123e36fdb7eSchristos eflags |= VIS_SHELL;
124e36fdb7eSchristos break;
125*2dad2d2dSchristos case 's':
126*2dad2d2dSchristos eflags |= VIS_SAFE;
127*2dad2d2dSchristos break;
128bb0dd614Schristos case 't':
129bb0dd614Schristos eflags |= VIS_TAB;
130bb0dd614Schristos break;
131bb0dd614Schristos case 'w':
132bb0dd614Schristos eflags |= VIS_WHITE;
133bb0dd614Schristos break;
13461f28255Scgd case '?':
13561f28255Scgd default:
136bb0dd614Schristos (void)fprintf(stderr,
137*2dad2d2dSchristos "Usage: %s [-bcfhlMmNnoSstw] [-e extra]"
138bb0dd614Schristos " [-F foldwidth] [file ...]\n", getprogname());
139bb0dd614Schristos return 1;
14061f28255Scgd }
141bb0dd614Schristos
142cc146168Schristos if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) ==
143bb0dd614Schristos (VIS_HTTPSTYLE|VIS_MIMESTYLE))
144bb0dd614Schristos errx(1, "Can't specify -m and -h at the same time");
145bb0dd614Schristos
14661f28255Scgd argc -= optind;
14761f28255Scgd argv += optind;
14861f28255Scgd
149be63aecfSitohy rval = 0;
150be63aecfSitohy
15161f28255Scgd if (*argv)
15261f28255Scgd while (*argv) {
153be63aecfSitohy if ((fp = fopen(*argv, "r")) != NULL) {
154bb0dd614Schristos process(fp);
155bb0dd614Schristos (void)fclose(fp);
156be63aecfSitohy } else {
1574915ed99Sjtc warn("%s", *argv);
158be63aecfSitohy rval = 1;
159be63aecfSitohy }
16061f28255Scgd argv++;
16161f28255Scgd }
16261f28255Scgd else
163cc146168Schristos process(stdin);
164bb0dd614Schristos return rval;
16561f28255Scgd }
16661f28255Scgd
167bb0dd614Schristos static void
process(FILE * fp)168bb0dd614Schristos process(FILE *fp)
16961f28255Scgd {
17061f28255Scgd static int col = 0;
171bb0dd614Schristos static char nul[] = "\0";
172bb0dd614Schristos char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */
173ff77c2a8Schristos wint_t c, c1, rachar;
174b2db3f87Schristos char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */
175b2db3f87Schristos char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */
1760439e621Schristos int mbilen, cerr = 0, raerr = 0;
17761f28255Scgd
17896689e7eSchristos /*
17996689e7eSchristos * The input stream is considered to be multibyte characters.
18096689e7eSchristos * The input loop will read this data inputing one character,
18196689e7eSchristos * possibly multiple bytes, at a time and converting each to
18296689e7eSchristos * a wide character wchar_t.
18396689e7eSchristos *
18496689e7eSchristos * The vis(3) functions, however, require single either bytes
18596689e7eSchristos * or a multibyte string as their arguments. So we convert
18696689e7eSchristos * our input wchar_t and the following look-ahead wchar_t to
18796689e7eSchristos * a multibyte string for processing by vis(3).
18896689e7eSchristos */
18996689e7eSchristos
19096689e7eSchristos /* Read one multibyte character, store as wchar_t */
1916db0248cSchristos c = getwc(fp);
1920439e621Schristos if (c == WEOF && errno == EILSEQ) {
19396689e7eSchristos /* Error in multibyte data. Read one byte. */
194c10d89b6Schristos c = (wint_t)getc(fp);
1950439e621Schristos cerr = 1;
1960439e621Schristos }
197c10d89b6Schristos while (c != WEOF) {
19896689e7eSchristos /* Clear multibyte input buffer. */
19996689e7eSchristos memset(mbibuff, 0, sizeof(mbibuff));
20096689e7eSchristos /* Read-ahead next multibyte character. */
201b2db3f87Schristos if (!cerr)
2026db0248cSchristos rachar = getwc(fp);
203b2db3f87Schristos if (cerr || (rachar == WEOF && errno == EILSEQ)) {
20496689e7eSchristos /* Error in multibyte data. Read one byte. */
205c10d89b6Schristos rachar = (wint_t)getc(fp);
2060439e621Schristos raerr = 1;
2070439e621Schristos }
20861f28255Scgd if (none) {
20996689e7eSchristos /* Handle -n flag. */
21061f28255Scgd cp = buff;
21161f28255Scgd *cp++ = c;
21261f28255Scgd if (c == '\\')
21361f28255Scgd *cp++ = '\\';
21461f28255Scgd *cp = '\0';
21561f28255Scgd } else if (markeol && c == '\n') {
21696689e7eSchristos /* Handle -l flag. */
21761f28255Scgd cp = buff;
21861f28255Scgd if ((eflags & VIS_NOSLASH) == 0)
21961f28255Scgd *cp++ = '\\';
22061f28255Scgd *cp++ = '$';
22161f28255Scgd *cp++ = '\n';
22261f28255Scgd *cp = '\0';
223ff77c2a8Schristos } else {
22496689e7eSchristos /*
22596689e7eSchristos * Convert character using vis(3) library.
22696689e7eSchristos * At this point we will process one character.
22796689e7eSchristos * But we must pass the vis(3) library this
22896689e7eSchristos * character plus the next one because the next
22996689e7eSchristos * one is used as a look-ahead to decide how to
23096689e7eSchristos * encode this one under certain circumstances.
23196689e7eSchristos *
23296689e7eSchristos * Since our characters may be multibyte, e.g.,
23396689e7eSchristos * in the UTF-8 locale, we cannot use vis() and
23496689e7eSchristos * svis() which require byte input, so we must
23596689e7eSchristos * create a multibyte string and use strvisx().
23696689e7eSchristos */
23796689e7eSchristos /* Treat EOF as a NUL char. */
238ff77c2a8Schristos c1 = rachar;
239ff77c2a8Schristos if (c1 == WEOF)
240ff77c2a8Schristos c1 = L'\0';
24196689e7eSchristos /*
24296689e7eSchristos * If we hit a multibyte conversion error above,
24396689e7eSchristos * insert byte directly into string buff because
24496689e7eSchristos * wctomb() will fail. Else convert wchar_t to
24596689e7eSchristos * multibyte using wctomb().
24696689e7eSchristos */
2470439e621Schristos if (cerr) {
24896689e7eSchristos *mbibuff = (char)c;
2490439e621Schristos mbilen = 1;
2500439e621Schristos } else
2510439e621Schristos mbilen = wctomb(mbibuff, c);
25296689e7eSchristos /* Same for look-ahead character. */
2530439e621Schristos if (raerr)
25496689e7eSchristos mbibuff[mbilen] = (char)c1;
2550439e621Schristos else
2560439e621Schristos wctomb(mbibuff + mbilen, c1);
25796689e7eSchristos /* Perform encoding on just first character. */
258b2db3f87Schristos (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff,
259b2db3f87Schristos 1, eflags, extra, &cerr);
260ff77c2a8Schristos }
26161f28255Scgd
26261f28255Scgd cp = buff;
26361f28255Scgd if (fold) {
26461f28255Scgd #ifdef DEBUG
26561f28255Scgd if (debug)
266bb0dd614Schristos (void)printf("<%02d,", col);
26761f28255Scgd #endif
268bb0dd614Schristos col = foldit(cp, col, foldwidth, eflags);
26961f28255Scgd #ifdef DEBUG
27061f28255Scgd if (debug)
271bb0dd614Schristos (void)printf("%02d>", col);
27261f28255Scgd #endif
27361f28255Scgd }
27461f28255Scgd do {
275bb0dd614Schristos (void)putchar(*cp);
27661f28255Scgd } while (*++cp);
27761f28255Scgd c = rachar;
2780439e621Schristos cerr = raerr;
27961f28255Scgd }
28061f28255Scgd /*
28161f28255Scgd * terminate partial line with a hidden newline
28261f28255Scgd */
28361f28255Scgd if (fold && *(cp - 1) != '\n')
284bb0dd614Schristos (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n");
28561f28255Scgd }
286