xref: /netbsd-src/usr.bin/vis/vis.c (revision 2dad2d2d111d73d215c076361fbf5959c71b7f24)
1*2dad2d2dSchristos /*	$NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $	*/
29afe744eSjtc 
361f28255Scgd /*-
49afe744eSjtc  * Copyright (c) 1989, 1993
59afe744eSjtc  *	The Regents of the University of California.  All rights reserved.
661f28255Scgd  *
761f28255Scgd  * Redistribution and use in source and binary forms, with or without
861f28255Scgd  * modification, are permitted provided that the following conditions
961f28255Scgd  * are met:
1061f28255Scgd  * 1. Redistributions of source code must retain the above copyright
1161f28255Scgd  *    notice, this list of conditions and the following disclaimer.
1261f28255Scgd  * 2. Redistributions in binary form must reproduce the above copyright
1361f28255Scgd  *    notice, this list of conditions and the following disclaimer in the
1461f28255Scgd  *    documentation and/or other materials provided with the distribution.
1589aaa1bbSagc  * 3. Neither the name of the University nor the names of its contributors
1661f28255Scgd  *    may be used to endorse or promote products derived from this software
1761f28255Scgd  *    without specific prior written permission.
1861f28255Scgd  *
1961f28255Scgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2061f28255Scgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2161f28255Scgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2261f28255Scgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2361f28255Scgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2461f28255Scgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2561f28255Scgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2661f28255Scgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2761f28255Scgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2861f28255Scgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2961f28255Scgd  * SUCH DAMAGE.
3061f28255Scgd  */
3161f28255Scgd 
323ad51f34Slukem #include <sys/cdefs.h>
3361f28255Scgd #ifndef lint
3498e5374cSlukem __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
3598e5374cSlukem  The Regents of the University of California.  All rights reserved.");
3661f28255Scgd #endif /* not lint */
3761f28255Scgd 
3861f28255Scgd #ifndef lint
399afe744eSjtc #if 0
409afe744eSjtc static char sccsid[] = "@(#)vis.c	8.1 (Berkeley) 6/6/93";
419afe744eSjtc #endif
42*2dad2d2dSchristos __RCSID("$NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $");
4361f28255Scgd #endif /* not lint */
4461f28255Scgd 
4561f28255Scgd #include <stdio.h>
464915ed99Sjtc #include <string.h>
474915ed99Sjtc #include <stdlib.h>
4896689e7eSchristos #include <string.h>
49c10d89b6Schristos #include <errno.h>
506db0248cSchristos #include <wchar.h>
51ff77c2a8Schristos #include <limits.h>
524915ed99Sjtc #include <unistd.h>
534915ed99Sjtc #include <err.h>
5461f28255Scgd #include <vis.h>
5561f28255Scgd 
56bb0dd614Schristos #include "extern.h"
5761f28255Scgd 
58bb0dd614Schristos static int eflags, fold, foldwidth = 80, none, markeol;
59bb0dd614Schristos #ifdef DEBUG
60bb0dd614Schristos int debug;
61bb0dd614Schristos #endif
62652899b7Schristos static const char *extra = "";
63bb0dd614Schristos 
64bb0dd614Schristos static void process(FILE *);
654915ed99Sjtc 
664915ed99Sjtc int
main(int argc,char * argv[])67bb0dd614Schristos main(int argc, char *argv[])
6861f28255Scgd {
6961f28255Scgd 	FILE *fp;
7061f28255Scgd 	int ch;
71be63aecfSitohy 	int rval;
7261f28255Scgd 
73*2dad2d2dSchristos 	while ((ch = getopt(argc, argv, "bcde:F:fhlMmNnoSstw")) != -1)
7461f28255Scgd 		switch((char)ch) {
75bb0dd614Schristos 		case 'b':
76bb0dd614Schristos 			eflags |= VIS_NOSLASH;
7761f28255Scgd 			break;
7861f28255Scgd 		case 'c':
7961f28255Scgd 			eflags |= VIS_CSTYLE;
8061f28255Scgd 			break;
81bb0dd614Schristos #ifdef DEBUG
82bb0dd614Schristos 		case 'd':
83bb0dd614Schristos 			debug++;
8461f28255Scgd 			break;
85bb0dd614Schristos #endif
8630c6688fSlukem 		case 'e':
8730c6688fSlukem 			extra = optarg;
8830c6688fSlukem 			break;
8961f28255Scgd 		case 'F':
9061f28255Scgd 			if ((foldwidth = atoi(optarg)) < 5) {
914915ed99Sjtc 				errx(1, "can't fold lines to less than 5 cols");
924915ed99Sjtc 				/* NOTREACHED */
9361f28255Scgd 			}
94bb0dd614Schristos 			markeol++;
95bb0dd614Schristos 			break;
9661f28255Scgd 		case 'f':
9761f28255Scgd 			fold++;		/* fold output lines to 80 cols */
9861f28255Scgd 			break;		/* using hidden newline */
99bb0dd614Schristos 		case 'h':
100bb0dd614Schristos 			eflags |= VIS_HTTPSTYLE;
101bb0dd614Schristos 			break;
10261f28255Scgd 		case 'l':
10361f28255Scgd 			markeol++;	/* mark end of line with \$ */
10461f28255Scgd 			break;
105*2dad2d2dSchristos 		case 'M':
106*2dad2d2dSchristos 			eflags |= VIS_META;
107*2dad2d2dSchristos 			break;
108bb0dd614Schristos 		case 'm':
109bb0dd614Schristos 			eflags |= VIS_MIMESTYLE;
110bb0dd614Schristos 			if (foldwidth == 80)
111bb0dd614Schristos 				foldwidth = 76;
11261f28255Scgd 			break;
113*2dad2d2dSchristos 		case 'N':
114*2dad2d2dSchristos 			eflags |= VIS_NOLOCALE;
115e36fdb7eSchristos 			break;
116bb0dd614Schristos 		case 'n':
117bb0dd614Schristos 			none++;
118bb0dd614Schristos 			break;
119bb0dd614Schristos 		case 'o':
120bb0dd614Schristos 			eflags |= VIS_OCTAL;
121bb0dd614Schristos 			break;
122e36fdb7eSchristos 		case 'S':
123e36fdb7eSchristos 			eflags |= VIS_SHELL;
124e36fdb7eSchristos 			break;
125*2dad2d2dSchristos 		case 's':
126*2dad2d2dSchristos 			eflags |= VIS_SAFE;
127*2dad2d2dSchristos 			break;
128bb0dd614Schristos 		case 't':
129bb0dd614Schristos 			eflags |= VIS_TAB;
130bb0dd614Schristos 			break;
131bb0dd614Schristos 		case 'w':
132bb0dd614Schristos 			eflags |= VIS_WHITE;
133bb0dd614Schristos 			break;
13461f28255Scgd 		case '?':
13561f28255Scgd 		default:
136bb0dd614Schristos 			(void)fprintf(stderr,
137*2dad2d2dSchristos 			    "Usage: %s [-bcfhlMmNnoSstw] [-e extra]"
138bb0dd614Schristos 			    " [-F foldwidth] [file ...]\n", getprogname());
139bb0dd614Schristos 			return 1;
14061f28255Scgd 		}
141bb0dd614Schristos 
142cc146168Schristos 	if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) ==
143bb0dd614Schristos 	    (VIS_HTTPSTYLE|VIS_MIMESTYLE))
144bb0dd614Schristos 		errx(1, "Can't specify -m and -h at the same time");
145bb0dd614Schristos 
14661f28255Scgd 	argc -= optind;
14761f28255Scgd 	argv += optind;
14861f28255Scgd 
149be63aecfSitohy 	rval = 0;
150be63aecfSitohy 
15161f28255Scgd 	if (*argv)
15261f28255Scgd 		while (*argv) {
153be63aecfSitohy 			if ((fp = fopen(*argv, "r")) != NULL) {
154bb0dd614Schristos 				process(fp);
155bb0dd614Schristos 				(void)fclose(fp);
156be63aecfSitohy 			} else {
1574915ed99Sjtc 				warn("%s", *argv);
158be63aecfSitohy 				rval = 1;
159be63aecfSitohy 			}
16061f28255Scgd 			argv++;
16161f28255Scgd 		}
16261f28255Scgd 	else
163cc146168Schristos 		process(stdin);
164bb0dd614Schristos 	return rval;
16561f28255Scgd }
16661f28255Scgd 
167bb0dd614Schristos static void
process(FILE * fp)168bb0dd614Schristos process(FILE *fp)
16961f28255Scgd {
17061f28255Scgd 	static int col = 0;
171bb0dd614Schristos 	static char nul[] = "\0";
172bb0dd614Schristos 	char *cp = nul + 1;	/* so *(cp-1) starts out != '\n' */
173ff77c2a8Schristos 	wint_t c, c1, rachar;
174b2db3f87Schristos 	char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */
175b2db3f87Schristos 	char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */
1760439e621Schristos 	int mbilen, cerr = 0, raerr = 0;
17761f28255Scgd 
17896689e7eSchristos         /*
17996689e7eSchristos          * The input stream is considered to be multibyte characters.
18096689e7eSchristos          * The input loop will read this data inputing one character,
18196689e7eSchristos 	 * possibly multiple bytes, at a time and converting each to
18296689e7eSchristos 	 * a wide character wchar_t.
18396689e7eSchristos          *
18496689e7eSchristos 	 * The vis(3) functions, however, require single either bytes
18596689e7eSchristos 	 * or a multibyte string as their arguments.  So we convert
18696689e7eSchristos 	 * our input wchar_t and the following look-ahead wchar_t to
18796689e7eSchristos 	 * a multibyte string for processing by vis(3).
18896689e7eSchristos          */
18996689e7eSchristos 
19096689e7eSchristos 	/* Read one multibyte character, store as wchar_t */
1916db0248cSchristos 	c = getwc(fp);
1920439e621Schristos 	if (c == WEOF && errno == EILSEQ) {
19396689e7eSchristos 		/* Error in multibyte data.  Read one byte. */
194c10d89b6Schristos 		c = (wint_t)getc(fp);
1950439e621Schristos 		cerr = 1;
1960439e621Schristos 	}
197c10d89b6Schristos 	while (c != WEOF) {
19896689e7eSchristos 		/* Clear multibyte input buffer. */
19996689e7eSchristos 		memset(mbibuff, 0, sizeof(mbibuff));
20096689e7eSchristos 		/* Read-ahead next multibyte character. */
201b2db3f87Schristos 		if (!cerr)
2026db0248cSchristos 			rachar = getwc(fp);
203b2db3f87Schristos 		if (cerr || (rachar == WEOF && errno == EILSEQ)) {
20496689e7eSchristos 			/* Error in multibyte data.  Read one byte. */
205c10d89b6Schristos 			rachar = (wint_t)getc(fp);
2060439e621Schristos 			raerr = 1;
2070439e621Schristos 		}
20861f28255Scgd 		if (none) {
20996689e7eSchristos 			/* Handle -n flag. */
21061f28255Scgd 			cp = buff;
21161f28255Scgd 			*cp++ = c;
21261f28255Scgd 			if (c == '\\')
21361f28255Scgd 				*cp++ = '\\';
21461f28255Scgd 			*cp = '\0';
21561f28255Scgd 		} else if (markeol && c == '\n') {
21696689e7eSchristos 			/* Handle -l flag. */
21761f28255Scgd 			cp = buff;
21861f28255Scgd 			if ((eflags & VIS_NOSLASH) == 0)
21961f28255Scgd 				*cp++ = '\\';
22061f28255Scgd 			*cp++ = '$';
22161f28255Scgd 			*cp++ = '\n';
22261f28255Scgd 			*cp = '\0';
223ff77c2a8Schristos 		} else {
22496689e7eSchristos 			/*
22596689e7eSchristos 			 * Convert character using vis(3) library.
22696689e7eSchristos 			 * At this point we will process one character.
22796689e7eSchristos 			 * But we must pass the vis(3) library this
22896689e7eSchristos 			 * character plus the next one because the next
22996689e7eSchristos 			 * one is used as a look-ahead to decide how to
23096689e7eSchristos 			 * encode this one under certain circumstances.
23196689e7eSchristos 			 *
23296689e7eSchristos 			 * Since our characters may be multibyte, e.g.,
23396689e7eSchristos 			 * in the UTF-8 locale, we cannot use vis() and
23496689e7eSchristos 			 * svis() which require byte input, so we must
23596689e7eSchristos 			 * create a multibyte string and use strvisx().
23696689e7eSchristos 			 */
23796689e7eSchristos 			/* Treat EOF as a NUL char. */
238ff77c2a8Schristos 			c1 = rachar;
239ff77c2a8Schristos 			if (c1 == WEOF)
240ff77c2a8Schristos 				c1 = L'\0';
24196689e7eSchristos 			/*
24296689e7eSchristos 			 * If we hit a multibyte conversion error above,
24396689e7eSchristos 			 * insert byte directly into string buff because
24496689e7eSchristos 			 * wctomb() will fail.  Else convert wchar_t to
24596689e7eSchristos 			 * multibyte using wctomb().
24696689e7eSchristos 			 */
2470439e621Schristos 			if (cerr) {
24896689e7eSchristos 				*mbibuff = (char)c;
2490439e621Schristos 				mbilen = 1;
2500439e621Schristos 			} else
2510439e621Schristos 				mbilen = wctomb(mbibuff, c);
25296689e7eSchristos 			/* Same for look-ahead character. */
2530439e621Schristos 			if (raerr)
25496689e7eSchristos 				mbibuff[mbilen] = (char)c1;
2550439e621Schristos 			else
2560439e621Schristos 				wctomb(mbibuff + mbilen, c1);
25796689e7eSchristos 			/* Perform encoding on just first character. */
258b2db3f87Schristos 			(void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff,
259b2db3f87Schristos 			    1, eflags, extra, &cerr);
260ff77c2a8Schristos 		}
26161f28255Scgd 
26261f28255Scgd 		cp = buff;
26361f28255Scgd 		if (fold) {
26461f28255Scgd #ifdef DEBUG
26561f28255Scgd 			if (debug)
266bb0dd614Schristos 				(void)printf("<%02d,", col);
26761f28255Scgd #endif
268bb0dd614Schristos 			col = foldit(cp, col, foldwidth, eflags);
26961f28255Scgd #ifdef DEBUG
27061f28255Scgd 			if (debug)
271bb0dd614Schristos 				(void)printf("%02d>", col);
27261f28255Scgd #endif
27361f28255Scgd 		}
27461f28255Scgd 		do {
275bb0dd614Schristos 			(void)putchar(*cp);
27661f28255Scgd 		} while (*++cp);
27761f28255Scgd 		c = rachar;
2780439e621Schristos 		cerr = raerr;
27961f28255Scgd 	}
28061f28255Scgd 	/*
28161f28255Scgd 	 * terminate partial line with a hidden newline
28261f28255Scgd 	 */
28361f28255Scgd 	if (fold && *(cp - 1) != '\n')
284bb0dd614Schristos 		(void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n");
28561f28255Scgd }
286