1*0a6a1f1dSLionel Sambuc /* $NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $ */
206f01a55SThomas Cort
306f01a55SThomas Cort /*-
406f01a55SThomas Cort * Copyright (c) 1989, 1993
506f01a55SThomas Cort * The Regents of the University of California. All rights reserved.
606f01a55SThomas Cort *
706f01a55SThomas Cort * Redistribution and use in source and binary forms, with or without
806f01a55SThomas Cort * modification, are permitted provided that the following conditions
906f01a55SThomas Cort * are met:
1006f01a55SThomas Cort * 1. Redistributions of source code must retain the above copyright
1106f01a55SThomas Cort * notice, this list of conditions and the following disclaimer.
1206f01a55SThomas Cort * 2. Redistributions in binary form must reproduce the above copyright
1306f01a55SThomas Cort * notice, this list of conditions and the following disclaimer in the
1406f01a55SThomas Cort * documentation and/or other materials provided with the distribution.
1506f01a55SThomas Cort * 3. Neither the name of the University nor the names of its contributors
1606f01a55SThomas Cort * may be used to endorse or promote products derived from this software
1706f01a55SThomas Cort * without specific prior written permission.
1806f01a55SThomas Cort *
1906f01a55SThomas Cort * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2006f01a55SThomas Cort * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2106f01a55SThomas Cort * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2206f01a55SThomas Cort * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2306f01a55SThomas Cort * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2406f01a55SThomas Cort * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2506f01a55SThomas Cort * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2606f01a55SThomas Cort * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2706f01a55SThomas Cort * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2806f01a55SThomas Cort * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2906f01a55SThomas Cort * SUCH DAMAGE.
3006f01a55SThomas Cort */
3106f01a55SThomas Cort
3206f01a55SThomas Cort #include <sys/cdefs.h>
3306f01a55SThomas Cort #ifndef lint
3406f01a55SThomas Cort __COPYRIGHT("@(#) Copyright (c) 1989, 1993\
3506f01a55SThomas Cort The Regents of the University of California. All rights reserved.");
3606f01a55SThomas Cort #endif /* not lint */
3706f01a55SThomas Cort
3806f01a55SThomas Cort #ifndef lint
3906f01a55SThomas Cort #if 0
4006f01a55SThomas Cort static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93";
4106f01a55SThomas Cort #endif
42*0a6a1f1dSLionel Sambuc __RCSID("$NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $");
4306f01a55SThomas Cort #endif /* not lint */
4406f01a55SThomas Cort
4506f01a55SThomas Cort #include <stdio.h>
4606f01a55SThomas Cort #include <string.h>
4706f01a55SThomas Cort #include <stdlib.h>
4884d9c625SLionel Sambuc #include <string.h>
4984d9c625SLionel Sambuc #include <errno.h>
5084d9c625SLionel Sambuc #include <wchar.h>
5184d9c625SLionel Sambuc #include <limits.h>
5206f01a55SThomas Cort #include <unistd.h>
5306f01a55SThomas Cort #include <err.h>
5406f01a55SThomas Cort #include <vis.h>
5506f01a55SThomas Cort
5606f01a55SThomas Cort #include "extern.h"
5706f01a55SThomas Cort
5806f01a55SThomas Cort static int eflags, fold, foldwidth = 80, none, markeol;
5906f01a55SThomas Cort #ifdef DEBUG
6006f01a55SThomas Cort int debug;
6106f01a55SThomas Cort #endif
6284d9c625SLionel Sambuc static const char *extra = "";
6306f01a55SThomas Cort
6406f01a55SThomas Cort static void process(FILE *);
6506f01a55SThomas Cort
6606f01a55SThomas Cort int
main(int argc,char * argv[])6706f01a55SThomas Cort main(int argc, char *argv[])
6806f01a55SThomas Cort {
6906f01a55SThomas Cort FILE *fp;
7006f01a55SThomas Cort int ch;
7106f01a55SThomas Cort int rval;
7206f01a55SThomas Cort
73*0a6a1f1dSLionel Sambuc while ((ch = getopt(argc, argv, "bcde:F:fhlMmNnoSstw")) != -1)
7406f01a55SThomas Cort switch((char)ch) {
7506f01a55SThomas Cort case 'b':
7606f01a55SThomas Cort eflags |= VIS_NOSLASH;
7706f01a55SThomas Cort break;
7806f01a55SThomas Cort case 'c':
7906f01a55SThomas Cort eflags |= VIS_CSTYLE;
8006f01a55SThomas Cort break;
8106f01a55SThomas Cort #ifdef DEBUG
8206f01a55SThomas Cort case 'd':
8306f01a55SThomas Cort debug++;
8406f01a55SThomas Cort break;
8506f01a55SThomas Cort #endif
8606f01a55SThomas Cort case 'e':
8706f01a55SThomas Cort extra = optarg;
8806f01a55SThomas Cort break;
8906f01a55SThomas Cort case 'F':
9006f01a55SThomas Cort if ((foldwidth = atoi(optarg)) < 5) {
9106f01a55SThomas Cort errx(1, "can't fold lines to less than 5 cols");
9206f01a55SThomas Cort /* NOTREACHED */
9306f01a55SThomas Cort }
9406f01a55SThomas Cort markeol++;
9506f01a55SThomas Cort break;
9606f01a55SThomas Cort case 'f':
9706f01a55SThomas Cort fold++; /* fold output lines to 80 cols */
9806f01a55SThomas Cort break; /* using hidden newline */
9906f01a55SThomas Cort case 'h':
10006f01a55SThomas Cort eflags |= VIS_HTTPSTYLE;
10106f01a55SThomas Cort break;
10206f01a55SThomas Cort case 'l':
10306f01a55SThomas Cort markeol++; /* mark end of line with \$ */
10406f01a55SThomas Cort break;
105*0a6a1f1dSLionel Sambuc case 'M':
106*0a6a1f1dSLionel Sambuc eflags |= VIS_META;
107*0a6a1f1dSLionel Sambuc break;
10806f01a55SThomas Cort case 'm':
10906f01a55SThomas Cort eflags |= VIS_MIMESTYLE;
11006f01a55SThomas Cort if (foldwidth == 80)
11106f01a55SThomas Cort foldwidth = 76;
11206f01a55SThomas Cort break;
113*0a6a1f1dSLionel Sambuc case 'N':
114*0a6a1f1dSLionel Sambuc eflags |= VIS_NOLOCALE;
115*0a6a1f1dSLionel Sambuc break;
11606f01a55SThomas Cort case 'n':
11706f01a55SThomas Cort none++;
11806f01a55SThomas Cort break;
11906f01a55SThomas Cort case 'o':
12006f01a55SThomas Cort eflags |= VIS_OCTAL;
12106f01a55SThomas Cort break;
122*0a6a1f1dSLionel Sambuc case 'S':
123*0a6a1f1dSLionel Sambuc eflags |= VIS_SHELL;
124*0a6a1f1dSLionel Sambuc break;
12506f01a55SThomas Cort case 's':
12606f01a55SThomas Cort eflags |= VIS_SAFE;
12706f01a55SThomas Cort break;
12806f01a55SThomas Cort case 't':
12906f01a55SThomas Cort eflags |= VIS_TAB;
13006f01a55SThomas Cort break;
13106f01a55SThomas Cort case 'w':
13206f01a55SThomas Cort eflags |= VIS_WHITE;
13306f01a55SThomas Cort break;
13406f01a55SThomas Cort case '?':
13506f01a55SThomas Cort default:
13606f01a55SThomas Cort (void)fprintf(stderr,
137*0a6a1f1dSLionel Sambuc "Usage: %s [-bcfhlMmNnoSstw] [-e extra]"
13806f01a55SThomas Cort " [-F foldwidth] [file ...]\n", getprogname());
13906f01a55SThomas Cort return 1;
14006f01a55SThomas Cort }
14106f01a55SThomas Cort
14206f01a55SThomas Cort if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) ==
14306f01a55SThomas Cort (VIS_HTTPSTYLE|VIS_MIMESTYLE))
14406f01a55SThomas Cort errx(1, "Can't specify -m and -h at the same time");
14506f01a55SThomas Cort
14606f01a55SThomas Cort argc -= optind;
14706f01a55SThomas Cort argv += optind;
14806f01a55SThomas Cort
14906f01a55SThomas Cort rval = 0;
15006f01a55SThomas Cort
15106f01a55SThomas Cort if (*argv)
15206f01a55SThomas Cort while (*argv) {
15306f01a55SThomas Cort if ((fp = fopen(*argv, "r")) != NULL) {
15406f01a55SThomas Cort process(fp);
15506f01a55SThomas Cort (void)fclose(fp);
15606f01a55SThomas Cort } else {
15706f01a55SThomas Cort warn("%s", *argv);
15806f01a55SThomas Cort rval = 1;
15906f01a55SThomas Cort }
16006f01a55SThomas Cort argv++;
16106f01a55SThomas Cort }
16206f01a55SThomas Cort else
16306f01a55SThomas Cort process(stdin);
16406f01a55SThomas Cort return rval;
16506f01a55SThomas Cort }
16606f01a55SThomas Cort
16706f01a55SThomas Cort static void
process(FILE * fp)16806f01a55SThomas Cort process(FILE *fp)
16906f01a55SThomas Cort {
17006f01a55SThomas Cort static int col = 0;
17106f01a55SThomas Cort static char nul[] = "\0";
17206f01a55SThomas Cort char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */
17384d9c625SLionel Sambuc wint_t c, c1, rachar;
17484d9c625SLionel Sambuc char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */
17584d9c625SLionel Sambuc char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */
17684d9c625SLionel Sambuc int mbilen, cerr = 0, raerr = 0;
17706f01a55SThomas Cort
17820a91f77SLionel Sambuc #if defined(__minix)
17920a91f77SLionel Sambuc /* triggers a 'may be used uninitialized', when compiled with gcc,
18020a91f77SLionel Sambuc * asserts off, and -Os. */
18120a91f77SLionel Sambuc rachar = 0;
18220a91f77SLionel Sambuc #endif /* defined(__minix) */
18384d9c625SLionel Sambuc /*
18484d9c625SLionel Sambuc * The input stream is considered to be multibyte characters.
18584d9c625SLionel Sambuc * The input loop will read this data inputing one character,
18684d9c625SLionel Sambuc * possibly multiple bytes, at a time and converting each to
18784d9c625SLionel Sambuc * a wide character wchar_t.
18884d9c625SLionel Sambuc *
18984d9c625SLionel Sambuc * The vis(3) functions, however, require single either bytes
19084d9c625SLionel Sambuc * or a multibyte string as their arguments. So we convert
19184d9c625SLionel Sambuc * our input wchar_t and the following look-ahead wchar_t to
19284d9c625SLionel Sambuc * a multibyte string for processing by vis(3).
19384d9c625SLionel Sambuc */
19484d9c625SLionel Sambuc
19584d9c625SLionel Sambuc /* Read one multibyte character, store as wchar_t */
19684d9c625SLionel Sambuc c = getwc(fp);
19784d9c625SLionel Sambuc if (c == WEOF && errno == EILSEQ) {
19884d9c625SLionel Sambuc /* Error in multibyte data. Read one byte. */
19984d9c625SLionel Sambuc c = (wint_t)getc(fp);
20084d9c625SLionel Sambuc cerr = 1;
20184d9c625SLionel Sambuc }
20284d9c625SLionel Sambuc while (c != WEOF) {
20384d9c625SLionel Sambuc /* Clear multibyte input buffer. */
20484d9c625SLionel Sambuc memset(mbibuff, 0, sizeof(mbibuff));
20584d9c625SLionel Sambuc /* Read-ahead next multibyte character. */
20684d9c625SLionel Sambuc if (!cerr)
20784d9c625SLionel Sambuc rachar = getwc(fp);
20884d9c625SLionel Sambuc if (cerr || (rachar == WEOF && errno == EILSEQ)) {
20984d9c625SLionel Sambuc /* Error in multibyte data. Read one byte. */
21084d9c625SLionel Sambuc rachar = (wint_t)getc(fp);
21184d9c625SLionel Sambuc raerr = 1;
21284d9c625SLionel Sambuc }
21306f01a55SThomas Cort if (none) {
21484d9c625SLionel Sambuc /* Handle -n flag. */
21506f01a55SThomas Cort cp = buff;
21606f01a55SThomas Cort *cp++ = c;
21706f01a55SThomas Cort if (c == '\\')
21806f01a55SThomas Cort *cp++ = '\\';
21906f01a55SThomas Cort *cp = '\0';
22006f01a55SThomas Cort } else if (markeol && c == '\n') {
22184d9c625SLionel Sambuc /* Handle -l flag. */
22206f01a55SThomas Cort cp = buff;
22306f01a55SThomas Cort if ((eflags & VIS_NOSLASH) == 0)
22406f01a55SThomas Cort *cp++ = '\\';
22506f01a55SThomas Cort *cp++ = '$';
22606f01a55SThomas Cort *cp++ = '\n';
22706f01a55SThomas Cort *cp = '\0';
22884d9c625SLionel Sambuc } else {
22984d9c625SLionel Sambuc /*
23084d9c625SLionel Sambuc * Convert character using vis(3) library.
23184d9c625SLionel Sambuc * At this point we will process one character.
23284d9c625SLionel Sambuc * But we must pass the vis(3) library this
23384d9c625SLionel Sambuc * character plus the next one because the next
23484d9c625SLionel Sambuc * one is used as a look-ahead to decide how to
23584d9c625SLionel Sambuc * encode this one under certain circumstances.
23684d9c625SLionel Sambuc *
23784d9c625SLionel Sambuc * Since our characters may be multibyte, e.g.,
23884d9c625SLionel Sambuc * in the UTF-8 locale, we cannot use vis() and
23984d9c625SLionel Sambuc * svis() which require byte input, so we must
24084d9c625SLionel Sambuc * create a multibyte string and use strvisx().
24184d9c625SLionel Sambuc */
24284d9c625SLionel Sambuc /* Treat EOF as a NUL char. */
24384d9c625SLionel Sambuc c1 = rachar;
24484d9c625SLionel Sambuc if (c1 == WEOF)
24584d9c625SLionel Sambuc c1 = L'\0';
24684d9c625SLionel Sambuc /*
24784d9c625SLionel Sambuc * If we hit a multibyte conversion error above,
24884d9c625SLionel Sambuc * insert byte directly into string buff because
24984d9c625SLionel Sambuc * wctomb() will fail. Else convert wchar_t to
25084d9c625SLionel Sambuc * multibyte using wctomb().
25184d9c625SLionel Sambuc */
25284d9c625SLionel Sambuc if (cerr) {
25384d9c625SLionel Sambuc *mbibuff = (char)c;
25484d9c625SLionel Sambuc mbilen = 1;
25584d9c625SLionel Sambuc } else
25684d9c625SLionel Sambuc mbilen = wctomb(mbibuff, c);
25784d9c625SLionel Sambuc /* Same for look-ahead character. */
25884d9c625SLionel Sambuc if (raerr)
25984d9c625SLionel Sambuc mbibuff[mbilen] = (char)c1;
26006f01a55SThomas Cort else
26184d9c625SLionel Sambuc wctomb(mbibuff + mbilen, c1);
26284d9c625SLionel Sambuc /* Perform encoding on just first character. */
26384d9c625SLionel Sambuc (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff,
26484d9c625SLionel Sambuc 1, eflags, extra, &cerr);
26584d9c625SLionel Sambuc }
26606f01a55SThomas Cort
26706f01a55SThomas Cort cp = buff;
26806f01a55SThomas Cort if (fold) {
26906f01a55SThomas Cort #ifdef DEBUG
27006f01a55SThomas Cort if (debug)
27106f01a55SThomas Cort (void)printf("<%02d,", col);
27206f01a55SThomas Cort #endif
27306f01a55SThomas Cort col = foldit(cp, col, foldwidth, eflags);
27406f01a55SThomas Cort #ifdef DEBUG
27506f01a55SThomas Cort if (debug)
27606f01a55SThomas Cort (void)printf("%02d>", col);
27706f01a55SThomas Cort #endif
27806f01a55SThomas Cort }
27906f01a55SThomas Cort do {
28006f01a55SThomas Cort (void)putchar(*cp);
28106f01a55SThomas Cort } while (*++cp);
28206f01a55SThomas Cort c = rachar;
28384d9c625SLionel Sambuc cerr = raerr;
28406f01a55SThomas Cort }
28506f01a55SThomas Cort /*
28606f01a55SThomas Cort * terminate partial line with a hidden newline
28706f01a55SThomas Cort */
28806f01a55SThomas Cort if (fold && *(cp - 1) != '\n')
28906f01a55SThomas Cort (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n");
29006f01a55SThomas Cort }
291