xref: /freebsd-src/contrib/libc-vis/unvis.c (revision ea2be8ed283597062853935a5a29dfd67973044b)
1*ea46e638SKyle Evans /*	$NetBSD: unvis.c,v 1.45 2022/04/19 20:32:15 rillig Exp $	*/
28ccca122SBrooks Davis 
38ccca122SBrooks Davis /*-
48ccca122SBrooks Davis  * Copyright (c) 1989, 1993
58ccca122SBrooks Davis  *	The Regents of the University of California.  All rights reserved.
68ccca122SBrooks Davis  *
78ccca122SBrooks Davis  * Redistribution and use in source and binary forms, with or without
88ccca122SBrooks Davis  * modification, are permitted provided that the following conditions
98ccca122SBrooks Davis  * are met:
108ccca122SBrooks Davis  * 1. Redistributions of source code must retain the above copyright
118ccca122SBrooks Davis  *    notice, this list of conditions and the following disclaimer.
128ccca122SBrooks Davis  * 2. Redistributions in binary form must reproduce the above copyright
138ccca122SBrooks Davis  *    notice, this list of conditions and the following disclaimer in the
148ccca122SBrooks Davis  *    documentation and/or other materials provided with the distribution.
158ccca122SBrooks Davis  * 3. Neither the name of the University nor the names of its contributors
168ccca122SBrooks Davis  *    may be used to endorse or promote products derived from this software
178ccca122SBrooks Davis  *    without specific prior written permission.
188ccca122SBrooks Davis  *
198ccca122SBrooks Davis  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
208ccca122SBrooks Davis  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
218ccca122SBrooks Davis  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
228ccca122SBrooks Davis  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
238ccca122SBrooks Davis  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
248ccca122SBrooks Davis  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
258ccca122SBrooks Davis  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
268ccca122SBrooks Davis  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
278ccca122SBrooks Davis  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
288ccca122SBrooks Davis  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
298ccca122SBrooks Davis  * SUCH DAMAGE.
308ccca122SBrooks Davis  */
318ccca122SBrooks Davis 
328ccca122SBrooks Davis #include <sys/cdefs.h>
338ccca122SBrooks Davis #if defined(LIBC_SCCS) && !defined(lint)
348ccca122SBrooks Davis #if 0
358ccca122SBrooks Davis static char sccsid[] = "@(#)unvis.c	8.1 (Berkeley) 6/4/93";
368ccca122SBrooks Davis #else
37*ea46e638SKyle Evans __RCSID("$NetBSD: unvis.c,v 1.45 2022/04/19 20:32:15 rillig Exp $");
388ccca122SBrooks Davis #endif
398ccca122SBrooks Davis #endif /* LIBC_SCCS and not lint */
408ccca122SBrooks Davis 
418ccca122SBrooks Davis #include "namespace.h"
428ccca122SBrooks Davis #include <sys/types.h>
438ccca122SBrooks Davis 
448ccca122SBrooks Davis #include <assert.h>
458ccca122SBrooks Davis #include <ctype.h>
468ccca122SBrooks Davis #include <stdint.h>
478ccca122SBrooks Davis #include <stdio.h>
488ccca122SBrooks Davis #include <errno.h>
498ccca122SBrooks Davis #include <vis.h>
508ccca122SBrooks Davis 
518ccca122SBrooks Davis #define	_DIAGASSERT(x)	assert(x)
528ccca122SBrooks Davis 
538ccca122SBrooks Davis /*
548ccca122SBrooks Davis  * Return the number of elements in a statically-allocated array,
558ccca122SBrooks Davis  * __x.
568ccca122SBrooks Davis  */
578ccca122SBrooks Davis #define	__arraycount(__x)	(sizeof(__x) / sizeof(__x[0]))
588ccca122SBrooks Davis 
598ccca122SBrooks Davis #ifdef __weak_alias
608ccca122SBrooks Davis __weak_alias(strnunvisx,_strnunvisx)
618ccca122SBrooks Davis #endif
628ccca122SBrooks Davis 
638ccca122SBrooks Davis #if !HAVE_VIS
648ccca122SBrooks Davis /*
658ccca122SBrooks Davis  * decode driven by state machine
668ccca122SBrooks Davis  */
678ccca122SBrooks Davis #define	S_GROUND	0	/* haven't seen escape char */
688ccca122SBrooks Davis #define	S_START		1	/* start decoding special sequence */
698ccca122SBrooks Davis #define	S_META		2	/* metachar started (M) */
708ccca122SBrooks Davis #define	S_META1		3	/* metachar more, regular char (-) */
718ccca122SBrooks Davis #define	S_CTRL		4	/* control char started (^) */
728ccca122SBrooks Davis #define	S_OCTAL2	5	/* octal digit 2 */
738ccca122SBrooks Davis #define	S_OCTAL3	6	/* octal digit 3 */
748ccca122SBrooks Davis #define	S_HEX		7	/* mandatory hex digit */
758ccca122SBrooks Davis #define	S_HEX1		8	/* http hex digit */
768ccca122SBrooks Davis #define	S_HEX2		9	/* http hex digit 2 */
778ccca122SBrooks Davis #define	S_MIME1		10	/* mime hex digit 1 */
788ccca122SBrooks Davis #define	S_MIME2		11	/* mime hex digit 2 */
798ccca122SBrooks Davis #define	S_EATCRNL	12	/* mime eating CRNL */
808ccca122SBrooks Davis #define	S_AMP		13	/* seen & */
818ccca122SBrooks Davis #define	S_NUMBER	14	/* collecting number */
828ccca122SBrooks Davis #define	S_STRING	15	/* collecting string */
838ccca122SBrooks Davis 
848ccca122SBrooks Davis #define	isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
858ccca122SBrooks Davis #define	xtod(c)		(isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10))
868ccca122SBrooks Davis #define	XTOD(c)		(isdigit(c) ? (c - '0') : ((c - 'A') + 10))
878ccca122SBrooks Davis 
888ccca122SBrooks Davis /*
898ccca122SBrooks Davis  * RFC 1866
908ccca122SBrooks Davis  */
918ccca122SBrooks Davis static const struct nv {
92778c12a6SBrooks Davis 	char name[7];
938ccca122SBrooks Davis 	uint8_t value;
948ccca122SBrooks Davis } nv[] = {
958ccca122SBrooks Davis 	{ "AElig",	198 }, /* capital AE diphthong (ligature)  */
968ccca122SBrooks Davis 	{ "Aacute",	193 }, /* capital A, acute accent  */
978ccca122SBrooks Davis 	{ "Acirc",	194 }, /* capital A, circumflex accent  */
988ccca122SBrooks Davis 	{ "Agrave",	192 }, /* capital A, grave accent  */
998ccca122SBrooks Davis 	{ "Aring",	197 }, /* capital A, ring  */
1008ccca122SBrooks Davis 	{ "Atilde",	195 }, /* capital A, tilde  */
1018ccca122SBrooks Davis 	{ "Auml",	196 }, /* capital A, dieresis or umlaut mark  */
1028ccca122SBrooks Davis 	{ "Ccedil",	199 }, /* capital C, cedilla  */
1038ccca122SBrooks Davis 	{ "ETH",	208 }, /* capital Eth, Icelandic  */
1048ccca122SBrooks Davis 	{ "Eacute",	201 }, /* capital E, acute accent  */
1058ccca122SBrooks Davis 	{ "Ecirc",	202 }, /* capital E, circumflex accent  */
1068ccca122SBrooks Davis 	{ "Egrave",	200 }, /* capital E, grave accent  */
1078ccca122SBrooks Davis 	{ "Euml",	203 }, /* capital E, dieresis or umlaut mark  */
1088ccca122SBrooks Davis 	{ "Iacute",	205 }, /* capital I, acute accent  */
1098ccca122SBrooks Davis 	{ "Icirc",	206 }, /* capital I, circumflex accent  */
1108ccca122SBrooks Davis 	{ "Igrave",	204 }, /* capital I, grave accent  */
1118ccca122SBrooks Davis 	{ "Iuml",	207 }, /* capital I, dieresis or umlaut mark  */
1128ccca122SBrooks Davis 	{ "Ntilde",	209 }, /* capital N, tilde  */
1138ccca122SBrooks Davis 	{ "Oacute",	211 }, /* capital O, acute accent  */
1148ccca122SBrooks Davis 	{ "Ocirc",	212 }, /* capital O, circumflex accent  */
1158ccca122SBrooks Davis 	{ "Ograve",	210 }, /* capital O, grave accent  */
1168ccca122SBrooks Davis 	{ "Oslash",	216 }, /* capital O, slash  */
1178ccca122SBrooks Davis 	{ "Otilde",	213 }, /* capital O, tilde  */
1188ccca122SBrooks Davis 	{ "Ouml",	214 }, /* capital O, dieresis or umlaut mark  */
1198ccca122SBrooks Davis 	{ "THORN",	222 }, /* capital THORN, Icelandic  */
1208ccca122SBrooks Davis 	{ "Uacute",	218 }, /* capital U, acute accent  */
1218ccca122SBrooks Davis 	{ "Ucirc",	219 }, /* capital U, circumflex accent  */
1228ccca122SBrooks Davis 	{ "Ugrave",	217 }, /* capital U, grave accent  */
1238ccca122SBrooks Davis 	{ "Uuml",	220 }, /* capital U, dieresis or umlaut mark  */
1248ccca122SBrooks Davis 	{ "Yacute",	221 }, /* capital Y, acute accent  */
1258ccca122SBrooks Davis 	{ "aacute",	225 }, /* small a, acute accent  */
1268ccca122SBrooks Davis 	{ "acirc",	226 }, /* small a, circumflex accent  */
1278ccca122SBrooks Davis 	{ "acute",	180 }, /* acute accent  */
1288ccca122SBrooks Davis 	{ "aelig",	230 }, /* small ae diphthong (ligature)  */
1298ccca122SBrooks Davis 	{ "agrave",	224 }, /* small a, grave accent  */
1308ccca122SBrooks Davis 	{ "amp",	 38 }, /* ampersand  */
1318ccca122SBrooks Davis 	{ "aring",	229 }, /* small a, ring  */
1328ccca122SBrooks Davis 	{ "atilde",	227 }, /* small a, tilde  */
1338ccca122SBrooks Davis 	{ "auml",	228 }, /* small a, dieresis or umlaut mark  */
1348ccca122SBrooks Davis 	{ "brvbar",	166 }, /* broken (vertical) bar  */
1358ccca122SBrooks Davis 	{ "ccedil",	231 }, /* small c, cedilla  */
1368ccca122SBrooks Davis 	{ "cedil",	184 }, /* cedilla  */
1378ccca122SBrooks Davis 	{ "cent",	162 }, /* cent sign  */
1388ccca122SBrooks Davis 	{ "copy",	169 }, /* copyright sign  */
1398ccca122SBrooks Davis 	{ "curren",	164 }, /* general currency sign  */
1408ccca122SBrooks Davis 	{ "deg",	176 }, /* degree sign  */
1418ccca122SBrooks Davis 	{ "divide",	247 }, /* divide sign  */
1428ccca122SBrooks Davis 	{ "eacute",	233 }, /* small e, acute accent  */
1438ccca122SBrooks Davis 	{ "ecirc",	234 }, /* small e, circumflex accent  */
1448ccca122SBrooks Davis 	{ "egrave",	232 }, /* small e, grave accent  */
1458ccca122SBrooks Davis 	{ "eth",	240 }, /* small eth, Icelandic  */
1468ccca122SBrooks Davis 	{ "euml",	235 }, /* small e, dieresis or umlaut mark  */
1478ccca122SBrooks Davis 	{ "frac12",	189 }, /* fraction one-half  */
1488ccca122SBrooks Davis 	{ "frac14",	188 }, /* fraction one-quarter  */
1498ccca122SBrooks Davis 	{ "frac34",	190 }, /* fraction three-quarters  */
1508ccca122SBrooks Davis 	{ "gt",		 62 }, /* greater than  */
1518ccca122SBrooks Davis 	{ "iacute",	237 }, /* small i, acute accent  */
1528ccca122SBrooks Davis 	{ "icirc",	238 }, /* small i, circumflex accent  */
1538ccca122SBrooks Davis 	{ "iexcl",	161 }, /* inverted exclamation mark  */
1548ccca122SBrooks Davis 	{ "igrave",	236 }, /* small i, grave accent  */
1558ccca122SBrooks Davis 	{ "iquest",	191 }, /* inverted question mark  */
1568ccca122SBrooks Davis 	{ "iuml",	239 }, /* small i, dieresis or umlaut mark  */
1578ccca122SBrooks Davis 	{ "laquo",	171 }, /* angle quotation mark, left  */
1588ccca122SBrooks Davis 	{ "lt",		 60 }, /* less than  */
1598ccca122SBrooks Davis 	{ "macr",	175 }, /* macron  */
1608ccca122SBrooks Davis 	{ "micro",	181 }, /* micro sign  */
1618ccca122SBrooks Davis 	{ "middot",	183 }, /* middle dot  */
1628ccca122SBrooks Davis 	{ "nbsp",	160 }, /* no-break space  */
1638ccca122SBrooks Davis 	{ "not",	172 }, /* not sign  */
1648ccca122SBrooks Davis 	{ "ntilde",	241 }, /* small n, tilde  */
1658ccca122SBrooks Davis 	{ "oacute",	243 }, /* small o, acute accent  */
1668ccca122SBrooks Davis 	{ "ocirc",	244 }, /* small o, circumflex accent  */
1678ccca122SBrooks Davis 	{ "ograve",	242 }, /* small o, grave accent  */
1688ccca122SBrooks Davis 	{ "ordf",	170 }, /* ordinal indicator, feminine  */
1698ccca122SBrooks Davis 	{ "ordm",	186 }, /* ordinal indicator, masculine  */
1708ccca122SBrooks Davis 	{ "oslash",	248 }, /* small o, slash  */
1718ccca122SBrooks Davis 	{ "otilde",	245 }, /* small o, tilde  */
1728ccca122SBrooks Davis 	{ "ouml",	246 }, /* small o, dieresis or umlaut mark  */
1738ccca122SBrooks Davis 	{ "para",	182 }, /* pilcrow (paragraph sign)  */
1748ccca122SBrooks Davis 	{ "plusmn",	177 }, /* plus-or-minus sign  */
1758ccca122SBrooks Davis 	{ "pound",	163 }, /* pound sterling sign  */
1768ccca122SBrooks Davis 	{ "quot",	 34 }, /* double quote  */
1778ccca122SBrooks Davis 	{ "raquo",	187 }, /* angle quotation mark, right  */
1788ccca122SBrooks Davis 	{ "reg",	174 }, /* registered sign  */
1798ccca122SBrooks Davis 	{ "sect",	167 }, /* section sign  */
1808ccca122SBrooks Davis 	{ "shy",	173 }, /* soft hyphen  */
1818ccca122SBrooks Davis 	{ "sup1",	185 }, /* superscript one  */
1828ccca122SBrooks Davis 	{ "sup2",	178 }, /* superscript two  */
1838ccca122SBrooks Davis 	{ "sup3",	179 }, /* superscript three  */
1848ccca122SBrooks Davis 	{ "szlig",	223 }, /* small sharp s, German (sz ligature)  */
1858ccca122SBrooks Davis 	{ "thorn",	254 }, /* small thorn, Icelandic  */
1868ccca122SBrooks Davis 	{ "times",	215 }, /* multiply sign  */
1878ccca122SBrooks Davis 	{ "uacute",	250 }, /* small u, acute accent  */
1888ccca122SBrooks Davis 	{ "ucirc",	251 }, /* small u, circumflex accent  */
1898ccca122SBrooks Davis 	{ "ugrave",	249 }, /* small u, grave accent  */
1908ccca122SBrooks Davis 	{ "uml",	168 }, /* umlaut (dieresis)  */
1918ccca122SBrooks Davis 	{ "uuml",	252 }, /* small u, dieresis or umlaut mark  */
1928ccca122SBrooks Davis 	{ "yacute",	253 }, /* small y, acute accent  */
1938ccca122SBrooks Davis 	{ "yen",	165 }, /* yen sign  */
1948ccca122SBrooks Davis 	{ "yuml",	255 }, /* small y, dieresis or umlaut mark  */
1958ccca122SBrooks Davis };
1968ccca122SBrooks Davis 
1978ccca122SBrooks Davis /*
1988ccca122SBrooks Davis  * unvis - decode characters previously encoded by vis
1998ccca122SBrooks Davis  */
2008ccca122SBrooks Davis int
unvis(char * cp,int c,int * astate,int flag)2018ccca122SBrooks Davis unvis(char *cp, int c, int *astate, int flag)
2028ccca122SBrooks Davis {
2038ccca122SBrooks Davis 	unsigned char uc = (unsigned char)c;
2048ccca122SBrooks Davis 	unsigned char st, ia, is, lc;
2058ccca122SBrooks Davis 
2068ccca122SBrooks Davis /*
2078ccca122SBrooks Davis  * Bottom 8 bits of astate hold the state machine state.
2088ccca122SBrooks Davis  * Top 8 bits hold the current character in the http 1866 nv string decoding
2098ccca122SBrooks Davis  */
2108ccca122SBrooks Davis #define GS(a)		((a) & 0xff)
2118ccca122SBrooks Davis #define SS(a, b)	(((uint32_t)(a) << 24) | (b))
2128ccca122SBrooks Davis #define GI(a)		((uint32_t)(a) >> 24)
2138ccca122SBrooks Davis 
2148ccca122SBrooks Davis 	_DIAGASSERT(cp != NULL);
2158ccca122SBrooks Davis 	_DIAGASSERT(astate != NULL);
2168ccca122SBrooks Davis 	st = GS(*astate);
2178ccca122SBrooks Davis 
2188ccca122SBrooks Davis 	if (flag & UNVIS_END) {
2198ccca122SBrooks Davis 		switch (st) {
2208ccca122SBrooks Davis 		case S_OCTAL2:
2218ccca122SBrooks Davis 		case S_OCTAL3:
2228ccca122SBrooks Davis 		case S_HEX2:
2238ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2248ccca122SBrooks Davis 			return UNVIS_VALID;
2258ccca122SBrooks Davis 		case S_GROUND:
2268ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2278ccca122SBrooks Davis 		default:
2288ccca122SBrooks Davis 			return UNVIS_SYNBAD;
2298ccca122SBrooks Davis 		}
2308ccca122SBrooks Davis 	}
2318ccca122SBrooks Davis 
2328ccca122SBrooks Davis 	switch (st) {
2338ccca122SBrooks Davis 
2348ccca122SBrooks Davis 	case S_GROUND:
2358ccca122SBrooks Davis 		*cp = 0;
2368ccca122SBrooks Davis 		if ((flag & VIS_NOESCAPE) == 0 && c == '\\') {
2378ccca122SBrooks Davis 			*astate = SS(0, S_START);
2388ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2398ccca122SBrooks Davis 		}
2408ccca122SBrooks Davis 		if ((flag & VIS_HTTP1808) && c == '%') {
2418ccca122SBrooks Davis 			*astate = SS(0, S_HEX1);
2428ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2438ccca122SBrooks Davis 		}
2448ccca122SBrooks Davis 		if ((flag & VIS_HTTP1866) && c == '&') {
2458ccca122SBrooks Davis 			*astate = SS(0, S_AMP);
2468ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2478ccca122SBrooks Davis 		}
2488ccca122SBrooks Davis 		if ((flag & VIS_MIMESTYLE) && c == '=') {
2498ccca122SBrooks Davis 			*astate = SS(0, S_MIME1);
2508ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2518ccca122SBrooks Davis 		}
2528ccca122SBrooks Davis 		*cp = c;
2538ccca122SBrooks Davis 		return UNVIS_VALID;
2548ccca122SBrooks Davis 
2558ccca122SBrooks Davis 	case S_START:
2568ccca122SBrooks Davis 		switch(c) {
2578ccca122SBrooks Davis 		case '\\':
2588ccca122SBrooks Davis 			*cp = c;
2598ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2608ccca122SBrooks Davis 			return UNVIS_VALID;
2618ccca122SBrooks Davis 		case '0': case '1': case '2': case '3':
2628ccca122SBrooks Davis 		case '4': case '5': case '6': case '7':
2638ccca122SBrooks Davis 			*cp = (c - '0');
2648ccca122SBrooks Davis 			*astate = SS(0, S_OCTAL2);
2658ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2668ccca122SBrooks Davis 		case 'M':
2678ccca122SBrooks Davis 			*cp = (char)0200;
2688ccca122SBrooks Davis 			*astate = SS(0, S_META);
2698ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2708ccca122SBrooks Davis 		case '^':
2718ccca122SBrooks Davis 			*astate = SS(0, S_CTRL);
2728ccca122SBrooks Davis 			return UNVIS_NOCHAR;
2738ccca122SBrooks Davis 		case 'n':
2748ccca122SBrooks Davis 			*cp = '\n';
2758ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2768ccca122SBrooks Davis 			return UNVIS_VALID;
2778ccca122SBrooks Davis 		case 'r':
2788ccca122SBrooks Davis 			*cp = '\r';
2798ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2808ccca122SBrooks Davis 			return UNVIS_VALID;
2818ccca122SBrooks Davis 		case 'b':
2828ccca122SBrooks Davis 			*cp = '\b';
2838ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2848ccca122SBrooks Davis 			return UNVIS_VALID;
2858ccca122SBrooks Davis 		case 'a':
2868ccca122SBrooks Davis 			*cp = '\007';
2878ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2888ccca122SBrooks Davis 			return UNVIS_VALID;
2898ccca122SBrooks Davis 		case 'v':
2908ccca122SBrooks Davis 			*cp = '\v';
2918ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2928ccca122SBrooks Davis 			return UNVIS_VALID;
2938ccca122SBrooks Davis 		case 't':
2948ccca122SBrooks Davis 			*cp = '\t';
2958ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
2968ccca122SBrooks Davis 			return UNVIS_VALID;
2978ccca122SBrooks Davis 		case 'f':
2988ccca122SBrooks Davis 			*cp = '\f';
2998ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
3008ccca122SBrooks Davis 			return UNVIS_VALID;
3018ccca122SBrooks Davis 		case 's':
3028ccca122SBrooks Davis 			*cp = ' ';
3038ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
3048ccca122SBrooks Davis 			return UNVIS_VALID;
3058ccca122SBrooks Davis 		case 'E':
3068ccca122SBrooks Davis 			*cp = '\033';
3078ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
3088ccca122SBrooks Davis 			return UNVIS_VALID;
3098ccca122SBrooks Davis 		case 'x':
3108ccca122SBrooks Davis 			*astate = SS(0, S_HEX);
3118ccca122SBrooks Davis 			return UNVIS_NOCHAR;
3128ccca122SBrooks Davis 		case '\n':
3138ccca122SBrooks Davis 			/*
3148ccca122SBrooks Davis 			 * hidden newline
3158ccca122SBrooks Davis 			 */
3168ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
3178ccca122SBrooks Davis 			return UNVIS_NOCHAR;
3188ccca122SBrooks Davis 		case '$':
3198ccca122SBrooks Davis 			/*
3208ccca122SBrooks Davis 			 * hidden marker
3218ccca122SBrooks Davis 			 */
3228ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
3238ccca122SBrooks Davis 			return UNVIS_NOCHAR;
3248dfeba04SBrooks Davis 		default:
3258dfeba04SBrooks Davis 			if (isgraph(c)) {
3268dfeba04SBrooks Davis 				*cp = c;
3278dfeba04SBrooks Davis 				*astate = SS(0, S_GROUND);
3288dfeba04SBrooks Davis 				return UNVIS_VALID;
3298dfeba04SBrooks Davis 			}
3308ccca122SBrooks Davis 		}
3318ccca122SBrooks Davis 		goto bad;
3328ccca122SBrooks Davis 
3338ccca122SBrooks Davis 	case S_META:
3348ccca122SBrooks Davis 		if (c == '-')
3358ccca122SBrooks Davis 			*astate = SS(0, S_META1);
3368ccca122SBrooks Davis 		else if (c == '^')
3378ccca122SBrooks Davis 			*astate = SS(0, S_CTRL);
3388ccca122SBrooks Davis 		else
3398ccca122SBrooks Davis 			goto bad;
3408ccca122SBrooks Davis 		return UNVIS_NOCHAR;
3418ccca122SBrooks Davis 
3428ccca122SBrooks Davis 	case S_META1:
3438ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
3448ccca122SBrooks Davis 		*cp |= c;
3458ccca122SBrooks Davis 		return UNVIS_VALID;
3468ccca122SBrooks Davis 
3478ccca122SBrooks Davis 	case S_CTRL:
3488ccca122SBrooks Davis 		if (c == '?')
3498ccca122SBrooks Davis 			*cp |= 0177;
3508ccca122SBrooks Davis 		else
3518ccca122SBrooks Davis 			*cp |= c & 037;
3528ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
3538ccca122SBrooks Davis 		return UNVIS_VALID;
3548ccca122SBrooks Davis 
3558ccca122SBrooks Davis 	case S_OCTAL2:	/* second possible octal digit */
3568ccca122SBrooks Davis 		if (isoctal(uc)) {
3578ccca122SBrooks Davis 			/*
3588ccca122SBrooks Davis 			 * yes - and maybe a third
3598ccca122SBrooks Davis 			 */
3608ccca122SBrooks Davis 			*cp = (*cp << 3) + (c - '0');
3618ccca122SBrooks Davis 			*astate = SS(0, S_OCTAL3);
3628ccca122SBrooks Davis 			return UNVIS_NOCHAR;
3638ccca122SBrooks Davis 		}
3648ccca122SBrooks Davis 		/*
3658ccca122SBrooks Davis 		 * no - done with current sequence, push back passed char
3668ccca122SBrooks Davis 		 */
3678ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
3688ccca122SBrooks Davis 		return UNVIS_VALIDPUSH;
3698ccca122SBrooks Davis 
3708ccca122SBrooks Davis 	case S_OCTAL3:	/* third possible octal digit */
3718ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
3728ccca122SBrooks Davis 		if (isoctal(uc)) {
3738ccca122SBrooks Davis 			*cp = (*cp << 3) + (c - '0');
3748ccca122SBrooks Davis 			return UNVIS_VALID;
3758ccca122SBrooks Davis 		}
3768ccca122SBrooks Davis 		/*
3778ccca122SBrooks Davis 		 * we were done, push back passed char
3788ccca122SBrooks Davis 		 */
3798ccca122SBrooks Davis 		return UNVIS_VALIDPUSH;
3808ccca122SBrooks Davis 
3818ccca122SBrooks Davis 	case S_HEX:
3828ccca122SBrooks Davis 		if (!isxdigit(uc))
3838ccca122SBrooks Davis 			goto bad;
3848ccca122SBrooks Davis 		/*FALLTHROUGH*/
3858ccca122SBrooks Davis 	case S_HEX1:
3868ccca122SBrooks Davis 		if (isxdigit(uc)) {
3878ccca122SBrooks Davis 			*cp = xtod(uc);
3888ccca122SBrooks Davis 			*astate = SS(0, S_HEX2);
3898ccca122SBrooks Davis 			return UNVIS_NOCHAR;
3908ccca122SBrooks Davis 		}
3918ccca122SBrooks Davis 		/*
3928ccca122SBrooks Davis 		 * no - done with current sequence, push back passed char
3938ccca122SBrooks Davis 		 */
3948ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
3958ccca122SBrooks Davis 		return UNVIS_VALIDPUSH;
3968ccca122SBrooks Davis 
3978ccca122SBrooks Davis 	case S_HEX2:
3988ccca122SBrooks Davis 		*astate = S_GROUND;
3998ccca122SBrooks Davis 		if (isxdigit(uc)) {
4008ccca122SBrooks Davis 			*cp = xtod(uc) | (*cp << 4);
4018ccca122SBrooks Davis 			return UNVIS_VALID;
4028ccca122SBrooks Davis 		}
4038ccca122SBrooks Davis 		return UNVIS_VALIDPUSH;
4048ccca122SBrooks Davis 
4058ccca122SBrooks Davis 	case S_MIME1:
4068ccca122SBrooks Davis 		if (uc == '\n' || uc == '\r') {
4078ccca122SBrooks Davis 			*astate = SS(0, S_EATCRNL);
4088ccca122SBrooks Davis 			return UNVIS_NOCHAR;
4098ccca122SBrooks Davis 		}
4108ccca122SBrooks Davis 		if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
4118ccca122SBrooks Davis 			*cp = XTOD(uc);
4128ccca122SBrooks Davis 			*astate = SS(0, S_MIME2);
4138ccca122SBrooks Davis 			return UNVIS_NOCHAR;
4148ccca122SBrooks Davis 		}
4158ccca122SBrooks Davis 		goto bad;
4168ccca122SBrooks Davis 
4178ccca122SBrooks Davis 	case S_MIME2:
4188ccca122SBrooks Davis 		if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
4198ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
4208ccca122SBrooks Davis 			*cp = XTOD(uc) | (*cp << 4);
4218ccca122SBrooks Davis 			return UNVIS_VALID;
4228ccca122SBrooks Davis 		}
4238ccca122SBrooks Davis 		goto bad;
4248ccca122SBrooks Davis 
4258ccca122SBrooks Davis 	case S_EATCRNL:
4268ccca122SBrooks Davis 		switch (uc) {
4278ccca122SBrooks Davis 		case '\r':
4288ccca122SBrooks Davis 		case '\n':
4298ccca122SBrooks Davis 			return UNVIS_NOCHAR;
4308ccca122SBrooks Davis 		case '=':
4318ccca122SBrooks Davis 			*astate = SS(0, S_MIME1);
4328ccca122SBrooks Davis 			return UNVIS_NOCHAR;
4338ccca122SBrooks Davis 		default:
4348ccca122SBrooks Davis 			*cp = uc;
4358ccca122SBrooks Davis 			*astate = SS(0, S_GROUND);
4368ccca122SBrooks Davis 			return UNVIS_VALID;
4378ccca122SBrooks Davis 		}
4388ccca122SBrooks Davis 
4398ccca122SBrooks Davis 	case S_AMP:
4408ccca122SBrooks Davis 		*cp = 0;
4418ccca122SBrooks Davis 		if (uc == '#') {
4428ccca122SBrooks Davis 			*astate = SS(0, S_NUMBER);
4438ccca122SBrooks Davis 			return UNVIS_NOCHAR;
4448ccca122SBrooks Davis 		}
4458ccca122SBrooks Davis 		*astate = SS(0, S_STRING);
4468ccca122SBrooks Davis 		/*FALLTHROUGH*/
4478ccca122SBrooks Davis 
4488ccca122SBrooks Davis 	case S_STRING:
4498ccca122SBrooks Davis 		ia = *cp;		/* index in the array */
4508ccca122SBrooks Davis 		is = GI(*astate);	/* index in the string */
4518ccca122SBrooks Davis 		lc = is == 0 ? 0 : nv[ia].name[is - 1];	/* last character */
4528ccca122SBrooks Davis 
4538ccca122SBrooks Davis 		if (uc == ';')
4548ccca122SBrooks Davis 			uc = '\0';
4558ccca122SBrooks Davis 
4568ccca122SBrooks Davis 		for (; ia < __arraycount(nv); ia++) {
4578ccca122SBrooks Davis 			if (is != 0 && nv[ia].name[is - 1] != lc)
4588ccca122SBrooks Davis 				goto bad;
4598ccca122SBrooks Davis 			if (nv[ia].name[is] == uc)
4608ccca122SBrooks Davis 				break;
4618ccca122SBrooks Davis 		}
4628ccca122SBrooks Davis 
4638ccca122SBrooks Davis 		if (ia == __arraycount(nv))
4648ccca122SBrooks Davis 			goto bad;
4658ccca122SBrooks Davis 
4668ccca122SBrooks Davis 		if (uc != 0) {
4678ccca122SBrooks Davis 			*cp = ia;
4688ccca122SBrooks Davis 			*astate = SS(is + 1, S_STRING);
4698ccca122SBrooks Davis 			return UNVIS_NOCHAR;
4708ccca122SBrooks Davis 		}
4718ccca122SBrooks Davis 
4728ccca122SBrooks Davis 		*cp = nv[ia].value;
4738ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
4748ccca122SBrooks Davis 		return UNVIS_VALID;
4758ccca122SBrooks Davis 
4768ccca122SBrooks Davis 	case S_NUMBER:
4778ccca122SBrooks Davis 		if (uc == ';')
4788ccca122SBrooks Davis 			return UNVIS_VALID;
4798ccca122SBrooks Davis 		if (!isdigit(uc))
4808ccca122SBrooks Davis 			goto bad;
4818ccca122SBrooks Davis 		*cp += (*cp * 10) + uc - '0';
4828ccca122SBrooks Davis 		return UNVIS_NOCHAR;
4838ccca122SBrooks Davis 
4848ccca122SBrooks Davis 	default:
4858ccca122SBrooks Davis 	bad:
4868ccca122SBrooks Davis 		/*
4878ccca122SBrooks Davis 		 * decoder in unknown state - (probably uninitialized)
4888ccca122SBrooks Davis 		 */
4898ccca122SBrooks Davis 		*astate = SS(0, S_GROUND);
4908ccca122SBrooks Davis 		return UNVIS_SYNBAD;
4918ccca122SBrooks Davis 	}
4928ccca122SBrooks Davis }
4938ccca122SBrooks Davis 
4948ccca122SBrooks Davis /*
4958ccca122SBrooks Davis  * strnunvisx - decode src into dst
4968ccca122SBrooks Davis  *
4978ccca122SBrooks Davis  *	Number of chars decoded into dst is returned, -1 on error.
4988ccca122SBrooks Davis  *	Dst is null terminated.
4998ccca122SBrooks Davis  */
5008ccca122SBrooks Davis 
5018ccca122SBrooks Davis int
strnunvisx(char * dst,size_t dlen,const char * src,int flag)5028ccca122SBrooks Davis strnunvisx(char *dst, size_t dlen, const char *src, int flag)
5038ccca122SBrooks Davis {
5048ccca122SBrooks Davis 	char c;
5058ccca122SBrooks Davis 	char t = '\0', *start = dst;
5068ccca122SBrooks Davis 	int state = 0;
5078ccca122SBrooks Davis 
5088ccca122SBrooks Davis 	_DIAGASSERT(src != NULL);
5098ccca122SBrooks Davis 	_DIAGASSERT(dst != NULL);
5108ccca122SBrooks Davis #define CHECKSPACE() \
5118ccca122SBrooks Davis 	do { \
5128ccca122SBrooks Davis 		if (dlen-- == 0) { \
5138ccca122SBrooks Davis 			errno = ENOSPC; \
5148ccca122SBrooks Davis 			return -1; \
5158ccca122SBrooks Davis 		} \
516*ea46e638SKyle Evans 	} while (0)
5178ccca122SBrooks Davis 
5188ccca122SBrooks Davis 	while ((c = *src++) != '\0') {
5198ccca122SBrooks Davis  again:
5208ccca122SBrooks Davis 		switch (unvis(&t, c, &state, flag)) {
5218ccca122SBrooks Davis 		case UNVIS_VALID:
5228ccca122SBrooks Davis 			CHECKSPACE();
5238ccca122SBrooks Davis 			*dst++ = t;
5248ccca122SBrooks Davis 			break;
5258ccca122SBrooks Davis 		case UNVIS_VALIDPUSH:
5268ccca122SBrooks Davis 			CHECKSPACE();
5278ccca122SBrooks Davis 			*dst++ = t;
5288ccca122SBrooks Davis 			goto again;
5298ccca122SBrooks Davis 		case 0:
5308ccca122SBrooks Davis 		case UNVIS_NOCHAR:
5318ccca122SBrooks Davis 			break;
5328ccca122SBrooks Davis 		case UNVIS_SYNBAD:
5338ccca122SBrooks Davis 			errno = EINVAL;
5348ccca122SBrooks Davis 			return -1;
5358ccca122SBrooks Davis 		default:
5368ccca122SBrooks Davis 			_DIAGASSERT(/*CONSTCOND*/0);
5378ccca122SBrooks Davis 			errno = EINVAL;
5388ccca122SBrooks Davis 			return -1;
5398ccca122SBrooks Davis 		}
5408ccca122SBrooks Davis 	}
5418ccca122SBrooks Davis 	if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) {
5428ccca122SBrooks Davis 		CHECKSPACE();
5438ccca122SBrooks Davis 		*dst++ = t;
5448ccca122SBrooks Davis 	}
5458ccca122SBrooks Davis 	CHECKSPACE();
5468ccca122SBrooks Davis 	*dst = '\0';
5478ccca122SBrooks Davis 	return (int)(dst - start);
5488ccca122SBrooks Davis }
5498ccca122SBrooks Davis 
5508ccca122SBrooks Davis int
strunvisx(char * dst,const char * src,int flag)5518ccca122SBrooks Davis strunvisx(char *dst, const char *src, int flag)
5528ccca122SBrooks Davis {
5538ccca122SBrooks Davis 	return strnunvisx(dst, (size_t)~0, src, flag);
5548ccca122SBrooks Davis }
5558ccca122SBrooks Davis 
5568ccca122SBrooks Davis int
strunvis(char * dst,const char * src)5578ccca122SBrooks Davis strunvis(char *dst, const char *src)
5588ccca122SBrooks Davis {
5598ccca122SBrooks Davis 	return strnunvisx(dst, (size_t)~0, src, 0);
5608ccca122SBrooks Davis }
5618ccca122SBrooks Davis 
5628ccca122SBrooks Davis int
strnunvis(char * dst,size_t dlen,const char * src)5638ccca122SBrooks Davis strnunvis(char *dst, size_t dlen, const char *src)
5648ccca122SBrooks Davis {
5658ccca122SBrooks Davis 	return strnunvisx(dst, dlen, src, 0);
5668ccca122SBrooks Davis }
5678ccca122SBrooks Davis #endif
568