1*ea46e638SKyle Evans /* $NetBSD: unvis.c,v 1.45 2022/04/19 20:32:15 rillig Exp $ */
28ccca122SBrooks Davis
38ccca122SBrooks Davis /*-
48ccca122SBrooks Davis * Copyright (c) 1989, 1993
58ccca122SBrooks Davis * The Regents of the University of California. All rights reserved.
68ccca122SBrooks Davis *
78ccca122SBrooks Davis * Redistribution and use in source and binary forms, with or without
88ccca122SBrooks Davis * modification, are permitted provided that the following conditions
98ccca122SBrooks Davis * are met:
108ccca122SBrooks Davis * 1. Redistributions of source code must retain the above copyright
118ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer.
128ccca122SBrooks Davis * 2. Redistributions in binary form must reproduce the above copyright
138ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer in the
148ccca122SBrooks Davis * documentation and/or other materials provided with the distribution.
158ccca122SBrooks Davis * 3. Neither the name of the University nor the names of its contributors
168ccca122SBrooks Davis * may be used to endorse or promote products derived from this software
178ccca122SBrooks Davis * without specific prior written permission.
188ccca122SBrooks Davis *
198ccca122SBrooks Davis * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
208ccca122SBrooks Davis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
218ccca122SBrooks Davis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
228ccca122SBrooks Davis * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
238ccca122SBrooks Davis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
248ccca122SBrooks Davis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
258ccca122SBrooks Davis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
268ccca122SBrooks Davis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
278ccca122SBrooks Davis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
288ccca122SBrooks Davis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
298ccca122SBrooks Davis * SUCH DAMAGE.
308ccca122SBrooks Davis */
318ccca122SBrooks Davis
328ccca122SBrooks Davis #include <sys/cdefs.h>
338ccca122SBrooks Davis #if defined(LIBC_SCCS) && !defined(lint)
348ccca122SBrooks Davis #if 0
358ccca122SBrooks Davis static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93";
368ccca122SBrooks Davis #else
37*ea46e638SKyle Evans __RCSID("$NetBSD: unvis.c,v 1.45 2022/04/19 20:32:15 rillig Exp $");
388ccca122SBrooks Davis #endif
398ccca122SBrooks Davis #endif /* LIBC_SCCS and not lint */
408ccca122SBrooks Davis
418ccca122SBrooks Davis #include "namespace.h"
428ccca122SBrooks Davis #include <sys/types.h>
438ccca122SBrooks Davis
448ccca122SBrooks Davis #include <assert.h>
458ccca122SBrooks Davis #include <ctype.h>
468ccca122SBrooks Davis #include <stdint.h>
478ccca122SBrooks Davis #include <stdio.h>
488ccca122SBrooks Davis #include <errno.h>
498ccca122SBrooks Davis #include <vis.h>
508ccca122SBrooks Davis
518ccca122SBrooks Davis #define _DIAGASSERT(x) assert(x)
528ccca122SBrooks Davis
538ccca122SBrooks Davis /*
548ccca122SBrooks Davis * Return the number of elements in a statically-allocated array,
558ccca122SBrooks Davis * __x.
568ccca122SBrooks Davis */
578ccca122SBrooks Davis #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0]))
588ccca122SBrooks Davis
598ccca122SBrooks Davis #ifdef __weak_alias
608ccca122SBrooks Davis __weak_alias(strnunvisx,_strnunvisx)
618ccca122SBrooks Davis #endif
628ccca122SBrooks Davis
638ccca122SBrooks Davis #if !HAVE_VIS
648ccca122SBrooks Davis /*
658ccca122SBrooks Davis * decode driven by state machine
668ccca122SBrooks Davis */
678ccca122SBrooks Davis #define S_GROUND 0 /* haven't seen escape char */
688ccca122SBrooks Davis #define S_START 1 /* start decoding special sequence */
698ccca122SBrooks Davis #define S_META 2 /* metachar started (M) */
708ccca122SBrooks Davis #define S_META1 3 /* metachar more, regular char (-) */
718ccca122SBrooks Davis #define S_CTRL 4 /* control char started (^) */
728ccca122SBrooks Davis #define S_OCTAL2 5 /* octal digit 2 */
738ccca122SBrooks Davis #define S_OCTAL3 6 /* octal digit 3 */
748ccca122SBrooks Davis #define S_HEX 7 /* mandatory hex digit */
758ccca122SBrooks Davis #define S_HEX1 8 /* http hex digit */
768ccca122SBrooks Davis #define S_HEX2 9 /* http hex digit 2 */
778ccca122SBrooks Davis #define S_MIME1 10 /* mime hex digit 1 */
788ccca122SBrooks Davis #define S_MIME2 11 /* mime hex digit 2 */
798ccca122SBrooks Davis #define S_EATCRNL 12 /* mime eating CRNL */
808ccca122SBrooks Davis #define S_AMP 13 /* seen & */
818ccca122SBrooks Davis #define S_NUMBER 14 /* collecting number */
828ccca122SBrooks Davis #define S_STRING 15 /* collecting string */
838ccca122SBrooks Davis
848ccca122SBrooks Davis #define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
858ccca122SBrooks Davis #define xtod(c) (isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10))
868ccca122SBrooks Davis #define XTOD(c) (isdigit(c) ? (c - '0') : ((c - 'A') + 10))
878ccca122SBrooks Davis
888ccca122SBrooks Davis /*
898ccca122SBrooks Davis * RFC 1866
908ccca122SBrooks Davis */
918ccca122SBrooks Davis static const struct nv {
92778c12a6SBrooks Davis char name[7];
938ccca122SBrooks Davis uint8_t value;
948ccca122SBrooks Davis } nv[] = {
958ccca122SBrooks Davis { "AElig", 198 }, /* capital AE diphthong (ligature) */
968ccca122SBrooks Davis { "Aacute", 193 }, /* capital A, acute accent */
978ccca122SBrooks Davis { "Acirc", 194 }, /* capital A, circumflex accent */
988ccca122SBrooks Davis { "Agrave", 192 }, /* capital A, grave accent */
998ccca122SBrooks Davis { "Aring", 197 }, /* capital A, ring */
1008ccca122SBrooks Davis { "Atilde", 195 }, /* capital A, tilde */
1018ccca122SBrooks Davis { "Auml", 196 }, /* capital A, dieresis or umlaut mark */
1028ccca122SBrooks Davis { "Ccedil", 199 }, /* capital C, cedilla */
1038ccca122SBrooks Davis { "ETH", 208 }, /* capital Eth, Icelandic */
1048ccca122SBrooks Davis { "Eacute", 201 }, /* capital E, acute accent */
1058ccca122SBrooks Davis { "Ecirc", 202 }, /* capital E, circumflex accent */
1068ccca122SBrooks Davis { "Egrave", 200 }, /* capital E, grave accent */
1078ccca122SBrooks Davis { "Euml", 203 }, /* capital E, dieresis or umlaut mark */
1088ccca122SBrooks Davis { "Iacute", 205 }, /* capital I, acute accent */
1098ccca122SBrooks Davis { "Icirc", 206 }, /* capital I, circumflex accent */
1108ccca122SBrooks Davis { "Igrave", 204 }, /* capital I, grave accent */
1118ccca122SBrooks Davis { "Iuml", 207 }, /* capital I, dieresis or umlaut mark */
1128ccca122SBrooks Davis { "Ntilde", 209 }, /* capital N, tilde */
1138ccca122SBrooks Davis { "Oacute", 211 }, /* capital O, acute accent */
1148ccca122SBrooks Davis { "Ocirc", 212 }, /* capital O, circumflex accent */
1158ccca122SBrooks Davis { "Ograve", 210 }, /* capital O, grave accent */
1168ccca122SBrooks Davis { "Oslash", 216 }, /* capital O, slash */
1178ccca122SBrooks Davis { "Otilde", 213 }, /* capital O, tilde */
1188ccca122SBrooks Davis { "Ouml", 214 }, /* capital O, dieresis or umlaut mark */
1198ccca122SBrooks Davis { "THORN", 222 }, /* capital THORN, Icelandic */
1208ccca122SBrooks Davis { "Uacute", 218 }, /* capital U, acute accent */
1218ccca122SBrooks Davis { "Ucirc", 219 }, /* capital U, circumflex accent */
1228ccca122SBrooks Davis { "Ugrave", 217 }, /* capital U, grave accent */
1238ccca122SBrooks Davis { "Uuml", 220 }, /* capital U, dieresis or umlaut mark */
1248ccca122SBrooks Davis { "Yacute", 221 }, /* capital Y, acute accent */
1258ccca122SBrooks Davis { "aacute", 225 }, /* small a, acute accent */
1268ccca122SBrooks Davis { "acirc", 226 }, /* small a, circumflex accent */
1278ccca122SBrooks Davis { "acute", 180 }, /* acute accent */
1288ccca122SBrooks Davis { "aelig", 230 }, /* small ae diphthong (ligature) */
1298ccca122SBrooks Davis { "agrave", 224 }, /* small a, grave accent */
1308ccca122SBrooks Davis { "amp", 38 }, /* ampersand */
1318ccca122SBrooks Davis { "aring", 229 }, /* small a, ring */
1328ccca122SBrooks Davis { "atilde", 227 }, /* small a, tilde */
1338ccca122SBrooks Davis { "auml", 228 }, /* small a, dieresis or umlaut mark */
1348ccca122SBrooks Davis { "brvbar", 166 }, /* broken (vertical) bar */
1358ccca122SBrooks Davis { "ccedil", 231 }, /* small c, cedilla */
1368ccca122SBrooks Davis { "cedil", 184 }, /* cedilla */
1378ccca122SBrooks Davis { "cent", 162 }, /* cent sign */
1388ccca122SBrooks Davis { "copy", 169 }, /* copyright sign */
1398ccca122SBrooks Davis { "curren", 164 }, /* general currency sign */
1408ccca122SBrooks Davis { "deg", 176 }, /* degree sign */
1418ccca122SBrooks Davis { "divide", 247 }, /* divide sign */
1428ccca122SBrooks Davis { "eacute", 233 }, /* small e, acute accent */
1438ccca122SBrooks Davis { "ecirc", 234 }, /* small e, circumflex accent */
1448ccca122SBrooks Davis { "egrave", 232 }, /* small e, grave accent */
1458ccca122SBrooks Davis { "eth", 240 }, /* small eth, Icelandic */
1468ccca122SBrooks Davis { "euml", 235 }, /* small e, dieresis or umlaut mark */
1478ccca122SBrooks Davis { "frac12", 189 }, /* fraction one-half */
1488ccca122SBrooks Davis { "frac14", 188 }, /* fraction one-quarter */
1498ccca122SBrooks Davis { "frac34", 190 }, /* fraction three-quarters */
1508ccca122SBrooks Davis { "gt", 62 }, /* greater than */
1518ccca122SBrooks Davis { "iacute", 237 }, /* small i, acute accent */
1528ccca122SBrooks Davis { "icirc", 238 }, /* small i, circumflex accent */
1538ccca122SBrooks Davis { "iexcl", 161 }, /* inverted exclamation mark */
1548ccca122SBrooks Davis { "igrave", 236 }, /* small i, grave accent */
1558ccca122SBrooks Davis { "iquest", 191 }, /* inverted question mark */
1568ccca122SBrooks Davis { "iuml", 239 }, /* small i, dieresis or umlaut mark */
1578ccca122SBrooks Davis { "laquo", 171 }, /* angle quotation mark, left */
1588ccca122SBrooks Davis { "lt", 60 }, /* less than */
1598ccca122SBrooks Davis { "macr", 175 }, /* macron */
1608ccca122SBrooks Davis { "micro", 181 }, /* micro sign */
1618ccca122SBrooks Davis { "middot", 183 }, /* middle dot */
1628ccca122SBrooks Davis { "nbsp", 160 }, /* no-break space */
1638ccca122SBrooks Davis { "not", 172 }, /* not sign */
1648ccca122SBrooks Davis { "ntilde", 241 }, /* small n, tilde */
1658ccca122SBrooks Davis { "oacute", 243 }, /* small o, acute accent */
1668ccca122SBrooks Davis { "ocirc", 244 }, /* small o, circumflex accent */
1678ccca122SBrooks Davis { "ograve", 242 }, /* small o, grave accent */
1688ccca122SBrooks Davis { "ordf", 170 }, /* ordinal indicator, feminine */
1698ccca122SBrooks Davis { "ordm", 186 }, /* ordinal indicator, masculine */
1708ccca122SBrooks Davis { "oslash", 248 }, /* small o, slash */
1718ccca122SBrooks Davis { "otilde", 245 }, /* small o, tilde */
1728ccca122SBrooks Davis { "ouml", 246 }, /* small o, dieresis or umlaut mark */
1738ccca122SBrooks Davis { "para", 182 }, /* pilcrow (paragraph sign) */
1748ccca122SBrooks Davis { "plusmn", 177 }, /* plus-or-minus sign */
1758ccca122SBrooks Davis { "pound", 163 }, /* pound sterling sign */
1768ccca122SBrooks Davis { "quot", 34 }, /* double quote */
1778ccca122SBrooks Davis { "raquo", 187 }, /* angle quotation mark, right */
1788ccca122SBrooks Davis { "reg", 174 }, /* registered sign */
1798ccca122SBrooks Davis { "sect", 167 }, /* section sign */
1808ccca122SBrooks Davis { "shy", 173 }, /* soft hyphen */
1818ccca122SBrooks Davis { "sup1", 185 }, /* superscript one */
1828ccca122SBrooks Davis { "sup2", 178 }, /* superscript two */
1838ccca122SBrooks Davis { "sup3", 179 }, /* superscript three */
1848ccca122SBrooks Davis { "szlig", 223 }, /* small sharp s, German (sz ligature) */
1858ccca122SBrooks Davis { "thorn", 254 }, /* small thorn, Icelandic */
1868ccca122SBrooks Davis { "times", 215 }, /* multiply sign */
1878ccca122SBrooks Davis { "uacute", 250 }, /* small u, acute accent */
1888ccca122SBrooks Davis { "ucirc", 251 }, /* small u, circumflex accent */
1898ccca122SBrooks Davis { "ugrave", 249 }, /* small u, grave accent */
1908ccca122SBrooks Davis { "uml", 168 }, /* umlaut (dieresis) */
1918ccca122SBrooks Davis { "uuml", 252 }, /* small u, dieresis or umlaut mark */
1928ccca122SBrooks Davis { "yacute", 253 }, /* small y, acute accent */
1938ccca122SBrooks Davis { "yen", 165 }, /* yen sign */
1948ccca122SBrooks Davis { "yuml", 255 }, /* small y, dieresis or umlaut mark */
1958ccca122SBrooks Davis };
1968ccca122SBrooks Davis
1978ccca122SBrooks Davis /*
1988ccca122SBrooks Davis * unvis - decode characters previously encoded by vis
1998ccca122SBrooks Davis */
2008ccca122SBrooks Davis int
unvis(char * cp,int c,int * astate,int flag)2018ccca122SBrooks Davis unvis(char *cp, int c, int *astate, int flag)
2028ccca122SBrooks Davis {
2038ccca122SBrooks Davis unsigned char uc = (unsigned char)c;
2048ccca122SBrooks Davis unsigned char st, ia, is, lc;
2058ccca122SBrooks Davis
2068ccca122SBrooks Davis /*
2078ccca122SBrooks Davis * Bottom 8 bits of astate hold the state machine state.
2088ccca122SBrooks Davis * Top 8 bits hold the current character in the http 1866 nv string decoding
2098ccca122SBrooks Davis */
2108ccca122SBrooks Davis #define GS(a) ((a) & 0xff)
2118ccca122SBrooks Davis #define SS(a, b) (((uint32_t)(a) << 24) | (b))
2128ccca122SBrooks Davis #define GI(a) ((uint32_t)(a) >> 24)
2138ccca122SBrooks Davis
2148ccca122SBrooks Davis _DIAGASSERT(cp != NULL);
2158ccca122SBrooks Davis _DIAGASSERT(astate != NULL);
2168ccca122SBrooks Davis st = GS(*astate);
2178ccca122SBrooks Davis
2188ccca122SBrooks Davis if (flag & UNVIS_END) {
2198ccca122SBrooks Davis switch (st) {
2208ccca122SBrooks Davis case S_OCTAL2:
2218ccca122SBrooks Davis case S_OCTAL3:
2228ccca122SBrooks Davis case S_HEX2:
2238ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2248ccca122SBrooks Davis return UNVIS_VALID;
2258ccca122SBrooks Davis case S_GROUND:
2268ccca122SBrooks Davis return UNVIS_NOCHAR;
2278ccca122SBrooks Davis default:
2288ccca122SBrooks Davis return UNVIS_SYNBAD;
2298ccca122SBrooks Davis }
2308ccca122SBrooks Davis }
2318ccca122SBrooks Davis
2328ccca122SBrooks Davis switch (st) {
2338ccca122SBrooks Davis
2348ccca122SBrooks Davis case S_GROUND:
2358ccca122SBrooks Davis *cp = 0;
2368ccca122SBrooks Davis if ((flag & VIS_NOESCAPE) == 0 && c == '\\') {
2378ccca122SBrooks Davis *astate = SS(0, S_START);
2388ccca122SBrooks Davis return UNVIS_NOCHAR;
2398ccca122SBrooks Davis }
2408ccca122SBrooks Davis if ((flag & VIS_HTTP1808) && c == '%') {
2418ccca122SBrooks Davis *astate = SS(0, S_HEX1);
2428ccca122SBrooks Davis return UNVIS_NOCHAR;
2438ccca122SBrooks Davis }
2448ccca122SBrooks Davis if ((flag & VIS_HTTP1866) && c == '&') {
2458ccca122SBrooks Davis *astate = SS(0, S_AMP);
2468ccca122SBrooks Davis return UNVIS_NOCHAR;
2478ccca122SBrooks Davis }
2488ccca122SBrooks Davis if ((flag & VIS_MIMESTYLE) && c == '=') {
2498ccca122SBrooks Davis *astate = SS(0, S_MIME1);
2508ccca122SBrooks Davis return UNVIS_NOCHAR;
2518ccca122SBrooks Davis }
2528ccca122SBrooks Davis *cp = c;
2538ccca122SBrooks Davis return UNVIS_VALID;
2548ccca122SBrooks Davis
2558ccca122SBrooks Davis case S_START:
2568ccca122SBrooks Davis switch(c) {
2578ccca122SBrooks Davis case '\\':
2588ccca122SBrooks Davis *cp = c;
2598ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2608ccca122SBrooks Davis return UNVIS_VALID;
2618ccca122SBrooks Davis case '0': case '1': case '2': case '3':
2628ccca122SBrooks Davis case '4': case '5': case '6': case '7':
2638ccca122SBrooks Davis *cp = (c - '0');
2648ccca122SBrooks Davis *astate = SS(0, S_OCTAL2);
2658ccca122SBrooks Davis return UNVIS_NOCHAR;
2668ccca122SBrooks Davis case 'M':
2678ccca122SBrooks Davis *cp = (char)0200;
2688ccca122SBrooks Davis *astate = SS(0, S_META);
2698ccca122SBrooks Davis return UNVIS_NOCHAR;
2708ccca122SBrooks Davis case '^':
2718ccca122SBrooks Davis *astate = SS(0, S_CTRL);
2728ccca122SBrooks Davis return UNVIS_NOCHAR;
2738ccca122SBrooks Davis case 'n':
2748ccca122SBrooks Davis *cp = '\n';
2758ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2768ccca122SBrooks Davis return UNVIS_VALID;
2778ccca122SBrooks Davis case 'r':
2788ccca122SBrooks Davis *cp = '\r';
2798ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2808ccca122SBrooks Davis return UNVIS_VALID;
2818ccca122SBrooks Davis case 'b':
2828ccca122SBrooks Davis *cp = '\b';
2838ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2848ccca122SBrooks Davis return UNVIS_VALID;
2858ccca122SBrooks Davis case 'a':
2868ccca122SBrooks Davis *cp = '\007';
2878ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2888ccca122SBrooks Davis return UNVIS_VALID;
2898ccca122SBrooks Davis case 'v':
2908ccca122SBrooks Davis *cp = '\v';
2918ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2928ccca122SBrooks Davis return UNVIS_VALID;
2938ccca122SBrooks Davis case 't':
2948ccca122SBrooks Davis *cp = '\t';
2958ccca122SBrooks Davis *astate = SS(0, S_GROUND);
2968ccca122SBrooks Davis return UNVIS_VALID;
2978ccca122SBrooks Davis case 'f':
2988ccca122SBrooks Davis *cp = '\f';
2998ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3008ccca122SBrooks Davis return UNVIS_VALID;
3018ccca122SBrooks Davis case 's':
3028ccca122SBrooks Davis *cp = ' ';
3038ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3048ccca122SBrooks Davis return UNVIS_VALID;
3058ccca122SBrooks Davis case 'E':
3068ccca122SBrooks Davis *cp = '\033';
3078ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3088ccca122SBrooks Davis return UNVIS_VALID;
3098ccca122SBrooks Davis case 'x':
3108ccca122SBrooks Davis *astate = SS(0, S_HEX);
3118ccca122SBrooks Davis return UNVIS_NOCHAR;
3128ccca122SBrooks Davis case '\n':
3138ccca122SBrooks Davis /*
3148ccca122SBrooks Davis * hidden newline
3158ccca122SBrooks Davis */
3168ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3178ccca122SBrooks Davis return UNVIS_NOCHAR;
3188ccca122SBrooks Davis case '$':
3198ccca122SBrooks Davis /*
3208ccca122SBrooks Davis * hidden marker
3218ccca122SBrooks Davis */
3228ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3238ccca122SBrooks Davis return UNVIS_NOCHAR;
3248dfeba04SBrooks Davis default:
3258dfeba04SBrooks Davis if (isgraph(c)) {
3268dfeba04SBrooks Davis *cp = c;
3278dfeba04SBrooks Davis *astate = SS(0, S_GROUND);
3288dfeba04SBrooks Davis return UNVIS_VALID;
3298dfeba04SBrooks Davis }
3308ccca122SBrooks Davis }
3318ccca122SBrooks Davis goto bad;
3328ccca122SBrooks Davis
3338ccca122SBrooks Davis case S_META:
3348ccca122SBrooks Davis if (c == '-')
3358ccca122SBrooks Davis *astate = SS(0, S_META1);
3368ccca122SBrooks Davis else if (c == '^')
3378ccca122SBrooks Davis *astate = SS(0, S_CTRL);
3388ccca122SBrooks Davis else
3398ccca122SBrooks Davis goto bad;
3408ccca122SBrooks Davis return UNVIS_NOCHAR;
3418ccca122SBrooks Davis
3428ccca122SBrooks Davis case S_META1:
3438ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3448ccca122SBrooks Davis *cp |= c;
3458ccca122SBrooks Davis return UNVIS_VALID;
3468ccca122SBrooks Davis
3478ccca122SBrooks Davis case S_CTRL:
3488ccca122SBrooks Davis if (c == '?')
3498ccca122SBrooks Davis *cp |= 0177;
3508ccca122SBrooks Davis else
3518ccca122SBrooks Davis *cp |= c & 037;
3528ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3538ccca122SBrooks Davis return UNVIS_VALID;
3548ccca122SBrooks Davis
3558ccca122SBrooks Davis case S_OCTAL2: /* second possible octal digit */
3568ccca122SBrooks Davis if (isoctal(uc)) {
3578ccca122SBrooks Davis /*
3588ccca122SBrooks Davis * yes - and maybe a third
3598ccca122SBrooks Davis */
3608ccca122SBrooks Davis *cp = (*cp << 3) + (c - '0');
3618ccca122SBrooks Davis *astate = SS(0, S_OCTAL3);
3628ccca122SBrooks Davis return UNVIS_NOCHAR;
3638ccca122SBrooks Davis }
3648ccca122SBrooks Davis /*
3658ccca122SBrooks Davis * no - done with current sequence, push back passed char
3668ccca122SBrooks Davis */
3678ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3688ccca122SBrooks Davis return UNVIS_VALIDPUSH;
3698ccca122SBrooks Davis
3708ccca122SBrooks Davis case S_OCTAL3: /* third possible octal digit */
3718ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3728ccca122SBrooks Davis if (isoctal(uc)) {
3738ccca122SBrooks Davis *cp = (*cp << 3) + (c - '0');
3748ccca122SBrooks Davis return UNVIS_VALID;
3758ccca122SBrooks Davis }
3768ccca122SBrooks Davis /*
3778ccca122SBrooks Davis * we were done, push back passed char
3788ccca122SBrooks Davis */
3798ccca122SBrooks Davis return UNVIS_VALIDPUSH;
3808ccca122SBrooks Davis
3818ccca122SBrooks Davis case S_HEX:
3828ccca122SBrooks Davis if (!isxdigit(uc))
3838ccca122SBrooks Davis goto bad;
3848ccca122SBrooks Davis /*FALLTHROUGH*/
3858ccca122SBrooks Davis case S_HEX1:
3868ccca122SBrooks Davis if (isxdigit(uc)) {
3878ccca122SBrooks Davis *cp = xtod(uc);
3888ccca122SBrooks Davis *astate = SS(0, S_HEX2);
3898ccca122SBrooks Davis return UNVIS_NOCHAR;
3908ccca122SBrooks Davis }
3918ccca122SBrooks Davis /*
3928ccca122SBrooks Davis * no - done with current sequence, push back passed char
3938ccca122SBrooks Davis */
3948ccca122SBrooks Davis *astate = SS(0, S_GROUND);
3958ccca122SBrooks Davis return UNVIS_VALIDPUSH;
3968ccca122SBrooks Davis
3978ccca122SBrooks Davis case S_HEX2:
3988ccca122SBrooks Davis *astate = S_GROUND;
3998ccca122SBrooks Davis if (isxdigit(uc)) {
4008ccca122SBrooks Davis *cp = xtod(uc) | (*cp << 4);
4018ccca122SBrooks Davis return UNVIS_VALID;
4028ccca122SBrooks Davis }
4038ccca122SBrooks Davis return UNVIS_VALIDPUSH;
4048ccca122SBrooks Davis
4058ccca122SBrooks Davis case S_MIME1:
4068ccca122SBrooks Davis if (uc == '\n' || uc == '\r') {
4078ccca122SBrooks Davis *astate = SS(0, S_EATCRNL);
4088ccca122SBrooks Davis return UNVIS_NOCHAR;
4098ccca122SBrooks Davis }
4108ccca122SBrooks Davis if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
4118ccca122SBrooks Davis *cp = XTOD(uc);
4128ccca122SBrooks Davis *astate = SS(0, S_MIME2);
4138ccca122SBrooks Davis return UNVIS_NOCHAR;
4148ccca122SBrooks Davis }
4158ccca122SBrooks Davis goto bad;
4168ccca122SBrooks Davis
4178ccca122SBrooks Davis case S_MIME2:
4188ccca122SBrooks Davis if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
4198ccca122SBrooks Davis *astate = SS(0, S_GROUND);
4208ccca122SBrooks Davis *cp = XTOD(uc) | (*cp << 4);
4218ccca122SBrooks Davis return UNVIS_VALID;
4228ccca122SBrooks Davis }
4238ccca122SBrooks Davis goto bad;
4248ccca122SBrooks Davis
4258ccca122SBrooks Davis case S_EATCRNL:
4268ccca122SBrooks Davis switch (uc) {
4278ccca122SBrooks Davis case '\r':
4288ccca122SBrooks Davis case '\n':
4298ccca122SBrooks Davis return UNVIS_NOCHAR;
4308ccca122SBrooks Davis case '=':
4318ccca122SBrooks Davis *astate = SS(0, S_MIME1);
4328ccca122SBrooks Davis return UNVIS_NOCHAR;
4338ccca122SBrooks Davis default:
4348ccca122SBrooks Davis *cp = uc;
4358ccca122SBrooks Davis *astate = SS(0, S_GROUND);
4368ccca122SBrooks Davis return UNVIS_VALID;
4378ccca122SBrooks Davis }
4388ccca122SBrooks Davis
4398ccca122SBrooks Davis case S_AMP:
4408ccca122SBrooks Davis *cp = 0;
4418ccca122SBrooks Davis if (uc == '#') {
4428ccca122SBrooks Davis *astate = SS(0, S_NUMBER);
4438ccca122SBrooks Davis return UNVIS_NOCHAR;
4448ccca122SBrooks Davis }
4458ccca122SBrooks Davis *astate = SS(0, S_STRING);
4468ccca122SBrooks Davis /*FALLTHROUGH*/
4478ccca122SBrooks Davis
4488ccca122SBrooks Davis case S_STRING:
4498ccca122SBrooks Davis ia = *cp; /* index in the array */
4508ccca122SBrooks Davis is = GI(*astate); /* index in the string */
4518ccca122SBrooks Davis lc = is == 0 ? 0 : nv[ia].name[is - 1]; /* last character */
4528ccca122SBrooks Davis
4538ccca122SBrooks Davis if (uc == ';')
4548ccca122SBrooks Davis uc = '\0';
4558ccca122SBrooks Davis
4568ccca122SBrooks Davis for (; ia < __arraycount(nv); ia++) {
4578ccca122SBrooks Davis if (is != 0 && nv[ia].name[is - 1] != lc)
4588ccca122SBrooks Davis goto bad;
4598ccca122SBrooks Davis if (nv[ia].name[is] == uc)
4608ccca122SBrooks Davis break;
4618ccca122SBrooks Davis }
4628ccca122SBrooks Davis
4638ccca122SBrooks Davis if (ia == __arraycount(nv))
4648ccca122SBrooks Davis goto bad;
4658ccca122SBrooks Davis
4668ccca122SBrooks Davis if (uc != 0) {
4678ccca122SBrooks Davis *cp = ia;
4688ccca122SBrooks Davis *astate = SS(is + 1, S_STRING);
4698ccca122SBrooks Davis return UNVIS_NOCHAR;
4708ccca122SBrooks Davis }
4718ccca122SBrooks Davis
4728ccca122SBrooks Davis *cp = nv[ia].value;
4738ccca122SBrooks Davis *astate = SS(0, S_GROUND);
4748ccca122SBrooks Davis return UNVIS_VALID;
4758ccca122SBrooks Davis
4768ccca122SBrooks Davis case S_NUMBER:
4778ccca122SBrooks Davis if (uc == ';')
4788ccca122SBrooks Davis return UNVIS_VALID;
4798ccca122SBrooks Davis if (!isdigit(uc))
4808ccca122SBrooks Davis goto bad;
4818ccca122SBrooks Davis *cp += (*cp * 10) + uc - '0';
4828ccca122SBrooks Davis return UNVIS_NOCHAR;
4838ccca122SBrooks Davis
4848ccca122SBrooks Davis default:
4858ccca122SBrooks Davis bad:
4868ccca122SBrooks Davis /*
4878ccca122SBrooks Davis * decoder in unknown state - (probably uninitialized)
4888ccca122SBrooks Davis */
4898ccca122SBrooks Davis *astate = SS(0, S_GROUND);
4908ccca122SBrooks Davis return UNVIS_SYNBAD;
4918ccca122SBrooks Davis }
4928ccca122SBrooks Davis }
4938ccca122SBrooks Davis
4948ccca122SBrooks Davis /*
4958ccca122SBrooks Davis * strnunvisx - decode src into dst
4968ccca122SBrooks Davis *
4978ccca122SBrooks Davis * Number of chars decoded into dst is returned, -1 on error.
4988ccca122SBrooks Davis * Dst is null terminated.
4998ccca122SBrooks Davis */
5008ccca122SBrooks Davis
5018ccca122SBrooks Davis int
strnunvisx(char * dst,size_t dlen,const char * src,int flag)5028ccca122SBrooks Davis strnunvisx(char *dst, size_t dlen, const char *src, int flag)
5038ccca122SBrooks Davis {
5048ccca122SBrooks Davis char c;
5058ccca122SBrooks Davis char t = '\0', *start = dst;
5068ccca122SBrooks Davis int state = 0;
5078ccca122SBrooks Davis
5088ccca122SBrooks Davis _DIAGASSERT(src != NULL);
5098ccca122SBrooks Davis _DIAGASSERT(dst != NULL);
5108ccca122SBrooks Davis #define CHECKSPACE() \
5118ccca122SBrooks Davis do { \
5128ccca122SBrooks Davis if (dlen-- == 0) { \
5138ccca122SBrooks Davis errno = ENOSPC; \
5148ccca122SBrooks Davis return -1; \
5158ccca122SBrooks Davis } \
516*ea46e638SKyle Evans } while (0)
5178ccca122SBrooks Davis
5188ccca122SBrooks Davis while ((c = *src++) != '\0') {
5198ccca122SBrooks Davis again:
5208ccca122SBrooks Davis switch (unvis(&t, c, &state, flag)) {
5218ccca122SBrooks Davis case UNVIS_VALID:
5228ccca122SBrooks Davis CHECKSPACE();
5238ccca122SBrooks Davis *dst++ = t;
5248ccca122SBrooks Davis break;
5258ccca122SBrooks Davis case UNVIS_VALIDPUSH:
5268ccca122SBrooks Davis CHECKSPACE();
5278ccca122SBrooks Davis *dst++ = t;
5288ccca122SBrooks Davis goto again;
5298ccca122SBrooks Davis case 0:
5308ccca122SBrooks Davis case UNVIS_NOCHAR:
5318ccca122SBrooks Davis break;
5328ccca122SBrooks Davis case UNVIS_SYNBAD:
5338ccca122SBrooks Davis errno = EINVAL;
5348ccca122SBrooks Davis return -1;
5358ccca122SBrooks Davis default:
5368ccca122SBrooks Davis _DIAGASSERT(/*CONSTCOND*/0);
5378ccca122SBrooks Davis errno = EINVAL;
5388ccca122SBrooks Davis return -1;
5398ccca122SBrooks Davis }
5408ccca122SBrooks Davis }
5418ccca122SBrooks Davis if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) {
5428ccca122SBrooks Davis CHECKSPACE();
5438ccca122SBrooks Davis *dst++ = t;
5448ccca122SBrooks Davis }
5458ccca122SBrooks Davis CHECKSPACE();
5468ccca122SBrooks Davis *dst = '\0';
5478ccca122SBrooks Davis return (int)(dst - start);
5488ccca122SBrooks Davis }
5498ccca122SBrooks Davis
5508ccca122SBrooks Davis int
strunvisx(char * dst,const char * src,int flag)5518ccca122SBrooks Davis strunvisx(char *dst, const char *src, int flag)
5528ccca122SBrooks Davis {
5538ccca122SBrooks Davis return strnunvisx(dst, (size_t)~0, src, flag);
5548ccca122SBrooks Davis }
5558ccca122SBrooks Davis
5568ccca122SBrooks Davis int
strunvis(char * dst,const char * src)5578ccca122SBrooks Davis strunvis(char *dst, const char *src)
5588ccca122SBrooks Davis {
5598ccca122SBrooks Davis return strnunvisx(dst, (size_t)~0, src, 0);
5608ccca122SBrooks Davis }
5618ccca122SBrooks Davis
5628ccca122SBrooks Davis int
strnunvis(char * dst,size_t dlen,const char * src)5638ccca122SBrooks Davis strnunvis(char *dst, size_t dlen, const char *src)
5648ccca122SBrooks Davis {
5658ccca122SBrooks Davis return strnunvisx(dst, dlen, src, 0);
5668ccca122SBrooks Davis }
5678ccca122SBrooks Davis #endif
568