1*8dfeba04SBrooks Davis /* $NetBSD: unvis.c,v 1.44 2014/09/26 15:43:36 roy Exp $ */ 28ccca122SBrooks Davis 38ccca122SBrooks Davis /*- 48ccca122SBrooks Davis * Copyright (c) 1989, 1993 58ccca122SBrooks Davis * The Regents of the University of California. All rights reserved. 68ccca122SBrooks Davis * 78ccca122SBrooks Davis * Redistribution and use in source and binary forms, with or without 88ccca122SBrooks Davis * modification, are permitted provided that the following conditions 98ccca122SBrooks Davis * are met: 108ccca122SBrooks Davis * 1. Redistributions of source code must retain the above copyright 118ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer. 128ccca122SBrooks Davis * 2. Redistributions in binary form must reproduce the above copyright 138ccca122SBrooks Davis * notice, this list of conditions and the following disclaimer in the 148ccca122SBrooks Davis * documentation and/or other materials provided with the distribution. 158ccca122SBrooks Davis * 3. Neither the name of the University nor the names of its contributors 168ccca122SBrooks Davis * may be used to endorse or promote products derived from this software 178ccca122SBrooks Davis * without specific prior written permission. 188ccca122SBrooks Davis * 198ccca122SBrooks Davis * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 208ccca122SBrooks Davis * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 218ccca122SBrooks Davis * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 228ccca122SBrooks Davis * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 238ccca122SBrooks Davis * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 248ccca122SBrooks Davis * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 258ccca122SBrooks Davis * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 268ccca122SBrooks Davis * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 278ccca122SBrooks Davis * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 288ccca122SBrooks Davis * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 298ccca122SBrooks Davis * SUCH DAMAGE. 308ccca122SBrooks Davis */ 318ccca122SBrooks Davis 328ccca122SBrooks Davis #include <sys/cdefs.h> 338ccca122SBrooks Davis #if defined(LIBC_SCCS) && !defined(lint) 348ccca122SBrooks Davis #if 0 358ccca122SBrooks Davis static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; 368ccca122SBrooks Davis #else 37*8dfeba04SBrooks Davis __RCSID("$NetBSD: unvis.c,v 1.44 2014/09/26 15:43:36 roy Exp $"); 388ccca122SBrooks Davis #endif 398ccca122SBrooks Davis #endif /* LIBC_SCCS and not lint */ 408ccca122SBrooks Davis __FBSDID("$FreeBSD$"); 418ccca122SBrooks Davis 428ccca122SBrooks Davis #include "namespace.h" 438ccca122SBrooks Davis #include <sys/types.h> 448ccca122SBrooks Davis 458ccca122SBrooks Davis #include <assert.h> 468ccca122SBrooks Davis #include <ctype.h> 478ccca122SBrooks Davis #include <stdint.h> 488ccca122SBrooks Davis #include <stdio.h> 498ccca122SBrooks Davis #include <errno.h> 508ccca122SBrooks Davis #include <vis.h> 518ccca122SBrooks Davis 528ccca122SBrooks Davis #define _DIAGASSERT(x) assert(x) 538ccca122SBrooks Davis 548ccca122SBrooks Davis /* 558ccca122SBrooks Davis * Return the number of elements in a statically-allocated array, 568ccca122SBrooks Davis * __x. 578ccca122SBrooks Davis */ 588ccca122SBrooks Davis #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 598ccca122SBrooks Davis 608ccca122SBrooks Davis #ifdef __weak_alias 618ccca122SBrooks Davis __weak_alias(strnunvisx,_strnunvisx) 628ccca122SBrooks Davis #endif 638ccca122SBrooks Davis 648ccca122SBrooks Davis #if !HAVE_VIS 658ccca122SBrooks Davis /* 668ccca122SBrooks Davis * decode driven by state machine 678ccca122SBrooks Davis */ 688ccca122SBrooks Davis #define S_GROUND 0 /* haven't seen escape char */ 698ccca122SBrooks Davis #define S_START 1 /* start decoding special sequence */ 708ccca122SBrooks Davis #define S_META 2 /* metachar started (M) */ 718ccca122SBrooks Davis #define S_META1 3 /* metachar more, regular char (-) */ 728ccca122SBrooks Davis #define S_CTRL 4 /* control char started (^) */ 738ccca122SBrooks Davis #define S_OCTAL2 5 /* octal digit 2 */ 748ccca122SBrooks Davis #define S_OCTAL3 6 /* octal digit 3 */ 758ccca122SBrooks Davis #define S_HEX 7 /* mandatory hex digit */ 768ccca122SBrooks Davis #define S_HEX1 8 /* http hex digit */ 778ccca122SBrooks Davis #define S_HEX2 9 /* http hex digit 2 */ 788ccca122SBrooks Davis #define S_MIME1 10 /* mime hex digit 1 */ 798ccca122SBrooks Davis #define S_MIME2 11 /* mime hex digit 2 */ 808ccca122SBrooks Davis #define S_EATCRNL 12 /* mime eating CRNL */ 818ccca122SBrooks Davis #define S_AMP 13 /* seen & */ 828ccca122SBrooks Davis #define S_NUMBER 14 /* collecting number */ 838ccca122SBrooks Davis #define S_STRING 15 /* collecting string */ 848ccca122SBrooks Davis 858ccca122SBrooks Davis #define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') 868ccca122SBrooks Davis #define xtod(c) (isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10)) 878ccca122SBrooks Davis #define XTOD(c) (isdigit(c) ? (c - '0') : ((c - 'A') + 10)) 888ccca122SBrooks Davis 898ccca122SBrooks Davis /* 908ccca122SBrooks Davis * RFC 1866 918ccca122SBrooks Davis */ 928ccca122SBrooks Davis static const struct nv { 93778c12a6SBrooks Davis char name[7]; 948ccca122SBrooks Davis uint8_t value; 958ccca122SBrooks Davis } nv[] = { 968ccca122SBrooks Davis { "AElig", 198 }, /* capital AE diphthong (ligature) */ 978ccca122SBrooks Davis { "Aacute", 193 }, /* capital A, acute accent */ 988ccca122SBrooks Davis { "Acirc", 194 }, /* capital A, circumflex accent */ 998ccca122SBrooks Davis { "Agrave", 192 }, /* capital A, grave accent */ 1008ccca122SBrooks Davis { "Aring", 197 }, /* capital A, ring */ 1018ccca122SBrooks Davis { "Atilde", 195 }, /* capital A, tilde */ 1028ccca122SBrooks Davis { "Auml", 196 }, /* capital A, dieresis or umlaut mark */ 1038ccca122SBrooks Davis { "Ccedil", 199 }, /* capital C, cedilla */ 1048ccca122SBrooks Davis { "ETH", 208 }, /* capital Eth, Icelandic */ 1058ccca122SBrooks Davis { "Eacute", 201 }, /* capital E, acute accent */ 1068ccca122SBrooks Davis { "Ecirc", 202 }, /* capital E, circumflex accent */ 1078ccca122SBrooks Davis { "Egrave", 200 }, /* capital E, grave accent */ 1088ccca122SBrooks Davis { "Euml", 203 }, /* capital E, dieresis or umlaut mark */ 1098ccca122SBrooks Davis { "Iacute", 205 }, /* capital I, acute accent */ 1108ccca122SBrooks Davis { "Icirc", 206 }, /* capital I, circumflex accent */ 1118ccca122SBrooks Davis { "Igrave", 204 }, /* capital I, grave accent */ 1128ccca122SBrooks Davis { "Iuml", 207 }, /* capital I, dieresis or umlaut mark */ 1138ccca122SBrooks Davis { "Ntilde", 209 }, /* capital N, tilde */ 1148ccca122SBrooks Davis { "Oacute", 211 }, /* capital O, acute accent */ 1158ccca122SBrooks Davis { "Ocirc", 212 }, /* capital O, circumflex accent */ 1168ccca122SBrooks Davis { "Ograve", 210 }, /* capital O, grave accent */ 1178ccca122SBrooks Davis { "Oslash", 216 }, /* capital O, slash */ 1188ccca122SBrooks Davis { "Otilde", 213 }, /* capital O, tilde */ 1198ccca122SBrooks Davis { "Ouml", 214 }, /* capital O, dieresis or umlaut mark */ 1208ccca122SBrooks Davis { "THORN", 222 }, /* capital THORN, Icelandic */ 1218ccca122SBrooks Davis { "Uacute", 218 }, /* capital U, acute accent */ 1228ccca122SBrooks Davis { "Ucirc", 219 }, /* capital U, circumflex accent */ 1238ccca122SBrooks Davis { "Ugrave", 217 }, /* capital U, grave accent */ 1248ccca122SBrooks Davis { "Uuml", 220 }, /* capital U, dieresis or umlaut mark */ 1258ccca122SBrooks Davis { "Yacute", 221 }, /* capital Y, acute accent */ 1268ccca122SBrooks Davis { "aacute", 225 }, /* small a, acute accent */ 1278ccca122SBrooks Davis { "acirc", 226 }, /* small a, circumflex accent */ 1288ccca122SBrooks Davis { "acute", 180 }, /* acute accent */ 1298ccca122SBrooks Davis { "aelig", 230 }, /* small ae diphthong (ligature) */ 1308ccca122SBrooks Davis { "agrave", 224 }, /* small a, grave accent */ 1318ccca122SBrooks Davis { "amp", 38 }, /* ampersand */ 1328ccca122SBrooks Davis { "aring", 229 }, /* small a, ring */ 1338ccca122SBrooks Davis { "atilde", 227 }, /* small a, tilde */ 1348ccca122SBrooks Davis { "auml", 228 }, /* small a, dieresis or umlaut mark */ 1358ccca122SBrooks Davis { "brvbar", 166 }, /* broken (vertical) bar */ 1368ccca122SBrooks Davis { "ccedil", 231 }, /* small c, cedilla */ 1378ccca122SBrooks Davis { "cedil", 184 }, /* cedilla */ 1388ccca122SBrooks Davis { "cent", 162 }, /* cent sign */ 1398ccca122SBrooks Davis { "copy", 169 }, /* copyright sign */ 1408ccca122SBrooks Davis { "curren", 164 }, /* general currency sign */ 1418ccca122SBrooks Davis { "deg", 176 }, /* degree sign */ 1428ccca122SBrooks Davis { "divide", 247 }, /* divide sign */ 1438ccca122SBrooks Davis { "eacute", 233 }, /* small e, acute accent */ 1448ccca122SBrooks Davis { "ecirc", 234 }, /* small e, circumflex accent */ 1458ccca122SBrooks Davis { "egrave", 232 }, /* small e, grave accent */ 1468ccca122SBrooks Davis { "eth", 240 }, /* small eth, Icelandic */ 1478ccca122SBrooks Davis { "euml", 235 }, /* small e, dieresis or umlaut mark */ 1488ccca122SBrooks Davis { "frac12", 189 }, /* fraction one-half */ 1498ccca122SBrooks Davis { "frac14", 188 }, /* fraction one-quarter */ 1508ccca122SBrooks Davis { "frac34", 190 }, /* fraction three-quarters */ 1518ccca122SBrooks Davis { "gt", 62 }, /* greater than */ 1528ccca122SBrooks Davis { "iacute", 237 }, /* small i, acute accent */ 1538ccca122SBrooks Davis { "icirc", 238 }, /* small i, circumflex accent */ 1548ccca122SBrooks Davis { "iexcl", 161 }, /* inverted exclamation mark */ 1558ccca122SBrooks Davis { "igrave", 236 }, /* small i, grave accent */ 1568ccca122SBrooks Davis { "iquest", 191 }, /* inverted question mark */ 1578ccca122SBrooks Davis { "iuml", 239 }, /* small i, dieresis or umlaut mark */ 1588ccca122SBrooks Davis { "laquo", 171 }, /* angle quotation mark, left */ 1598ccca122SBrooks Davis { "lt", 60 }, /* less than */ 1608ccca122SBrooks Davis { "macr", 175 }, /* macron */ 1618ccca122SBrooks Davis { "micro", 181 }, /* micro sign */ 1628ccca122SBrooks Davis { "middot", 183 }, /* middle dot */ 1638ccca122SBrooks Davis { "nbsp", 160 }, /* no-break space */ 1648ccca122SBrooks Davis { "not", 172 }, /* not sign */ 1658ccca122SBrooks Davis { "ntilde", 241 }, /* small n, tilde */ 1668ccca122SBrooks Davis { "oacute", 243 }, /* small o, acute accent */ 1678ccca122SBrooks Davis { "ocirc", 244 }, /* small o, circumflex accent */ 1688ccca122SBrooks Davis { "ograve", 242 }, /* small o, grave accent */ 1698ccca122SBrooks Davis { "ordf", 170 }, /* ordinal indicator, feminine */ 1708ccca122SBrooks Davis { "ordm", 186 }, /* ordinal indicator, masculine */ 1718ccca122SBrooks Davis { "oslash", 248 }, /* small o, slash */ 1728ccca122SBrooks Davis { "otilde", 245 }, /* small o, tilde */ 1738ccca122SBrooks Davis { "ouml", 246 }, /* small o, dieresis or umlaut mark */ 1748ccca122SBrooks Davis { "para", 182 }, /* pilcrow (paragraph sign) */ 1758ccca122SBrooks Davis { "plusmn", 177 }, /* plus-or-minus sign */ 1768ccca122SBrooks Davis { "pound", 163 }, /* pound sterling sign */ 1778ccca122SBrooks Davis { "quot", 34 }, /* double quote */ 1788ccca122SBrooks Davis { "raquo", 187 }, /* angle quotation mark, right */ 1798ccca122SBrooks Davis { "reg", 174 }, /* registered sign */ 1808ccca122SBrooks Davis { "sect", 167 }, /* section sign */ 1818ccca122SBrooks Davis { "shy", 173 }, /* soft hyphen */ 1828ccca122SBrooks Davis { "sup1", 185 }, /* superscript one */ 1838ccca122SBrooks Davis { "sup2", 178 }, /* superscript two */ 1848ccca122SBrooks Davis { "sup3", 179 }, /* superscript three */ 1858ccca122SBrooks Davis { "szlig", 223 }, /* small sharp s, German (sz ligature) */ 1868ccca122SBrooks Davis { "thorn", 254 }, /* small thorn, Icelandic */ 1878ccca122SBrooks Davis { "times", 215 }, /* multiply sign */ 1888ccca122SBrooks Davis { "uacute", 250 }, /* small u, acute accent */ 1898ccca122SBrooks Davis { "ucirc", 251 }, /* small u, circumflex accent */ 1908ccca122SBrooks Davis { "ugrave", 249 }, /* small u, grave accent */ 1918ccca122SBrooks Davis { "uml", 168 }, /* umlaut (dieresis) */ 1928ccca122SBrooks Davis { "uuml", 252 }, /* small u, dieresis or umlaut mark */ 1938ccca122SBrooks Davis { "yacute", 253 }, /* small y, acute accent */ 1948ccca122SBrooks Davis { "yen", 165 }, /* yen sign */ 1958ccca122SBrooks Davis { "yuml", 255 }, /* small y, dieresis or umlaut mark */ 1968ccca122SBrooks Davis }; 1978ccca122SBrooks Davis 1988ccca122SBrooks Davis /* 1998ccca122SBrooks Davis * unvis - decode characters previously encoded by vis 2008ccca122SBrooks Davis */ 2018ccca122SBrooks Davis int 2028ccca122SBrooks Davis unvis(char *cp, int c, int *astate, int flag) 2038ccca122SBrooks Davis { 2048ccca122SBrooks Davis unsigned char uc = (unsigned char)c; 2058ccca122SBrooks Davis unsigned char st, ia, is, lc; 2068ccca122SBrooks Davis 2078ccca122SBrooks Davis /* 2088ccca122SBrooks Davis * Bottom 8 bits of astate hold the state machine state. 2098ccca122SBrooks Davis * Top 8 bits hold the current character in the http 1866 nv string decoding 2108ccca122SBrooks Davis */ 2118ccca122SBrooks Davis #define GS(a) ((a) & 0xff) 2128ccca122SBrooks Davis #define SS(a, b) (((uint32_t)(a) << 24) | (b)) 2138ccca122SBrooks Davis #define GI(a) ((uint32_t)(a) >> 24) 2148ccca122SBrooks Davis 2158ccca122SBrooks Davis _DIAGASSERT(cp != NULL); 2168ccca122SBrooks Davis _DIAGASSERT(astate != NULL); 2178ccca122SBrooks Davis st = GS(*astate); 2188ccca122SBrooks Davis 2198ccca122SBrooks Davis if (flag & UNVIS_END) { 2208ccca122SBrooks Davis switch (st) { 2218ccca122SBrooks Davis case S_OCTAL2: 2228ccca122SBrooks Davis case S_OCTAL3: 2238ccca122SBrooks Davis case S_HEX2: 2248ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2258ccca122SBrooks Davis return UNVIS_VALID; 2268ccca122SBrooks Davis case S_GROUND: 2278ccca122SBrooks Davis return UNVIS_NOCHAR; 2288ccca122SBrooks Davis default: 2298ccca122SBrooks Davis return UNVIS_SYNBAD; 2308ccca122SBrooks Davis } 2318ccca122SBrooks Davis } 2328ccca122SBrooks Davis 2338ccca122SBrooks Davis switch (st) { 2348ccca122SBrooks Davis 2358ccca122SBrooks Davis case S_GROUND: 2368ccca122SBrooks Davis *cp = 0; 2378ccca122SBrooks Davis if ((flag & VIS_NOESCAPE) == 0 && c == '\\') { 2388ccca122SBrooks Davis *astate = SS(0, S_START); 2398ccca122SBrooks Davis return UNVIS_NOCHAR; 2408ccca122SBrooks Davis } 2418ccca122SBrooks Davis if ((flag & VIS_HTTP1808) && c == '%') { 2428ccca122SBrooks Davis *astate = SS(0, S_HEX1); 2438ccca122SBrooks Davis return UNVIS_NOCHAR; 2448ccca122SBrooks Davis } 2458ccca122SBrooks Davis if ((flag & VIS_HTTP1866) && c == '&') { 2468ccca122SBrooks Davis *astate = SS(0, S_AMP); 2478ccca122SBrooks Davis return UNVIS_NOCHAR; 2488ccca122SBrooks Davis } 2498ccca122SBrooks Davis if ((flag & VIS_MIMESTYLE) && c == '=') { 2508ccca122SBrooks Davis *astate = SS(0, S_MIME1); 2518ccca122SBrooks Davis return UNVIS_NOCHAR; 2528ccca122SBrooks Davis } 2538ccca122SBrooks Davis *cp = c; 2548ccca122SBrooks Davis return UNVIS_VALID; 2558ccca122SBrooks Davis 2568ccca122SBrooks Davis case S_START: 2578ccca122SBrooks Davis switch(c) { 2588ccca122SBrooks Davis case '\\': 2598ccca122SBrooks Davis *cp = c; 2608ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2618ccca122SBrooks Davis return UNVIS_VALID; 2628ccca122SBrooks Davis case '0': case '1': case '2': case '3': 2638ccca122SBrooks Davis case '4': case '5': case '6': case '7': 2648ccca122SBrooks Davis *cp = (c - '0'); 2658ccca122SBrooks Davis *astate = SS(0, S_OCTAL2); 2668ccca122SBrooks Davis return UNVIS_NOCHAR; 2678ccca122SBrooks Davis case 'M': 2688ccca122SBrooks Davis *cp = (char)0200; 2698ccca122SBrooks Davis *astate = SS(0, S_META); 2708ccca122SBrooks Davis return UNVIS_NOCHAR; 2718ccca122SBrooks Davis case '^': 2728ccca122SBrooks Davis *astate = SS(0, S_CTRL); 2738ccca122SBrooks Davis return UNVIS_NOCHAR; 2748ccca122SBrooks Davis case 'n': 2758ccca122SBrooks Davis *cp = '\n'; 2768ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2778ccca122SBrooks Davis return UNVIS_VALID; 2788ccca122SBrooks Davis case 'r': 2798ccca122SBrooks Davis *cp = '\r'; 2808ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2818ccca122SBrooks Davis return UNVIS_VALID; 2828ccca122SBrooks Davis case 'b': 2838ccca122SBrooks Davis *cp = '\b'; 2848ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2858ccca122SBrooks Davis return UNVIS_VALID; 2868ccca122SBrooks Davis case 'a': 2878ccca122SBrooks Davis *cp = '\007'; 2888ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2898ccca122SBrooks Davis return UNVIS_VALID; 2908ccca122SBrooks Davis case 'v': 2918ccca122SBrooks Davis *cp = '\v'; 2928ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2938ccca122SBrooks Davis return UNVIS_VALID; 2948ccca122SBrooks Davis case 't': 2958ccca122SBrooks Davis *cp = '\t'; 2968ccca122SBrooks Davis *astate = SS(0, S_GROUND); 2978ccca122SBrooks Davis return UNVIS_VALID; 2988ccca122SBrooks Davis case 'f': 2998ccca122SBrooks Davis *cp = '\f'; 3008ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3018ccca122SBrooks Davis return UNVIS_VALID; 3028ccca122SBrooks Davis case 's': 3038ccca122SBrooks Davis *cp = ' '; 3048ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3058ccca122SBrooks Davis return UNVIS_VALID; 3068ccca122SBrooks Davis case 'E': 3078ccca122SBrooks Davis *cp = '\033'; 3088ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3098ccca122SBrooks Davis return UNVIS_VALID; 3108ccca122SBrooks Davis case 'x': 3118ccca122SBrooks Davis *astate = SS(0, S_HEX); 3128ccca122SBrooks Davis return UNVIS_NOCHAR; 3138ccca122SBrooks Davis case '\n': 3148ccca122SBrooks Davis /* 3158ccca122SBrooks Davis * hidden newline 3168ccca122SBrooks Davis */ 3178ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3188ccca122SBrooks Davis return UNVIS_NOCHAR; 3198ccca122SBrooks Davis case '$': 3208ccca122SBrooks Davis /* 3218ccca122SBrooks Davis * hidden marker 3228ccca122SBrooks Davis */ 3238ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3248ccca122SBrooks Davis return UNVIS_NOCHAR; 325*8dfeba04SBrooks Davis default: 326*8dfeba04SBrooks Davis if (isgraph(c)) { 327*8dfeba04SBrooks Davis *cp = c; 328*8dfeba04SBrooks Davis *astate = SS(0, S_GROUND); 329*8dfeba04SBrooks Davis return UNVIS_VALID; 330*8dfeba04SBrooks Davis } 3318ccca122SBrooks Davis } 3328ccca122SBrooks Davis goto bad; 3338ccca122SBrooks Davis 3348ccca122SBrooks Davis case S_META: 3358ccca122SBrooks Davis if (c == '-') 3368ccca122SBrooks Davis *astate = SS(0, S_META1); 3378ccca122SBrooks Davis else if (c == '^') 3388ccca122SBrooks Davis *astate = SS(0, S_CTRL); 3398ccca122SBrooks Davis else 3408ccca122SBrooks Davis goto bad; 3418ccca122SBrooks Davis return UNVIS_NOCHAR; 3428ccca122SBrooks Davis 3438ccca122SBrooks Davis case S_META1: 3448ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3458ccca122SBrooks Davis *cp |= c; 3468ccca122SBrooks Davis return UNVIS_VALID; 3478ccca122SBrooks Davis 3488ccca122SBrooks Davis case S_CTRL: 3498ccca122SBrooks Davis if (c == '?') 3508ccca122SBrooks Davis *cp |= 0177; 3518ccca122SBrooks Davis else 3528ccca122SBrooks Davis *cp |= c & 037; 3538ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3548ccca122SBrooks Davis return UNVIS_VALID; 3558ccca122SBrooks Davis 3568ccca122SBrooks Davis case S_OCTAL2: /* second possible octal digit */ 3578ccca122SBrooks Davis if (isoctal(uc)) { 3588ccca122SBrooks Davis /* 3598ccca122SBrooks Davis * yes - and maybe a third 3608ccca122SBrooks Davis */ 3618ccca122SBrooks Davis *cp = (*cp << 3) + (c - '0'); 3628ccca122SBrooks Davis *astate = SS(0, S_OCTAL3); 3638ccca122SBrooks Davis return UNVIS_NOCHAR; 3648ccca122SBrooks Davis } 3658ccca122SBrooks Davis /* 3668ccca122SBrooks Davis * no - done with current sequence, push back passed char 3678ccca122SBrooks Davis */ 3688ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3698ccca122SBrooks Davis return UNVIS_VALIDPUSH; 3708ccca122SBrooks Davis 3718ccca122SBrooks Davis case S_OCTAL3: /* third possible octal digit */ 3728ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3738ccca122SBrooks Davis if (isoctal(uc)) { 3748ccca122SBrooks Davis *cp = (*cp << 3) + (c - '0'); 3758ccca122SBrooks Davis return UNVIS_VALID; 3768ccca122SBrooks Davis } 3778ccca122SBrooks Davis /* 3788ccca122SBrooks Davis * we were done, push back passed char 3798ccca122SBrooks Davis */ 3808ccca122SBrooks Davis return UNVIS_VALIDPUSH; 3818ccca122SBrooks Davis 3828ccca122SBrooks Davis case S_HEX: 3838ccca122SBrooks Davis if (!isxdigit(uc)) 3848ccca122SBrooks Davis goto bad; 3858ccca122SBrooks Davis /*FALLTHROUGH*/ 3868ccca122SBrooks Davis case S_HEX1: 3878ccca122SBrooks Davis if (isxdigit(uc)) { 3888ccca122SBrooks Davis *cp = xtod(uc); 3898ccca122SBrooks Davis *astate = SS(0, S_HEX2); 3908ccca122SBrooks Davis return UNVIS_NOCHAR; 3918ccca122SBrooks Davis } 3928ccca122SBrooks Davis /* 3938ccca122SBrooks Davis * no - done with current sequence, push back passed char 3948ccca122SBrooks Davis */ 3958ccca122SBrooks Davis *astate = SS(0, S_GROUND); 3968ccca122SBrooks Davis return UNVIS_VALIDPUSH; 3978ccca122SBrooks Davis 3988ccca122SBrooks Davis case S_HEX2: 3998ccca122SBrooks Davis *astate = S_GROUND; 4008ccca122SBrooks Davis if (isxdigit(uc)) { 4018ccca122SBrooks Davis *cp = xtod(uc) | (*cp << 4); 4028ccca122SBrooks Davis return UNVIS_VALID; 4038ccca122SBrooks Davis } 4048ccca122SBrooks Davis return UNVIS_VALIDPUSH; 4058ccca122SBrooks Davis 4068ccca122SBrooks Davis case S_MIME1: 4078ccca122SBrooks Davis if (uc == '\n' || uc == '\r') { 4088ccca122SBrooks Davis *astate = SS(0, S_EATCRNL); 4098ccca122SBrooks Davis return UNVIS_NOCHAR; 4108ccca122SBrooks Davis } 4118ccca122SBrooks Davis if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) { 4128ccca122SBrooks Davis *cp = XTOD(uc); 4138ccca122SBrooks Davis *astate = SS(0, S_MIME2); 4148ccca122SBrooks Davis return UNVIS_NOCHAR; 4158ccca122SBrooks Davis } 4168ccca122SBrooks Davis goto bad; 4178ccca122SBrooks Davis 4188ccca122SBrooks Davis case S_MIME2: 4198ccca122SBrooks Davis if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) { 4208ccca122SBrooks Davis *astate = SS(0, S_GROUND); 4218ccca122SBrooks Davis *cp = XTOD(uc) | (*cp << 4); 4228ccca122SBrooks Davis return UNVIS_VALID; 4238ccca122SBrooks Davis } 4248ccca122SBrooks Davis goto bad; 4258ccca122SBrooks Davis 4268ccca122SBrooks Davis case S_EATCRNL: 4278ccca122SBrooks Davis switch (uc) { 4288ccca122SBrooks Davis case '\r': 4298ccca122SBrooks Davis case '\n': 4308ccca122SBrooks Davis return UNVIS_NOCHAR; 4318ccca122SBrooks Davis case '=': 4328ccca122SBrooks Davis *astate = SS(0, S_MIME1); 4338ccca122SBrooks Davis return UNVIS_NOCHAR; 4348ccca122SBrooks Davis default: 4358ccca122SBrooks Davis *cp = uc; 4368ccca122SBrooks Davis *astate = SS(0, S_GROUND); 4378ccca122SBrooks Davis return UNVIS_VALID; 4388ccca122SBrooks Davis } 4398ccca122SBrooks Davis 4408ccca122SBrooks Davis case S_AMP: 4418ccca122SBrooks Davis *cp = 0; 4428ccca122SBrooks Davis if (uc == '#') { 4438ccca122SBrooks Davis *astate = SS(0, S_NUMBER); 4448ccca122SBrooks Davis return UNVIS_NOCHAR; 4458ccca122SBrooks Davis } 4468ccca122SBrooks Davis *astate = SS(0, S_STRING); 4478ccca122SBrooks Davis /*FALLTHROUGH*/ 4488ccca122SBrooks Davis 4498ccca122SBrooks Davis case S_STRING: 4508ccca122SBrooks Davis ia = *cp; /* index in the array */ 4518ccca122SBrooks Davis is = GI(*astate); /* index in the string */ 4528ccca122SBrooks Davis lc = is == 0 ? 0 : nv[ia].name[is - 1]; /* last character */ 4538ccca122SBrooks Davis 4548ccca122SBrooks Davis if (uc == ';') 4558ccca122SBrooks Davis uc = '\0'; 4568ccca122SBrooks Davis 4578ccca122SBrooks Davis for (; ia < __arraycount(nv); ia++) { 4588ccca122SBrooks Davis if (is != 0 && nv[ia].name[is - 1] != lc) 4598ccca122SBrooks Davis goto bad; 4608ccca122SBrooks Davis if (nv[ia].name[is] == uc) 4618ccca122SBrooks Davis break; 4628ccca122SBrooks Davis } 4638ccca122SBrooks Davis 4648ccca122SBrooks Davis if (ia == __arraycount(nv)) 4658ccca122SBrooks Davis goto bad; 4668ccca122SBrooks Davis 4678ccca122SBrooks Davis if (uc != 0) { 4688ccca122SBrooks Davis *cp = ia; 4698ccca122SBrooks Davis *astate = SS(is + 1, S_STRING); 4708ccca122SBrooks Davis return UNVIS_NOCHAR; 4718ccca122SBrooks Davis } 4728ccca122SBrooks Davis 4738ccca122SBrooks Davis *cp = nv[ia].value; 4748ccca122SBrooks Davis *astate = SS(0, S_GROUND); 4758ccca122SBrooks Davis return UNVIS_VALID; 4768ccca122SBrooks Davis 4778ccca122SBrooks Davis case S_NUMBER: 4788ccca122SBrooks Davis if (uc == ';') 4798ccca122SBrooks Davis return UNVIS_VALID; 4808ccca122SBrooks Davis if (!isdigit(uc)) 4818ccca122SBrooks Davis goto bad; 4828ccca122SBrooks Davis *cp += (*cp * 10) + uc - '0'; 4838ccca122SBrooks Davis return UNVIS_NOCHAR; 4848ccca122SBrooks Davis 4858ccca122SBrooks Davis default: 4868ccca122SBrooks Davis bad: 4878ccca122SBrooks Davis /* 4888ccca122SBrooks Davis * decoder in unknown state - (probably uninitialized) 4898ccca122SBrooks Davis */ 4908ccca122SBrooks Davis *astate = SS(0, S_GROUND); 4918ccca122SBrooks Davis return UNVIS_SYNBAD; 4928ccca122SBrooks Davis } 4938ccca122SBrooks Davis } 4948ccca122SBrooks Davis 4958ccca122SBrooks Davis /* 4968ccca122SBrooks Davis * strnunvisx - decode src into dst 4978ccca122SBrooks Davis * 4988ccca122SBrooks Davis * Number of chars decoded into dst is returned, -1 on error. 4998ccca122SBrooks Davis * Dst is null terminated. 5008ccca122SBrooks Davis */ 5018ccca122SBrooks Davis 5028ccca122SBrooks Davis int 5038ccca122SBrooks Davis strnunvisx(char *dst, size_t dlen, const char *src, int flag) 5048ccca122SBrooks Davis { 5058ccca122SBrooks Davis char c; 5068ccca122SBrooks Davis char t = '\0', *start = dst; 5078ccca122SBrooks Davis int state = 0; 5088ccca122SBrooks Davis 5098ccca122SBrooks Davis _DIAGASSERT(src != NULL); 5108ccca122SBrooks Davis _DIAGASSERT(dst != NULL); 5118ccca122SBrooks Davis #define CHECKSPACE() \ 5128ccca122SBrooks Davis do { \ 5138ccca122SBrooks Davis if (dlen-- == 0) { \ 5148ccca122SBrooks Davis errno = ENOSPC; \ 5158ccca122SBrooks Davis return -1; \ 5168ccca122SBrooks Davis } \ 5178ccca122SBrooks Davis } while (/*CONSTCOND*/0) 5188ccca122SBrooks Davis 5198ccca122SBrooks Davis while ((c = *src++) != '\0') { 5208ccca122SBrooks Davis again: 5218ccca122SBrooks Davis switch (unvis(&t, c, &state, flag)) { 5228ccca122SBrooks Davis case UNVIS_VALID: 5238ccca122SBrooks Davis CHECKSPACE(); 5248ccca122SBrooks Davis *dst++ = t; 5258ccca122SBrooks Davis break; 5268ccca122SBrooks Davis case UNVIS_VALIDPUSH: 5278ccca122SBrooks Davis CHECKSPACE(); 5288ccca122SBrooks Davis *dst++ = t; 5298ccca122SBrooks Davis goto again; 5308ccca122SBrooks Davis case 0: 5318ccca122SBrooks Davis case UNVIS_NOCHAR: 5328ccca122SBrooks Davis break; 5338ccca122SBrooks Davis case UNVIS_SYNBAD: 5348ccca122SBrooks Davis errno = EINVAL; 5358ccca122SBrooks Davis return -1; 5368ccca122SBrooks Davis default: 5378ccca122SBrooks Davis _DIAGASSERT(/*CONSTCOND*/0); 5388ccca122SBrooks Davis errno = EINVAL; 5398ccca122SBrooks Davis return -1; 5408ccca122SBrooks Davis } 5418ccca122SBrooks Davis } 5428ccca122SBrooks Davis if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) { 5438ccca122SBrooks Davis CHECKSPACE(); 5448ccca122SBrooks Davis *dst++ = t; 5458ccca122SBrooks Davis } 5468ccca122SBrooks Davis CHECKSPACE(); 5478ccca122SBrooks Davis *dst = '\0'; 5488ccca122SBrooks Davis return (int)(dst - start); 5498ccca122SBrooks Davis } 5508ccca122SBrooks Davis 5518ccca122SBrooks Davis int 5528ccca122SBrooks Davis strunvisx(char *dst, const char *src, int flag) 5538ccca122SBrooks Davis { 5548ccca122SBrooks Davis return strnunvisx(dst, (size_t)~0, src, flag); 5558ccca122SBrooks Davis } 5568ccca122SBrooks Davis 5578ccca122SBrooks Davis int 5588ccca122SBrooks Davis strunvis(char *dst, const char *src) 5598ccca122SBrooks Davis { 5608ccca122SBrooks Davis return strnunvisx(dst, (size_t)~0, src, 0); 5618ccca122SBrooks Davis } 5628ccca122SBrooks Davis 5638ccca122SBrooks Davis int 5648ccca122SBrooks Davis strnunvis(char *dst, size_t dlen, const char *src) 5658ccca122SBrooks Davis { 5668ccca122SBrooks Davis return strnunvisx(dst, dlen, src, 0); 5678ccca122SBrooks Davis } 5688ccca122SBrooks Davis #endif 569