1 /* $NetBSD: uxtext.c,v 1.2 2017/02/14 01:16:45 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* uxtext 3 6 /* SUMMARY 7 /* quote/unquote text, xtext style. 8 /* SYNOPSIS 9 /* #include <uxtext.h> 10 /* 11 /* VSTRING *uxtext_quote(quoted, unquoted, special) 12 /* VSTRING *quoted; 13 /* const char *unquoted; 14 /* const char *special; 15 /* 16 /* VSTRING *uxtext_quote_append(unquoted, quoted, special) 17 /* VSTRING *unquoted; 18 /* const char *quoted; 19 /* const char *special; 20 /* 21 /* VSTRING *uxtext_unquote(unquoted, quoted) 22 /* VSTRING *unquoted; 23 /* const char *quoted; 24 /* 25 /* VSTRING *uxtext_unquote_append(unquoted, quoted) 26 /* VSTRING *unquoted; 27 /* const char *quoted; 28 /* DESCRIPTION 29 /* uxtext_quote() takes a null-terminated UTF8 string and 30 /* replaces characters \, <33(10) and >126(10), as well as 31 /* characters specified with "special" with \x{XX}, XX being 32 /* a 2-6-digit uppercase hexadecimal equivalent. 33 /* 34 /* uxtext_quote_append() is like uxtext_quote(), but appends 35 /* the conversion result to the result buffer. 36 /* 37 /* uxtext_unquote() performs the opposite transformation. This 38 /* function understands lowercase, uppercase, and mixed case 39 /* \x{XX...} sequences. The result value is the unquoted 40 /* argument in case of success, a null pointer otherwise. 41 /* 42 /* uxtext_unquote_append() is like uxtext_unquote(), but appends 43 /* the conversion result to the result buffer. 44 /* BUGS 45 /* This module cannot process null characters in data. 46 /* LICENSE 47 /* .ad 48 /* .fi 49 /* The Secure Mailer license must be distributed with this software. 50 /* AUTHOR(S) 51 /* Arnt Gulbrandsen 52 /* 53 /* Wietse Venema 54 /* IBM T.J. Watson Research 55 /* P.O. Box 704 56 /* Yorktown Heights, NY 10598, USA 57 /*--*/ 58 59 /* System library. */ 60 61 #include <sys_defs.h> 62 #include <string.h> 63 #include <ctype.h> 64 65 /* Utility library. */ 66 67 #include "msg.h" 68 #include "vstring.h" 69 #include "uxtext.h" 70 71 /* Application-specific. */ 72 73 #define STR(x) vstring_str(x) 74 #define LEN(x) VSTRING_LEN(x) 75 76 /* uxtext_quote_append - append unquoted data to quoted data */ 77 78 VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted, 79 const char *special) 80 { 81 unsigned const char *cp; 82 int ch; 83 84 for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) { 85 /* Fix 20140709: the '\' character must always be quoted. */ 86 if (ch != '\\' && ch > 32 && ch < 127 87 && (*special == 0 || strchr(special, ch) == 0)) { 88 VSTRING_ADDCH(quoted, ch); 89 } else { 90 91 /* 92 * had RFC6533 been written like 6531 and 6532, this else clause 93 * would be one line long. 94 */ 95 int unicode = 0; 96 int pick = 0; 97 98 if (ch < 0x80) { 99 //0000 0000 - 0000 007 F 0x xxxxxx 100 unicode = ch; 101 } else if ((ch & 0xe0) == 0xc0) { 102 //0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx 103 unicode = (ch & 0x1f); 104 pick = 1; 105 } else if ((ch & 0xf0) == 0xe0) { 106 //0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx 107 unicode = (ch & 0x0f); 108 pick = 2; 109 } else if ((ch & 0xf8) == 0xf0) { 110 //0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx 111 unicode = (ch & 0x07); 112 pick = 3; 113 } else if ((ch & 0xfc) == 0xf8) { 114 //0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx 115 unicode = (ch & 0x03); 116 pick = 4; 117 } else if ((ch & 0xfe) == 0xfc) { 118 //0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx 119 unicode = (ch & 0x01); 120 pick = 5; 121 } else { 122 return (0); 123 } 124 while (pick > 0) { 125 ch = *++cp; 126 if ((ch & 0xc0) != 0x80) 127 return (0); 128 unicode = unicode << 6 | (ch & 0x3f); 129 pick--; 130 } 131 vstring_sprintf_append(quoted, "\\x{%02X}", unicode); 132 } 133 } 134 VSTRING_TERMINATE(quoted); 135 return (quoted); 136 } 137 138 /* uxtext_quote - unquoted data to quoted */ 139 140 VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special) 141 { 142 VSTRING_RESET(quoted); 143 uxtext_quote_append(quoted, unquoted, special); 144 return (quoted); 145 } 146 147 /* uxtext_unquote_append - quoted data to unquoted */ 148 149 VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted) 150 { 151 const unsigned char *cp; 152 int ch; 153 154 for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) { 155 if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') { 156 int unicode = 0; 157 158 cp += 2; 159 while ((ch = *++cp) != '}') { 160 if (ISDIGIT(ch)) 161 unicode = (unicode << 4) + (ch - '0'); 162 else if (ch >= 'a' && ch <= 'f') 163 unicode = (unicode << 4) + (ch - 'a' + 10); 164 else if (ch >= 'A' && ch <= 'F') 165 unicode = (unicode << 4) + (ch - 'A' + 10); 166 else 167 return (0); /* also covers the null 168 * terminator */ 169 if (unicode > 0x10ffff) 170 return (0); 171 } 172 173 /* 174 * the following block is from 175 * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with 176 * permission by the authors. 177 */ 178 if (unicode < 0x80) { 179 VSTRING_ADDCH(unquoted, (char) unicode); 180 } else if (unicode < 0x800) { 181 VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6))); 182 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 183 } else if (unicode < 0x10000) { 184 VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12))); 185 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 186 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 187 } else if (unicode < 0x200000) { 188 VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18))); 189 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); 190 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 191 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 192 } else if (unicode < 0x4000000) { 193 VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24))); 194 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f)); 195 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); 196 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 197 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 198 } else { 199 VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30))); 200 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f)); 201 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f)); 202 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); 203 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 204 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 205 } 206 } else { 207 VSTRING_ADDCH(unquoted, ch); 208 } 209 } 210 VSTRING_TERMINATE(unquoted); 211 return (unquoted); 212 } 213 214 /* uxtext_unquote - quoted data to unquoted */ 215 216 VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted) 217 { 218 VSTRING_RESET(unquoted); 219 return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0); 220 } 221 222 #ifdef TEST 223 224 /* 225 * Proof-of-concept test program: convert to quoted and back. 226 */ 227 #include <vstream.h> 228 229 #define BUFLEN 1024 230 231 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf) 232 { 233 ssize_t len; 234 235 VSTRING_RESET(buf); 236 len = vstream_fread(fp, STR(buf), vstring_avail(buf)); 237 VSTRING_AT_OFFSET(buf, len); /* XXX */ 238 VSTRING_TERMINATE(buf); 239 return (len); 240 } 241 242 int main(int unused_argc, char **unused_argv) 243 { 244 VSTRING *unquoted = vstring_alloc(BUFLEN); 245 VSTRING *quoted = vstring_alloc(100); 246 ssize_t len; 247 248 /* 249 * Negative tests. 250 */ 251 if (uxtext_unquote(unquoted, "\\x{x1}") != 0) 252 msg_warn("undetected error pattern 1"); 253 if (uxtext_unquote(unquoted, "\\x{2x}") != 0) 254 msg_warn("undetected error pattern 2"); 255 if (uxtext_unquote(unquoted, "\\x{33") != 0) 256 msg_warn("undetected error pattern 3"); 257 258 /* 259 * Positive tests. 260 */ 261 while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) { 262 uxtext_quote(quoted, STR(unquoted), "+="); 263 if (uxtext_unquote(unquoted, STR(quoted)) == 0) 264 msg_fatal("bad input: %.100s", STR(quoted)); 265 if (LEN(unquoted) != len) 266 msg_fatal("len %ld != unquoted len %ld", 267 (long) len, (long) LEN(unquoted)); 268 if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted)) 269 msg_fatal("write error: %m"); 270 } 271 vstream_fflush(VSTREAM_OUT); 272 vstring_free(unquoted); 273 vstring_free(quoted); 274 return (0); 275 } 276 277 #endif 278