1 /* $NetBSD: uxtext.c,v 1.3 2020/03/18 19:05:16 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* uxtext 3 6 /* SUMMARY 7 /* quote/unquote text, xtext style. 8 /* SYNOPSIS 9 /* #include <uxtext.h> 10 /* 11 /* VSTRING *uxtext_quote(quoted, unquoted, special) 12 /* VSTRING *quoted; 13 /* const char *unquoted; 14 /* const char *special; 15 /* 16 /* VSTRING *uxtext_quote_append(unquoted, quoted, special) 17 /* VSTRING *unquoted; 18 /* const char *quoted; 19 /* const char *special; 20 /* 21 /* VSTRING *uxtext_unquote(unquoted, quoted) 22 /* VSTRING *unquoted; 23 /* const char *quoted; 24 /* 25 /* VSTRING *uxtext_unquote_append(unquoted, quoted) 26 /* VSTRING *unquoted; 27 /* const char *quoted; 28 /* DESCRIPTION 29 /* uxtext_quote() takes a null-terminated UTF8 string and 30 /* replaces characters \, <33(10) and >126(10), as well as 31 /* characters specified with "special" with \x{XX}, XX being 32 /* a 2-6-digit uppercase hexadecimal equivalent. 33 /* 34 /* uxtext_quote_append() is like uxtext_quote(), but appends 35 /* the conversion result to the result buffer. 36 /* 37 /* uxtext_unquote() performs the opposite transformation. This 38 /* function understands lowercase, uppercase, and mixed case 39 /* \x{XX...} sequences. The result value is the unquoted 40 /* argument in case of success, a null pointer otherwise. 41 /* 42 /* uxtext_unquote_append() is like uxtext_unquote(), but appends 43 /* the conversion result to the result buffer. 44 /* BUGS 45 /* This module cannot process null characters in data. 46 /* LICENSE 47 /* .ad 48 /* .fi 49 /* The Secure Mailer license must be distributed with this software. 50 /* AUTHOR(S) 51 /* Arnt Gulbrandsen 52 /* 53 /* Wietse Venema 54 /* IBM T.J. Watson Research 55 /* P.O. Box 704 56 /* Yorktown Heights, NY 10598, USA 57 /* 58 /* Wietse Venema 59 /* Google, Inc. 60 /* 111 8th Avenue 61 /* New York, NY 10011, USA 62 /*--*/ 63 64 /* System library. */ 65 66 #include <sys_defs.h> 67 #include <string.h> 68 #include <ctype.h> 69 70 /* Utility library. */ 71 72 #include "msg.h" 73 #include "vstring.h" 74 #include "uxtext.h" 75 76 /* Application-specific. */ 77 78 #define STR(x) vstring_str(x) 79 #define LEN(x) VSTRING_LEN(x) 80 81 /* uxtext_quote_append - append unquoted data to quoted data */ 82 83 VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted, 84 const char *special) 85 { 86 unsigned const char *cp; 87 int ch; 88 89 for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) { 90 /* Fix 20140709: the '\' character must always be quoted. */ 91 if (ch != '\\' && ch > 32 && ch < 127 92 && (*special == 0 || strchr(special, ch) == 0)) { 93 VSTRING_ADDCH(quoted, ch); 94 } else { 95 96 /* 97 * had RFC6533 been written like 6531 and 6532, this else clause 98 * would be one line long. 99 */ 100 int unicode = 0; 101 int pick = 0; 102 103 if (ch < 0x80) { 104 //0000 0000 - 0000 007 F 0x xxxxxx 105 unicode = ch; 106 } else if ((ch & 0xe0) == 0xc0) { 107 //0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx 108 unicode = (ch & 0x1f); 109 pick = 1; 110 } else if ((ch & 0xf0) == 0xe0) { 111 //0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx 112 unicode = (ch & 0x0f); 113 pick = 2; 114 } else if ((ch & 0xf8) == 0xf0) { 115 //0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx 116 unicode = (ch & 0x07); 117 pick = 3; 118 } else if ((ch & 0xfc) == 0xf8) { 119 //0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx 120 unicode = (ch & 0x03); 121 pick = 4; 122 } else if ((ch & 0xfe) == 0xfc) { 123 //0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx 124 unicode = (ch & 0x01); 125 pick = 5; 126 } else { 127 return (0); 128 } 129 while (pick > 0) { 130 ch = *++cp; 131 if ((ch & 0xc0) != 0x80) 132 return (0); 133 unicode = unicode << 6 | (ch & 0x3f); 134 pick--; 135 } 136 vstring_sprintf_append(quoted, "\\x{%02X}", unicode); 137 } 138 } 139 VSTRING_TERMINATE(quoted); 140 return (quoted); 141 } 142 143 /* uxtext_quote - unquoted data to quoted */ 144 145 VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special) 146 { 147 VSTRING_RESET(quoted); 148 uxtext_quote_append(quoted, unquoted, special); 149 return (quoted); 150 } 151 152 /* uxtext_unquote_append - quoted data to unquoted */ 153 154 VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted) 155 { 156 const unsigned char *cp; 157 int ch; 158 159 for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) { 160 if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') { 161 int unicode = 0; 162 163 cp += 2; 164 while ((ch = *++cp) != '}') { 165 if (ISDIGIT(ch)) 166 unicode = (unicode << 4) + (ch - '0'); 167 else if (ch >= 'a' && ch <= 'f') 168 unicode = (unicode << 4) + (ch - 'a' + 10); 169 else if (ch >= 'A' && ch <= 'F') 170 unicode = (unicode << 4) + (ch - 'A' + 10); 171 else 172 return (0); /* also covers the null 173 * terminator */ 174 if (unicode > 0x10ffff) 175 return (0); 176 } 177 178 /* 179 * the following block is from 180 * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with 181 * permission by the authors. 182 */ 183 if (unicode < 0x80) { 184 VSTRING_ADDCH(unquoted, (char) unicode); 185 } else if (unicode < 0x800) { 186 VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6))); 187 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 188 } else if (unicode < 0x10000) { 189 VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12))); 190 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 191 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 192 } else if (unicode < 0x200000) { 193 VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18))); 194 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); 195 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 196 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 197 } else if (unicode < 0x4000000) { 198 VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24))); 199 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f)); 200 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); 201 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 202 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 203 } else { 204 VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30))); 205 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f)); 206 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f)); 207 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f)); 208 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f)); 209 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f))); 210 } 211 } else { 212 VSTRING_ADDCH(unquoted, ch); 213 } 214 } 215 VSTRING_TERMINATE(unquoted); 216 return (unquoted); 217 } 218 219 /* uxtext_unquote - quoted data to unquoted */ 220 221 VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted) 222 { 223 VSTRING_RESET(unquoted); 224 return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0); 225 } 226 227 #ifdef TEST 228 229 /* 230 * Proof-of-concept test program: convert to quoted and back. 231 */ 232 #include <vstream.h> 233 234 #define BUFLEN 1024 235 236 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf) 237 { 238 ssize_t len; 239 240 len = vstream_fread_buf(fp, buf, BUFLEN); 241 VSTRING_TERMINATE(buf); 242 return (len); 243 } 244 245 int main(int unused_argc, char **unused_argv) 246 { 247 VSTRING *unquoted = vstring_alloc(BUFLEN); 248 VSTRING *quoted = vstring_alloc(100); 249 ssize_t len; 250 251 /* 252 * Negative tests. 253 */ 254 if (uxtext_unquote(unquoted, "\\x{x1}") != 0) 255 msg_warn("undetected error pattern 1"); 256 if (uxtext_unquote(unquoted, "\\x{2x}") != 0) 257 msg_warn("undetected error pattern 2"); 258 if (uxtext_unquote(unquoted, "\\x{33") != 0) 259 msg_warn("undetected error pattern 3"); 260 261 /* 262 * Positive tests. 263 */ 264 while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) { 265 uxtext_quote(quoted, STR(unquoted), "+="); 266 if (uxtext_unquote(unquoted, STR(quoted)) == 0) 267 msg_fatal("bad input: %.100s", STR(quoted)); 268 if (LEN(unquoted) != len) 269 msg_fatal("len %ld != unquoted len %ld", 270 (long) len, (long) LEN(unquoted)); 271 if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted)) 272 msg_fatal("write error: %m"); 273 } 274 vstream_fflush(VSTREAM_OUT); 275 vstring_free(unquoted); 276 vstring_free(quoted); 277 return (0); 278 } 279 280 #endif 281