1 /* $NetBSD: uxtext.c,v 1.3 2020/03/18 19:05:16 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* uxtext 3
6 /* SUMMARY
7 /* quote/unquote text, xtext style.
8 /* SYNOPSIS
9 /* #include <uxtext.h>
10 /*
11 /* VSTRING *uxtext_quote(quoted, unquoted, special)
12 /* VSTRING *quoted;
13 /* const char *unquoted;
14 /* const char *special;
15 /*
16 /* VSTRING *uxtext_quote_append(unquoted, quoted, special)
17 /* VSTRING *unquoted;
18 /* const char *quoted;
19 /* const char *special;
20 /*
21 /* VSTRING *uxtext_unquote(unquoted, quoted)
22 /* VSTRING *unquoted;
23 /* const char *quoted;
24 /*
25 /* VSTRING *uxtext_unquote_append(unquoted, quoted)
26 /* VSTRING *unquoted;
27 /* const char *quoted;
28 /* DESCRIPTION
29 /* uxtext_quote() takes a null-terminated UTF8 string and
30 /* replaces characters \, <33(10) and >126(10), as well as
31 /* characters specified with "special" with \x{XX}, XX being
32 /* a 2-6-digit uppercase hexadecimal equivalent.
33 /*
34 /* uxtext_quote_append() is like uxtext_quote(), but appends
35 /* the conversion result to the result buffer.
36 /*
37 /* uxtext_unquote() performs the opposite transformation. This
38 /* function understands lowercase, uppercase, and mixed case
39 /* \x{XX...} sequences. The result value is the unquoted
40 /* argument in case of success, a null pointer otherwise.
41 /*
42 /* uxtext_unquote_append() is like uxtext_unquote(), but appends
43 /* the conversion result to the result buffer.
44 /* BUGS
45 /* This module cannot process null characters in data.
46 /* LICENSE
47 /* .ad
48 /* .fi
49 /* The Secure Mailer license must be distributed with this software.
50 /* AUTHOR(S)
51 /* Arnt Gulbrandsen
52 /*
53 /* Wietse Venema
54 /* IBM T.J. Watson Research
55 /* P.O. Box 704
56 /* Yorktown Heights, NY 10598, USA
57 /*
58 /* Wietse Venema
59 /* Google, Inc.
60 /* 111 8th Avenue
61 /* New York, NY 10011, USA
62 /*--*/
63
64 /* System library. */
65
66 #include <sys_defs.h>
67 #include <string.h>
68 #include <ctype.h>
69
70 /* Utility library. */
71
72 #include "msg.h"
73 #include "vstring.h"
74 #include "uxtext.h"
75
76 /* Application-specific. */
77
78 #define STR(x) vstring_str(x)
79 #define LEN(x) VSTRING_LEN(x)
80
81 /* uxtext_quote_append - append unquoted data to quoted data */
82
uxtext_quote_append(VSTRING * quoted,const char * unquoted,const char * special)83 VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted,
84 const char *special)
85 {
86 unsigned const char *cp;
87 int ch;
88
89 for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) {
90 /* Fix 20140709: the '\' character must always be quoted. */
91 if (ch != '\\' && ch > 32 && ch < 127
92 && (*special == 0 || strchr(special, ch) == 0)) {
93 VSTRING_ADDCH(quoted, ch);
94 } else {
95
96 /*
97 * had RFC6533 been written like 6531 and 6532, this else clause
98 * would be one line long.
99 */
100 int unicode = 0;
101 int pick = 0;
102
103 if (ch < 0x80) {
104 //0000 0000 - 0000 007 F 0x xxxxxx
105 unicode = ch;
106 } else if ((ch & 0xe0) == 0xc0) {
107 //0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx
108 unicode = (ch & 0x1f);
109 pick = 1;
110 } else if ((ch & 0xf0) == 0xe0) {
111 //0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
112 unicode = (ch & 0x0f);
113 pick = 2;
114 } else if ((ch & 0xf8) == 0xf0) {
115 //0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx
116 unicode = (ch & 0x07);
117 pick = 3;
118 } else if ((ch & 0xfc) == 0xf8) {
119 //0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx
120 unicode = (ch & 0x03);
121 pick = 4;
122 } else if ((ch & 0xfe) == 0xfc) {
123 //0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx
124 unicode = (ch & 0x01);
125 pick = 5;
126 } else {
127 return (0);
128 }
129 while (pick > 0) {
130 ch = *++cp;
131 if ((ch & 0xc0) != 0x80)
132 return (0);
133 unicode = unicode << 6 | (ch & 0x3f);
134 pick--;
135 }
136 vstring_sprintf_append(quoted, "\\x{%02X}", unicode);
137 }
138 }
139 VSTRING_TERMINATE(quoted);
140 return (quoted);
141 }
142
143 /* uxtext_quote - unquoted data to quoted */
144
uxtext_quote(VSTRING * quoted,const char * unquoted,const char * special)145 VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special)
146 {
147 VSTRING_RESET(quoted);
148 uxtext_quote_append(quoted, unquoted, special);
149 return (quoted);
150 }
151
152 /* uxtext_unquote_append - quoted data to unquoted */
153
uxtext_unquote_append(VSTRING * unquoted,const char * quoted)154 VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted)
155 {
156 const unsigned char *cp;
157 int ch;
158
159 for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) {
160 if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') {
161 int unicode = 0;
162
163 cp += 2;
164 while ((ch = *++cp) != '}') {
165 if (ISDIGIT(ch))
166 unicode = (unicode << 4) + (ch - '0');
167 else if (ch >= 'a' && ch <= 'f')
168 unicode = (unicode << 4) + (ch - 'a' + 10);
169 else if (ch >= 'A' && ch <= 'F')
170 unicode = (unicode << 4) + (ch - 'A' + 10);
171 else
172 return (0); /* also covers the null
173 * terminator */
174 if (unicode > 0x10ffff)
175 return (0);
176 }
177
178 /*
179 * the following block is from
180 * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with
181 * permission by the authors.
182 */
183 if (unicode < 0x80) {
184 VSTRING_ADDCH(unquoted, (char) unicode);
185 } else if (unicode < 0x800) {
186 VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6)));
187 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
188 } else if (unicode < 0x10000) {
189 VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12)));
190 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
191 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
192 } else if (unicode < 0x200000) {
193 VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18)));
194 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
195 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
196 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
197 } else if (unicode < 0x4000000) {
198 VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24)));
199 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
200 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
201 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
202 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
203 } else {
204 VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30)));
205 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f));
206 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
207 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
208 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
209 VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
210 }
211 } else {
212 VSTRING_ADDCH(unquoted, ch);
213 }
214 }
215 VSTRING_TERMINATE(unquoted);
216 return (unquoted);
217 }
218
219 /* uxtext_unquote - quoted data to unquoted */
220
uxtext_unquote(VSTRING * unquoted,const char * quoted)221 VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted)
222 {
223 VSTRING_RESET(unquoted);
224 return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0);
225 }
226
227 #ifdef TEST
228
229 /*
230 * Proof-of-concept test program: convert to quoted and back.
231 */
232 #include <vstream.h>
233
234 #define BUFLEN 1024
235
read_buf(VSTREAM * fp,VSTRING * buf)236 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf)
237 {
238 ssize_t len;
239
240 len = vstream_fread_buf(fp, buf, BUFLEN);
241 VSTRING_TERMINATE(buf);
242 return (len);
243 }
244
main(int unused_argc,char ** unused_argv)245 int main(int unused_argc, char **unused_argv)
246 {
247 VSTRING *unquoted = vstring_alloc(BUFLEN);
248 VSTRING *quoted = vstring_alloc(100);
249 ssize_t len;
250
251 /*
252 * Negative tests.
253 */
254 if (uxtext_unquote(unquoted, "\\x{x1}") != 0)
255 msg_warn("undetected error pattern 1");
256 if (uxtext_unquote(unquoted, "\\x{2x}") != 0)
257 msg_warn("undetected error pattern 2");
258 if (uxtext_unquote(unquoted, "\\x{33") != 0)
259 msg_warn("undetected error pattern 3");
260
261 /*
262 * Positive tests.
263 */
264 while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) {
265 uxtext_quote(quoted, STR(unquoted), "+=");
266 if (uxtext_unquote(unquoted, STR(quoted)) == 0)
267 msg_fatal("bad input: %.100s", STR(quoted));
268 if (LEN(unquoted) != len)
269 msg_fatal("len %ld != unquoted len %ld",
270 (long) len, (long) LEN(unquoted));
271 if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted))
272 msg_fatal("write error: %m");
273 }
274 vstream_fflush(VSTREAM_OUT);
275 vstring_free(unquoted);
276 vstring_free(quoted);
277 return (0);
278 }
279
280 #endif
281