xref: /netbsd-src/external/ibm-public/postfix/dist/src/global/uxtext.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: uxtext.c,v 1.2 2017/02/14 01:16:45 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	uxtext 3
6 /* SUMMARY
7 /*	quote/unquote text, xtext style.
8 /* SYNOPSIS
9 /*	#include <uxtext.h>
10 /*
11 /*	VSTRING	*uxtext_quote(quoted, unquoted, special)
12 /*	VSTRING	*quoted;
13 /*	const char *unquoted;
14 /*	const char *special;
15 /*
16 /*	VSTRING	*uxtext_quote_append(unquoted, quoted, special)
17 /*	VSTRING	*unquoted;
18 /*	const char *quoted;
19 /*	const char *special;
20 /*
21 /*	VSTRING	*uxtext_unquote(unquoted, quoted)
22 /*	VSTRING	*unquoted;
23 /*	const char *quoted;
24 /*
25 /*	VSTRING	*uxtext_unquote_append(unquoted, quoted)
26 /*	VSTRING	*unquoted;
27 /*	const char *quoted;
28 /* DESCRIPTION
29 /*	uxtext_quote() takes a null-terminated UTF8 string and
30 /*	replaces characters \, <33(10) and >126(10), as well as
31 /*	characters specified with "special" with \x{XX}, XX being
32 /*	a 2-6-digit uppercase hexadecimal equivalent.
33 /*
34 /*	uxtext_quote_append() is like uxtext_quote(), but appends
35 /*	the conversion result to the result buffer.
36 /*
37 /*	uxtext_unquote() performs the opposite transformation. This
38 /*	function understands lowercase, uppercase, and mixed case
39 /*	\x{XX...} sequences.  The result value is the unquoted
40 /*	argument in case of success, a null pointer otherwise.
41 /*
42 /*	uxtext_unquote_append() is like uxtext_unquote(), but appends
43 /*	the conversion result to the result buffer.
44 /* BUGS
45 /*	This module cannot process null characters in data.
46 /* LICENSE
47 /* .ad
48 /* .fi
49 /*	The Secure Mailer license must be distributed with this software.
50 /* AUTHOR(S)
51 /*	Arnt Gulbrandsen
52 /*
53 /*	Wietse Venema
54 /*	IBM T.J. Watson Research
55 /*	P.O. Box 704
56 /*	Yorktown Heights, NY 10598, USA
57 /*--*/
58 
59 /* System library. */
60 
61 #include <sys_defs.h>
62 #include <string.h>
63 #include <ctype.h>
64 
65 /* Utility library. */
66 
67 #include "msg.h"
68 #include "vstring.h"
69 #include "uxtext.h"
70 
71 /* Application-specific. */
72 
73 #define STR(x)	vstring_str(x)
74 #define LEN(x)	VSTRING_LEN(x)
75 
76 /* uxtext_quote_append - append unquoted data to quoted data */
77 
78 VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted,
79 			             const char *special)
80 {
81     unsigned const char *cp;
82     int     ch;
83 
84     for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) {
85 	/* Fix 20140709: the '\' character must always be quoted. */
86 	if (ch != '\\' && ch > 32 && ch < 127
87 	    && (*special == 0 || strchr(special, ch) == 0)) {
88 	    VSTRING_ADDCH(quoted, ch);
89 	} else {
90 
91 	    /*
92 	     * had RFC6533 been written like 6531 and 6532, this else clause
93 	     * would be one line long.
94 	     */
95 	    int     unicode = 0;
96 	    int     pick = 0;
97 
98 	    if (ch < 0x80) {
99 		//0000 0000 - 0000 007 F 0x xxxxxx
100 		    unicode = ch;
101 	    } else if ((ch & 0xe0) == 0xc0) {
102 		//0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx
103 		    unicode = (ch & 0x1f);
104 		pick = 1;
105 	    } else if ((ch & 0xf0) == 0xe0) {
106 		//0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
107 		    unicode = (ch & 0x0f);
108 		pick = 2;
109 	    } else if ((ch & 0xf8) == 0xf0) {
110 		//0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx
111 		    unicode = (ch & 0x07);
112 		pick = 3;
113 	    } else if ((ch & 0xfc) == 0xf8) {
114 		//0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx
115 		    unicode = (ch & 0x03);
116 		pick = 4;
117 	    } else if ((ch & 0xfe) == 0xfc) {
118 		//0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx
119 		    unicode = (ch & 0x01);
120 		pick = 5;
121 	    } else {
122 		return (0);
123 	    }
124 	    while (pick > 0) {
125 		ch = *++cp;
126 		if ((ch & 0xc0) != 0x80)
127 		    return (0);
128 		unicode = unicode << 6 | (ch & 0x3f);
129 		pick--;
130 	    }
131 	    vstring_sprintf_append(quoted, "\\x{%02X}", unicode);
132 	}
133     }
134     VSTRING_TERMINATE(quoted);
135     return (quoted);
136 }
137 
138 /* uxtext_quote - unquoted data to quoted */
139 
140 VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special)
141 {
142     VSTRING_RESET(quoted);
143     uxtext_quote_append(quoted, unquoted, special);
144     return (quoted);
145 }
146 
147 /* uxtext_unquote_append - quoted data to unquoted */
148 
149 VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted)
150 {
151     const unsigned char *cp;
152     int     ch;
153 
154     for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) {
155 	if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') {
156 	    int     unicode = 0;
157 
158 	    cp += 2;
159 	    while ((ch = *++cp) != '}') {
160 		if (ISDIGIT(ch))
161 		    unicode = (unicode << 4) + (ch - '0');
162 		else if (ch >= 'a' && ch <= 'f')
163 		    unicode = (unicode << 4) + (ch - 'a' + 10);
164 		else if (ch >= 'A' && ch <= 'F')
165 		    unicode = (unicode << 4) + (ch - 'A' + 10);
166 		else
167 		    return (0);			/* also covers the null
168 						 * terminator */
169 		if (unicode > 0x10ffff)
170 		    return (0);
171 	    }
172 
173 	    /*
174 	     * the following block is from
175 	     * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with
176 	     * permission by the authors.
177 	     */
178 	    if (unicode < 0x80) {
179 		VSTRING_ADDCH(unquoted, (char) unicode);
180 	    } else if (unicode < 0x800) {
181 		VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6)));
182 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
183 	    } else if (unicode < 0x10000) {
184 		VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12)));
185 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
186 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
187 	    } else if (unicode < 0x200000) {
188 		VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18)));
189 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
190 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
191 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
192 	    } else if (unicode < 0x4000000) {
193 		VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24)));
194 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
195 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
196 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
197 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
198 	    } else {
199 		VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30)));
200 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f));
201 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
202 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
203 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
204 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
205 	    }
206 	} else {
207 	    VSTRING_ADDCH(unquoted, ch);
208 	}
209     }
210     VSTRING_TERMINATE(unquoted);
211     return (unquoted);
212 }
213 
214 /* uxtext_unquote - quoted data to unquoted */
215 
216 VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted)
217 {
218     VSTRING_RESET(unquoted);
219     return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0);
220 }
221 
222 #ifdef TEST
223 
224  /*
225   * Proof-of-concept test program: convert to quoted and back.
226   */
227 #include <vstream.h>
228 
229 #define BUFLEN 1024
230 
231 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf)
232 {
233     ssize_t len;
234 
235     VSTRING_RESET(buf);
236     len = vstream_fread(fp, STR(buf), vstring_avail(buf));
237     VSTRING_AT_OFFSET(buf, len);		/* XXX */
238     VSTRING_TERMINATE(buf);
239     return (len);
240 }
241 
242 int     main(int unused_argc, char **unused_argv)
243 {
244     VSTRING *unquoted = vstring_alloc(BUFLEN);
245     VSTRING *quoted = vstring_alloc(100);
246     ssize_t len;
247 
248     /*
249      * Negative tests.
250      */
251     if (uxtext_unquote(unquoted, "\\x{x1}") != 0)
252 	msg_warn("undetected error pattern 1");
253     if (uxtext_unquote(unquoted, "\\x{2x}") != 0)
254 	msg_warn("undetected error pattern 2");
255     if (uxtext_unquote(unquoted, "\\x{33") != 0)
256 	msg_warn("undetected error pattern 3");
257 
258     /*
259      * Positive tests.
260      */
261     while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) {
262 	uxtext_quote(quoted, STR(unquoted), "+=");
263 	if (uxtext_unquote(unquoted, STR(quoted)) == 0)
264 	    msg_fatal("bad input: %.100s", STR(quoted));
265 	if (LEN(unquoted) != len)
266 	    msg_fatal("len %ld != unquoted len %ld",
267 		      (long) len, (long) LEN(unquoted));
268 	if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted))
269 	    msg_fatal("write error: %m");
270     }
271     vstream_fflush(VSTREAM_OUT);
272     vstring_free(unquoted);
273     vstring_free(quoted);
274     return (0);
275 }
276 
277 #endif
278