xref: /netbsd-src/external/ibm-public/postfix/dist/src/global/uxtext.c (revision 33881f779a77dce6440bdc44610d94de75bebefe)
1 /*	$NetBSD: uxtext.c,v 1.3 2020/03/18 19:05:16 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	uxtext 3
6 /* SUMMARY
7 /*	quote/unquote text, xtext style.
8 /* SYNOPSIS
9 /*	#include <uxtext.h>
10 /*
11 /*	VSTRING	*uxtext_quote(quoted, unquoted, special)
12 /*	VSTRING	*quoted;
13 /*	const char *unquoted;
14 /*	const char *special;
15 /*
16 /*	VSTRING	*uxtext_quote_append(unquoted, quoted, special)
17 /*	VSTRING	*unquoted;
18 /*	const char *quoted;
19 /*	const char *special;
20 /*
21 /*	VSTRING	*uxtext_unquote(unquoted, quoted)
22 /*	VSTRING	*unquoted;
23 /*	const char *quoted;
24 /*
25 /*	VSTRING	*uxtext_unquote_append(unquoted, quoted)
26 /*	VSTRING	*unquoted;
27 /*	const char *quoted;
28 /* DESCRIPTION
29 /*	uxtext_quote() takes a null-terminated UTF8 string and
30 /*	replaces characters \, <33(10) and >126(10), as well as
31 /*	characters specified with "special" with \x{XX}, XX being
32 /*	a 2-6-digit uppercase hexadecimal equivalent.
33 /*
34 /*	uxtext_quote_append() is like uxtext_quote(), but appends
35 /*	the conversion result to the result buffer.
36 /*
37 /*	uxtext_unquote() performs the opposite transformation. This
38 /*	function understands lowercase, uppercase, and mixed case
39 /*	\x{XX...} sequences.  The result value is the unquoted
40 /*	argument in case of success, a null pointer otherwise.
41 /*
42 /*	uxtext_unquote_append() is like uxtext_unquote(), but appends
43 /*	the conversion result to the result buffer.
44 /* BUGS
45 /*	This module cannot process null characters in data.
46 /* LICENSE
47 /* .ad
48 /* .fi
49 /*	The Secure Mailer license must be distributed with this software.
50 /* AUTHOR(S)
51 /*	Arnt Gulbrandsen
52 /*
53 /*	Wietse Venema
54 /*	IBM T.J. Watson Research
55 /*	P.O. Box 704
56 /*	Yorktown Heights, NY 10598, USA
57 /*
58 /*	Wietse Venema
59 /*	Google, Inc.
60 /*	111 8th Avenue
61 /*	New York, NY 10011, USA
62 /*--*/
63 
64 /* System library. */
65 
66 #include <sys_defs.h>
67 #include <string.h>
68 #include <ctype.h>
69 
70 /* Utility library. */
71 
72 #include "msg.h"
73 #include "vstring.h"
74 #include "uxtext.h"
75 
76 /* Application-specific. */
77 
78 #define STR(x)	vstring_str(x)
79 #define LEN(x)	VSTRING_LEN(x)
80 
81 /* uxtext_quote_append - append unquoted data to quoted data */
82 
uxtext_quote_append(VSTRING * quoted,const char * unquoted,const char * special)83 VSTRING *uxtext_quote_append(VSTRING *quoted, const char *unquoted,
84 			             const char *special)
85 {
86     unsigned const char *cp;
87     int     ch;
88 
89     for (cp = (unsigned const char *) unquoted; (ch = *cp) != 0; cp++) {
90 	/* Fix 20140709: the '\' character must always be quoted. */
91 	if (ch != '\\' && ch > 32 && ch < 127
92 	    && (*special == 0 || strchr(special, ch) == 0)) {
93 	    VSTRING_ADDCH(quoted, ch);
94 	} else {
95 
96 	    /*
97 	     * had RFC6533 been written like 6531 and 6532, this else clause
98 	     * would be one line long.
99 	     */
100 	    int     unicode = 0;
101 	    int     pick = 0;
102 
103 	    if (ch < 0x80) {
104 		//0000 0000 - 0000 007 F 0x xxxxxx
105 		    unicode = ch;
106 	    } else if ((ch & 0xe0) == 0xc0) {
107 		//0000 0080 - 0000 07 FF 110 xxxxx 10 xxxxxx
108 		    unicode = (ch & 0x1f);
109 		pick = 1;
110 	    } else if ((ch & 0xf0) == 0xe0) {
111 		//0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
112 		    unicode = (ch & 0x0f);
113 		pick = 2;
114 	    } else if ((ch & 0xf8) == 0xf0) {
115 		//0001 0000 - 001 F FFFF 11110 xxx 10 xxxxxx 10 xxxxxx 10 xxxxxx
116 		    unicode = (ch & 0x07);
117 		pick = 3;
118 	    } else if ((ch & 0xfc) == 0xf8) {
119 		//0020 0000 - 03 FF FFFF 111110 xx 10 xxxxxx 10 xxxxxx...10 xxxxxx
120 		    unicode = (ch & 0x03);
121 		pick = 4;
122 	    } else if ((ch & 0xfe) == 0xfc) {
123 		//0400 0000 - 7 FFF FFFF 1111110 x 10 xxxxxx...10 xxxxxx
124 		    unicode = (ch & 0x01);
125 		pick = 5;
126 	    } else {
127 		return (0);
128 	    }
129 	    while (pick > 0) {
130 		ch = *++cp;
131 		if ((ch & 0xc0) != 0x80)
132 		    return (0);
133 		unicode = unicode << 6 | (ch & 0x3f);
134 		pick--;
135 	    }
136 	    vstring_sprintf_append(quoted, "\\x{%02X}", unicode);
137 	}
138     }
139     VSTRING_TERMINATE(quoted);
140     return (quoted);
141 }
142 
143 /* uxtext_quote - unquoted data to quoted */
144 
uxtext_quote(VSTRING * quoted,const char * unquoted,const char * special)145 VSTRING *uxtext_quote(VSTRING *quoted, const char *unquoted, const char *special)
146 {
147     VSTRING_RESET(quoted);
148     uxtext_quote_append(quoted, unquoted, special);
149     return (quoted);
150 }
151 
152 /* uxtext_unquote_append - quoted data to unquoted */
153 
uxtext_unquote_append(VSTRING * unquoted,const char * quoted)154 VSTRING *uxtext_unquote_append(VSTRING *unquoted, const char *quoted)
155 {
156     const unsigned char *cp;
157     int     ch;
158 
159     for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) {
160 	if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') {
161 	    int     unicode = 0;
162 
163 	    cp += 2;
164 	    while ((ch = *++cp) != '}') {
165 		if (ISDIGIT(ch))
166 		    unicode = (unicode << 4) + (ch - '0');
167 		else if (ch >= 'a' && ch <= 'f')
168 		    unicode = (unicode << 4) + (ch - 'a' + 10);
169 		else if (ch >= 'A' && ch <= 'F')
170 		    unicode = (unicode << 4) + (ch - 'A' + 10);
171 		else
172 		    return (0);			/* also covers the null
173 						 * terminator */
174 		if (unicode > 0x10ffff)
175 		    return (0);
176 	    }
177 
178 	    /*
179 	     * the following block is from
180 	     * https://github.com/aox/aox/blob/master/encodings/utf.cpp, with
181 	     * permission by the authors.
182 	     */
183 	    if (unicode < 0x80) {
184 		VSTRING_ADDCH(unquoted, (char) unicode);
185 	    } else if (unicode < 0x800) {
186 		VSTRING_ADDCH(unquoted, 0xc0 | ((char) (unicode >> 6)));
187 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
188 	    } else if (unicode < 0x10000) {
189 		VSTRING_ADDCH(unquoted, 0xe0 | ((char) (unicode >> 12)));
190 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
191 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
192 	    } else if (unicode < 0x200000) {
193 		VSTRING_ADDCH(unquoted, 0xf0 | ((char) (unicode >> 18)));
194 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
195 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
196 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
197 	    } else if (unicode < 0x4000000) {
198 		VSTRING_ADDCH(unquoted, 0xf8 | ((char) (unicode >> 24)));
199 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
200 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
201 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
202 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
203 	    } else {
204 		VSTRING_ADDCH(unquoted, 0xfc | ((char) (unicode >> 30)));
205 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 24) & 0x3f));
206 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 18) & 0x3f));
207 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 12) & 0x3f));
208 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode >> 6) & 0x3f));
209 		VSTRING_ADDCH(unquoted, 0x80 | ((char) (unicode & 0x3f)));
210 	    }
211 	} else {
212 	    VSTRING_ADDCH(unquoted, ch);
213 	}
214     }
215     VSTRING_TERMINATE(unquoted);
216     return (unquoted);
217 }
218 
219 /* uxtext_unquote - quoted data to unquoted */
220 
uxtext_unquote(VSTRING * unquoted,const char * quoted)221 VSTRING *uxtext_unquote(VSTRING *unquoted, const char *quoted)
222 {
223     VSTRING_RESET(unquoted);
224     return (uxtext_unquote_append(unquoted, quoted) ? unquoted : 0);
225 }
226 
227 #ifdef TEST
228 
229  /*
230   * Proof-of-concept test program: convert to quoted and back.
231   */
232 #include <vstream.h>
233 
234 #define BUFLEN 1024
235 
read_buf(VSTREAM * fp,VSTRING * buf)236 static ssize_t read_buf(VSTREAM *fp, VSTRING *buf)
237 {
238     ssize_t len;
239 
240     len = vstream_fread_buf(fp, buf, BUFLEN);
241     VSTRING_TERMINATE(buf);
242     return (len);
243 }
244 
main(int unused_argc,char ** unused_argv)245 int     main(int unused_argc, char **unused_argv)
246 {
247     VSTRING *unquoted = vstring_alloc(BUFLEN);
248     VSTRING *quoted = vstring_alloc(100);
249     ssize_t len;
250 
251     /*
252      * Negative tests.
253      */
254     if (uxtext_unquote(unquoted, "\\x{x1}") != 0)
255 	msg_warn("undetected error pattern 1");
256     if (uxtext_unquote(unquoted, "\\x{2x}") != 0)
257 	msg_warn("undetected error pattern 2");
258     if (uxtext_unquote(unquoted, "\\x{33") != 0)
259 	msg_warn("undetected error pattern 3");
260 
261     /*
262      * Positive tests.
263      */
264     while ((len = read_buf(VSTREAM_IN, unquoted)) > 0) {
265 	uxtext_quote(quoted, STR(unquoted), "+=");
266 	if (uxtext_unquote(unquoted, STR(quoted)) == 0)
267 	    msg_fatal("bad input: %.100s", STR(quoted));
268 	if (LEN(unquoted) != len)
269 	    msg_fatal("len %ld != unquoted len %ld",
270 		      (long) len, (long) LEN(unquoted));
271 	if (vstream_fwrite(VSTREAM_OUT, STR(unquoted), LEN(unquoted)) != LEN(unquoted))
272 	    msg_fatal("write error: %m");
273     }
274     vstream_fflush(VSTREAM_OUT);
275     vstring_free(unquoted);
276     vstring_free(quoted);
277     return (0);
278 }
279 
280 #endif
281