xref: /minix3/external/bsd/bind/dist/contrib/idn/idnkit-1.0-src/lib/race.c (revision 00b67f09dd46474d133c95011a48590a8e8f94c7)
1 /*	$NetBSD: race.c,v 1.4 2014/12/10 04:37:55 christos Exp $	*/
2 
3 #ifndef lint
4 static char *rcsid = "Id: race.c,v 1.1 2003/06/04 00:26:07 marka Exp ";
5 #endif
6 
7 /*
8  * Copyright (c) 2000,2001,2002 Japan Network Information Center.
9  * All rights reserved.
10  *
11  * By using this file, you agree to the terms and conditions set forth bellow.
12  *
13  * 			LICENSE TERMS AND CONDITIONS
14  *
15  * The following License Terms and Conditions apply, unless a different
16  * license is obtained from Japan Network Information Center ("JPNIC"),
17  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18  * Chiyoda-ku, Tokyo 101-0047, Japan.
19  *
20  * 1. Use, Modification and Redistribution (including distribution of any
21  *    modified or derived work) in source and/or binary forms is permitted
22  *    under this License Terms and Conditions.
23  *
24  * 2. Redistribution of source code must retain the copyright notices as they
25  *    appear in each source code file, this License Terms and Conditions.
26  *
27  * 3. Redistribution in binary form must reproduce the Copyright Notice,
28  *    this License Terms and Conditions, in the documentation and/or other
29  *    materials provided with the distribution.  For the purposes of binary
30  *    distribution the "Copyright Notice" refers to the following language:
31  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
32  *
33  * 4. The name of JPNIC may not be used to endorse or promote products
34  *    derived from this Software without specific prior written approval of
35  *    JPNIC.
36  *
37  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
41  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
48  */
49 
50 #include <config.h>
51 
52 #include <stddef.h>
53 #include <stdlib.h>
54 #include <string.h>
55 
56 #include <idn/result.h>
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/converter.h>
60 #include <idn/ucs4.h>
61 #include <idn/debug.h>
62 #include <idn/race.h>
63 #include <idn/util.h>
64 
65 #ifndef IDN_RACE_PREFIX
66 #define IDN_RACE_PREFIX		"bq--"
67 #endif
68 #define RACE_2OCTET_MODE	0xd8
69 #define RACE_ESCAPE		0xff
70 #define RACE_ESCAPE_2ND		0x99
71 
72 #define RACE_BUF_SIZE		128		/* more than enough */
73 
74 /*
75  * Unicode surrogate pair.
76  */
77 #define IS_SURROGATE_HIGH(v)	(0xd800 <= (v) && (v) <= 0xdbff)
78 #define IS_SURROGATE_LOW(v)	(0xdc00 <= (v) && (v) <= 0xdfff)
79 #define SURROGATE_HIGH(v)	(SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
80 #define SURROGATE_LOW(v)	(SURROGATE_L_OFF + ((v) & 0x3ff))
81 #define SURROGATE_BASE		0x10000
82 #define SURROGATE_H_OFF		0xd800
83 #define SURROGATE_L_OFF		0xdc00
84 #define COMBINE_SURROGATE(h, l) \
85 	(SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
86 
87 /*
88  * Compression type.
89  */
90 enum {
91 	compress_one,	/* all characters are in a single row */
92 	compress_two,	/* row 0 and another row */
93 	compress_none	/* nope */
94 };
95 
96 static idn_result_t	race_decode_decompress(const char *from,
97 					       unsigned short *buf,
98 					       size_t buflen);
99 static idn_result_t	race_compress_encode(const unsigned short *p,
100 					     int compress_mode,
101 					     char *to, size_t tolen);
102 static int		get_compress_mode(unsigned short *p);
103 
104 idn_result_t
idn__race_decode(idn_converter_t ctx,void * privdata,const char * from,unsigned long * to,size_t tolen)105 idn__race_decode(idn_converter_t ctx, void *privdata,
106 		 const char *from, unsigned long *to, size_t tolen) {
107 	unsigned short *buf = NULL;
108 	size_t prefixlen = strlen(IDN_RACE_PREFIX);
109 	size_t fromlen;
110 	size_t buflen;
111 	idn_result_t r;
112 
113 	assert(ctx != NULL);
114 
115 	TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n",
116 	       idn__debug_xstring(from, 50), (int)tolen));
117 
118 	if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) {
119 		if (*from == '\0') {
120 			r = idn_ucs4_utf8toucs4(from, to, tolen);
121 			goto ret;
122 		}
123 		r = idn_invalid_encoding;
124 		goto ret;
125 	}
126 	from += prefixlen;
127 	fromlen = strlen(from);
128 
129 	/*
130 	 * Allocate sufficient buffer.
131 	 */
132 	buflen = fromlen + 1;
133 	buf = malloc(sizeof(*buf) * buflen);
134 	if (buf == NULL) {
135 		r = idn_nomemory;
136 		goto ret;
137 	}
138 
139 	/*
140 	 * Decode base32 and decompress.
141 	 */
142 	r = race_decode_decompress(from, buf, buflen);
143 	if (r != idn_success)
144 		goto ret;
145 
146 	/*
147 	 * Now 'buf' points the decompressed string, which must contain
148 	 * UTF-16 characters.
149 	 */
150 
151 	/*
152 	 * Convert to UCS4.
153 	 */
154 	r = idn_ucs4_utf16toucs4(buf, to, tolen);
155 	if (r != idn_success)
156 		goto ret;
157 
158 ret:
159 	free(buf);
160 	if (r == idn_success) {
161 		TRACE(("idn__race_decode(): succcess (to=\"%s\")\n",
162 		       idn__debug_ucs4xstring(to, 50)));
163 	} else {
164 		TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r)));
165 	}
166 	return (r);
167 }
168 
169 static idn_result_t
race_decode_decompress(const char * from,unsigned short * buf,size_t buflen)170 race_decode_decompress(const char *from, unsigned short *buf, size_t buflen)
171 {
172 	unsigned short *p = buf;
173 	unsigned int bitbuf = 0;
174 	int bitlen = 0;
175 	int i, j;
176 	size_t len;
177 
178 	while (*from != '\0') {
179 		int c = *from++;
180 		int x;
181 
182 		if ('a' <= c && c <= 'z')
183 			x = c - 'a';
184 		else if ('A' <= c && c <= 'Z')
185 			x = c - 'A';
186 		else if ('2' <= c && c <= '7')
187 			x = c - '2' + 26;
188 		else
189 			return (idn_invalid_encoding);
190 
191 		bitbuf = (bitbuf << 5) + x;
192 		bitlen += 5;
193 		if (bitlen >= 8) {
194 			*p++ = (bitbuf >> (bitlen - 8)) & 0xff;
195 			bitlen -= 8;
196 		}
197 	}
198 	len = p - buf;
199 
200 	/*
201 	 * Now 'buf' holds the decoded string.
202 	 */
203 
204 	/*
205 	 * Decompress.
206 	 */
207 	if (buf[0] == RACE_2OCTET_MODE) {
208 		if ((len - 1) % 2 != 0)
209 			return (idn_invalid_encoding);
210 		for (i = 1, j = 0; i < len; i += 2, j++)
211 			buf[j] = (buf[i] << 8) + buf[i + 1];
212 		len = j;
213 	} else {
214 		unsigned short c = buf[0] << 8;	/* higher octet */
215 
216 		for (i = 1, j = 0; i < len; j++) {
217 			if (buf[i] == RACE_ESCAPE) {
218 				if (i + 1 >= len)
219 					return (idn_invalid_encoding);
220 				else if (buf[i + 1] == RACE_ESCAPE_2ND)
221 					buf[j] = c | 0xff;
222 				else
223 					buf[j] = buf[i + 1];
224 				i += 2;
225 
226 			} else if (buf[i] == 0x99 && c == 0x00) {
227 				/*
228 				 * The RACE specification says this is error.
229 				 */
230 				return (idn_invalid_encoding);
231 
232 			} else {
233 				buf[j] = c | buf[i++];
234 			}
235 		}
236 		len = j;
237 	}
238 	buf[len] = '\0';
239 
240 	return (idn_success);
241 }
242 
243 idn_result_t
idn__race_encode(idn_converter_t ctx,void * privdata,const unsigned long * from,char * to,size_t tolen)244 idn__race_encode(idn_converter_t ctx, void *privdata,
245 		 const unsigned long *from, char *to, size_t tolen) {
246 	char *to_org = to;
247 	unsigned short *p, *buf = NULL;
248 	size_t prefixlen = strlen(IDN_RACE_PREFIX);
249 	size_t buflen;
250 	size_t fromlen;
251 	idn_result_t r;
252 	int compress_mode;
253 
254 	assert(ctx != NULL);
255 
256 	TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n",
257 	       idn__debug_ucs4xstring(from, 50), (int)tolen));
258 
259 	if (*from == '\0') {
260 		r = idn_ucs4_ucs4toutf8(from, to, tolen);
261 		goto ret;
262 	} else if (idn__util_ucs4haveaceprefix(from, IDN_RACE_PREFIX)) {
263 		r = idn_prohibited;
264 		goto ret;
265 	}
266 
267 	if (tolen < prefixlen) {
268 		r  = idn_buffer_overflow;
269 		goto ret;
270 	}
271 	memcpy(to, IDN_RACE_PREFIX, prefixlen);
272 	to += prefixlen;
273 	tolen -= prefixlen;
274 
275 	fromlen = idn_ucs4_strlen(from);
276 	buflen = fromlen * 2 + 2;
277 
278 	/*
279 	 * Convert to UTF-16.
280 	 * Preserve space for a character at the top of the buffer.
281 	 */
282 	for (;;) {
283 		unsigned short *new_buf;
284 
285 		new_buf = realloc(buf, sizeof(*buf) * buflen);
286 		if (new_buf == NULL) {
287 			r = idn_nomemory;
288 			goto ret;
289 		}
290 		buf = new_buf;
291 
292 		r = idn_ucs4_ucs4toutf16(from, buf + 1, buflen - 1);
293 		if (r == idn_success)
294 			break;
295 		else if (r != idn_buffer_overflow)
296 			goto ret;
297 
298 		buflen = fromlen * 2 + 2;
299 	}
300 	p = buf + 1;
301 
302 	/*
303 	 * Now 'p' contains UTF-16 encoded string.
304 	 */
305 
306 	/*
307 	 * Check U+0099.
308 	 * RACE doesn't permit U+0099 in an input string.
309 	 */
310 	for (p = buf + 1; *p != '\0'; p++) {
311 		if (*p == 0x0099) {
312 			r = idn_invalid_encoding;
313 			goto ret;
314 		}
315 	}
316 
317 	/*
318 	 * Compress, encode in base-32 and output.
319 	 */
320 	compress_mode = get_compress_mode(buf + 1);
321 	r = race_compress_encode(buf, compress_mode, to, tolen);
322 
323 ret:
324 	free(buf);
325 	if (r == idn_success) {
326 		TRACE(("idn__race_encode(): succcess (to=\"%s\")\n",
327 		       idn__debug_xstring(to_org, 50)));
328 	} else {
329 		TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r)));
330 	}
331 	return (r);
332 }
333 
334 static idn_result_t
race_compress_encode(const unsigned short * p,int compress_mode,char * to,size_t tolen)335 race_compress_encode(const unsigned short *p, int compress_mode,
336 		     char *to, size_t tolen)
337 {
338 	unsigned long bitbuf = *p++;	/* bit stream buffer */
339 	int bitlen = 8;			/* # of bits in 'bitbuf' */
340 
341 	while (*p != '\0' || bitlen > 0) {
342 		unsigned int c = *p;
343 
344 		if (c == '\0') {
345 			/* End of data.  Flush. */
346 			bitbuf <<= (5 - bitlen);
347 			bitlen = 5;
348 		} else if (compress_mode == compress_none) {
349 			/* Push 16 bit data. */
350 			bitbuf = (bitbuf << 16) | c;
351 			bitlen += 16;
352 			p++;
353 		} else {/* compress_mode == compress_one/compress_two */
354 			/* Push 8 or 16 bit data. */
355 			if (compress_mode == compress_two &&
356 			    (c & 0xff00) == 0) {
357 				/* Upper octet is zero (and not U1). */
358 				bitbuf = (bitbuf << 16) | 0xff00 | c;
359 				bitlen += 16;
360 			} else if ((c & 0xff) == 0xff) {
361 				/* Lower octet is 0xff. */
362 				bitbuf = (bitbuf << 16) |
363 					(RACE_ESCAPE << 8) | RACE_ESCAPE_2ND;
364 				bitlen += 16;
365 			} else {
366 				/* Just output lower octet. */
367 				bitbuf = (bitbuf << 8) | (c & 0xff);
368 				bitlen += 8;
369 			}
370 			p++;
371 		}
372 
373 		/*
374 		 * Output bits in 'bitbuf' in 5-bit unit.
375 		 */
376 		while (bitlen >= 5) {
377 			int x;
378 
379 			/* Get top 5 bits. */
380 			x = (bitbuf >> (bitlen - 5)) & 0x1f;
381 			bitlen -= 5;
382 
383 			/* Encode. */
384 			if (x < 26)
385 				x += 'a';
386 			else
387 				x = (x - 26) + '2';
388 
389 			if (tolen < 1)
390 				return (idn_buffer_overflow);
391 
392 			*to++ = x;
393 			tolen--;
394 		}
395 	}
396 
397 	if (tolen <= 0)
398 		return (idn_buffer_overflow);
399 
400 	*to = '\0';
401 	return (idn_success);
402 }
403 
404 static int
get_compress_mode(unsigned short * p)405 get_compress_mode(unsigned short *p) {
406 	int zero = 0;
407 	unsigned int upper = 0;
408 	unsigned short *modepos = p - 1;
409 
410 	while (*p != '\0') {
411 		unsigned int hi = *p++ & 0xff00;
412 
413 		if (hi == 0) {
414 			zero++;
415 		} else if (hi == upper) {
416 			;
417 		} else if (upper == 0) {
418 			upper = hi;
419 		} else {
420 			*modepos = RACE_2OCTET_MODE;
421 			return (compress_none);
422 		}
423 	}
424 	*modepos = upper >> 8;
425 	if (upper > 0 && zero > 0)
426 		return (compress_two);
427 	else
428 		return (compress_one);
429 }
430