xref: /minix3/external/bsd/bind/dist/contrib/idn/idnkit-1.0-src/lib/delimitermap.c (revision 00b67f09dd46474d133c95011a48590a8e8f94c7)
1*00b67f09SDavid van Moolenbroek /*	$NetBSD: delimitermap.c,v 1.4 2014/12/10 04:37:55 christos Exp $	*/
2*00b67f09SDavid van Moolenbroek 
3*00b67f09SDavid van Moolenbroek #ifndef lint
4*00b67f09SDavid van Moolenbroek static char *rcsid = "Id: delimitermap.c,v 1.1 2003/06/04 00:25:52 marka Exp ";
5*00b67f09SDavid van Moolenbroek #endif
6*00b67f09SDavid van Moolenbroek 
7*00b67f09SDavid van Moolenbroek /*
8*00b67f09SDavid van Moolenbroek  * Copyright (c) 2001,2002 Japan Network Information Center.
9*00b67f09SDavid van Moolenbroek  * All rights reserved.
10*00b67f09SDavid van Moolenbroek  *
11*00b67f09SDavid van Moolenbroek  * By using this file, you agree to the terms and conditions set forth bellow.
12*00b67f09SDavid van Moolenbroek  *
13*00b67f09SDavid van Moolenbroek  * 			LICENSE TERMS AND CONDITIONS
14*00b67f09SDavid van Moolenbroek  *
15*00b67f09SDavid van Moolenbroek  * The following License Terms and Conditions apply, unless a different
16*00b67f09SDavid van Moolenbroek  * license is obtained from Japan Network Information Center ("JPNIC"),
17*00b67f09SDavid van Moolenbroek  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18*00b67f09SDavid van Moolenbroek  * Chiyoda-ku, Tokyo 101-0047, Japan.
19*00b67f09SDavid van Moolenbroek  *
20*00b67f09SDavid van Moolenbroek  * 1. Use, Modification and Redistribution (including distribution of any
21*00b67f09SDavid van Moolenbroek  *    modified or derived work) in source and/or binary forms is permitted
22*00b67f09SDavid van Moolenbroek  *    under this License Terms and Conditions.
23*00b67f09SDavid van Moolenbroek  *
24*00b67f09SDavid van Moolenbroek  * 2. Redistribution of source code must retain the copyright notices as they
25*00b67f09SDavid van Moolenbroek  *    appear in each source code file, this License Terms and Conditions.
26*00b67f09SDavid van Moolenbroek  *
27*00b67f09SDavid van Moolenbroek  * 3. Redistribution in binary form must reproduce the Copyright Notice,
28*00b67f09SDavid van Moolenbroek  *    this License Terms and Conditions, in the documentation and/or other
29*00b67f09SDavid van Moolenbroek  *    materials provided with the distribution.  For the purposes of binary
30*00b67f09SDavid van Moolenbroek  *    distribution the "Copyright Notice" refers to the following language:
31*00b67f09SDavid van Moolenbroek  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
32*00b67f09SDavid van Moolenbroek  *
33*00b67f09SDavid van Moolenbroek  * 4. The name of JPNIC may not be used to endorse or promote products
34*00b67f09SDavid van Moolenbroek  *    derived from this Software without specific prior written approval of
35*00b67f09SDavid van Moolenbroek  *    JPNIC.
36*00b67f09SDavid van Moolenbroek  *
37*00b67f09SDavid van Moolenbroek  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38*00b67f09SDavid van Moolenbroek  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39*00b67f09SDavid van Moolenbroek  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40*00b67f09SDavid van Moolenbroek  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
41*00b67f09SDavid van Moolenbroek  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42*00b67f09SDavid van Moolenbroek  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43*00b67f09SDavid van Moolenbroek  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44*00b67f09SDavid van Moolenbroek  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45*00b67f09SDavid van Moolenbroek  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46*00b67f09SDavid van Moolenbroek  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47*00b67f09SDavid van Moolenbroek  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
48*00b67f09SDavid van Moolenbroek  */
49*00b67f09SDavid van Moolenbroek 
50*00b67f09SDavid van Moolenbroek #include <config.h>
51*00b67f09SDavid van Moolenbroek 
52*00b67f09SDavid van Moolenbroek #include <stddef.h>
53*00b67f09SDavid van Moolenbroek #include <stdlib.h>
54*00b67f09SDavid van Moolenbroek #include <string.h>
55*00b67f09SDavid van Moolenbroek 
56*00b67f09SDavid van Moolenbroek #include <idn/result.h>
57*00b67f09SDavid van Moolenbroek #include <idn/assert.h>
58*00b67f09SDavid van Moolenbroek #include <idn/logmacro.h>
59*00b67f09SDavid van Moolenbroek #include <idn/delimitermap.h>
60*00b67f09SDavid van Moolenbroek #include <idn/util.h>
61*00b67f09SDavid van Moolenbroek #include <idn/debug.h>
62*00b67f09SDavid van Moolenbroek #include <idn/ucs4.h>
63*00b67f09SDavid van Moolenbroek 
64*00b67f09SDavid van Moolenbroek /*
65*00b67f09SDavid van Moolenbroek  * Mapper object type.
66*00b67f09SDavid van Moolenbroek  */
67*00b67f09SDavid van Moolenbroek struct idn_delimitermap {
68*00b67f09SDavid van Moolenbroek 	int ndelimiters;
69*00b67f09SDavid van Moolenbroek 	int delimiter_size;
70*00b67f09SDavid van Moolenbroek 	unsigned long *delimiters;
71*00b67f09SDavid van Moolenbroek 	int reference_count;
72*00b67f09SDavid van Moolenbroek };
73*00b67f09SDavid van Moolenbroek 
74*00b67f09SDavid van Moolenbroek #define DELIMITERMAP_INITIAL_DELIMITER_SIZE	4
75*00b67f09SDavid van Moolenbroek #define UNICODE_MAX		0x10ffff
76*00b67f09SDavid van Moolenbroek #define IS_SURROGATE_HIGH(v)	(0xd800 <= (v) && (v) <= 0xdbff)
77*00b67f09SDavid van Moolenbroek #define IS_SURROGATE_LOW(v)	(0xdc00 <= (v) && (v) <= 0xdfff)
78*00b67f09SDavid van Moolenbroek 
79*00b67f09SDavid van Moolenbroek idn_result_t
idn_delimitermap_create(idn_delimitermap_t * ctxp)80*00b67f09SDavid van Moolenbroek idn_delimitermap_create(idn_delimitermap_t *ctxp) {
81*00b67f09SDavid van Moolenbroek 	idn_delimitermap_t ctx = NULL;
82*00b67f09SDavid van Moolenbroek 	idn_result_t r;
83*00b67f09SDavid van Moolenbroek 
84*00b67f09SDavid van Moolenbroek 	assert(ctxp != NULL);
85*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_create()\n"));
86*00b67f09SDavid van Moolenbroek 
87*00b67f09SDavid van Moolenbroek 	ctx = (idn_delimitermap_t) malloc(sizeof(struct idn_delimitermap));
88*00b67f09SDavid van Moolenbroek 	if (ctx == NULL) {
89*00b67f09SDavid van Moolenbroek 		WARNING(("idn_mapper_create: malloc failed\n"));
90*00b67f09SDavid van Moolenbroek 		r = idn_nomemory;
91*00b67f09SDavid van Moolenbroek 		goto ret;
92*00b67f09SDavid van Moolenbroek 	}
93*00b67f09SDavid van Moolenbroek 
94*00b67f09SDavid van Moolenbroek 	ctx->delimiters = (unsigned long *) malloc(sizeof(unsigned long)
95*00b67f09SDavid van Moolenbroek 		* DELIMITERMAP_INITIAL_DELIMITER_SIZE);
96*00b67f09SDavid van Moolenbroek 	if (ctx->delimiters == NULL) {
97*00b67f09SDavid van Moolenbroek 		r = idn_nomemory;
98*00b67f09SDavid van Moolenbroek 		goto ret;
99*00b67f09SDavid van Moolenbroek 	}
100*00b67f09SDavid van Moolenbroek 	ctx->ndelimiters = 0;
101*00b67f09SDavid van Moolenbroek 	ctx->delimiter_size = DELIMITERMAP_INITIAL_DELIMITER_SIZE;
102*00b67f09SDavid van Moolenbroek 	ctx->reference_count = 1;
103*00b67f09SDavid van Moolenbroek 	*ctxp = ctx;
104*00b67f09SDavid van Moolenbroek 	r = idn_success;
105*00b67f09SDavid van Moolenbroek 
106*00b67f09SDavid van Moolenbroek ret:
107*00b67f09SDavid van Moolenbroek 	if (r != idn_success)
108*00b67f09SDavid van Moolenbroek 		free(ctx);
109*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_create(): %s\n", idn_result_tostring(r)));
110*00b67f09SDavid van Moolenbroek 	return (r);
111*00b67f09SDavid van Moolenbroek }
112*00b67f09SDavid van Moolenbroek 
113*00b67f09SDavid van Moolenbroek void
idn_delimitermap_destroy(idn_delimitermap_t ctx)114*00b67f09SDavid van Moolenbroek idn_delimitermap_destroy(idn_delimitermap_t ctx) {
115*00b67f09SDavid van Moolenbroek 	assert(ctx != NULL);
116*00b67f09SDavid van Moolenbroek 
117*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_destroy()\n"));
118*00b67f09SDavid van Moolenbroek 
119*00b67f09SDavid van Moolenbroek 	ctx->reference_count--;
120*00b67f09SDavid van Moolenbroek 	if (ctx->reference_count <= 0) {
121*00b67f09SDavid van Moolenbroek 		TRACE(("idn_mapper_destroy(): the object is destroyed\n"));
122*00b67f09SDavid van Moolenbroek 		free(ctx->delimiters);
123*00b67f09SDavid van Moolenbroek 		free(ctx);
124*00b67f09SDavid van Moolenbroek 	} else {
125*00b67f09SDavid van Moolenbroek 		TRACE(("idn_delimitermap_destroy(): "
126*00b67f09SDavid van Moolenbroek 		       "update reference count (%d->%d)\n",
127*00b67f09SDavid van Moolenbroek 		       ctx->reference_count + 1, ctx->reference_count));
128*00b67f09SDavid van Moolenbroek 	}
129*00b67f09SDavid van Moolenbroek }
130*00b67f09SDavid van Moolenbroek 
131*00b67f09SDavid van Moolenbroek void
idn_delimitermap_incrref(idn_delimitermap_t ctx)132*00b67f09SDavid van Moolenbroek idn_delimitermap_incrref(idn_delimitermap_t ctx) {
133*00b67f09SDavid van Moolenbroek 	assert(ctx != NULL);
134*00b67f09SDavid van Moolenbroek 
135*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_incrref()\n"));
136*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_incrref: update reference count (%d->%d)\n",
137*00b67f09SDavid van Moolenbroek 		ctx->reference_count, ctx->reference_count + 1));
138*00b67f09SDavid van Moolenbroek 
139*00b67f09SDavid van Moolenbroek 	ctx->reference_count++;
140*00b67f09SDavid van Moolenbroek }
141*00b67f09SDavid van Moolenbroek 
142*00b67f09SDavid van Moolenbroek idn_result_t
idn_delimitermap_add(idn_delimitermap_t ctx,unsigned long delimiter)143*00b67f09SDavid van Moolenbroek idn_delimitermap_add(idn_delimitermap_t ctx, unsigned long delimiter) {
144*00b67f09SDavid van Moolenbroek 	idn_result_t r;
145*00b67f09SDavid van Moolenbroek 
146*00b67f09SDavid van Moolenbroek 	assert(ctx != NULL && ctx->ndelimiters <= ctx->delimiter_size);
147*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_add(delimiter=\\x%04lx)\n", delimiter));
148*00b67f09SDavid van Moolenbroek 
149*00b67f09SDavid van Moolenbroek 	if (delimiter == 0 || delimiter > UNICODE_MAX ||
150*00b67f09SDavid van Moolenbroek 	    IS_SURROGATE_HIGH(delimiter) || IS_SURROGATE_LOW(delimiter)) {
151*00b67f09SDavid van Moolenbroek 		r = idn_invalid_codepoint;
152*00b67f09SDavid van Moolenbroek 		goto ret;
153*00b67f09SDavid van Moolenbroek 	}
154*00b67f09SDavid van Moolenbroek 
155*00b67f09SDavid van Moolenbroek 	if (ctx->ndelimiters == ctx->delimiter_size) {
156*00b67f09SDavid van Moolenbroek 		unsigned long *new_delimiters;
157*00b67f09SDavid van Moolenbroek 
158*00b67f09SDavid van Moolenbroek 		new_delimiters = (unsigned long *) realloc(ctx->delimiters,
159*00b67f09SDavid van Moolenbroek 			sizeof(unsigned long) * ctx->delimiter_size * 2);
160*00b67f09SDavid van Moolenbroek 		if (new_delimiters == NULL) {
161*00b67f09SDavid van Moolenbroek 			r = idn_nomemory;
162*00b67f09SDavid van Moolenbroek 			goto ret;
163*00b67f09SDavid van Moolenbroek 		}
164*00b67f09SDavid van Moolenbroek 		ctx->delimiters = new_delimiters;
165*00b67f09SDavid van Moolenbroek 		ctx->delimiter_size *= 2;
166*00b67f09SDavid van Moolenbroek 	}
167*00b67f09SDavid van Moolenbroek 
168*00b67f09SDavid van Moolenbroek 	ctx->delimiters[ctx->ndelimiters] = delimiter;
169*00b67f09SDavid van Moolenbroek 	ctx->ndelimiters++;
170*00b67f09SDavid van Moolenbroek 	r = idn_success;
171*00b67f09SDavid van Moolenbroek 
172*00b67f09SDavid van Moolenbroek ret:
173*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_add(): %s\n", idn_result_tostring(r)));
174*00b67f09SDavid van Moolenbroek 	return (r);
175*00b67f09SDavid van Moolenbroek }
176*00b67f09SDavid van Moolenbroek 
177*00b67f09SDavid van Moolenbroek idn_result_t
idn_delimitermap_addall(idn_delimitermap_t ctx,unsigned long * delimiters,int ndelimiters)178*00b67f09SDavid van Moolenbroek idn_delimitermap_addall(idn_delimitermap_t ctx, unsigned long *delimiters,
179*00b67f09SDavid van Moolenbroek 			int ndelimiters) {
180*00b67f09SDavid van Moolenbroek 	idn_result_t r;
181*00b67f09SDavid van Moolenbroek 	int i;
182*00b67f09SDavid van Moolenbroek 
183*00b67f09SDavid van Moolenbroek 	assert(ctx != NULL && delimiters != NULL);
184*00b67f09SDavid van Moolenbroek 
185*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_addall(ndelimiters=%d)\n", ndelimiters));
186*00b67f09SDavid van Moolenbroek 
187*00b67f09SDavid van Moolenbroek 	for (i = 0; i < ndelimiters; i++) {
188*00b67f09SDavid van Moolenbroek 		r = idn_delimitermap_add(ctx, *delimiters);
189*00b67f09SDavid van Moolenbroek 		if (r != idn_success)
190*00b67f09SDavid van Moolenbroek 			goto ret;
191*00b67f09SDavid van Moolenbroek 		delimiters++;
192*00b67f09SDavid van Moolenbroek 	}
193*00b67f09SDavid van Moolenbroek 
194*00b67f09SDavid van Moolenbroek 	r = idn_success;
195*00b67f09SDavid van Moolenbroek ret:
196*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_addall(): %s\n", idn_result_tostring(r)));
197*00b67f09SDavid van Moolenbroek 	return (r);
198*00b67f09SDavid van Moolenbroek }
199*00b67f09SDavid van Moolenbroek 
200*00b67f09SDavid van Moolenbroek idn_result_t
idn_delimitermap_map(idn_delimitermap_t ctx,const unsigned long * from,unsigned long * to,size_t tolen)201*00b67f09SDavid van Moolenbroek idn_delimitermap_map(idn_delimitermap_t ctx, const unsigned long *from,
202*00b67f09SDavid van Moolenbroek 		     unsigned long *to, size_t tolen) {
203*00b67f09SDavid van Moolenbroek 
204*00b67f09SDavid van Moolenbroek 	/* default delimiters (label separators) from IDNA specification */
205*00b67f09SDavid van Moolenbroek 	static const unsigned long default_delimiters[] =
206*00b67f09SDavid van Moolenbroek 		{ 0x002e, /* full stop */
207*00b67f09SDavid van Moolenbroek 		  0x3002, /* ideographic full stop */
208*00b67f09SDavid van Moolenbroek 		  0xff0e, /* fullwidth full stop */
209*00b67f09SDavid van Moolenbroek 		  0xff61, /* halfwidth ideographic full stop */
210*00b67f09SDavid van Moolenbroek 		  0x0000 };
211*00b67f09SDavid van Moolenbroek 
212*00b67f09SDavid van Moolenbroek 	unsigned long *to_org = to;
213*00b67f09SDavid van Moolenbroek 	idn_result_t r;
214*00b67f09SDavid van Moolenbroek 	int i, j;
215*00b67f09SDavid van Moolenbroek 	int found;
216*00b67f09SDavid van Moolenbroek 
217*00b67f09SDavid van Moolenbroek 	assert(ctx != NULL && from != NULL && to != NULL);
218*00b67f09SDavid van Moolenbroek 
219*00b67f09SDavid van Moolenbroek 	TRACE(("idn_delimitermap_map(from=\"%s\", tolen=%d)\n",
220*00b67f09SDavid van Moolenbroek 		idn__debug_ucs4xstring(from, 50), (int)tolen));
221*00b67f09SDavid van Moolenbroek 
222*00b67f09SDavid van Moolenbroek 	/*
223*00b67f09SDavid van Moolenbroek 	 * Map.
224*00b67f09SDavid van Moolenbroek 	 */
225*00b67f09SDavid van Moolenbroek 	while (*from != '\0') {
226*00b67f09SDavid van Moolenbroek 		found = 0;
227*00b67f09SDavid van Moolenbroek 		if (tolen < 1) {
228*00b67f09SDavid van Moolenbroek 			r = idn_buffer_overflow;
229*00b67f09SDavid van Moolenbroek 			goto ret;
230*00b67f09SDavid van Moolenbroek 		}
231*00b67f09SDavid van Moolenbroek 		for (j = 0; default_delimiters[j] != 0x0000; j++) {
232*00b67f09SDavid van Moolenbroek 			if (default_delimiters[j] == *from) {
233*00b67f09SDavid van Moolenbroek 				found = 1;
234*00b67f09SDavid van Moolenbroek 				break;
235*00b67f09SDavid van Moolenbroek 			}
236*00b67f09SDavid van Moolenbroek 		}
237*00b67f09SDavid van Moolenbroek 		if (!found) {
238*00b67f09SDavid van Moolenbroek 			for (i = 0; i < ctx->ndelimiters; i++) {
239*00b67f09SDavid van Moolenbroek 				if (ctx->delimiters[i] == *from) {
240*00b67f09SDavid van Moolenbroek 					found = 1;
241*00b67f09SDavid van Moolenbroek 					break;
242*00b67f09SDavid van Moolenbroek 				}
243*00b67f09SDavid van Moolenbroek 			}
244*00b67f09SDavid van Moolenbroek 		}
245*00b67f09SDavid van Moolenbroek 		if (found)
246*00b67f09SDavid van Moolenbroek 			*to = '.';
247*00b67f09SDavid van Moolenbroek 		else
248*00b67f09SDavid van Moolenbroek 			*to = *from;
249*00b67f09SDavid van Moolenbroek 		from++;
250*00b67f09SDavid van Moolenbroek 		to++;
251*00b67f09SDavid van Moolenbroek 		tolen--;
252*00b67f09SDavid van Moolenbroek 	}
253*00b67f09SDavid van Moolenbroek 
254*00b67f09SDavid van Moolenbroek 	if (tolen < 1) {
255*00b67f09SDavid van Moolenbroek 		r = idn_buffer_overflow;
256*00b67f09SDavid van Moolenbroek 		goto ret;
257*00b67f09SDavid van Moolenbroek 	}
258*00b67f09SDavid van Moolenbroek 	*to = '\0';
259*00b67f09SDavid van Moolenbroek 	r = idn_success;
260*00b67f09SDavid van Moolenbroek 
261*00b67f09SDavid van Moolenbroek ret:
262*00b67f09SDavid van Moolenbroek 	if (r == idn_success) {
263*00b67f09SDavid van Moolenbroek 		TRACE(("idn_delimitermap_map(): success (to=\"%s\")\n",
264*00b67f09SDavid van Moolenbroek 		       idn__debug_ucs4xstring(to_org, 50)));
265*00b67f09SDavid van Moolenbroek 	} else {
266*00b67f09SDavid van Moolenbroek 		TRACE(("idn_delimitermap_map(): %s\n",
267*00b67f09SDavid van Moolenbroek 		       idn_result_tostring(r)));
268*00b67f09SDavid van Moolenbroek 	}
269*00b67f09SDavid van Moolenbroek 	return (r);
270*00b67f09SDavid van Moolenbroek }
271