xref: /minix3/crypto/external/bsd/heimdal/dist/lib/wind/normalize.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc /*	$NetBSD: normalize.c,v 1.1.1.2 2014/04/24 12:45:56 pettai Exp $	*/
2ebfedea0SLionel Sambuc 
3ebfedea0SLionel Sambuc /*
4ebfedea0SLionel Sambuc  * Copyright (c) 2004 Kungliga Tekniska Högskolan
5ebfedea0SLionel Sambuc  * (Royal Institute of Technology, Stockholm, Sweden).
6ebfedea0SLionel Sambuc  * All rights reserved.
7ebfedea0SLionel Sambuc  *
8ebfedea0SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
9ebfedea0SLionel Sambuc  * modification, are permitted provided that the following conditions
10ebfedea0SLionel Sambuc  * are met:
11ebfedea0SLionel Sambuc  *
12ebfedea0SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
13ebfedea0SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
14ebfedea0SLionel Sambuc  *
15ebfedea0SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
16ebfedea0SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
17ebfedea0SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
18ebfedea0SLionel Sambuc  *
19ebfedea0SLionel Sambuc  * 3. Neither the name of the Institute nor the names of its contributors
20ebfedea0SLionel Sambuc  *    may be used to endorse or promote products derived from this software
21ebfedea0SLionel Sambuc  *    without specific prior written permission.
22ebfedea0SLionel Sambuc  *
23ebfedea0SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24ebfedea0SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25ebfedea0SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ebfedea0SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27ebfedea0SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28ebfedea0SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29ebfedea0SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30ebfedea0SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31ebfedea0SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32ebfedea0SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33ebfedea0SLionel Sambuc  * SUCH DAMAGE.
34ebfedea0SLionel Sambuc  */
35ebfedea0SLionel Sambuc 
36ebfedea0SLionel Sambuc #ifdef HAVE_CONFIG_H
37ebfedea0SLionel Sambuc #include <config.h>
38ebfedea0SLionel Sambuc #endif
39ebfedea0SLionel Sambuc #include "windlocl.h"
40ebfedea0SLionel Sambuc 
41ebfedea0SLionel Sambuc #include <assert.h>
42ebfedea0SLionel Sambuc #include <stdlib.h>
43ebfedea0SLionel Sambuc #include <errno.h>
44ebfedea0SLionel Sambuc #include <stdio.h>
45ebfedea0SLionel Sambuc 
46ebfedea0SLionel Sambuc #include <krb5/roken.h>
47ebfedea0SLionel Sambuc 
48ebfedea0SLionel Sambuc #include "normalize_table.h"
49ebfedea0SLionel Sambuc 
50ebfedea0SLionel Sambuc static int
translation_cmp(const void * key,const void * data)51ebfedea0SLionel Sambuc translation_cmp(const void *key, const void *data)
52ebfedea0SLionel Sambuc {
53ebfedea0SLionel Sambuc     const struct translation *t1 = (const struct translation *)key;
54ebfedea0SLionel Sambuc     const struct translation *t2 = (const struct translation *)data;
55ebfedea0SLionel Sambuc 
56ebfedea0SLionel Sambuc     return t1->key - t2->key;
57ebfedea0SLionel Sambuc }
58ebfedea0SLionel Sambuc 
59ebfedea0SLionel Sambuc enum { s_base  = 0xAC00};
60ebfedea0SLionel Sambuc enum { s_count = 11172};
61ebfedea0SLionel Sambuc enum { l_base  = 0x1100};
62ebfedea0SLionel Sambuc enum { l_count = 19};
63ebfedea0SLionel Sambuc enum { v_base  = 0x1161};
64ebfedea0SLionel Sambuc enum { v_count = 21};
65ebfedea0SLionel Sambuc enum { t_base  = 0x11A7};
66ebfedea0SLionel Sambuc enum { t_count = 28};
67ebfedea0SLionel Sambuc enum { n_count = v_count * t_count};
68ebfedea0SLionel Sambuc 
69ebfedea0SLionel Sambuc static int
hangul_decomp(const uint32_t * in,size_t in_len,uint32_t * out,size_t * out_len)70ebfedea0SLionel Sambuc hangul_decomp(const uint32_t *in, size_t in_len,
71ebfedea0SLionel Sambuc 	      uint32_t *out, size_t *out_len)
72ebfedea0SLionel Sambuc {
73ebfedea0SLionel Sambuc     uint32_t u = *in;
74ebfedea0SLionel Sambuc     unsigned s_index;
75ebfedea0SLionel Sambuc     unsigned l, v, t;
76ebfedea0SLionel Sambuc     unsigned o;
77ebfedea0SLionel Sambuc 
78ebfedea0SLionel Sambuc     if (u < s_base || u >= s_base + s_count)
79ebfedea0SLionel Sambuc 	return 0;
80ebfedea0SLionel Sambuc     s_index = u - s_base;
81ebfedea0SLionel Sambuc     l = l_base + s_index / n_count;
82ebfedea0SLionel Sambuc     v = v_base + (s_index % n_count) / t_count;
83ebfedea0SLionel Sambuc     t = t_base + s_index % t_count;
84ebfedea0SLionel Sambuc     o = 2;
85ebfedea0SLionel Sambuc     if (t != t_base)
86ebfedea0SLionel Sambuc 	++o;
87ebfedea0SLionel Sambuc     if (*out_len < o)
88ebfedea0SLionel Sambuc 	return WIND_ERR_OVERRUN;
89ebfedea0SLionel Sambuc     out[0] = l;
90ebfedea0SLionel Sambuc     out[1] = v;
91ebfedea0SLionel Sambuc     if (t != t_base)
92ebfedea0SLionel Sambuc 	out[2] = t;
93ebfedea0SLionel Sambuc     *out_len = o;
94ebfedea0SLionel Sambuc     return 1;
95ebfedea0SLionel Sambuc }
96ebfedea0SLionel Sambuc 
97ebfedea0SLionel Sambuc static uint32_t
hangul_composition(const uint32_t * in,size_t in_len)98ebfedea0SLionel Sambuc hangul_composition(const uint32_t *in, size_t in_len)
99ebfedea0SLionel Sambuc {
100ebfedea0SLionel Sambuc     if (in_len < 2)
101ebfedea0SLionel Sambuc 	return 0;
102ebfedea0SLionel Sambuc     if (in[0] >= l_base && in[0] < l_base + l_count) {
103ebfedea0SLionel Sambuc 	unsigned l_index = in[0] - l_base;
104ebfedea0SLionel Sambuc 	unsigned v_index;
105ebfedea0SLionel Sambuc 
106ebfedea0SLionel Sambuc 	if (in[1] < v_base || in[1] >= v_base + v_count)
107ebfedea0SLionel Sambuc 	    return 0;
108ebfedea0SLionel Sambuc 	v_index = in[1] - v_base;
109ebfedea0SLionel Sambuc 	return (l_index * v_count + v_index) * t_count + s_base;
110ebfedea0SLionel Sambuc     } else if (in[0] >= s_base && in[0] < s_base + s_count) {
111ebfedea0SLionel Sambuc 	unsigned s_index = in[0] - s_base;
112ebfedea0SLionel Sambuc 	unsigned t_index;
113ebfedea0SLionel Sambuc 
114ebfedea0SLionel Sambuc 	if (s_index % t_count != 0)
115ebfedea0SLionel Sambuc 	    return 0;
116ebfedea0SLionel Sambuc 	if (in[1] < t_base || in[1] >= t_base + t_count)
117ebfedea0SLionel Sambuc 	    return 0;
118ebfedea0SLionel Sambuc 	t_index = in[1] - t_base;
119ebfedea0SLionel Sambuc 	return in[0] + t_index;
120ebfedea0SLionel Sambuc     }
121ebfedea0SLionel Sambuc     return 0;
122ebfedea0SLionel Sambuc }
123ebfedea0SLionel Sambuc 
124ebfedea0SLionel Sambuc static int
compat_decomp(const uint32_t * in,size_t in_len,uint32_t * out,size_t * out_len)125ebfedea0SLionel Sambuc compat_decomp(const uint32_t *in, size_t in_len,
126ebfedea0SLionel Sambuc 	      uint32_t *out, size_t *out_len)
127ebfedea0SLionel Sambuc {
128ebfedea0SLionel Sambuc     unsigned i;
129ebfedea0SLionel Sambuc     unsigned o = 0;
130ebfedea0SLionel Sambuc 
131ebfedea0SLionel Sambuc     for (i = 0; i < in_len; ++i) {
132ebfedea0SLionel Sambuc 	struct translation ts = {in[i]};
133ebfedea0SLionel Sambuc 	size_t sub_len = *out_len - o;
134ebfedea0SLionel Sambuc 	int ret;
135ebfedea0SLionel Sambuc 
136ebfedea0SLionel Sambuc 	ret = hangul_decomp(in + i, in_len - i,
137ebfedea0SLionel Sambuc 			    out + o, &sub_len);
138ebfedea0SLionel Sambuc 	if (ret) {
139ebfedea0SLionel Sambuc 	    if (ret == WIND_ERR_OVERRUN)
140ebfedea0SLionel Sambuc 		return ret;
141ebfedea0SLionel Sambuc 	    o += sub_len;
142ebfedea0SLionel Sambuc 	} else {
143ebfedea0SLionel Sambuc 	    void *s = bsearch(&ts,
144ebfedea0SLionel Sambuc 			      _wind_normalize_table,
145ebfedea0SLionel Sambuc 			      _wind_normalize_table_size,
146ebfedea0SLionel Sambuc 			      sizeof(_wind_normalize_table[0]),
147ebfedea0SLionel Sambuc 			      translation_cmp);
148ebfedea0SLionel Sambuc 	    if (s != NULL) {
149ebfedea0SLionel Sambuc 		const struct translation *t = (const struct translation *)s;
150ebfedea0SLionel Sambuc 
151ebfedea0SLionel Sambuc 		ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
152ebfedea0SLionel Sambuc 				    t->val_len,
153ebfedea0SLionel Sambuc 				    out + o, &sub_len);
154ebfedea0SLionel Sambuc 		if (ret)
155ebfedea0SLionel Sambuc 		    return ret;
156ebfedea0SLionel Sambuc 		o += sub_len;
157ebfedea0SLionel Sambuc 	    } else {
158ebfedea0SLionel Sambuc 		if (o >= *out_len)
159ebfedea0SLionel Sambuc 		    return WIND_ERR_OVERRUN;
160ebfedea0SLionel Sambuc 		out[o++] = in[i];
161ebfedea0SLionel Sambuc 
162ebfedea0SLionel Sambuc 	    }
163ebfedea0SLionel Sambuc 	}
164ebfedea0SLionel Sambuc     }
165ebfedea0SLionel Sambuc     *out_len = o;
166ebfedea0SLionel Sambuc     return 0;
167ebfedea0SLionel Sambuc }
168ebfedea0SLionel Sambuc 
169ebfedea0SLionel Sambuc static void
swap_char(uint32_t * a,uint32_t * b)170ebfedea0SLionel Sambuc swap_char(uint32_t * a, uint32_t * b)
171ebfedea0SLionel Sambuc {
172ebfedea0SLionel Sambuc     uint32_t t;
173ebfedea0SLionel Sambuc     t = *a;
174ebfedea0SLionel Sambuc     *a = *b;
175ebfedea0SLionel Sambuc     *b = t;
176ebfedea0SLionel Sambuc }
177ebfedea0SLionel Sambuc 
178ebfedea0SLionel Sambuc /* Unicode 5.2.0 D109 Canonical Ordering for a sequence of code points
179ebfedea0SLionel Sambuc  * that all have Canonical_Combining_Class > 0 */
180ebfedea0SLionel Sambuc static void
canonical_reorder_sequence(uint32_t * a,size_t len)181ebfedea0SLionel Sambuc canonical_reorder_sequence(uint32_t * a, size_t len)
182ebfedea0SLionel Sambuc {
183ebfedea0SLionel Sambuc     size_t i, j;
184ebfedea0SLionel Sambuc 
185ebfedea0SLionel Sambuc     if (len <= 1)
186ebfedea0SLionel Sambuc 	return;
187ebfedea0SLionel Sambuc 
188ebfedea0SLionel Sambuc     for (i = 1; i < len; i++) {
189ebfedea0SLionel Sambuc 	for (j = i;
190ebfedea0SLionel Sambuc 	     j > 0 &&
191ebfedea0SLionel Sambuc 		 _wind_combining_class(a[j]) < _wind_combining_class(a[j-1]);
192ebfedea0SLionel Sambuc 	     j--)
193ebfedea0SLionel Sambuc 	    swap_char(&a[j], &a[j-1]);
194ebfedea0SLionel Sambuc     }
195ebfedea0SLionel Sambuc }
196ebfedea0SLionel Sambuc 
197ebfedea0SLionel Sambuc static void
canonical_reorder(uint32_t * tmp,size_t tmp_len)198ebfedea0SLionel Sambuc canonical_reorder(uint32_t *tmp, size_t tmp_len)
199ebfedea0SLionel Sambuc {
200ebfedea0SLionel Sambuc     size_t i;
201ebfedea0SLionel Sambuc 
202ebfedea0SLionel Sambuc     for (i = 0; i < tmp_len; ++i) {
203ebfedea0SLionel Sambuc 	int cc = _wind_combining_class(tmp[i]);
204ebfedea0SLionel Sambuc 	if (cc) {
205ebfedea0SLionel Sambuc 	    size_t j;
206ebfedea0SLionel Sambuc 	    for (j = i + 1;
207ebfedea0SLionel Sambuc 		 j < tmp_len && _wind_combining_class(tmp[j]);
208ebfedea0SLionel Sambuc 		 ++j)
209ebfedea0SLionel Sambuc 		;
210ebfedea0SLionel Sambuc 	    canonical_reorder_sequence(&tmp[i], j - i);
211ebfedea0SLionel Sambuc 	    i = j;
212ebfedea0SLionel Sambuc 	}
213ebfedea0SLionel Sambuc     }
214ebfedea0SLionel Sambuc }
215ebfedea0SLionel Sambuc 
216ebfedea0SLionel Sambuc static uint32_t
find_composition(const uint32_t * in,unsigned in_len)217ebfedea0SLionel Sambuc find_composition(const uint32_t *in, unsigned in_len)
218ebfedea0SLionel Sambuc {
219ebfedea0SLionel Sambuc     unsigned short canon_index = 0;
220ebfedea0SLionel Sambuc     uint32_t cur;
221ebfedea0SLionel Sambuc     unsigned n = 0;
222ebfedea0SLionel Sambuc 
223ebfedea0SLionel Sambuc     cur = hangul_composition(in, in_len);
224ebfedea0SLionel Sambuc     if (cur)
225ebfedea0SLionel Sambuc 	return cur;
226ebfedea0SLionel Sambuc 
227ebfedea0SLionel Sambuc     do {
228ebfedea0SLionel Sambuc 	const struct canon_node *c = &_wind_canon_table[canon_index];
229ebfedea0SLionel Sambuc 	unsigned i;
230ebfedea0SLionel Sambuc 
231ebfedea0SLionel Sambuc 	if (n % 5 == 0) {
232ebfedea0SLionel Sambuc 	    cur = *in++;
233ebfedea0SLionel Sambuc 	    if (in_len-- == 0)
234ebfedea0SLionel Sambuc 		return c->val;
235ebfedea0SLionel Sambuc 	}
236ebfedea0SLionel Sambuc 
237ebfedea0SLionel Sambuc 	i = cur >> 16;
238ebfedea0SLionel Sambuc 	if (i < c->next_start || i >= c->next_end)
239ebfedea0SLionel Sambuc 	    canon_index = 0;
240ebfedea0SLionel Sambuc 	else
241ebfedea0SLionel Sambuc 	    canon_index =
242ebfedea0SLionel Sambuc 		_wind_canon_next_table[c->next_offset + i - c->next_start];
243ebfedea0SLionel Sambuc 	if (canon_index != 0) {
244ebfedea0SLionel Sambuc 	    cur = (cur << 4) & 0xFFFFF;
245ebfedea0SLionel Sambuc 	    ++n;
246ebfedea0SLionel Sambuc 	}
247ebfedea0SLionel Sambuc     } while (canon_index != 0);
248ebfedea0SLionel Sambuc     return 0;
249ebfedea0SLionel Sambuc }
250ebfedea0SLionel Sambuc 
251ebfedea0SLionel Sambuc static int
combine(const uint32_t * in,size_t in_len,uint32_t * out,size_t * out_len)252ebfedea0SLionel Sambuc combine(const uint32_t *in, size_t in_len,
253ebfedea0SLionel Sambuc 	uint32_t *out, size_t *out_len)
254ebfedea0SLionel Sambuc {
255ebfedea0SLionel Sambuc     unsigned i;
256ebfedea0SLionel Sambuc     int ostarter;
257ebfedea0SLionel Sambuc     unsigned o = 0;
258ebfedea0SLionel Sambuc     int old_cc;
259ebfedea0SLionel Sambuc 
260ebfedea0SLionel Sambuc     for (i = 0; i < in_len;) {
261ebfedea0SLionel Sambuc 	while (i < in_len && _wind_combining_class(in[i]) != 0) {
262ebfedea0SLionel Sambuc 	    out[o++] = in[i++];
263ebfedea0SLionel Sambuc 	}
264ebfedea0SLionel Sambuc 	if (i < in_len) {
265ebfedea0SLionel Sambuc 	    if (o >= *out_len)
266ebfedea0SLionel Sambuc 		return WIND_ERR_OVERRUN;
267ebfedea0SLionel Sambuc 	    ostarter = o;
268ebfedea0SLionel Sambuc 	    out[o++] = in[i++];
269ebfedea0SLionel Sambuc 	    old_cc   = -1;
270ebfedea0SLionel Sambuc 
271ebfedea0SLionel Sambuc 	    while (i < in_len) {
272ebfedea0SLionel Sambuc 		uint32_t comb;
273ebfedea0SLionel Sambuc 		uint32_t v[2];
274ebfedea0SLionel Sambuc 		int cc;
275ebfedea0SLionel Sambuc 
276ebfedea0SLionel Sambuc 		v[0] = out[ostarter];
277ebfedea0SLionel Sambuc 		v[1] = in[i];
278ebfedea0SLionel Sambuc 
279ebfedea0SLionel Sambuc 		cc = _wind_combining_class(in[i]);
280ebfedea0SLionel Sambuc 		if (old_cc != cc && (comb = find_composition(v, 2))) {
281ebfedea0SLionel Sambuc 		    out[ostarter] = comb;
282ebfedea0SLionel Sambuc 		} else if (cc == 0) {
283ebfedea0SLionel Sambuc 		    break;
284ebfedea0SLionel Sambuc 		} else {
285ebfedea0SLionel Sambuc 		    if (o >= *out_len)
286ebfedea0SLionel Sambuc 			return WIND_ERR_OVERRUN;
287ebfedea0SLionel Sambuc 		    out[o++] = in[i];
288ebfedea0SLionel Sambuc 		    old_cc   = cc;
289ebfedea0SLionel Sambuc 		}
290ebfedea0SLionel Sambuc 		++i;
291ebfedea0SLionel Sambuc 	    }
292ebfedea0SLionel Sambuc 	}
293ebfedea0SLionel Sambuc     }
294ebfedea0SLionel Sambuc     *out_len = o;
295ebfedea0SLionel Sambuc     return 0;
296ebfedea0SLionel Sambuc }
297ebfedea0SLionel Sambuc 
298ebfedea0SLionel Sambuc int
_wind_stringprep_normalize(const uint32_t * in,size_t in_len,uint32_t * out,size_t * out_len)299ebfedea0SLionel Sambuc _wind_stringprep_normalize(const uint32_t *in, size_t in_len,
300ebfedea0SLionel Sambuc 			   uint32_t *out, size_t *out_len)
301ebfedea0SLionel Sambuc {
302ebfedea0SLionel Sambuc     size_t tmp_len;
303ebfedea0SLionel Sambuc     uint32_t *tmp;
304ebfedea0SLionel Sambuc     int ret;
305ebfedea0SLionel Sambuc 
306ebfedea0SLionel Sambuc     if (in_len == 0) {
307ebfedea0SLionel Sambuc 	*out_len = 0;
308ebfedea0SLionel Sambuc 	return 0;
309ebfedea0SLionel Sambuc     }
310ebfedea0SLionel Sambuc 
311ebfedea0SLionel Sambuc     tmp_len = in_len * 4;
312ebfedea0SLionel Sambuc     if (tmp_len < MAX_LENGTH_CANON)
313ebfedea0SLionel Sambuc 	tmp_len = MAX_LENGTH_CANON;
314ebfedea0SLionel Sambuc     tmp = malloc(tmp_len * sizeof(uint32_t));
315ebfedea0SLionel Sambuc     if (tmp == NULL)
316ebfedea0SLionel Sambuc 	return ENOMEM;
317ebfedea0SLionel Sambuc 
318ebfedea0SLionel Sambuc     ret = compat_decomp(in, in_len, tmp, &tmp_len);
319ebfedea0SLionel Sambuc     if (ret) {
320ebfedea0SLionel Sambuc 	free(tmp);
321ebfedea0SLionel Sambuc 	return ret;
322ebfedea0SLionel Sambuc     }
323ebfedea0SLionel Sambuc     canonical_reorder(tmp, tmp_len);
324ebfedea0SLionel Sambuc     ret = combine(tmp, tmp_len, out, out_len);
325ebfedea0SLionel Sambuc     free(tmp);
326ebfedea0SLionel Sambuc     return ret;
327ebfedea0SLionel Sambuc }
328