xref: /netbsd-src/crypto/external/bsd/heimdal/dist/lib/wind/punycode.c (revision d3273b5b76f5afaafe308cead5511dbb8df8c5e9)
1*d3273b5bSchristos /*	$NetBSD: punycode.c,v 1.2 2017/01/28 21:31:50 christos Exp $	*/
2ca1c9b0cSelric 
3ca1c9b0cSelric /*
4ca1c9b0cSelric  * Copyright (c) 2004 Kungliga Tekniska Högskolan
5ca1c9b0cSelric  * (Royal Institute of Technology, Stockholm, Sweden).
6ca1c9b0cSelric  * All rights reserved.
7ca1c9b0cSelric  *
8ca1c9b0cSelric  * Redistribution and use in source and binary forms, with or without
9ca1c9b0cSelric  * modification, are permitted provided that the following conditions
10ca1c9b0cSelric  * are met:
11ca1c9b0cSelric  *
12ca1c9b0cSelric  * 1. Redistributions of source code must retain the above copyright
13ca1c9b0cSelric  *    notice, this list of conditions and the following disclaimer.
14ca1c9b0cSelric  *
15ca1c9b0cSelric  * 2. Redistributions in binary form must reproduce the above copyright
16ca1c9b0cSelric  *    notice, this list of conditions and the following disclaimer in the
17ca1c9b0cSelric  *    documentation and/or other materials provided with the distribution.
18ca1c9b0cSelric  *
19ca1c9b0cSelric  * 3. Neither the name of the Institute nor the names of its contributors
20ca1c9b0cSelric  *    may be used to endorse or promote products derived from this software
21ca1c9b0cSelric  *    without specific prior written permission.
22ca1c9b0cSelric  *
23ca1c9b0cSelric  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24ca1c9b0cSelric  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25ca1c9b0cSelric  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ca1c9b0cSelric  * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27ca1c9b0cSelric  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28ca1c9b0cSelric  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29ca1c9b0cSelric  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30ca1c9b0cSelric  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31ca1c9b0cSelric  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32ca1c9b0cSelric  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33ca1c9b0cSelric  * SUCH DAMAGE.
34ca1c9b0cSelric  */
35ca1c9b0cSelric 
36ca1c9b0cSelric #ifdef HAVE_CONFIG_H
37ca1c9b0cSelric #include <config.h>
38ca1c9b0cSelric #endif
39ca1c9b0cSelric #include <string.h>
40ca1c9b0cSelric #include "windlocl.h"
41ca1c9b0cSelric 
42ca1c9b0cSelric static const unsigned base         = 36;
43ca1c9b0cSelric static const unsigned t_min        = 1;
44ca1c9b0cSelric static const unsigned t_max        = 26;
45ca1c9b0cSelric static const unsigned skew         = 38;
46ca1c9b0cSelric static const unsigned damp         = 700;
47ca1c9b0cSelric static const unsigned initial_n    = 128;
48ca1c9b0cSelric static const unsigned initial_bias = 72;
49ca1c9b0cSelric 
50ca1c9b0cSelric static unsigned
digit(unsigned n)51ca1c9b0cSelric digit(unsigned n)
52ca1c9b0cSelric {
53ca1c9b0cSelric     return "abcdefghijklmnopqrstuvwxyz0123456789"[n];
54ca1c9b0cSelric }
55ca1c9b0cSelric 
56ca1c9b0cSelric static unsigned
adapt(unsigned delta,unsigned numpoints,int first)57ca1c9b0cSelric adapt(unsigned delta, unsigned numpoints, int first)
58ca1c9b0cSelric {
59ca1c9b0cSelric     unsigned k;
60ca1c9b0cSelric 
61ca1c9b0cSelric     if (first)
62ca1c9b0cSelric 	delta = delta / damp;
63ca1c9b0cSelric     else
64ca1c9b0cSelric 	delta /= 2;
65ca1c9b0cSelric     delta += delta / numpoints;
66ca1c9b0cSelric     k = 0;
67ca1c9b0cSelric     while (delta > ((base - t_min) * t_max) / 2) {
68ca1c9b0cSelric 	delta /= base - t_min;
69ca1c9b0cSelric 	k += base;
70ca1c9b0cSelric     }
71ca1c9b0cSelric     return k + (((base - t_min + 1) * delta) / (delta + skew));
72ca1c9b0cSelric }
73ca1c9b0cSelric 
74ca1c9b0cSelric /**
75ca1c9b0cSelric  * Convert an UCS4 string to a puny-coded DNS label string suitable
76ca1c9b0cSelric  * when combined with delimiters and other labels for DNS lookup.
77ca1c9b0cSelric  *
78ca1c9b0cSelric  * @param in an UCS4 string to convert
79ca1c9b0cSelric  * @param in_len the length of in.
80ca1c9b0cSelric  * @param out the resulting puny-coded string. The string is not NUL
81ca1c9b0cSelric  * terminatied.
82ca1c9b0cSelric  * @param out_len before processing out_len should be the length of
83ca1c9b0cSelric  * the out variable, after processing it will be the length of the out
84ca1c9b0cSelric  * string.
85ca1c9b0cSelric  *
86ca1c9b0cSelric  * @return returns 0 on success, an wind error code otherwise
87ca1c9b0cSelric  * @ingroup wind
88ca1c9b0cSelric  */
89ca1c9b0cSelric 
90ca1c9b0cSelric int
wind_punycode_label_toascii(const uint32_t * in,size_t in_len,char * out,size_t * out_len)91ca1c9b0cSelric wind_punycode_label_toascii(const uint32_t *in, size_t in_len,
92ca1c9b0cSelric 			    char *out, size_t *out_len)
93ca1c9b0cSelric {
94ca1c9b0cSelric     unsigned n     = initial_n;
95ca1c9b0cSelric     unsigned delta = 0;
96ca1c9b0cSelric     unsigned bias  = initial_bias;
97ca1c9b0cSelric     unsigned h = 0;
98ca1c9b0cSelric     unsigned b;
99ca1c9b0cSelric     unsigned i;
100ca1c9b0cSelric     unsigned o = 0;
101ca1c9b0cSelric     unsigned m;
102ca1c9b0cSelric 
103ca1c9b0cSelric     for (i = 0; i < in_len; ++i) {
104ca1c9b0cSelric 	if (in[i] < 0x80) {
105ca1c9b0cSelric 	    ++h;
106ca1c9b0cSelric 	    if (o >= *out_len)
107ca1c9b0cSelric 		return WIND_ERR_OVERRUN;
108ca1c9b0cSelric 	    out[o++] = in[i];
109ca1c9b0cSelric 	}
110ca1c9b0cSelric     }
111ca1c9b0cSelric     b = h;
112ca1c9b0cSelric     if (b > 0) {
113ca1c9b0cSelric 	if (o >= *out_len)
114ca1c9b0cSelric 	    return WIND_ERR_OVERRUN;
115ca1c9b0cSelric 	out[o++] = 0x2D;
116ca1c9b0cSelric     }
117ca1c9b0cSelric     /* is this string punycoded */
118ca1c9b0cSelric     if (h < in_len) {
119ca1c9b0cSelric 	if (o + 4 >= *out_len)
120ca1c9b0cSelric 	    return WIND_ERR_OVERRUN;
121ca1c9b0cSelric 	memmove(out + 4, out, o);
122ca1c9b0cSelric 	memcpy(out, "xn--", 4);
123ca1c9b0cSelric 	o += 4;
124ca1c9b0cSelric     }
125ca1c9b0cSelric 
126ca1c9b0cSelric     while (h < in_len) {
127ca1c9b0cSelric 	m = (unsigned)-1;
128ca1c9b0cSelric 	for (i = 0; i < in_len; ++i)
129ca1c9b0cSelric 	    if(in[i] < m && in[i] >= n)
130ca1c9b0cSelric 		m = in[i];
131ca1c9b0cSelric 
132ca1c9b0cSelric 	delta += (m - n) * (h + 1);
133ca1c9b0cSelric 	n = m;
134ca1c9b0cSelric 	for (i = 0; i < in_len; ++i) {
135ca1c9b0cSelric 	    if (in[i] < n) {
136ca1c9b0cSelric 		++delta;
137ca1c9b0cSelric 	    } else if (in[i] == n) {
138ca1c9b0cSelric 		unsigned q = delta;
139ca1c9b0cSelric 		unsigned k;
140ca1c9b0cSelric 		for (k = base; ; k += base) {
141ca1c9b0cSelric 		    unsigned t;
142ca1c9b0cSelric 		    if (k <= bias)
143ca1c9b0cSelric 			t = t_min;
144ca1c9b0cSelric 		    else if (k >= bias + t_max)
145ca1c9b0cSelric 			t = t_max;
146ca1c9b0cSelric 		    else
147ca1c9b0cSelric 			t = k - bias;
148ca1c9b0cSelric 		    if (q < t)
149ca1c9b0cSelric 			break;
150ca1c9b0cSelric 		    if (o >= *out_len)
151ca1c9b0cSelric 			return WIND_ERR_OVERRUN;
152ca1c9b0cSelric 		    out[o++] = digit(t + ((q - t) % (base - t)));
153ca1c9b0cSelric 		    q = (q - t) / (base - t);
154ca1c9b0cSelric 		}
155ca1c9b0cSelric 		if (o >= *out_len)
156ca1c9b0cSelric 		    return WIND_ERR_OVERRUN;
157ca1c9b0cSelric 		out[o++] = digit(q);
158ca1c9b0cSelric 		/* output */
159ca1c9b0cSelric 		bias = adapt(delta, h + 1, h == b);
160ca1c9b0cSelric 		delta = 0;
161ca1c9b0cSelric 		++h;
162ca1c9b0cSelric 	    }
163ca1c9b0cSelric 	}
164ca1c9b0cSelric 	++delta;
165ca1c9b0cSelric 	++n;
166ca1c9b0cSelric     }
167ca1c9b0cSelric 
168ca1c9b0cSelric     *out_len = o;
169ca1c9b0cSelric     return 0;
170ca1c9b0cSelric }
171