1*d3273b5bSchristos /* $NetBSD: punycode.c,v 1.2 2017/01/28 21:31:50 christos Exp $ */
2ca1c9b0cSelric
3ca1c9b0cSelric /*
4ca1c9b0cSelric * Copyright (c) 2004 Kungliga Tekniska Högskolan
5ca1c9b0cSelric * (Royal Institute of Technology, Stockholm, Sweden).
6ca1c9b0cSelric * All rights reserved.
7ca1c9b0cSelric *
8ca1c9b0cSelric * Redistribution and use in source and binary forms, with or without
9ca1c9b0cSelric * modification, are permitted provided that the following conditions
10ca1c9b0cSelric * are met:
11ca1c9b0cSelric *
12ca1c9b0cSelric * 1. Redistributions of source code must retain the above copyright
13ca1c9b0cSelric * notice, this list of conditions and the following disclaimer.
14ca1c9b0cSelric *
15ca1c9b0cSelric * 2. Redistributions in binary form must reproduce the above copyright
16ca1c9b0cSelric * notice, this list of conditions and the following disclaimer in the
17ca1c9b0cSelric * documentation and/or other materials provided with the distribution.
18ca1c9b0cSelric *
19ca1c9b0cSelric * 3. Neither the name of the Institute nor the names of its contributors
20ca1c9b0cSelric * may be used to endorse or promote products derived from this software
21ca1c9b0cSelric * without specific prior written permission.
22ca1c9b0cSelric *
23ca1c9b0cSelric * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24ca1c9b0cSelric * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25ca1c9b0cSelric * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ca1c9b0cSelric * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27ca1c9b0cSelric * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28ca1c9b0cSelric * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29ca1c9b0cSelric * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30ca1c9b0cSelric * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31ca1c9b0cSelric * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32ca1c9b0cSelric * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33ca1c9b0cSelric * SUCH DAMAGE.
34ca1c9b0cSelric */
35ca1c9b0cSelric
36ca1c9b0cSelric #ifdef HAVE_CONFIG_H
37ca1c9b0cSelric #include <config.h>
38ca1c9b0cSelric #endif
39ca1c9b0cSelric #include <string.h>
40ca1c9b0cSelric #include "windlocl.h"
41ca1c9b0cSelric
42ca1c9b0cSelric static const unsigned base = 36;
43ca1c9b0cSelric static const unsigned t_min = 1;
44ca1c9b0cSelric static const unsigned t_max = 26;
45ca1c9b0cSelric static const unsigned skew = 38;
46ca1c9b0cSelric static const unsigned damp = 700;
47ca1c9b0cSelric static const unsigned initial_n = 128;
48ca1c9b0cSelric static const unsigned initial_bias = 72;
49ca1c9b0cSelric
50ca1c9b0cSelric static unsigned
digit(unsigned n)51ca1c9b0cSelric digit(unsigned n)
52ca1c9b0cSelric {
53ca1c9b0cSelric return "abcdefghijklmnopqrstuvwxyz0123456789"[n];
54ca1c9b0cSelric }
55ca1c9b0cSelric
56ca1c9b0cSelric static unsigned
adapt(unsigned delta,unsigned numpoints,int first)57ca1c9b0cSelric adapt(unsigned delta, unsigned numpoints, int first)
58ca1c9b0cSelric {
59ca1c9b0cSelric unsigned k;
60ca1c9b0cSelric
61ca1c9b0cSelric if (first)
62ca1c9b0cSelric delta = delta / damp;
63ca1c9b0cSelric else
64ca1c9b0cSelric delta /= 2;
65ca1c9b0cSelric delta += delta / numpoints;
66ca1c9b0cSelric k = 0;
67ca1c9b0cSelric while (delta > ((base - t_min) * t_max) / 2) {
68ca1c9b0cSelric delta /= base - t_min;
69ca1c9b0cSelric k += base;
70ca1c9b0cSelric }
71ca1c9b0cSelric return k + (((base - t_min + 1) * delta) / (delta + skew));
72ca1c9b0cSelric }
73ca1c9b0cSelric
74ca1c9b0cSelric /**
75ca1c9b0cSelric * Convert an UCS4 string to a puny-coded DNS label string suitable
76ca1c9b0cSelric * when combined with delimiters and other labels for DNS lookup.
77ca1c9b0cSelric *
78ca1c9b0cSelric * @param in an UCS4 string to convert
79ca1c9b0cSelric * @param in_len the length of in.
80ca1c9b0cSelric * @param out the resulting puny-coded string. The string is not NUL
81ca1c9b0cSelric * terminatied.
82ca1c9b0cSelric * @param out_len before processing out_len should be the length of
83ca1c9b0cSelric * the out variable, after processing it will be the length of the out
84ca1c9b0cSelric * string.
85ca1c9b0cSelric *
86ca1c9b0cSelric * @return returns 0 on success, an wind error code otherwise
87ca1c9b0cSelric * @ingroup wind
88ca1c9b0cSelric */
89ca1c9b0cSelric
90ca1c9b0cSelric int
wind_punycode_label_toascii(const uint32_t * in,size_t in_len,char * out,size_t * out_len)91ca1c9b0cSelric wind_punycode_label_toascii(const uint32_t *in, size_t in_len,
92ca1c9b0cSelric char *out, size_t *out_len)
93ca1c9b0cSelric {
94ca1c9b0cSelric unsigned n = initial_n;
95ca1c9b0cSelric unsigned delta = 0;
96ca1c9b0cSelric unsigned bias = initial_bias;
97ca1c9b0cSelric unsigned h = 0;
98ca1c9b0cSelric unsigned b;
99ca1c9b0cSelric unsigned i;
100ca1c9b0cSelric unsigned o = 0;
101ca1c9b0cSelric unsigned m;
102ca1c9b0cSelric
103ca1c9b0cSelric for (i = 0; i < in_len; ++i) {
104ca1c9b0cSelric if (in[i] < 0x80) {
105ca1c9b0cSelric ++h;
106ca1c9b0cSelric if (o >= *out_len)
107ca1c9b0cSelric return WIND_ERR_OVERRUN;
108ca1c9b0cSelric out[o++] = in[i];
109ca1c9b0cSelric }
110ca1c9b0cSelric }
111ca1c9b0cSelric b = h;
112ca1c9b0cSelric if (b > 0) {
113ca1c9b0cSelric if (o >= *out_len)
114ca1c9b0cSelric return WIND_ERR_OVERRUN;
115ca1c9b0cSelric out[o++] = 0x2D;
116ca1c9b0cSelric }
117ca1c9b0cSelric /* is this string punycoded */
118ca1c9b0cSelric if (h < in_len) {
119ca1c9b0cSelric if (o + 4 >= *out_len)
120ca1c9b0cSelric return WIND_ERR_OVERRUN;
121ca1c9b0cSelric memmove(out + 4, out, o);
122ca1c9b0cSelric memcpy(out, "xn--", 4);
123ca1c9b0cSelric o += 4;
124ca1c9b0cSelric }
125ca1c9b0cSelric
126ca1c9b0cSelric while (h < in_len) {
127ca1c9b0cSelric m = (unsigned)-1;
128ca1c9b0cSelric for (i = 0; i < in_len; ++i)
129ca1c9b0cSelric if(in[i] < m && in[i] >= n)
130ca1c9b0cSelric m = in[i];
131ca1c9b0cSelric
132ca1c9b0cSelric delta += (m - n) * (h + 1);
133ca1c9b0cSelric n = m;
134ca1c9b0cSelric for (i = 0; i < in_len; ++i) {
135ca1c9b0cSelric if (in[i] < n) {
136ca1c9b0cSelric ++delta;
137ca1c9b0cSelric } else if (in[i] == n) {
138ca1c9b0cSelric unsigned q = delta;
139ca1c9b0cSelric unsigned k;
140ca1c9b0cSelric for (k = base; ; k += base) {
141ca1c9b0cSelric unsigned t;
142ca1c9b0cSelric if (k <= bias)
143ca1c9b0cSelric t = t_min;
144ca1c9b0cSelric else if (k >= bias + t_max)
145ca1c9b0cSelric t = t_max;
146ca1c9b0cSelric else
147ca1c9b0cSelric t = k - bias;
148ca1c9b0cSelric if (q < t)
149ca1c9b0cSelric break;
150ca1c9b0cSelric if (o >= *out_len)
151ca1c9b0cSelric return WIND_ERR_OVERRUN;
152ca1c9b0cSelric out[o++] = digit(t + ((q - t) % (base - t)));
153ca1c9b0cSelric q = (q - t) / (base - t);
154ca1c9b0cSelric }
155ca1c9b0cSelric if (o >= *out_len)
156ca1c9b0cSelric return WIND_ERR_OVERRUN;
157ca1c9b0cSelric out[o++] = digit(q);
158ca1c9b0cSelric /* output */
159ca1c9b0cSelric bias = adapt(delta, h + 1, h == b);
160ca1c9b0cSelric delta = 0;
161ca1c9b0cSelric ++h;
162ca1c9b0cSelric }
163ca1c9b0cSelric }
164ca1c9b0cSelric ++delta;
165ca1c9b0cSelric ++n;
166ca1c9b0cSelric }
167ca1c9b0cSelric
168ca1c9b0cSelric *out_len = o;
169ca1c9b0cSelric return 0;
170ca1c9b0cSelric }
171