1*0a6a1f1dSLionel Sambuc /* $NetBSD: utf8.c,v 1.1.1.2 2014/04/24 12:45:56 pettai Exp $ */
2ebfedea0SLionel Sambuc
3ebfedea0SLionel Sambuc /*
4ebfedea0SLionel Sambuc * Copyright (c) 2004, 2006, 2007, 2008 Kungliga Tekniska Högskolan
5ebfedea0SLionel Sambuc * (Royal Institute of Technology, Stockholm, Sweden).
6ebfedea0SLionel Sambuc * All rights reserved.
7ebfedea0SLionel Sambuc *
8ebfedea0SLionel Sambuc * Redistribution and use in source and binary forms, with or without
9ebfedea0SLionel Sambuc * modification, are permitted provided that the following conditions
10ebfedea0SLionel Sambuc * are met:
11ebfedea0SLionel Sambuc *
12ebfedea0SLionel Sambuc * 1. Redistributions of source code must retain the above copyright
13ebfedea0SLionel Sambuc * notice, this list of conditions and the following disclaimer.
14ebfedea0SLionel Sambuc *
15ebfedea0SLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
16ebfedea0SLionel Sambuc * notice, this list of conditions and the following disclaimer in the
17ebfedea0SLionel Sambuc * documentation and/or other materials provided with the distribution.
18ebfedea0SLionel Sambuc *
19ebfedea0SLionel Sambuc * 3. Neither the name of the Institute nor the names of its contributors
20ebfedea0SLionel Sambuc * may be used to endorse or promote products derived from this software
21ebfedea0SLionel Sambuc * without specific prior written permission.
22ebfedea0SLionel Sambuc *
23ebfedea0SLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24ebfedea0SLionel Sambuc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25ebfedea0SLionel Sambuc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26ebfedea0SLionel Sambuc * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27ebfedea0SLionel Sambuc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28ebfedea0SLionel Sambuc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29ebfedea0SLionel Sambuc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30ebfedea0SLionel Sambuc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31ebfedea0SLionel Sambuc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32ebfedea0SLionel Sambuc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33ebfedea0SLionel Sambuc * SUCH DAMAGE.
34ebfedea0SLionel Sambuc */
35ebfedea0SLionel Sambuc
36ebfedea0SLionel Sambuc #include <config.h>
37ebfedea0SLionel Sambuc #include "windlocl.h"
38ebfedea0SLionel Sambuc
39ebfedea0SLionel Sambuc static int
utf8toutf32(const unsigned char ** pp,uint32_t * out)40ebfedea0SLionel Sambuc utf8toutf32(const unsigned char **pp, uint32_t *out)
41ebfedea0SLionel Sambuc {
42ebfedea0SLionel Sambuc const unsigned char *p = *pp;
43ebfedea0SLionel Sambuc unsigned c = *p;
44ebfedea0SLionel Sambuc
45ebfedea0SLionel Sambuc if (c & 0x80) {
46ebfedea0SLionel Sambuc if ((c & 0xE0) == 0xC0) {
47ebfedea0SLionel Sambuc const unsigned c2 = *++p;
48ebfedea0SLionel Sambuc if ((c2 & 0xC0) == 0x80) {
49ebfedea0SLionel Sambuc *out = ((c & 0x1F) << 6)
50ebfedea0SLionel Sambuc | (c2 & 0x3F);
51ebfedea0SLionel Sambuc } else {
52ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
53ebfedea0SLionel Sambuc }
54ebfedea0SLionel Sambuc } else if ((c & 0xF0) == 0xE0) {
55ebfedea0SLionel Sambuc const unsigned c2 = *++p;
56ebfedea0SLionel Sambuc if ((c2 & 0xC0) == 0x80) {
57ebfedea0SLionel Sambuc const unsigned c3 = *++p;
58ebfedea0SLionel Sambuc if ((c3 & 0xC0) == 0x80) {
59ebfedea0SLionel Sambuc *out = ((c & 0x0F) << 12)
60ebfedea0SLionel Sambuc | ((c2 & 0x3F) << 6)
61ebfedea0SLionel Sambuc | (c3 & 0x3F);
62ebfedea0SLionel Sambuc } else {
63ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
64ebfedea0SLionel Sambuc }
65ebfedea0SLionel Sambuc } else {
66ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
67ebfedea0SLionel Sambuc }
68ebfedea0SLionel Sambuc } else if ((c & 0xF8) == 0xF0) {
69ebfedea0SLionel Sambuc const unsigned c2 = *++p;
70ebfedea0SLionel Sambuc if ((c2 & 0xC0) == 0x80) {
71ebfedea0SLionel Sambuc const unsigned c3 = *++p;
72ebfedea0SLionel Sambuc if ((c3 & 0xC0) == 0x80) {
73ebfedea0SLionel Sambuc const unsigned c4 = *++p;
74ebfedea0SLionel Sambuc if ((c4 & 0xC0) == 0x80) {
75ebfedea0SLionel Sambuc *out = ((c & 0x07) << 18)
76ebfedea0SLionel Sambuc | ((c2 & 0x3F) << 12)
77ebfedea0SLionel Sambuc | ((c3 & 0x3F) << 6)
78ebfedea0SLionel Sambuc | (c4 & 0x3F);
79ebfedea0SLionel Sambuc } else {
80ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
81ebfedea0SLionel Sambuc }
82ebfedea0SLionel Sambuc } else {
83ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
84ebfedea0SLionel Sambuc }
85ebfedea0SLionel Sambuc } else {
86ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
87ebfedea0SLionel Sambuc }
88ebfedea0SLionel Sambuc } else {
89ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF8;
90ebfedea0SLionel Sambuc }
91ebfedea0SLionel Sambuc } else {
92ebfedea0SLionel Sambuc *out = c;
93ebfedea0SLionel Sambuc }
94ebfedea0SLionel Sambuc
95ebfedea0SLionel Sambuc *pp = p;
96ebfedea0SLionel Sambuc
97ebfedea0SLionel Sambuc return 0;
98ebfedea0SLionel Sambuc }
99ebfedea0SLionel Sambuc
100ebfedea0SLionel Sambuc /**
101ebfedea0SLionel Sambuc * Convert an UTF-8 string to an UCS4 string.
102ebfedea0SLionel Sambuc *
103ebfedea0SLionel Sambuc * @param in an UTF-8 string to convert.
104ebfedea0SLionel Sambuc * @param out the resulting UCS4 strint, must be at least
105ebfedea0SLionel Sambuc * wind_utf8ucs4_length() long. If out is NULL, the function will
106ebfedea0SLionel Sambuc * calculate the needed space for the out variable (just like
107ebfedea0SLionel Sambuc * wind_utf8ucs4_length()).
108ebfedea0SLionel Sambuc * @param out_len before processing out_len should be the length of
109ebfedea0SLionel Sambuc * the out variable, after processing it will be the length of the out
110ebfedea0SLionel Sambuc * string.
111ebfedea0SLionel Sambuc *
112ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
113ebfedea0SLionel Sambuc * @ingroup wind
114ebfedea0SLionel Sambuc */
115ebfedea0SLionel Sambuc
116ebfedea0SLionel Sambuc int
wind_utf8ucs4(const char * in,uint32_t * out,size_t * out_len)117ebfedea0SLionel Sambuc wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len)
118ebfedea0SLionel Sambuc {
119ebfedea0SLionel Sambuc const unsigned char *p;
120ebfedea0SLionel Sambuc size_t o = 0;
121ebfedea0SLionel Sambuc int ret;
122ebfedea0SLionel Sambuc
123ebfedea0SLionel Sambuc for (p = (const unsigned char *)in; *p != '\0'; ++p) {
124ebfedea0SLionel Sambuc uint32_t u;
125ebfedea0SLionel Sambuc
126ebfedea0SLionel Sambuc ret = utf8toutf32(&p, &u);
127ebfedea0SLionel Sambuc if (ret)
128ebfedea0SLionel Sambuc return ret;
129ebfedea0SLionel Sambuc
130ebfedea0SLionel Sambuc if (out) {
131ebfedea0SLionel Sambuc if (o >= *out_len)
132ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
133ebfedea0SLionel Sambuc out[o] = u;
134ebfedea0SLionel Sambuc }
135ebfedea0SLionel Sambuc o++;
136ebfedea0SLionel Sambuc }
137ebfedea0SLionel Sambuc *out_len = o;
138ebfedea0SLionel Sambuc return 0;
139ebfedea0SLionel Sambuc }
140ebfedea0SLionel Sambuc
141ebfedea0SLionel Sambuc /**
142ebfedea0SLionel Sambuc * Calculate the length of from converting a UTF-8 string to a UCS4
143ebfedea0SLionel Sambuc * string.
144ebfedea0SLionel Sambuc *
145ebfedea0SLionel Sambuc * @param in an UTF-8 string to convert.
146ebfedea0SLionel Sambuc * @param out_len the length of the resulting UCS4 string.
147ebfedea0SLionel Sambuc *
148ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
149ebfedea0SLionel Sambuc * @ingroup wind
150ebfedea0SLionel Sambuc */
151ebfedea0SLionel Sambuc
152ebfedea0SLionel Sambuc int
wind_utf8ucs4_length(const char * in,size_t * out_len)153ebfedea0SLionel Sambuc wind_utf8ucs4_length(const char *in, size_t *out_len)
154ebfedea0SLionel Sambuc {
155ebfedea0SLionel Sambuc return wind_utf8ucs4(in, NULL, out_len);
156ebfedea0SLionel Sambuc }
157ebfedea0SLionel Sambuc
158ebfedea0SLionel Sambuc static const char first_char[4] =
159ebfedea0SLionel Sambuc { 0x00, 0xC0, 0xE0, 0xF0 };
160ebfedea0SLionel Sambuc
161ebfedea0SLionel Sambuc /**
162ebfedea0SLionel Sambuc * Convert an UCS4 string to a UTF-8 string.
163ebfedea0SLionel Sambuc *
164ebfedea0SLionel Sambuc * @param in an UCS4 string to convert.
165ebfedea0SLionel Sambuc * @param in_len the length input array.
166ebfedea0SLionel Sambuc
167ebfedea0SLionel Sambuc * @param out the resulting UTF-8 strint, must be at least
168ebfedea0SLionel Sambuc * wind_ucs4utf8_length() + 1 long (the extra char for the NUL). If
169ebfedea0SLionel Sambuc * out is NULL, the function will calculate the needed space for the
170ebfedea0SLionel Sambuc * out variable (just like wind_ucs4utf8_length()).
171ebfedea0SLionel Sambuc
172ebfedea0SLionel Sambuc * @param out_len before processing out_len should be the length of
173ebfedea0SLionel Sambuc * the out variable, after processing it will be the length of the out
174ebfedea0SLionel Sambuc * string.
175ebfedea0SLionel Sambuc *
176ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
177ebfedea0SLionel Sambuc * @ingroup wind
178ebfedea0SLionel Sambuc */
179ebfedea0SLionel Sambuc
180ebfedea0SLionel Sambuc int
wind_ucs4utf8(const uint32_t * in,size_t in_len,char * out,size_t * out_len)181ebfedea0SLionel Sambuc wind_ucs4utf8(const uint32_t *in, size_t in_len, char *out, size_t *out_len)
182ebfedea0SLionel Sambuc {
183ebfedea0SLionel Sambuc uint32_t ch;
184ebfedea0SLionel Sambuc size_t i, len, o;
185ebfedea0SLionel Sambuc
186ebfedea0SLionel Sambuc for (o = 0, i = 0; i < in_len; i++) {
187ebfedea0SLionel Sambuc ch = in[i];
188ebfedea0SLionel Sambuc
189ebfedea0SLionel Sambuc if (ch < 0x80) {
190ebfedea0SLionel Sambuc len = 1;
191ebfedea0SLionel Sambuc } else if (ch < 0x800) {
192ebfedea0SLionel Sambuc len = 2;
193ebfedea0SLionel Sambuc } else if (ch < 0x10000) {
194ebfedea0SLionel Sambuc len = 3;
195ebfedea0SLionel Sambuc } else if (ch <= 0x10FFFF) {
196ebfedea0SLionel Sambuc len = 4;
197ebfedea0SLionel Sambuc } else
198ebfedea0SLionel Sambuc return WIND_ERR_INVALID_UTF32;
199ebfedea0SLionel Sambuc
200ebfedea0SLionel Sambuc o += len;
201ebfedea0SLionel Sambuc
202ebfedea0SLionel Sambuc if (out) {
203ebfedea0SLionel Sambuc if (o >= *out_len)
204ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
205ebfedea0SLionel Sambuc
206ebfedea0SLionel Sambuc switch(len) {
207ebfedea0SLionel Sambuc case 4:
208ebfedea0SLionel Sambuc out[3] = (ch | 0x80) & 0xbf;
209*0a6a1f1dSLionel Sambuc ch = ch >> 6;
210ebfedea0SLionel Sambuc case 3:
211ebfedea0SLionel Sambuc out[2] = (ch | 0x80) & 0xbf;
212*0a6a1f1dSLionel Sambuc ch = ch >> 6;
213ebfedea0SLionel Sambuc case 2:
214ebfedea0SLionel Sambuc out[1] = (ch | 0x80) & 0xbf;
215*0a6a1f1dSLionel Sambuc ch = ch >> 6;
216ebfedea0SLionel Sambuc case 1:
217ebfedea0SLionel Sambuc out[0] = ch | first_char[len - 1];
218ebfedea0SLionel Sambuc }
219ebfedea0SLionel Sambuc }
220ebfedea0SLionel Sambuc out += len;
221ebfedea0SLionel Sambuc }
222ebfedea0SLionel Sambuc if (out) {
223ebfedea0SLionel Sambuc if (o + 1 >= *out_len)
224ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
225ebfedea0SLionel Sambuc *out = '\0';
226ebfedea0SLionel Sambuc }
227ebfedea0SLionel Sambuc *out_len = o;
228ebfedea0SLionel Sambuc return 0;
229ebfedea0SLionel Sambuc }
230ebfedea0SLionel Sambuc
231ebfedea0SLionel Sambuc /**
232ebfedea0SLionel Sambuc * Calculate the length of from converting a UCS4 string to an UTF-8 string.
233ebfedea0SLionel Sambuc *
234ebfedea0SLionel Sambuc * @param in an UCS4 string to convert.
235ebfedea0SLionel Sambuc * @param in_len the length of UCS4 string to convert.
236ebfedea0SLionel Sambuc * @param out_len the length of the resulting UTF-8 string.
237ebfedea0SLionel Sambuc *
238ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
239ebfedea0SLionel Sambuc * @ingroup wind
240ebfedea0SLionel Sambuc */
241ebfedea0SLionel Sambuc
242ebfedea0SLionel Sambuc int
wind_ucs4utf8_length(const uint32_t * in,size_t in_len,size_t * out_len)243ebfedea0SLionel Sambuc wind_ucs4utf8_length(const uint32_t *in, size_t in_len, size_t *out_len)
244ebfedea0SLionel Sambuc {
245ebfedea0SLionel Sambuc return wind_ucs4utf8(in, in_len, NULL, out_len);
246ebfedea0SLionel Sambuc }
247ebfedea0SLionel Sambuc
248ebfedea0SLionel Sambuc /**
249ebfedea0SLionel Sambuc * Read in an UCS2 from a buffer.
250ebfedea0SLionel Sambuc *
251ebfedea0SLionel Sambuc * @param ptr The input buffer to read from.
252ebfedea0SLionel Sambuc * @param len the length of the input buffer.
253ebfedea0SLionel Sambuc * @param flags Flags to control the behavior of the function.
254ebfedea0SLionel Sambuc * @param out the output UCS2, the array must be at least out/2 long.
255ebfedea0SLionel Sambuc * @param out_len the output length
256ebfedea0SLionel Sambuc *
257ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise.
258ebfedea0SLionel Sambuc * @ingroup wind
259ebfedea0SLionel Sambuc */
260ebfedea0SLionel Sambuc
261ebfedea0SLionel Sambuc int
wind_ucs2read(const void * ptr,size_t len,unsigned int * flags,uint16_t * out,size_t * out_len)262ebfedea0SLionel Sambuc wind_ucs2read(const void *ptr, size_t len, unsigned int *flags,
263ebfedea0SLionel Sambuc uint16_t *out, size_t *out_len)
264ebfedea0SLionel Sambuc {
265ebfedea0SLionel Sambuc const unsigned char *p = ptr;
266ebfedea0SLionel Sambuc int little = ((*flags) & WIND_RW_LE);
267ebfedea0SLionel Sambuc size_t olen = *out_len;
268ebfedea0SLionel Sambuc
269ebfedea0SLionel Sambuc /** if len is zero, flags are unchanged */
270ebfedea0SLionel Sambuc if (len == 0) {
271ebfedea0SLionel Sambuc *out_len = 0;
272ebfedea0SLionel Sambuc return 0;
273ebfedea0SLionel Sambuc }
274ebfedea0SLionel Sambuc
275ebfedea0SLionel Sambuc /** if len is odd, WIND_ERR_LENGTH_NOT_MOD2 is returned */
276ebfedea0SLionel Sambuc if (len & 1)
277ebfedea0SLionel Sambuc return WIND_ERR_LENGTH_NOT_MOD2;
278ebfedea0SLionel Sambuc
279ebfedea0SLionel Sambuc /**
280ebfedea0SLionel Sambuc * If the flags WIND_RW_BOM is set, check for BOM. If not BOM is
281ebfedea0SLionel Sambuc * found, check is LE/BE flag is already and use that otherwise
282ebfedea0SLionel Sambuc * fail with WIND_ERR_NO_BOM. When done, clear WIND_RW_BOM and
283ebfedea0SLionel Sambuc * the LE/BE flag and set the resulting LE/BE flag.
284ebfedea0SLionel Sambuc */
285ebfedea0SLionel Sambuc if ((*flags) & WIND_RW_BOM) {
286ebfedea0SLionel Sambuc uint16_t bom = (p[0] << 8) + p[1];
287ebfedea0SLionel Sambuc if (bom == 0xfffe || bom == 0xfeff) {
288ebfedea0SLionel Sambuc little = (bom == 0xfffe);
289ebfedea0SLionel Sambuc p += 2;
290ebfedea0SLionel Sambuc len -= 2;
291ebfedea0SLionel Sambuc } else if (((*flags) & (WIND_RW_LE|WIND_RW_BE)) != 0) {
292ebfedea0SLionel Sambuc /* little already set */
293ebfedea0SLionel Sambuc } else
294ebfedea0SLionel Sambuc return WIND_ERR_NO_BOM;
295ebfedea0SLionel Sambuc *flags = ((*flags) & ~(WIND_RW_BOM|WIND_RW_LE|WIND_RW_BE));
296ebfedea0SLionel Sambuc *flags |= little ? WIND_RW_LE : WIND_RW_BE;
297ebfedea0SLionel Sambuc }
298ebfedea0SLionel Sambuc
299ebfedea0SLionel Sambuc while (len) {
300ebfedea0SLionel Sambuc if (olen < 1)
301ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
302ebfedea0SLionel Sambuc if (little)
303ebfedea0SLionel Sambuc *out = (p[1] << 8) + p[0];
304ebfedea0SLionel Sambuc else
305ebfedea0SLionel Sambuc *out = (p[0] << 8) + p[1];
306ebfedea0SLionel Sambuc out++; p += 2; len -= 2; olen--;
307ebfedea0SLionel Sambuc }
308ebfedea0SLionel Sambuc *out_len -= olen;
309ebfedea0SLionel Sambuc return 0;
310ebfedea0SLionel Sambuc }
311ebfedea0SLionel Sambuc
312ebfedea0SLionel Sambuc /**
313ebfedea0SLionel Sambuc * Write an UCS2 string to a buffer.
314ebfedea0SLionel Sambuc *
315ebfedea0SLionel Sambuc * @param in The input UCS2 string.
316ebfedea0SLionel Sambuc * @param in_len the length of the input buffer.
317ebfedea0SLionel Sambuc * @param flags Flags to control the behavior of the function.
318ebfedea0SLionel Sambuc * @param ptr The input buffer to write to, the array must be at least
319ebfedea0SLionel Sambuc * (in + 1) * 2 bytes long.
320ebfedea0SLionel Sambuc * @param out_len the output length
321ebfedea0SLionel Sambuc *
322ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise.
323ebfedea0SLionel Sambuc * @ingroup wind
324ebfedea0SLionel Sambuc */
325ebfedea0SLionel Sambuc
326ebfedea0SLionel Sambuc int
wind_ucs2write(const uint16_t * in,size_t in_len,unsigned int * flags,void * ptr,size_t * out_len)327ebfedea0SLionel Sambuc wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags,
328ebfedea0SLionel Sambuc void *ptr, size_t *out_len)
329ebfedea0SLionel Sambuc {
330ebfedea0SLionel Sambuc unsigned char *p = ptr;
331ebfedea0SLionel Sambuc size_t len = *out_len;
332ebfedea0SLionel Sambuc
333ebfedea0SLionel Sambuc /** If in buffer is not of length be mod 2, WIND_ERR_LENGTH_NOT_MOD2 is returned*/
334ebfedea0SLionel Sambuc if (len & 1)
335ebfedea0SLionel Sambuc return WIND_ERR_LENGTH_NOT_MOD2;
336ebfedea0SLionel Sambuc
337ebfedea0SLionel Sambuc /** On zero input length, flags are preserved */
338ebfedea0SLionel Sambuc if (in_len == 0) {
339ebfedea0SLionel Sambuc *out_len = 0;
340ebfedea0SLionel Sambuc return 0;
341ebfedea0SLionel Sambuc }
342ebfedea0SLionel Sambuc /** If flags have WIND_RW_BOM set, the byte order mark is written
343ebfedea0SLionel Sambuc * first to the output data */
344ebfedea0SLionel Sambuc if ((*flags) & WIND_RW_BOM) {
345ebfedea0SLionel Sambuc uint16_t bom = 0xfffe;
346ebfedea0SLionel Sambuc
347ebfedea0SLionel Sambuc if (len < 2)
348ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
349ebfedea0SLionel Sambuc
350ebfedea0SLionel Sambuc if ((*flags) & WIND_RW_LE) {
351*0a6a1f1dSLionel Sambuc p[0] = (bom ) & 0xff;
352*0a6a1f1dSLionel Sambuc p[1] = (bom >> 8) & 0xff;
353ebfedea0SLionel Sambuc } else {
354ebfedea0SLionel Sambuc p[1] = (bom ) & 0xff;
355ebfedea0SLionel Sambuc p[0] = (bom >> 8) & 0xff;
356ebfedea0SLionel Sambuc }
357ebfedea0SLionel Sambuc len -= 2;
358ebfedea0SLionel Sambuc }
359ebfedea0SLionel Sambuc
360ebfedea0SLionel Sambuc while (in_len) {
361ebfedea0SLionel Sambuc /** If the output wont fit into out_len, WIND_ERR_OVERRUN is returned */
362ebfedea0SLionel Sambuc if (len < 2)
363ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
364ebfedea0SLionel Sambuc if ((*flags) & WIND_RW_LE) {
365*0a6a1f1dSLionel Sambuc p[0] = (in[0] ) & 0xff;
366*0a6a1f1dSLionel Sambuc p[1] = (in[0] >> 8) & 0xff;
367ebfedea0SLionel Sambuc } else {
368ebfedea0SLionel Sambuc p[1] = (in[0] ) & 0xff;
369ebfedea0SLionel Sambuc p[0] = (in[0] >> 8) & 0xff;
370ebfedea0SLionel Sambuc }
371ebfedea0SLionel Sambuc len -= 2;
372ebfedea0SLionel Sambuc in_len--;
373ebfedea0SLionel Sambuc p += 2;
374ebfedea0SLionel Sambuc in++;
375ebfedea0SLionel Sambuc }
376ebfedea0SLionel Sambuc *out_len -= len;
377ebfedea0SLionel Sambuc return 0;
378ebfedea0SLionel Sambuc }
379ebfedea0SLionel Sambuc
380ebfedea0SLionel Sambuc
381ebfedea0SLionel Sambuc /**
382ebfedea0SLionel Sambuc * Convert an UTF-8 string to an UCS2 string.
383ebfedea0SLionel Sambuc *
384ebfedea0SLionel Sambuc * @param in an UTF-8 string to convert.
385ebfedea0SLionel Sambuc * @param out the resulting UCS2 strint, must be at least
386ebfedea0SLionel Sambuc * wind_utf8ucs2_length() long. If out is NULL, the function will
387ebfedea0SLionel Sambuc * calculate the needed space for the out variable (just like
388ebfedea0SLionel Sambuc * wind_utf8ucs2_length()).
389ebfedea0SLionel Sambuc * @param out_len before processing out_len should be the length of
390ebfedea0SLionel Sambuc * the out variable, after processing it will be the length of the out
391ebfedea0SLionel Sambuc * string.
392ebfedea0SLionel Sambuc *
393ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
394ebfedea0SLionel Sambuc * @ingroup wind
395ebfedea0SLionel Sambuc */
396ebfedea0SLionel Sambuc
397ebfedea0SLionel Sambuc int
wind_utf8ucs2(const char * in,uint16_t * out,size_t * out_len)398ebfedea0SLionel Sambuc wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len)
399ebfedea0SLionel Sambuc {
400ebfedea0SLionel Sambuc const unsigned char *p;
401ebfedea0SLionel Sambuc size_t o = 0;
402ebfedea0SLionel Sambuc int ret;
403ebfedea0SLionel Sambuc
404ebfedea0SLionel Sambuc for (p = (const unsigned char *)in; *p != '\0'; ++p) {
405ebfedea0SLionel Sambuc uint32_t u;
406ebfedea0SLionel Sambuc
407ebfedea0SLionel Sambuc ret = utf8toutf32(&p, &u);
408ebfedea0SLionel Sambuc if (ret)
409ebfedea0SLionel Sambuc return ret;
410ebfedea0SLionel Sambuc
411ebfedea0SLionel Sambuc if (u & 0xffff0000)
412ebfedea0SLionel Sambuc return WIND_ERR_NOT_UTF16;
413ebfedea0SLionel Sambuc
414ebfedea0SLionel Sambuc if (out) {
415ebfedea0SLionel Sambuc if (o >= *out_len)
416ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
417ebfedea0SLionel Sambuc out[o] = u;
418ebfedea0SLionel Sambuc }
419ebfedea0SLionel Sambuc o++;
420ebfedea0SLionel Sambuc }
421ebfedea0SLionel Sambuc *out_len = o;
422ebfedea0SLionel Sambuc return 0;
423ebfedea0SLionel Sambuc }
424ebfedea0SLionel Sambuc
425ebfedea0SLionel Sambuc /**
426ebfedea0SLionel Sambuc * Calculate the length of from converting a UTF-8 string to a UCS2
427ebfedea0SLionel Sambuc * string.
428ebfedea0SLionel Sambuc *
429ebfedea0SLionel Sambuc * @param in an UTF-8 string to convert.
430ebfedea0SLionel Sambuc * @param out_len the length of the resulting UCS4 string.
431ebfedea0SLionel Sambuc *
432ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
433ebfedea0SLionel Sambuc * @ingroup wind
434ebfedea0SLionel Sambuc */
435ebfedea0SLionel Sambuc
436ebfedea0SLionel Sambuc int
wind_utf8ucs2_length(const char * in,size_t * out_len)437ebfedea0SLionel Sambuc wind_utf8ucs2_length(const char *in, size_t *out_len)
438ebfedea0SLionel Sambuc {
439ebfedea0SLionel Sambuc return wind_utf8ucs2(in, NULL, out_len);
440ebfedea0SLionel Sambuc }
441ebfedea0SLionel Sambuc
442ebfedea0SLionel Sambuc /**
443ebfedea0SLionel Sambuc * Convert an UCS2 string to a UTF-8 string.
444ebfedea0SLionel Sambuc *
445ebfedea0SLionel Sambuc * @param in an UCS2 string to convert.
446ebfedea0SLionel Sambuc * @param in_len the length of the in UCS2 string.
447ebfedea0SLionel Sambuc * @param out the resulting UTF-8 strint, must be at least
448ebfedea0SLionel Sambuc * wind_ucs2utf8_length() long. If out is NULL, the function will
449ebfedea0SLionel Sambuc * calculate the needed space for the out variable (just like
450ebfedea0SLionel Sambuc * wind_ucs2utf8_length()).
451ebfedea0SLionel Sambuc * @param out_len before processing out_len should be the length of
452ebfedea0SLionel Sambuc * the out variable, after processing it will be the length of the out
453ebfedea0SLionel Sambuc * string.
454ebfedea0SLionel Sambuc *
455ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
456ebfedea0SLionel Sambuc * @ingroup wind
457ebfedea0SLionel Sambuc */
458ebfedea0SLionel Sambuc
459ebfedea0SLionel Sambuc int
wind_ucs2utf8(const uint16_t * in,size_t in_len,char * out,size_t * out_len)460ebfedea0SLionel Sambuc wind_ucs2utf8(const uint16_t *in, size_t in_len, char *out, size_t *out_len)
461ebfedea0SLionel Sambuc {
462ebfedea0SLionel Sambuc uint16_t ch;
463ebfedea0SLionel Sambuc size_t i, len, o;
464ebfedea0SLionel Sambuc
465ebfedea0SLionel Sambuc for (o = 0, i = 0; i < in_len; i++) {
466ebfedea0SLionel Sambuc ch = in[i];
467ebfedea0SLionel Sambuc
468ebfedea0SLionel Sambuc if (ch < 0x80) {
469ebfedea0SLionel Sambuc len = 1;
470ebfedea0SLionel Sambuc } else if (ch < 0x800) {
471ebfedea0SLionel Sambuc len = 2;
472ebfedea0SLionel Sambuc } else
473ebfedea0SLionel Sambuc len = 3;
474ebfedea0SLionel Sambuc
475ebfedea0SLionel Sambuc o += len;
476ebfedea0SLionel Sambuc
477ebfedea0SLionel Sambuc if (out) {
478ebfedea0SLionel Sambuc if (o >= *out_len)
479ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
480ebfedea0SLionel Sambuc
481ebfedea0SLionel Sambuc switch(len) {
482ebfedea0SLionel Sambuc case 3:
483ebfedea0SLionel Sambuc out[2] = (ch | 0x80) & 0xbf;
484*0a6a1f1dSLionel Sambuc ch = ch >> 6;
485ebfedea0SLionel Sambuc case 2:
486ebfedea0SLionel Sambuc out[1] = (ch | 0x80) & 0xbf;
487*0a6a1f1dSLionel Sambuc ch = ch >> 6;
488ebfedea0SLionel Sambuc case 1:
489ebfedea0SLionel Sambuc out[0] = ch | first_char[len - 1];
490ebfedea0SLionel Sambuc }
491ebfedea0SLionel Sambuc out += len;
492ebfedea0SLionel Sambuc }
493ebfedea0SLionel Sambuc }
494ebfedea0SLionel Sambuc if (out) {
495ebfedea0SLionel Sambuc if (o >= *out_len)
496ebfedea0SLionel Sambuc return WIND_ERR_OVERRUN;
497ebfedea0SLionel Sambuc *out = '\0';
498ebfedea0SLionel Sambuc }
499ebfedea0SLionel Sambuc *out_len = o;
500ebfedea0SLionel Sambuc return 0;
501ebfedea0SLionel Sambuc }
502ebfedea0SLionel Sambuc
503ebfedea0SLionel Sambuc /**
504ebfedea0SLionel Sambuc * Calculate the length of from converting a UCS2 string to an UTF-8 string.
505ebfedea0SLionel Sambuc *
506ebfedea0SLionel Sambuc * @param in an UCS2 string to convert.
507ebfedea0SLionel Sambuc * @param in_len an UCS2 string length to convert.
508ebfedea0SLionel Sambuc * @param out_len the length of the resulting UTF-8 string.
509ebfedea0SLionel Sambuc *
510ebfedea0SLionel Sambuc * @return returns 0 on success, an wind error code otherwise
511ebfedea0SLionel Sambuc * @ingroup wind
512ebfedea0SLionel Sambuc */
513ebfedea0SLionel Sambuc
514ebfedea0SLionel Sambuc int
wind_ucs2utf8_length(const uint16_t * in,size_t in_len,size_t * out_len)515ebfedea0SLionel Sambuc wind_ucs2utf8_length(const uint16_t *in, size_t in_len, size_t *out_len)
516ebfedea0SLionel Sambuc {
517ebfedea0SLionel Sambuc return wind_ucs2utf8(in, in_len, NULL, out_len);
518ebfedea0SLionel Sambuc }
519