xref: /netbsd-src/lib/libc/citrus/modules/citrus_dechanyu.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /* $NetBSD: citrus_dechanyu.c,v 1.3 2008/06/14 16:01:07 tnozaki Exp $ */
2 
3 /*-
4  * Copyright (c)2007 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 #include <sys/cdefs.h>
29 #if defined(LIBC_SCCS) && !defined(lint)
30 __RCSID("$NetBSD: citrus_dechanyu.c,v 1.3 2008/06/14 16:01:07 tnozaki Exp $");
31 #endif /* LIBC_SCCS and not lint */
32 
33 #include <sys/types.h>
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <stddef.h>
41 #include <wchar.h>
42 #include <limits.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_bcs.h"
47 #include "citrus_module.h"
48 #include "citrus_ctype.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_dechanyu.h"
51 
52 /* ----------------------------------------------------------------------
53  * private stuffs used by templates
54  */
55 
56 typedef struct {
57 	int chlen;
58 	char ch[4];
59 } _DECHanyuState;
60 
61 typedef struct {
62 	int dummy;
63 } _DECHanyuEncodingInfo;
64 
65 typedef struct {
66 	_DECHanyuEncodingInfo	ei;
67 	struct {
68 		/* for future multi-locale facility */
69 		_DECHanyuState	s_mblen;
70 		_DECHanyuState	s_mbrlen;
71 		_DECHanyuState	s_mbrtowc;
72 		_DECHanyuState	s_mbtowc;
73 		_DECHanyuState	s_mbsrtowcs;
74 		_DECHanyuState	s_wcrtomb;
75 		_DECHanyuState	s_wcsrtombs;
76 		_DECHanyuState	s_wctomb;
77 	} states;
78 } _DECHanyuCTypeInfo;
79 
80 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
81 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
82 
83 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
84 #define _ENCODING_INFO			_DECHanyuEncodingInfo
85 #define _CTYPE_INFO			_DECHanyuCTypeInfo
86 #define _ENCODING_STATE			_DECHanyuState
87 #define _ENCODING_MB_CUR_MAX(_ei_)		4
88 #define _ENCODING_IS_STATE_DEPENDENT		0
89 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
90 
91 static __inline void
92 /*ARGSUSED*/
93 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei,
94 	_DECHanyuState * __restrict psenc)
95 {
96 	/* ei may be null */
97 	_DIAGASSERT(psenc != NULL);
98 
99 	psenc->chlen = 0;
100 }
101 
102 static __inline void
103 /*ARGSUSED*/
104 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei,
105 	void * __restrict pspriv,
106 	const _DECHanyuState * __restrict psenc)
107 {
108 	/* ei may be null */
109 	_DIAGASSERT(pspriv != NULL);
110 	_DIAGASSERT(psenc != NULL);
111 
112 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
113 }
114 
115 static __inline void
116 /*ARGSUSED*/
117 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei,
118 	_DECHanyuState * __restrict psenc,
119 	const void * __restrict pspriv)
120 {
121 	/* ei may be null */
122 	_DIAGASSERT(psenc != NULL);
123 	_DIAGASSERT(pspriv != NULL);
124 
125 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
126 }
127 
128 static void
129 /*ARGSUSED*/
130 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei)
131 {
132 	/* ei may be null */
133 }
134 
135 static int
136 /*ARGSUSED*/
137 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei,
138 	const void * __restrict var, size_t lenvar)
139 {
140 	/* ei may be null */
141 	return 0;
142 }
143 
144 static __inline int
145 is_singlebyte(int c)
146 {
147 	return c <= 0x7F;
148 }
149 
150 static __inline int
151 is_leadbyte(int c)
152 {
153 	return c >= 0xA1 && c <= 0xFE;
154 }
155 
156 static __inline int
157 is_trailbyte(int c)
158 {
159 	c &= ~0x80;
160 	return c >= 0x21 && c <= 0x7E;
161 }
162 
163 static __inline int
164 is_hanyu1(int c)
165 {
166 	return c == 0xC2;
167 }
168 
169 static __inline int
170 is_hanyu2(int c)
171 {
172 	return c == 0xCB;
173 }
174 
175 #define HANYUBIT	0xC2CB0000
176 
177 static __inline int
178 is_94charset(int c)
179 {
180 	return c >= 0x21 && c <= 0x7E;
181 }
182 
183 static int
184 /*ARGSUSED*/
185 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
186 	wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
187 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
188 {
189 	const char *s0;
190 	int ch, i;
191 	wchar_t wc;
192 
193 	/* ei may be unused */
194 	_DIAGASSERT(s != NULL);
195 	_DIAGASSERT(psenc != NULL);
196 	_DIAGASSERT(nresult != NULL);
197 
198 	if (*s == NULL) {
199 		_citrus_DECHanyu_init_state(ei, psenc);
200 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
201 		return 0;
202 	}
203 	s0 = *s;
204 
205 	wc = (wchar_t)0;
206 	switch (psenc->chlen) {
207 	case 0:
208 		if (n-- < 1)
209 			goto restart;
210 		ch = *s0++ & 0xFF;
211 		if (is_singlebyte(ch) != 0) {
212 			if (pwc != NULL)
213 				*pwc = (wchar_t)ch;
214 			*nresult = (size_t)((ch == 0) ? 0 : 1);
215 			*s = s0;
216 			return 0;
217 		}
218 		if (is_leadbyte(ch) == 0)
219 			goto ilseq;
220 		psenc->ch[psenc->chlen++] = ch;
221 		break;
222 	case 1:
223 		ch = psenc->ch[0] & 0xFF;
224 		if (is_leadbyte(ch) == 0)
225 			return EINVAL;
226 		break;
227 	case 2: case 3:
228 		ch = psenc->ch[0] & 0xFF;
229 		if (is_hanyu1(ch) != 0) {
230 			ch = psenc->ch[1] & 0xFF;
231 			if (is_hanyu2(ch) != 0) {
232 				wc |= (wchar_t)HANYUBIT;
233 				break;
234 			}
235 		}
236 	/*FALLTHROUGH*/
237 	default:
238 		return EINVAL;
239 	}
240 
241 	switch (psenc->chlen) {
242 	case 1:
243 		if (is_hanyu1(ch) != 0) {
244 			if (n-- < 1)
245 				goto restart;
246 			ch = *s0++ & 0xFF;
247 			if (is_hanyu2(ch) == 0)
248 				goto ilseq;
249 			psenc->ch[psenc->chlen++] = ch;
250 			wc |= (wchar_t)HANYUBIT;
251 			if (n-- < 1)
252 				goto restart;
253 			ch = *s0++ & 0xFF;
254 			if (is_leadbyte(ch) == 0)
255 				goto ilseq;
256 			psenc->ch[psenc->chlen++] = ch;
257 		}
258 		break;
259 	case 2:
260 		if (n-- < 1)
261 			goto restart;
262 		ch = *s0++ & 0xFF;
263 		if (is_leadbyte(ch) == 0)
264 			goto ilseq;
265 		psenc->ch[psenc->chlen++] = ch;
266 		break;
267 	case 3:
268 		ch = psenc->ch[2] & 0xFF;
269 		if (is_leadbyte(ch) == 0)
270 			return EINVAL;
271 	}
272 	if (n-- < 1)
273 		goto restart;
274 	wc |= (wchar_t)(ch << 8);
275 	ch = *s0++ & 0xFF;
276 	if (is_trailbyte(ch) == 0)
277 		goto ilseq;
278 	wc |= (wchar_t)ch;
279 	if (pwc != NULL)
280 		*pwc = wc;
281 	*nresult = (size_t)(s0 - *s);
282 	*s = s0;
283 	psenc->chlen = 0;
284 
285 	return 0;
286 
287 restart:
288 	*nresult = (size_t)-2;
289 	*s = s0;
290 	return 0;
291 
292 ilseq:
293 	*nresult = (size_t)-1;
294 	return EILSEQ;
295 }
296 
297 static int
298 /*ARGSUSED*/
299 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei,
300 	char * __restrict s, size_t n, wchar_t wc,
301 	_DECHanyuState * __restrict psenc, size_t * __restrict nresult)
302 {
303 	int ch;
304 
305 	/* ei may be unused */
306 	_DIAGASSERT(s != NULL);
307 	_DIAGASSERT(psenc != NULL);
308 	_DIAGASSERT(nresult != NULL);
309 
310 	if (psenc->chlen != 0)
311 		return EINVAL;
312 
313 	/* XXX: assume wchar_t as int */
314 	if ((uint32_t)wc <= 0x7F) {
315 		ch = wc & 0xFF;
316 	} else {
317 		if ((uint32_t)wc > 0xFFFF) {
318 			if ((wc & ~0xFFFF) != HANYUBIT)
319 				goto ilseq;
320 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
321 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
322 			wc &= 0xFFFF;
323 		}
324 		ch = (wc >> 8) & 0xFF;
325 		if (!is_leadbyte(ch))
326 			goto ilseq;
327 		psenc->ch[psenc->chlen++] = ch;
328 		ch = wc & 0xFF;
329 		if (is_trailbyte(ch) == 0)
330 			goto ilseq;
331 	}
332 	psenc->ch[psenc->chlen++] = ch;
333 	if (n < psenc->chlen) {
334 		*nresult = (size_t)-1;
335 		return E2BIG;
336 	}
337 	memcpy(s, psenc->ch, psenc->chlen);
338 	*nresult = psenc->chlen;
339 	psenc->chlen = 0;
340 
341 	return 0;
342 
343 ilseq:
344 	*nresult = (size_t)-1;
345 	return EILSEQ;
346 }
347 
348 static __inline int
349 /*ARGSUSED*/
350 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei,
351 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
352 {
353 	int plane;
354 	wchar_t mask;
355 
356 	/* ei may be unused */
357 	_DIAGASSERT(csid != NULL);
358 	_DIAGASSERT(idx != NULL);
359 
360 	plane = 0;
361 	mask = 0x7F;
362 	/* XXX: assume wchar_t as int */
363 	if ((uint32_t)wc > 0x7F) {
364 		if ((uint32_t)wc > 0xFFFF) {
365 			if ((wc & ~0xFFFF) != HANYUBIT)
366 				return EILSEQ;
367 			plane += 2;
368 		}
369 		if (is_leadbyte((wc >> 8) & 0xFF) == 0 ||
370 		    is_trailbyte(wc & 0xFF) == 0)
371 			return EILSEQ;
372 		plane += (wc & 0x80) ? 1 : 2;
373 		mask |= 0x7F00;
374 	}
375 	*csid = plane;
376 	*idx = (_index_t)(wc & mask);
377 
378 	return 0;
379 }
380 
381 static __inline int
382 /*ARGSUSED*/
383 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei,
384 	wchar_t * __restrict wc, _csid_t csid, _index_t idx)
385 {
386 	/* ei may be unused */
387 	_DIAGASSERT(wc != NULL);
388 
389 	if (csid == 0) {
390 		if (idx > 0x7F)
391 			return EILSEQ;
392 	} else if (csid <= 4) {
393 		if (is_94charset(idx >> 8) == 0)
394 			return EILSEQ;
395 		if (is_94charset(idx & 0xFF) == 0)
396 			return EILSEQ;
397 		if (csid % 2)
398 			idx |= 0x80;
399 		idx |= 0x8000;
400 		if (csid > 2)
401 			idx |= HANYUBIT;
402 	} else
403 		return EILSEQ;
404 	*wc = (wchar_t)idx;
405 	return 0;
406 }
407 
408 static __inline int
409 /*ARGSUSED*/
410 _citrus_DECHanyu_stdenc_get_state_desc_generic(
411 	_DECHanyuEncodingInfo * __restrict ei,
412 	_DECHanyuState * __restrict psenc, int * __restrict rstate)
413 {
414 	/* ei may be unused */
415 	_DIAGASSERT(psenc != NULL);
416 	_DIAGASSERT(rstate != NULL);
417 
418 	*rstate = (psenc->chlen == 0)
419 	    ? _STDENC_SDGEN_INITIAL
420 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
421 	return 0;
422 }
423 
424 /* ----------------------------------------------------------------------
425  * public interface for ctype
426  */
427 
428 _CITRUS_CTYPE_DECLS(DECHanyu);
429 _CITRUS_CTYPE_DEF_OPS(DECHanyu);
430 
431 #include "citrus_ctype_template.h"
432 
433 
434 /* ----------------------------------------------------------------------
435  * public interface for stdenc
436  */
437 
438 _CITRUS_STDENC_DECLS(DECHanyu);
439 _CITRUS_STDENC_DEF_OPS(DECHanyu);
440 
441 #include "citrus_stdenc_template.h"
442