xref: /netbsd-src/lib/libc/citrus/modules/citrus_gbk2k.c (revision d710132b4b8ce7f7cccaaf660cb16aa16b4077a0)
1 /* $NetBSD: citrus_gbk2k.c,v 1.3 2003/06/25 09:51:42 tshiozak Exp $ */
2 
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_gbk2k.c,v 1.3 2003/06/25 09:51:42 tshiozak Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stddef.h>
40 #include <locale.h>
41 #include <wchar.h>
42 #include <sys/types.h>
43 #include <limits.h>
44 
45 #include "citrus_namespace.h"
46 #include "citrus_types.h"
47 #include "citrus_bcs.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_gbk2k.h"
52 
53 
54 /* ----------------------------------------------------------------------
55  * private stuffs used by templates
56  */
57 
58 typedef struct _GBK2KState {
59 	char ch[4];
60 	int chlen;
61 } _GBK2KState;
62 
63 typedef struct {
64 	int ei_mode;
65 } _GBK2KEncodingInfo;
66 #define _MODE_2BYTE	0x0001
67 
68 typedef struct {
69 	_GBK2KEncodingInfo	ei;
70 	struct {
71 		/* for future multi-locale facility */
72 		_GBK2KState	s_mblen;
73 		_GBK2KState	s_mbrlen;
74 		_GBK2KState	s_mbrtowc;
75 		_GBK2KState	s_mbtowc;
76 		_GBK2KState	s_mbsrtowcs;
77 		_GBK2KState	s_wcrtomb;
78 		_GBK2KState	s_wcsrtombs;
79 		_GBK2KState	s_wctomb;
80 	} states;
81 } _GBK2KCTypeInfo;
82 
83 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
84 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
85 
86 #define _FUNCNAME(m)			_citrus_GBK2K_##m
87 #define _ENCODING_INFO			_GBK2KEncodingInfo
88 #define _CTYPE_INFO			_GBK2KCTypeInfo
89 #define _ENCODING_STATE			_GBK2KState
90 #define _ENCODING_MB_CUR_MAX(_ei_)	4
91 #define _ENCODING_IS_STATE_DEPENDENT	0
92 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
93 
94 static __inline void
95 /*ARGSUSED*/
96 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei,
97 			 _GBK2KState * __restrict s)
98 {
99 	memset(s, 0, sizeof(*s));
100 }
101 
102 static __inline void
103 /*ARGSUSED*/
104 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei,
105 			 void * __restrict pspriv,
106 			 const _GBK2KState * __restrict s)
107 {
108 	memcpy(pspriv, (const void *)s, sizeof(*s));
109 }
110 
111 static __inline void
112 /*ARGSUSED*/
113 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei,
114 			   _GBK2KState * __restrict s,
115 			   const void * __restrict pspriv)
116 {
117 	memcpy((void *)s, pspriv, sizeof(*s));
118 }
119 
120 static  __inline int
121 _mb_singlebyte(int c)
122 {
123 	c &= 0xff;
124 	return (c <= 0x7f);
125 }
126 
127 static __inline int
128 _mb_leadbyte(int c)
129 {
130 	c &= 0xff;
131 	return (0x81 <= c && c <= 0xfe);
132 }
133 
134 static __inline int
135 _mb_trailbyte(int c)
136 {
137 	c &= 0xff;
138 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
139 }
140 
141 static __inline int
142 _mb_surrogate(int c)
143 {
144 	c &= 0xff;
145 	return (0x30 <= c && c <= 0x39);
146 }
147 
148 static __inline int
149 _mb_count(wchar_t v)
150 {
151 	u_int32_t c;
152 
153 	c = (u_int32_t)v; /* XXX */
154 	if (!(c & 0xffffff00))
155 		return (1);
156 	if (!(c & 0xffff0000))
157 		return (2);
158 	return (4);
159 }
160 
161 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
162 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
163 
164 static int
165 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
166 			   wchar_t * __restrict pwc,
167 			   const char ** __restrict s, size_t n,
168 			   _GBK2KState * __restrict psenc,
169 			   size_t * __restrict nresult)
170 {
171 	int chlenbak, len;
172 	const char *s0, *s1;
173 	wchar_t wc;
174 
175 	_DIAGASSERT(ei != NULL);
176 	/* pwc may be NULL */
177 	_DIAGASSERT(s != NULL);
178 	_DIAGASSERT(psenc != NULL);
179 
180 	s0 = *s;
181 
182 	if (s0 == NULL) {
183 		/* _citrus_GBK2K_init_state(ei, psenc); */
184 		psenc->chlen = 0;
185 		*nresult = 0;
186 		return (0);
187 	}
188 
189 	chlenbak = psenc->chlen;
190 
191 	switch (psenc->chlen) {
192 	case 3:
193 		if (!_mb_leadbyte (_PSENC))
194 			goto invalid;
195 	/* FALLTHROUGH */
196 	case 2:
197 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
198 			goto invalid;
199 	/* FALLTHROUGH */
200 	case 1:
201 		if (!_mb_leadbyte (_PSENC))
202 			goto invalid;
203 	/* FALLTHOROUGH */
204 	case 0:
205 		break;
206 	default:
207 		goto invalid;
208 	}
209 
210 	for (;;) {
211 		if (n-- < 1)
212 			goto restart;
213 
214 		_PUSH_PSENC(*s0++);
215 
216 		switch (psenc->chlen) {
217 		case 1:
218 			if (_mb_singlebyte(_PSENC))
219 				goto convert;
220 			if (_mb_leadbyte  (_PSENC))
221 				continue;
222 			goto ilseq;
223 		case 2:
224 			if (_mb_trailbyte (_PSENC))
225 				goto convert;
226 			if ((ei->ei_mode & _MODE_2BYTE) == 0 &&
227 			    _mb_surrogate (_PSENC))
228 				continue;
229 			goto ilseq;
230 		case 3:
231 			if (_mb_leadbyte  (_PSENC))
232 				continue;
233 			goto ilseq;
234 		case 4:
235 			if (_mb_surrogate (_PSENC))
236 				goto convert;
237 			goto ilseq;
238 		}
239 	}
240 
241 convert:
242 	len = psenc->chlen;
243 	s1  = &psenc->ch[0];
244 	wc  = 0;
245 	while (len-- > 0)
246 		wc = (wc << 8) | (*s1++ & 0xff);
247 
248 	if (pwc != NULL)
249 		*pwc = wc;
250 	*s = s0;
251 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
252 	/* _citrus_GBK2K_init_state(ei, psenc); */
253 	psenc->chlen = 0;
254 
255 	return (0);
256 
257 restart:
258 	*s = s0;
259 	*nresult = (size_t)-2;
260 
261 	return (0);
262 
263 invalid:
264 	return (EINVAL);
265 
266 ilseq:
267 	*nresult = (size_t)-1;
268 	return (EILSEQ);
269 }
270 
271 static int
272 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
273 			   char * __restrict s, size_t n, wchar_t wc,
274 			   _GBK2KState * __restrict psenc,
275 			   size_t * __restrict nresult)
276 {
277 	int len, ret;
278 
279 	_DIAGASSERT(ei != NULL);
280 	_DIAGASSERT(s != NULL);
281 	_DIAGASSERT(psenc != NULL);
282 
283 	if (psenc->chlen != 0) {
284 		ret = EINVAL;
285 		goto err;
286 	}
287 
288 	/* reset state */
289 	if (wc == 0) {
290 		*nresult = 0; /* stateless */
291 		return 0;
292 	}
293 
294 	len = _mb_count(wc);
295 	if (n < len) {
296 		ret = E2BIG;
297 		goto err;
298 	}
299 
300 	switch (len) {
301 	case 1:
302 		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
303 			ret = EILSEQ;
304 			goto err;
305 		}
306 		break;
307 	case 2:
308 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
309 		    !_mb_trailbyte (_PUSH_PSENC(wc     ))) {
310 			ret = EILSEQ;
311 			goto err;
312 		}
313 		break;
314 	case 4:
315 		if ((ei->ei_mode & _MODE_2BYTE) != 0 ||
316 		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
317 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
318 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
319 		    !_mb_surrogate (_PUSH_PSENC(wc      ))) {
320 			ret = EILSEQ;
321 			goto err;
322 		}
323 		break;
324 	}
325 
326 	_DIAGASSERT(len == psenc->chlen);
327 
328 	memcpy(s, psenc->ch, psenc->chlen);
329 	*nresult = psenc->chlen;
330 	/* _citrus_GBK2K_init_state(ei, psenc); */
331 	psenc->chlen = 0;
332 
333 	return (0);
334 
335 err:
336 	*nresult = (size_t)-1;
337 	return ret;
338 }
339 
340 static __inline int
341 /*ARGSUSED*/
342 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei,
343 			    _csid_t * __restrict csid,
344 			    _index_t * __restrict idx, wchar_t wc)
345 {
346 	u_int8_t ch, cl;
347 
348 	_DIAGASSERT(csid != NULL && idx != NULL);
349 
350 	if ((u_int32_t)wc<0x80) {
351 		/* ISO646 */
352 		*csid = 0;
353 		*idx = (_index_t)wc;
354 	} else if ((u_int32_t)wc>=0x10000) {
355 		/* GBKUCS : XXX */
356 		*csid = 3;
357 		*idx = (_index_t)wc;
358 	} else {
359 		ch = (u_int8_t)(wc >> 8);
360 		cl = (u_int8_t)wc;
361 		if (ch>=0xA1 && cl>=0xA1) {
362 			/* EUC G1 */
363 			*csid = 1;
364 			*idx = (_index_t)wc & 0x7F7FU;
365 		} else {
366 			/* extended area (0x8140-) */
367 			*csid = 2;
368 			*idx = (_index_t)wc;
369 		}
370 	}
371 
372 	return 0;
373 }
374 
375 static __inline int
376 /*ARGSUSED*/
377 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
378 			    wchar_t * __restrict wc,
379 			    _csid_t csid, _index_t idx)
380 {
381 
382 	_DIAGASSERT(wc != NULL);
383 
384 	switch (csid) {
385 	case 0:
386 		/* ISO646 */
387 		*wc = (wchar_t)idx;
388 		break;
389 	case 1:
390 		/* EUC G1 */
391 		*wc = (wchar_t)idx | 0x8080U;
392 		break;
393 	case 2:
394 		/* extended area */
395 		*wc = (wchar_t)idx;
396 		break;
397 	case 3:
398 		/* GBKUCS : XXX */
399 		if ((ei->ei_mode & _MODE_2BYTE) != 0)
400 			return EINVAL;
401 		*wc = (wchar_t)idx;
402 		break;
403 	default:
404 		return EILSEQ;
405 	}
406 
407 	return 0;
408 }
409 
410 static int
411 /*ARGSUSED*/
412 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
413 				   const void * __restrict var, size_t lenvar)
414 {
415 	const char *p;
416 
417 	_DIAGASSERT(ei != NULL);
418 
419 	p = var;
420 #define MATCH(x, act)                                           \
421 do {                                                            \
422         if (lenvar >= (sizeof(#x)-1) &&                         \
423             _bcs_strncasecmp(p, #x, sizeof(#x)-1) == 0) {       \
424                 act;                                            \
425                 lenvar -= sizeof(#x)-1;                         \
426                 p += sizeof(#x)-1;                              \
427         }                                                       \
428 } while (/*CONSTCOND*/0)
429 	while (lenvar>0) {
430 		switch (_bcs_tolower(*p)) {
431 		case '2':
432 			MATCH("2byte", ei->ei_mode |= _MODE_2BYTE);
433 			break;
434 		}
435 		p++;
436 		lenvar--;
437 	}
438 
439 	memset((void *)ei, 0, sizeof(*ei));
440 	return (0);
441 }
442 
443 static void
444 /*ARGSUSED*/
445 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei)
446 {
447 }
448 
449 
450 /* ----------------------------------------------------------------------
451  * public interface for ctype
452  */
453 
454 _CITRUS_CTYPE_DECLS(GBK2K);
455 _CITRUS_CTYPE_DEF_OPS(GBK2K);
456 
457 #include "citrus_ctype_template.h"
458 
459 /* ----------------------------------------------------------------------
460  * public interface for stdenc
461  */
462 
463 _CITRUS_STDENC_DECLS(GBK2K);
464 _CITRUS_STDENC_DEF_OPS(GBK2K);
465 
466 #include "citrus_stdenc_template.h"
467