xref: /netbsd-src/lib/libc/citrus/modules/citrus_big5.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: citrus_big5.c,v 1.11 2006/11/22 23:38:27 tnozaki Exp $	*/
2 
3 /*-
4  * Copyright (c)2002, 2006 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Paul Borman at Krystal Technologies.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 #include <sys/cdefs.h>
62 #if defined(LIBC_SCCS) && !defined(lint)
63 __RCSID("$NetBSD: citrus_big5.c,v 1.11 2006/11/22 23:38:27 tnozaki Exp $");
64 #endif /* LIBC_SCCS and not lint */
65 
66 #include <sys/queue.h>
67 #include <sys/types.h>
68 #include <assert.h>
69 #include <errno.h>
70 #include <string.h>
71 #include <stdint.h>
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <stddef.h>
75 #include <locale.h>
76 #include <wchar.h>
77 #include <limits.h>
78 
79 #include "citrus_namespace.h"
80 #include "citrus_types.h"
81 #include "citrus_bcs.h"
82 #include "citrus_module.h"
83 #include "citrus_ctype.h"
84 #include "citrus_stdenc.h"
85 #include "citrus_big5.h"
86 
87 #include "citrus_prop.h"
88 
89 /* ----------------------------------------------------------------------
90  * private stuffs used by templates
91  */
92 
93 typedef struct {
94 	char ch[2];
95 	int chlen;
96 } _BIG5State;
97 
98 typedef struct _BIG5Exclude {
99 	TAILQ_ENTRY(_BIG5Exclude) entry;
100 	wint_t start, end;
101 } _BIG5Exclude;
102 
103 typedef TAILQ_HEAD(_BIG5ExcludeList, _BIG5Exclude) _BIG5ExcludeList;
104 
105 typedef struct {
106 	int cell[0x100];
107 	_BIG5ExcludeList excludes;
108 } _BIG5EncodingInfo;
109 
110 typedef struct {
111 	_BIG5EncodingInfo	ei;
112 	struct {
113 		/* for future multi-locale facility */
114 		_BIG5State	s_mblen;
115 		_BIG5State	s_mbrlen;
116 		_BIG5State	s_mbrtowc;
117 		_BIG5State	s_mbtowc;
118 		_BIG5State	s_mbsrtowcs;
119 		_BIG5State	s_wcrtomb;
120 		_BIG5State	s_wcsrtombs;
121 		_BIG5State	s_wctomb;
122 	} states;
123 } _BIG5CTypeInfo;
124 
125 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
126 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
127 
128 #define _FUNCNAME(m)			_citrus_BIG5_##m
129 #define _ENCODING_INFO			_BIG5EncodingInfo
130 #define _CTYPE_INFO			_BIG5CTypeInfo
131 #define _ENCODING_STATE			_BIG5State
132 #define _ENCODING_MB_CUR_MAX(_ei_)	2
133 #define _ENCODING_IS_STATE_DEPENDENT	0
134 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
135 
136 
137 static __inline void
138 /*ARGSUSED*/
139 _citrus_BIG5_init_state(_BIG5EncodingInfo * __restrict ei,
140 			_BIG5State * __restrict s)
141 {
142 	memset(s, 0, sizeof(*s));
143 }
144 
145 static __inline void
146 /*ARGSUSED*/
147 _citrus_BIG5_pack_state(_BIG5EncodingInfo * __restrict ei,
148 			void * __restrict pspriv,
149 			const _BIG5State * __restrict s)
150 {
151 	memcpy(pspriv, (const void *)s, sizeof(*s));
152 }
153 
154 static __inline void
155 /*ARGSUSED*/
156 _citrus_BIG5_unpack_state(_BIG5EncodingInfo * __restrict ei,
157 			  _BIG5State * __restrict s,
158 			  const void * __restrict pspriv)
159 {
160 	memcpy((void *)s, pspriv, sizeof(*s));
161 }
162 
163 static __inline int
164 _citrus_BIG5_check(_BIG5EncodingInfo *ei, u_int c)
165 {
166 	_DIAGASSERT(ei != NULL);
167 
168 	return (ei->cell[c & 0xFF] & 0x1) ? 2 : 1;
169 }
170 
171 static __inline int
172 _citrus_BIG5_check2(_BIG5EncodingInfo *ei, u_int c)
173 {
174 	_DIAGASSERT(ei != NULL);
175 
176 	return (ei->cell[c & 0xFF] & 0x2) ? 1 : 0;
177 }
178 
179 static __inline int
180 _citrus_BIG5_check_excludes(_BIG5EncodingInfo *ei, wint_t c)
181 {
182 	_BIG5Exclude *exclude;
183 
184 	_DIAGASSERT(ei != NULL);
185 
186 	TAILQ_FOREACH(exclude, &ei->excludes, entry) {
187 		if (c >= exclude->start && c <= exclude->end)
188 			return EILSEQ;
189 	}
190 	return 0;
191 }
192 
193 static int
194 _citrus_BIG5_fill_rowcol(void ** __restrict ctx, const char * __restrict s,
195 	uint64_t start, uint64_t end)
196 {
197 	_BIG5EncodingInfo *ei;
198 	int i;
199 	uint64_t n;
200 
201 	_DIAGASSERT(ctx != NULL && *ctx != NULL);
202 
203 	if (start > 0xFF || end > 0xFF)
204 		return EINVAL;
205 	ei = (_BIG5EncodingInfo *)*ctx;
206 	i = strcmp("row", s) ? 1 : 0;
207 	i = 1 << i;
208 	for (n = start; n <= end; ++n)
209 		ei->cell[n & 0xFF] |= i;
210 	return 0;
211 }
212 
213 static int
214 /*ARGSUSED*/
215 _citrus_BIG5_fill_excludes(void ** __restrict ctx, const char * __restrict s,
216 	uint64_t start, uint64_t end)
217 {
218 	_BIG5EncodingInfo *ei;
219 	_BIG5Exclude *exclude;
220 
221 	_DIAGASSERT(ctx != NULL && *ctx != NULL);
222 
223 	if (start > 0xFFFF || end > 0xFFFF)
224 		return EINVAL;
225 	ei = (_BIG5EncodingInfo *)*ctx;
226 	exclude = TAILQ_LAST(&ei->excludes, _BIG5ExcludeList);
227 	if (exclude != NULL && (wint_t)start <= exclude->end)
228 		return EINVAL;
229 	exclude = (void *)malloc(sizeof(*exclude));
230 	if (exclude == NULL)
231 		return ENOMEM;
232 	exclude->start = (wint_t)start;
233 	exclude->end = (wint_t)end;
234 	TAILQ_INSERT_TAIL(&ei->excludes, exclude, entry);
235 
236 	return 0;
237 }
238 
239 static const _citrus_prop_hint_t root_hints[] = {
240     _CITRUS_PROP_HINT_NUM("row", &_citrus_BIG5_fill_rowcol),
241     _CITRUS_PROP_HINT_NUM("col", &_citrus_BIG5_fill_rowcol),
242     _CITRUS_PROP_HINT_NUM("excludes", &_citrus_BIG5_fill_excludes),
243     _CITRUS_PROP_HINT_END
244 };
245 
246 static void
247 /*ARGSUSED*/
248 _citrus_BIG5_encoding_module_uninit(_BIG5EncodingInfo *ei)
249 {
250 	_BIG5Exclude *exclude;
251 
252 	_DIAGASSERT(ei != NULL);
253 
254 	while ((exclude = TAILQ_FIRST(&ei->excludes)) != NULL) {
255 		TAILQ_REMOVE(&ei->excludes, exclude, entry);
256 		free(exclude);
257 	}
258 }
259 
260 static int
261 /*ARGSUSED*/
262 _citrus_BIG5_encoding_module_init(_BIG5EncodingInfo * __restrict ei,
263 				  const void * __restrict var, size_t lenvar)
264 {
265 	int err;
266 	const char *s;
267 
268 	_DIAGASSERT(ei != NULL);
269 
270 	memset((void *)ei, 0, sizeof(*ei));
271 	TAILQ_INIT(&ei->excludes);
272 
273 	if (lenvar > 0 && var != NULL) {
274 		s = _bcs_skip_ws_len((const char *)var, &lenvar);
275 		if (lenvar > 0 && *s != '\0') {
276 			err = _citrus_prop_parse_variable(
277 			    root_hints, (void *)ei, s, lenvar);
278 			if (err == 0)
279 				return 0;
280 
281 			_citrus_BIG5_encoding_module_uninit(ei);
282 			memset((void *)ei, 0, sizeof(*ei));
283 			TAILQ_INIT(&ei->excludes);
284 		}
285 	}
286 
287 	/* fallback Big5-1984, for backward compatibility. */
288 	_citrus_BIG5_fill_rowcol((void **)&ei, "row", 0xA1, 0xFE);
289 	_citrus_BIG5_fill_rowcol((void **)&ei, "col", 0x40, 0x7E);
290 	_citrus_BIG5_fill_rowcol((void **)&ei, "col", 0xA1, 0xFE);
291 
292 	return 0;
293 }
294 
295 static int
296 /*ARGSUSED*/
297 _citrus_BIG5_mbrtowc_priv(_BIG5EncodingInfo * __restrict ei,
298 			  wchar_t * __restrict pwc,
299 			  const char ** __restrict s, size_t n,
300 			  _BIG5State * __restrict psenc,
301 			  size_t * __restrict nresult)
302 {
303 	wchar_t wchar;
304 	int c;
305 	int chlenbak;
306 	const char *s0;
307 
308 	_DIAGASSERT(nresult != 0);
309 	_DIAGASSERT(ei != NULL);
310 	_DIAGASSERT(psenc != NULL);
311 	_DIAGASSERT(s != NULL && *s != NULL);
312 
313 	s0 = *s;
314 
315 	if (s0 == NULL) {
316 		_citrus_BIG5_init_state(ei, psenc);
317 		*nresult = 0;
318 		return (0);
319 	}
320 
321 	chlenbak = psenc->chlen;
322 
323 	/* make sure we have the first byte in the buffer */
324 	switch (psenc->chlen) {
325 	case 0:
326 		if (n < 1)
327 			goto restart;
328 		psenc->ch[0] = *s0++;
329 		psenc->chlen = 1;
330 		n--;
331 		break;
332 	case 1:
333 		break;
334 	default:
335 		/* illegal state */
336 		goto ilseq;
337 	}
338 
339 	c = _citrus_BIG5_check(ei, psenc->ch[0] & 0xff);
340 	if (c == 0)
341 		goto ilseq;
342 	while (psenc->chlen < c) {
343 		if (n < 1) {
344 			goto restart;
345 		}
346 		psenc->ch[psenc->chlen] = *s0++;
347 		psenc->chlen++;
348 		n--;
349 	}
350 
351 	switch (c) {
352 	case 1:
353 		wchar = psenc->ch[0] & 0xff;
354 		break;
355 	case 2:
356 		if (!_citrus_BIG5_check2(ei, psenc->ch[1] & 0xff))
357 			goto ilseq;
358 		wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
359 		break;
360 	default:
361 		/* illegal state */
362 		goto ilseq;
363 	}
364 
365 	if (_citrus_BIG5_check_excludes(ei, (wint_t)wchar) != 0)
366 		goto ilseq;
367 
368 	*s = s0;
369 	psenc->chlen = 0;
370 	if (pwc)
371 		*pwc = wchar;
372 	if (!wchar)
373 		*nresult = 0;
374 	else
375 		*nresult = c - chlenbak;
376 
377 	return (0);
378 
379 ilseq:
380 	psenc->chlen = 0;
381 	*nresult = (size_t)-1;
382 	return (EILSEQ);
383 
384 restart:
385 	*s = s0;
386 	*nresult = (size_t)-2;
387 	return (0);
388 }
389 
390 static int
391 /*ARGSUSED*/
392 _citrus_BIG5_wcrtomb_priv(_BIG5EncodingInfo * __restrict ei,
393 			  char * __restrict s,
394 			  size_t n, wchar_t wc, _BIG5State * __restrict psenc,
395 			  size_t * __restrict nresult)
396 {
397 	int l, ret;
398 
399 	_DIAGASSERT(ei != NULL);
400 	_DIAGASSERT(nresult != 0);
401 	_DIAGASSERT(s != NULL);
402 
403 	/* check invalid sequence */
404 	if (wc & ~0xffff ||
405 	    _citrus_BIG5_check_excludes(ei, (wint_t)wc) != 0) {
406 		ret = EILSEQ;
407 		goto err;
408 	}
409 
410 	if (wc & 0x8000) {
411 		if (_citrus_BIG5_check(ei, (wc >> 8) & 0xff) != 2 ||
412 		    !_citrus_BIG5_check2(ei, wc & 0xff)) {
413 			ret = EILSEQ;
414 			goto err;
415 		}
416 		l = 2;
417 	} else {
418 		if (wc & ~0xff || !_citrus_BIG5_check(ei, wc & 0xff)) {
419 			ret = EILSEQ;
420 			goto err;
421 		}
422 		l = 1;
423 	}
424 
425 	if (n < l) {
426 		/* bound check failure */
427 		ret = E2BIG;
428 		goto err;
429 	}
430 
431 	if (l == 2) {
432 		s[0] = (wc >> 8) & 0xff;
433 		s[1] = wc & 0xff;
434 	} else
435 		s[0] = wc & 0xff;
436 
437 	*nresult = l;
438 
439 	return 0;
440 
441 err:
442 	*nresult = (size_t)-1;
443 	return ret;
444 }
445 
446 static __inline int
447 /*ARGSUSED*/
448 _citrus_BIG5_stdenc_wctocs(_BIG5EncodingInfo * __restrict ei,
449 			   _csid_t * __restrict csid,
450 			   _index_t * __restrict idx, wchar_t wc)
451 {
452 
453 	_DIAGASSERT(csid != NULL && idx != NULL);
454 
455 	*csid = (wc < 0x100) ? 0 : 1;
456 	*idx = (_index_t)wc;
457 
458 	return 0;
459 }
460 
461 static __inline int
462 /*ARGSUSED*/
463 _citrus_BIG5_stdenc_cstowc(_BIG5EncodingInfo * __restrict ei,
464 			   wchar_t * __restrict wc,
465 			   _csid_t csid, _index_t idx)
466 {
467 	_DIAGASSERT(wc != NULL);
468 
469 	switch (csid) {
470 	case 0:
471 	case 1:
472 		*wc = (wchar_t)idx;
473 		break;
474 	default:
475 		return EILSEQ;
476 	}
477 
478 	return 0;
479 }
480 
481 static __inline int
482 /*ARGSUSED*/
483 _citrus_BIG5_stdenc_get_state_desc_generic(_BIG5EncodingInfo * __restrict ei,
484 					   _BIG5State * __restrict psenc,
485 					   int * __restrict rstate)
486 {
487 
488 	if (psenc->chlen == 0)
489 		*rstate = _STDENC_SDGEN_INITIAL;
490 	else
491 		*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
492 
493 	return 0;
494 }
495 
496 /* ----------------------------------------------------------------------
497  * public interface for ctype
498  */
499 
500 _CITRUS_CTYPE_DECLS(BIG5);
501 _CITRUS_CTYPE_DEF_OPS(BIG5);
502 
503 #include "citrus_ctype_template.h"
504 
505 
506 /* ----------------------------------------------------------------------
507  * public interface for stdenc
508  */
509 
510 _CITRUS_STDENC_DECLS(BIG5);
511 _CITRUS_STDENC_DEF_OPS(BIG5);
512 
513 #include "citrus_stdenc_template.h"
514