xref: /netbsd-src/lib/libc/citrus/modules/citrus_zw.c (revision 7fa608457b817eca6e0977b37f758ae064f3c99c)
1 /* $NetBSD: citrus_zw.c,v 1.3 2006/11/24 17:27:52 tnozaki Exp $ */
2 
3 /*-
4  * Copyright (c)2004, 2006 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 #if defined(LIB_SCCS) && !defined(lint)
32 __RCSID("$NetBSD: citrus_zw.c,v 1.3 2006/11/24 17:27:52 tnozaki Exp $");
33 #endif /* LIB_SCCS and not lint */
34 
35 #include <sys/types.h>
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <stddef.h>
43 #include <locale.h>
44 #include <wchar.h>
45 #include <limits.h>
46 
47 #include "citrus_namespace.h"
48 #include "citrus_types.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_zw.h"
53 
54 /* ----------------------------------------------------------------------
55  * private stuffs used by templates
56  */
57 
58 typedef struct {
59 	int dummy;
60 } _ZWEncodingInfo;
61 
62 typedef enum {
63 	NONE, AMBIGIOUS, ASCII, GB2312
64 } _ZWCharset;
65 
66 typedef struct {
67 	int		chlen;
68 	char		ch[4];
69 	_ZWCharset	charset;
70 } _ZWState;
71 
72 typedef struct {
73 	_ZWEncodingInfo	ei;
74 	struct {
75 		/* for future multi-locale facility */
76 		_ZWState	s_mblen;
77 		_ZWState	s_mbrlen;
78 		_ZWState	s_mbrtowc;
79 		_ZWState	s_mbtowc;
80 		_ZWState	s_mbsrtowcs;
81 		_ZWState	s_wcrtomb;
82 		_ZWState	s_wcsrtombs;
83 		_ZWState	s_wctomb;
84 	} states;
85 } _ZWCTypeInfo;
86 
87 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
88 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
89 
90 #define _FUNCNAME(m)			_citrus_ZW_##m
91 #define _ENCODING_INFO			_ZWEncodingInfo
92 #define _CTYPE_INFO			_ZWCTypeInfo
93 #define _ENCODING_STATE			_ZWState
94 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
95 #define _ENCODING_IS_STATE_DEPENDENT		1
96 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
97 
98 static __inline void
99 /*ARGSUSED*/
100 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei,
101 	_ZWState * __restrict psenc)
102 {
103 	/* ei my be unused */
104 	_DIAGASSERT(psenc != NULL);
105 
106 	psenc->chlen = 0;
107 	psenc->charset = NONE;
108 }
109 
110 static __inline void
111 /*ARGSUSED*/
112 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei,
113 	void *__restrict pspriv, const _ZWState * __restrict psenc)
114 {
115 	/* ei may be unused */
116 	_DIAGASSERT(pspriv != NULL);
117 	_DIAGASSERT(psenc != NULL);
118 
119 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
120 }
121 
122 static __inline void
123 /*ARGSUSED*/
124 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei,
125 	_ZWState * __restrict psenc, const void * __restrict pspriv)
126 {
127 	/* ei may be unused */
128 	_DIAGASSERT(psenc != NULL);
129 	_DIAGASSERT(pspriv != NULL);
130 
131 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
132 }
133 
134 static int
135 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
136 	wchar_t * __restrict pwc, const char **__restrict s, size_t n,
137 	_ZWState * __restrict psenc, size_t * __restrict nresult)
138 {
139 	const char *s0;
140 	int ch, len;
141 	wchar_t	 wc;
142 
143 	/* ei may be unused */
144 	/* pwc may be null */
145 	_DIAGASSERT(s != NULL);
146 	_DIAGASSERT(psenc != NULL);
147 	_DIAGASSERT(nresult != NULL);
148 
149 	if (*s == NULL) {
150 		_citrus_ZW_init_state(ei, psenc);
151 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
152 		return 0;
153 	}
154 	s0 = *s;
155 	len = 0;
156 
157 #define	STORE				\
158 do {					\
159 	if (n-- < 1) {			\
160 		*nresult = (size_t)-2;	\
161 		*s = s0;		\
162 		return 0;		\
163 	}				\
164 	ch = (unsigned char)*s0++;	\
165 	if (len++ > MB_LEN_MAX || ch > 0x7F)\
166 		goto ilseq;		\
167 	psenc->ch[psenc->chlen++] = ch;	\
168 } while (/*CONSTCOND*/0)
169 
170 loop:
171 	switch (psenc->charset) {
172 	case ASCII:
173 		switch (psenc->chlen) {
174 		case 0:
175 			STORE;
176 			switch (psenc->ch[0]) {
177 			case '\0': case '\n':
178 				psenc->charset = NONE;
179 			}
180 		/*FALLTHROUGH*/
181 		case 1:
182 			break;
183 		default:
184 			return EINVAL;
185 		}
186 		ch = (unsigned char)psenc->ch[0];
187 		if (ch > 0x7F)
188 			goto ilseq;
189 		wc = (wchar_t)ch;
190 		psenc->chlen = 0;
191 		break;
192 	case NONE:
193 		if (psenc->chlen != 0)
194 			return EINVAL;
195 		STORE;
196 		ch = (unsigned char)psenc->ch[0];
197 		if (ch != 'z') {
198 			if (ch != '\n' && ch != '\0')
199 				psenc->charset = ASCII;
200 			wc = (wchar_t)ch;
201 			psenc->chlen = 0;
202 			break;
203 		}
204 		psenc->charset = AMBIGIOUS;
205 		psenc->chlen = 0;
206 	/* FALLTHROUGH */
207 	case AMBIGIOUS:
208 		if (psenc->chlen != 0)
209 			return EINVAL;
210 		STORE;
211 		if (psenc->ch[0] != 'W') {
212 			psenc->charset = ASCII;
213 			wc = L'z';
214 			break;
215 		}
216 		psenc->charset = GB2312;
217 		psenc->chlen = 0;
218 	/* FALLTHROUGH */
219 	case GB2312:
220 		switch (psenc->chlen) {
221 		case 0:
222 			STORE;
223 			ch = (unsigned char)psenc->ch[0];
224 			if (ch == '\0') {
225 				psenc->charset = NONE;
226 				wc = (wchar_t)ch;
227 				psenc->chlen = 0;
228 				break;
229 			} else if (ch == '\n') {
230 				psenc->charset = NONE;
231 				psenc->chlen = 0;
232 				goto loop;
233 			}
234 		/*FALLTHROUGH*/
235 		case 1:
236 			STORE;
237 			if (psenc->ch[0] == ' ') {
238 				ch = (unsigned char)psenc->ch[1];
239 				wc = (wchar_t)ch;
240 				psenc->chlen = 0;
241 				break;
242 			} else if (psenc->ch[0] == '#') {
243 				ch = (unsigned char)psenc->ch[1];
244 				if (ch == '\n') {
245 					psenc->charset = NONE;
246 					wc = (wchar_t)ch;
247 					psenc->chlen = 0;
248 					break;
249 				} else if (ch == ' ') {
250 					wc = (wchar_t)ch;
251 					psenc->chlen = 0;
252 					break;
253 				}
254 			}
255 			ch = (unsigned char)psenc->ch[0];
256 			if (ch < 0x21 || ch > 0x7E)
257 				goto ilseq;
258 			wc = (wchar_t)(ch << 8);
259 			ch = (unsigned char)psenc->ch[1];
260 			if (ch < 0x21 || ch > 0x7E) {
261 ilseq:
262 				*nresult = (size_t)-1;
263 				return EILSEQ;
264 			}
265 			wc |= (wchar_t)ch;
266 			psenc->chlen = 0;
267 			break;
268 		default:
269 			return EINVAL;
270 		}
271 		break;
272 	default:
273 		return EINVAL;
274 	}
275 	if (pwc != NULL)
276 		*pwc = wc;
277 
278 	*nresult = (size_t)(wc == 0 ? 0 : len);
279 	*s = s0;
280 
281 	return 0;
282 }
283 
284 static int
285 /*ARGSUSED*/
286 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei,
287 	char *__restrict s, size_t n, wchar_t wc,
288 	_ZWState * __restrict psenc, size_t * __restrict nresult)
289 {
290 	int ch;
291 
292 	/* ei may be null */
293 	_DIAGASSERT(s != NULL);
294 	_DIAGASSERT(psenc != NULL);
295 	_DIAGASSERT(nresult != NULL);
296 
297 	if (psenc->chlen != 0)
298 		return EINVAL;
299 	if ((uint32_t)wc <= 0x7F) {
300 		ch = (unsigned char)wc;
301 		switch (psenc->charset) {
302 		case NONE:
303 			if (ch == '\0' || ch == '\n') {
304 				psenc->ch[psenc->chlen++] = ch;
305 			} else {
306 				if (n < 4)
307 					return E2BIG;
308 				n -= 4;
309 				psenc->ch[psenc->chlen++] = 'z';
310 				psenc->ch[psenc->chlen++] = 'W';
311 				psenc->ch[psenc->chlen++] = ' ';
312 				psenc->ch[psenc->chlen++] = ch;
313 				psenc->charset = GB2312;
314 			}
315 			break;
316 		case GB2312:
317 			if (n < 2)
318 				return E2BIG;
319 			n -= 2;
320 			if (ch == '\0') {
321 				psenc->ch[psenc->chlen++] = '\n';
322 				psenc->ch[psenc->chlen++] = '\0';
323 				psenc->charset = NONE;
324 			} else if (ch == '\n') {
325 				psenc->ch[psenc->chlen++] = '#';
326 				psenc->ch[psenc->chlen++] = '\n';
327 				psenc->charset = NONE;
328 			} else {
329 				psenc->ch[psenc->chlen++] = ' ';
330 				psenc->ch[psenc->chlen++] = ch;
331 			}
332 			break;
333 		default:
334 			return EINVAL;
335 		}
336 	} else if ((uint32_t)wc <= 0x7E7E) {
337 		switch (psenc->charset) {
338 		case NONE:
339 			if (n < 2)
340 				return E2BIG;
341 			n -= 2;
342 			psenc->ch[psenc->chlen++] = 'z';
343 			psenc->ch[psenc->chlen++] = 'W';
344 			psenc->charset = GB2312;
345 		/* FALLTHROUGH*/
346 		case GB2312:
347 			if (n < 2)
348 				return E2BIG;
349 			n -= 2;
350 			ch = (wc >> 8) & 0xFF;
351 			if (ch < 0x21 || ch > 0x7E)
352 				goto ilseq;
353 			psenc->ch[psenc->chlen++] = ch;
354 			ch = wc & 0xFF;
355 			if (ch < 0x21 || ch > 0x7E)
356 				goto ilseq;
357 			psenc->ch[psenc->chlen++] = ch;
358 			break;
359 		default:
360 			return EINVAL;
361 		}
362 	} else {
363 ilseq:
364 		*nresult = (size_t)-1;
365 		return EILSEQ;
366 	}
367 	memcpy(s, psenc->ch, psenc->chlen);
368 	*nresult = psenc->chlen;
369 	psenc->chlen = 0;
370 
371 	return 0;
372 }
373 
374 static int
375 /*ARGSUSED*/
376 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei,
377 	char * __restrict s, size_t n,
378 	_ZWState * __restrict psenc, size_t * __restrict nresult)
379 {
380 	/* ei may be unused */
381 	_DIAGASSERT(s != NULL);
382 	_DIAGASSERT(psenc != NULL);
383 	_DIAGASSERT(nresult != NULL);
384 
385 	if (psenc->chlen != 0)
386 		return EINVAL;
387 	switch (psenc->charset) {
388 	case GB2312:
389 		if (n-- < 1)
390 			return E2BIG;
391 		psenc->ch[psenc->chlen++] = '\n';
392 		psenc->charset = NONE;
393 	/*FALLTHROUGH*/
394 	case NONE:
395 		*nresult = psenc->chlen;
396 		if (psenc->chlen > 0) {
397 			memcpy(s, psenc->ch, psenc->chlen);
398 			psenc->chlen = 0;
399 		}
400 		break;
401 	default:
402 		return EINVAL;
403 	}
404 
405 	return 0;
406 }
407 
408 static __inline int
409 /*ARGSUSED*/
410 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei,
411 	_ZWState * __restrict psenc, int * __restrict rstate)
412 {
413 	/* ei may be unused */
414 	_DIAGASSERT(psenc != NULL);
415 	_DIAGASSERT(rstate != NULL);
416 
417 	switch (psenc->charset) {
418 	case NONE:
419 		if (psenc->chlen != 0)
420 			return EINVAL;
421 		*rstate = _STDENC_SDGEN_INITIAL;
422 		break;
423 	case AMBIGIOUS:
424 		if (psenc->chlen != 0)
425 			return EINVAL;
426 		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
427 		break;
428 	case ASCII:
429 	case GB2312:
430 		switch (psenc->chlen) {
431 		case 0:
432 			*rstate = _STDENC_SDGEN_STABLE;
433 			break;
434 		case 1:
435 			*rstate = (psenc->ch[0] == '#')
436 			    ? _STDENC_SDGEN_INCOMPLETE_SHIFT
437 			    : _STDENC_SDGEN_INCOMPLETE_CHAR;
438 			break;
439 		default:
440 			return EINVAL;
441 		}
442 		break;
443 	default:
444 		return EINVAL;
445 	}
446 	return 0;
447 }
448 
449 static __inline int
450 /*ARGSUSED*/
451 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei,
452 	_csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
453 {
454 	/* ei seems to be unused */
455 	_DIAGASSERT(csid != NULL);
456 	_DIAGASSERT(idx != NULL);
457 
458 	*csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1;
459 	*idx = (_index_t)wc;
460 
461 	return 0;
462 }
463 
464 static __inline int
465 /*ARGSUSED*/
466 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei,
467 	 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
468 {
469 	/* ei seems to be unused */
470 	_DIAGASSERT(wc != NULL);
471 
472 	switch (csid) {
473 	case 0: case 1:
474 		break;
475 	default:
476 		return EINVAL;
477 	}
478 	*wc = (wchar_t)idx;
479 
480 	return 0;
481 }
482 
483 static void
484 /*ARGSUSED*/
485 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei)
486 {
487 }
488 
489 static int
490 /*ARGSUSED*/
491 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei,
492 	const void *__restrict var, size_t lenvar)
493 {
494 	return 0;
495 }
496 
497 /* ----------------------------------------------------------------------
498  * public interface for ctype
499  */
500 
501 _CITRUS_CTYPE_DECLS(ZW);
502 _CITRUS_CTYPE_DEF_OPS(ZW);
503 
504 #include "citrus_ctype_template.h"
505 
506 /* ----------------------------------------------------------------------
507  * public interface for stdenc
508  */
509 
510 _CITRUS_STDENC_DECLS(ZW);
511 _CITRUS_STDENC_DEF_OPS(ZW);
512 
513 #include "citrus_stdenc_template.h"
514