xref: /onnv-gate/usr/src/uts/common/kiconv/kiconv_tc/kiconv_tc.c (revision 6008:3a1c10482cf2)
1*6008Syy154373 /*
2*6008Syy154373  * CDDL HEADER START
3*6008Syy154373  *
4*6008Syy154373  * The contents of this file are subject to the terms of the
5*6008Syy154373  * Common Development and Distribution License (the "License").
6*6008Syy154373  * You may not use this file except in compliance with the License.
7*6008Syy154373  *
8*6008Syy154373  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373  * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373  * See the License for the specific language governing permissions
11*6008Syy154373  * and limitations under the License.
12*6008Syy154373  *
13*6008Syy154373  * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373  * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373  * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373  * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373  *
19*6008Syy154373  * CDDL HEADER END
20*6008Syy154373  */
21*6008Syy154373 /*
22*6008Syy154373  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23*6008Syy154373  * Use is subject to license terms.
24*6008Syy154373  */
25*6008Syy154373 
26*6008Syy154373 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*6008Syy154373 
28*6008Syy154373 #include <sys/types.h>
29*6008Syy154373 #include <sys/param.h>
30*6008Syy154373 #include <sys/sysmacros.h>
31*6008Syy154373 #include <sys/systm.h>
32*6008Syy154373 #include <sys/debug.h>
33*6008Syy154373 #include <sys/kmem.h>
34*6008Syy154373 #include <sys/sunddi.h>
35*6008Syy154373 #include <sys/byteorder.h>
36*6008Syy154373 #include <sys/errno.h>
37*6008Syy154373 #include <sys/modctl.h>
38*6008Syy154373 #include <sys/u8_textprep.h>
39*6008Syy154373 #include <sys/kiconv.h>
40*6008Syy154373 #include <sys/kiconv_cck_common.h>
41*6008Syy154373 #include <sys/kiconv_tc.h>
42*6008Syy154373 #include <sys/kiconv_big5_utf8.h>
43*6008Syy154373 #include <sys/kiconv_euctw_utf8.h>
44*6008Syy154373 #include <sys/kiconv_hkscs_utf8.h>
45*6008Syy154373 #include <sys/kiconv_cp950hkscs_utf8.h>
46*6008Syy154373 #include <sys/kiconv_utf8_big5.h>
47*6008Syy154373 #include <sys/kiconv_utf8_euctw.h>
48*6008Syy154373 #include <sys/kiconv_utf8_cp950hkscs.h>
49*6008Syy154373 #include <sys/kiconv_utf8_hkscs.h>
50*6008Syy154373 
51*6008Syy154373 /* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
52*6008Syy154373 static uchar_t hkscs_special_sequence[][4] = {
53*6008Syy154373 	{ 0xc3, 0x8a, 0xcc, 0x84 },	/* 0x8862 */
54*6008Syy154373 	{ 0xc3, 0x8a, 0xcc, 0x8c },	/* 0x8864 */
55*6008Syy154373 	{ 0xc3, 0xaa, 0xcc, 0x84 },	/* 0x88a3 */
56*6008Syy154373 	{ 0xc3, 0xaa, 0xcc, 0x8c } 	/* 0x88a5 */
57*6008Syy154373 };
58*6008Syy154373 
59*6008Syy154373 /* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
60*6008Syy154373 static uint32_t ucs_special_sequence[] = {
61*6008Syy154373 	0x8866,		/* U+00ca */
62*6008Syy154373 	0x8862,		/* U+00ca U+0304 */
63*6008Syy154373 	0x8864,		/* U+00ca U+030c */
64*6008Syy154373 	0x88a7,		/* U+00ea */
65*6008Syy154373 	0x88a3,		/* U+00ea U+0304 */
66*6008Syy154373 	0x88a5		/* U+00ea U+030c */
67*6008Syy154373 };
68*6008Syy154373 
69*6008Syy154373 typedef int8_t (*kiconv_big5toutf8_t)(uint32_t value, uchar_t *ob,
70*6008Syy154373 	uchar_t *obtail, size_t *ret_val);
71*6008Syy154373 
72*6008Syy154373 static int8_t utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
73*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val);
74*6008Syy154373 static int8_t utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
75*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val);
76*6008Syy154373 static int8_t utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf,
77*6008Syy154373 	uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
78*6008Syy154373 static int8_t utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
79*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val);
80*6008Syy154373 static int8_t big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
81*6008Syy154373 	size_t *ret_val);
82*6008Syy154373 static int8_t big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
83*6008Syy154373 	uchar_t *obtail, size_t *ret_val);
84*6008Syy154373 static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
85*6008Syy154373 	uchar_t *obtail, size_t *ret_val);
86*6008Syy154373 static int8_t euctw_to_utf8(size_t plane_no, uint32_t euctw_val,
87*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val);
88*6008Syy154373 static uint32_t get_unicode_from_UDA(size_t plane_no, uchar_t byte1,
89*6008Syy154373 	uchar_t byte2);
90*6008Syy154373 
91*6008Syy154373 #define	KICONV_TC_BIG5		(0x01)
92*6008Syy154373 #define	KICONV_TC_BIG5HKSCS	(0x02)
93*6008Syy154373 #define	KICONV_TC_CP950HKSCS	(0x03)
94*6008Syy154373 #define	KICONV_TC_EUCTW		(0x04)
95*6008Syy154373 #define	KICONV_TC_MAX_MAGIC_ID	(0x04)
96*6008Syy154373 
97*6008Syy154373 static void *
open_fr_big5()98*6008Syy154373 open_fr_big5()
99*6008Syy154373 {
100*6008Syy154373 	return ((void *)KICONV_TC_BIG5);
101*6008Syy154373 }
102*6008Syy154373 
103*6008Syy154373 static void *
open_fr_big5hkscs()104*6008Syy154373 open_fr_big5hkscs()
105*6008Syy154373 {
106*6008Syy154373 	return ((void *)KICONV_TC_BIG5HKSCS);
107*6008Syy154373 }
108*6008Syy154373 
109*6008Syy154373 static void *
open_fr_cp950hkscs()110*6008Syy154373 open_fr_cp950hkscs()
111*6008Syy154373 {
112*6008Syy154373 	return ((void *)KICONV_TC_CP950HKSCS);
113*6008Syy154373 }
114*6008Syy154373 
115*6008Syy154373 static void *
open_fr_euctw()116*6008Syy154373 open_fr_euctw()
117*6008Syy154373 {
118*6008Syy154373 	return ((void *)KICONV_TC_EUCTW);
119*6008Syy154373 }
120*6008Syy154373 
121*6008Syy154373 static int
close_fr_tc(void * s)122*6008Syy154373 close_fr_tc(void *s)
123*6008Syy154373 {
124*6008Syy154373 	if ((uintptr_t)s > KICONV_TC_MAX_MAGIC_ID)
125*6008Syy154373 		return (EBADF);
126*6008Syy154373 
127*6008Syy154373 	return (0);
128*6008Syy154373 }
129*6008Syy154373 
130*6008Syy154373 /*
131*6008Syy154373  * Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
132*6008Syy154373  */
133*6008Syy154373 static size_t
kiconv_fr_big5_common(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno,kiconv_big5toutf8_t ptr_big5touf8)134*6008Syy154373 kiconv_fr_big5_common(void *kcd, char **inbuf, size_t *inbytesleft,
135*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno,
136*6008Syy154373 	kiconv_big5toutf8_t ptr_big5touf8)
137*6008Syy154373 {
138*6008Syy154373 	uchar_t		*ib;
139*6008Syy154373 	uchar_t		*ob;
140*6008Syy154373 	uchar_t		*ibtail;
141*6008Syy154373 	uchar_t		*obtail;
142*6008Syy154373 	size_t		ret_val;
143*6008Syy154373 	int8_t		sz;
144*6008Syy154373 	uint32_t	big5_val;
145*6008Syy154373 
146*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
147*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
148*6008Syy154373 		*errno = EBADF;
149*6008Syy154373 		return ((size_t)-1);
150*6008Syy154373 	}
151*6008Syy154373 
152*6008Syy154373 	/* If this is a state reset request, process and return. */
153*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
154*6008Syy154373 		return (0);
155*6008Syy154373 	}
156*6008Syy154373 
157*6008Syy154373 	ret_val = 0;
158*6008Syy154373 	ib = (uchar_t *)*inbuf;
159*6008Syy154373 	ob = (uchar_t *)*outbuf;
160*6008Syy154373 	ibtail = ib + *inbytesleft;
161*6008Syy154373 	obtail = ob + *outbytesleft;
162*6008Syy154373 
163*6008Syy154373 	while (ib < ibtail) {
164*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
165*6008Syy154373 			if (ob >= obtail) {
166*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
167*6008Syy154373 			}
168*6008Syy154373 
169*6008Syy154373 			*ob++ = *ib++;
170*6008Syy154373 			continue;
171*6008Syy154373 		}
172*6008Syy154373 
173*6008Syy154373 		/*
174*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
175*6008Syy154373 		 * valid BIG5/HKSCS leading byte.
176*6008Syy154373 		 */
177*6008Syy154373 		if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
178*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
179*6008Syy154373 		}
180*6008Syy154373 
181*6008Syy154373 		/*
182*6008Syy154373 		 * Issue EINVAL error if input buffer has an incomplete
183*6008Syy154373 		 * character at the end of the buffer.
184*6008Syy154373 		 */
185*6008Syy154373 		if (ibtail - ib < 2) {
186*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
187*6008Syy154373 		}
188*6008Syy154373 
189*6008Syy154373 		/*
190*6008Syy154373 		 * Issue EILSEQ error if the remaining bytes is not
191*6008Syy154373 		 * a valid BIG5/HKSCS byte.
192*6008Syy154373 		 */
193*6008Syy154373 		if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
194*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
195*6008Syy154373 		}
196*6008Syy154373 
197*6008Syy154373 		/* Now we have a valid BIG5/HKSCS character. */
198*6008Syy154373 		big5_val = (uint32_t)(*ib) << 8 | *(ib + 1);
199*6008Syy154373 		sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
200*6008Syy154373 
201*6008Syy154373 		if (sz < 0) {
202*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
203*6008Syy154373 		}
204*6008Syy154373 
205*6008Syy154373 		ib += 2;
206*6008Syy154373 		ob += sz;
207*6008Syy154373 	}
208*6008Syy154373 
209*6008Syy154373 	*inbuf = (char *)ib;
210*6008Syy154373 	*inbytesleft = ibtail - ib;
211*6008Syy154373 	*outbuf = (char *)ob;
212*6008Syy154373 	*outbytesleft = obtail - ob;
213*6008Syy154373 
214*6008Syy154373 	return (ret_val);
215*6008Syy154373 }
216*6008Syy154373 
217*6008Syy154373 /*
218*6008Syy154373  * String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
219*6008Syy154373  * to UTF-8.
220*6008Syy154373  */
221*6008Syy154373 static size_t
kiconvstr_fr_big5_common(uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno,kiconv_big5toutf8_t ptr_big5touf8)222*6008Syy154373 kiconvstr_fr_big5_common(uchar_t *ib, size_t *inlen, uchar_t *ob,
223*6008Syy154373     size_t *outlen, int flag, int *errno,
224*6008Syy154373     kiconv_big5toutf8_t ptr_big5touf8)
225*6008Syy154373 {
226*6008Syy154373 	uchar_t		*oldib;
227*6008Syy154373 	uchar_t		*ibtail;
228*6008Syy154373 	uchar_t		*obtail;
229*6008Syy154373 	size_t		ret_val;
230*6008Syy154373 	int8_t		sz;
231*6008Syy154373 	uint32_t	big5_val;
232*6008Syy154373 	boolean_t	do_not_ignore_null;
233*6008Syy154373 
234*6008Syy154373 	ret_val = 0;
235*6008Syy154373 	ibtail = ib + *inlen;
236*6008Syy154373 	obtail = ob + *outlen;
237*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
238*6008Syy154373 
239*6008Syy154373 	while (ib < ibtail) {
240*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
241*6008Syy154373 			break;
242*6008Syy154373 
243*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
244*6008Syy154373 			if (ob >= obtail) {
245*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
246*6008Syy154373 			}
247*6008Syy154373 
248*6008Syy154373 			*ob++ = *ib++;
249*6008Syy154373 			continue;
250*6008Syy154373 		}
251*6008Syy154373 
252*6008Syy154373 		oldib = ib;
253*6008Syy154373 
254*6008Syy154373 		if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
255*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
256*6008Syy154373 		}
257*6008Syy154373 
258*6008Syy154373 		if (ibtail - ib < 2) {
259*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
260*6008Syy154373 		}
261*6008Syy154373 
262*6008Syy154373 		if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
263*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
264*6008Syy154373 		}
265*6008Syy154373 
266*6008Syy154373 		big5_val = *ib++;
267*6008Syy154373 		big5_val = (big5_val << 8) | *ib++;
268*6008Syy154373 		sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
269*6008Syy154373 
270*6008Syy154373 		if (sz < 0) {
271*6008Syy154373 			ib = oldib;
272*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
273*6008Syy154373 		}
274*6008Syy154373 
275*6008Syy154373 		ob += sz;
276*6008Syy154373 		continue;
277*6008Syy154373 
278*6008Syy154373 REPLACE_INVALID:
279*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
280*6008Syy154373 			ib = oldib;
281*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
282*6008Syy154373 		}
283*6008Syy154373 
284*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
285*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
286*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
287*6008Syy154373 		ret_val++;
288*6008Syy154373 	}
289*6008Syy154373 
290*6008Syy154373 	*inlen = ibtail - ib;
291*6008Syy154373 	*outlen = obtail - ob;
292*6008Syy154373 
293*6008Syy154373 	return (ret_val);
294*6008Syy154373 }
295*6008Syy154373 
296*6008Syy154373 /*
297*6008Syy154373  * Encoding convertor from BIG5 to UTF-8.
298*6008Syy154373  */
299*6008Syy154373 static size_t
kiconv_fr_big5(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)300*6008Syy154373 kiconv_fr_big5(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
301*6008Syy154373 	size_t *outbytesleft, int *errno)
302*6008Syy154373 {
303*6008Syy154373 	return (kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
304*6008Syy154373 	    outbytesleft, errno, big5_to_utf8));
305*6008Syy154373 }
306*6008Syy154373 
307*6008Syy154373 /*
308*6008Syy154373  * String based encoding convertor from BIG5 to UTF-8.
309*6008Syy154373  */
310*6008Syy154373 static size_t
kiconvstr_fr_big5(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)311*6008Syy154373 kiconvstr_fr_big5(char *inarray, size_t *inlen, char *outarray,
312*6008Syy154373     size_t *outlen, int flag, int *errno)
313*6008Syy154373 {
314*6008Syy154373 	return (kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
315*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno,
316*6008Syy154373 	    big5_to_utf8));
317*6008Syy154373 }
318*6008Syy154373 
319*6008Syy154373 /*
320*6008Syy154373  * Encoding convertor from BIG5-HKSCS to UTF-8.
321*6008Syy154373  */
322*6008Syy154373 static size_t
kiconv_fr_big5hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)323*6008Syy154373 kiconv_fr_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
324*6008Syy154373     char **outbuf, size_t *outbytesleft, int *errno)
325*6008Syy154373 {
326*6008Syy154373 	return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
327*6008Syy154373 	    outbytesleft, errno, big5hkscs_to_utf8);
328*6008Syy154373 }
329*6008Syy154373 
330*6008Syy154373 /*
331*6008Syy154373  * String based encoding convertor from BIG5-HKSCS to UTF-8.
332*6008Syy154373  */
333*6008Syy154373 static size_t
kiconvstr_fr_big5hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)334*6008Syy154373 kiconvstr_fr_big5hkscs(char *inarray, size_t *inlen, char *outarray,
335*6008Syy154373 	size_t *outlen, int flag, int *errno)
336*6008Syy154373 {
337*6008Syy154373 	return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
338*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, big5hkscs_to_utf8);
339*6008Syy154373 }
340*6008Syy154373 
341*6008Syy154373 /*
342*6008Syy154373  * Encoding convertor from CP950-HKSCS to UTF-8.
343*6008Syy154373  */
344*6008Syy154373 static size_t
kiconv_fr_cp950hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)345*6008Syy154373 kiconv_fr_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
346*6008Syy154373     char **outbuf, size_t *outbytesleft, int *errno)
347*6008Syy154373 {
348*6008Syy154373 	return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
349*6008Syy154373 	    outbytesleft, errno, cp950hkscs_to_utf8);
350*6008Syy154373 }
351*6008Syy154373 
352*6008Syy154373 /*
353*6008Syy154373  * String based encoding convertor from CP950-HKSCS to UTF-8.
354*6008Syy154373  */
355*6008Syy154373 static size_t
kiconvstr_fr_cp950hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)356*6008Syy154373 kiconvstr_fr_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
357*6008Syy154373 	size_t *outlen, int flag, int *errno)
358*6008Syy154373 {
359*6008Syy154373 	return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
360*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, cp950hkscs_to_utf8);
361*6008Syy154373 }
362*6008Syy154373 
363*6008Syy154373 /*
364*6008Syy154373  * Encoding convertor from EUC-TW to UTF-8.
365*6008Syy154373  */
366*6008Syy154373 static size_t
kiconv_fr_euctw(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)367*6008Syy154373 kiconv_fr_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
368*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
369*6008Syy154373 {
370*6008Syy154373 	uchar_t		*ib;
371*6008Syy154373 	uchar_t		*ob;
372*6008Syy154373 	uchar_t		*ibtail;
373*6008Syy154373 	uchar_t		*obtail;
374*6008Syy154373 	uchar_t		*oldib;
375*6008Syy154373 	size_t		ret_val;
376*6008Syy154373 	size_t		plane_no;
377*6008Syy154373 	int8_t		sz;
378*6008Syy154373 	uint32_t	euctw_val;
379*6008Syy154373 	boolean_t	isplane1;
380*6008Syy154373 
381*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
382*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
383*6008Syy154373 		*errno = EBADF;
384*6008Syy154373 		return ((size_t)-1);
385*6008Syy154373 	}
386*6008Syy154373 
387*6008Syy154373 	/* If this is a state reset request, process and return. */
388*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
389*6008Syy154373 		return (0);
390*6008Syy154373 	}
391*6008Syy154373 
392*6008Syy154373 	ret_val = 0;
393*6008Syy154373 	ib = (uchar_t *)*inbuf;
394*6008Syy154373 	ob = (uchar_t *)*outbuf;
395*6008Syy154373 	ibtail = ib + *inbytesleft;
396*6008Syy154373 	obtail = ob + *outbytesleft;
397*6008Syy154373 
398*6008Syy154373 	while (ib < ibtail) {
399*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
400*6008Syy154373 			if (ob >= obtail) {
401*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
402*6008Syy154373 			}
403*6008Syy154373 
404*6008Syy154373 			*ob++ = *ib++;
405*6008Syy154373 			continue;
406*6008Syy154373 		}
407*6008Syy154373 
408*6008Syy154373 		/*
409*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
410*6008Syy154373 		 * valid EUC-TW leading byte.
411*6008Syy154373 		 */
412*6008Syy154373 		if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
413*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
414*6008Syy154373 		}
415*6008Syy154373 
416*6008Syy154373 		isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
417*6008Syy154373 		    B_FALSE : B_TRUE;
418*6008Syy154373 
419*6008Syy154373 		/*
420*6008Syy154373 		 * Issue EINVAL error if input buffer has an incomplete
421*6008Syy154373 		 * character at the end of the buffer.
422*6008Syy154373 		 */
423*6008Syy154373 		if (ibtail - ib < (isplane1 ? 2 : 4)) {
424*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
425*6008Syy154373 		}
426*6008Syy154373 
427*6008Syy154373 		oldib = ib;
428*6008Syy154373 		plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
429*6008Syy154373 
430*6008Syy154373 		/*
431*6008Syy154373 		 * Issue EILSEQ error if the remaining bytes are not
432*6008Syy154373 		 * valid EUC-TW bytes.
433*6008Syy154373 		 */
434*6008Syy154373 		if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
435*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
436*6008Syy154373 		}
437*6008Syy154373 
438*6008Syy154373 		if (! isplane1)
439*6008Syy154373 			ib += 2;
440*6008Syy154373 
441*6008Syy154373 		/* Now we have a valid EUC-TW character. */
442*6008Syy154373 		euctw_val = *ib++;
443*6008Syy154373 		euctw_val = (euctw_val << 8) | *ib++;
444*6008Syy154373 		sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
445*6008Syy154373 
446*6008Syy154373 		if (sz < 0) {
447*6008Syy154373 			ib = oldib;
448*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
449*6008Syy154373 		}
450*6008Syy154373 
451*6008Syy154373 		ob += sz;
452*6008Syy154373 	}
453*6008Syy154373 
454*6008Syy154373 	*inbuf = (char *)ib;
455*6008Syy154373 	*inbytesleft = ibtail - ib;
456*6008Syy154373 	*outbuf = (char *)ob;
457*6008Syy154373 	*outbytesleft = obtail - ob;
458*6008Syy154373 
459*6008Syy154373 	return (ret_val);
460*6008Syy154373 }
461*6008Syy154373 
462*6008Syy154373 /*
463*6008Syy154373  * String based encoding convertor from EUC-TW to UTF-8.
464*6008Syy154373  */
465*6008Syy154373 static size_t
kiconvstr_fr_euctw(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)466*6008Syy154373 kiconvstr_fr_euctw(char *inarray, size_t *inlen, char *outarray,
467*6008Syy154373 	size_t *outlen, int flag, int *errno)
468*6008Syy154373 {
469*6008Syy154373 	uchar_t		*ib;
470*6008Syy154373 	uchar_t		*ob;
471*6008Syy154373 	uchar_t		*ibtail;
472*6008Syy154373 	uchar_t		*obtail;
473*6008Syy154373 	uchar_t		*oldib;
474*6008Syy154373 	size_t		ret_val;
475*6008Syy154373 	size_t		plane_no;
476*6008Syy154373 	int8_t		sz;
477*6008Syy154373 	uint32_t	euctw_val;
478*6008Syy154373 	boolean_t	isplane1;
479*6008Syy154373 	boolean_t	do_not_ignore_null;
480*6008Syy154373 
481*6008Syy154373 	ret_val = 0;
482*6008Syy154373 	ib = (uchar_t *)inarray;
483*6008Syy154373 	ob = (uchar_t *)outarray;
484*6008Syy154373 	ibtail = ib + *inlen;
485*6008Syy154373 	obtail = ob + *outlen;
486*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
487*6008Syy154373 
488*6008Syy154373 	while (ib < ibtail) {
489*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
490*6008Syy154373 			break;
491*6008Syy154373 
492*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
493*6008Syy154373 			if (ob >= obtail) {
494*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495*6008Syy154373 			}
496*6008Syy154373 
497*6008Syy154373 			*ob++ = *ib++;
498*6008Syy154373 			continue;
499*6008Syy154373 		}
500*6008Syy154373 
501*6008Syy154373 		oldib = ib;
502*6008Syy154373 
503*6008Syy154373 		if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
504*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
505*6008Syy154373 		}
506*6008Syy154373 
507*6008Syy154373 		isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
508*6008Syy154373 		    B_FALSE : B_TRUE;
509*6008Syy154373 
510*6008Syy154373 		if (ibtail - ib < (isplane1 ? 2 : 4)) {
511*6008Syy154373 			if (flag & KICONV_REPLACE_INVALID) {
512*6008Syy154373 				ib = ibtail;
513*6008Syy154373 				goto REPLACE_INVALID;
514*6008Syy154373 			}
515*6008Syy154373 
516*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
517*6008Syy154373 		}
518*6008Syy154373 
519*6008Syy154373 		plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
520*6008Syy154373 
521*6008Syy154373 		if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
522*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(isplane1 ? 2 : 4, EILSEQ);
523*6008Syy154373 		}
524*6008Syy154373 
525*6008Syy154373 		if (! isplane1)
526*6008Syy154373 			ib += 2;
527*6008Syy154373 
528*6008Syy154373 		euctw_val = *ib++;
529*6008Syy154373 		euctw_val = (euctw_val << 8) | *ib++;
530*6008Syy154373 		sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
531*6008Syy154373 
532*6008Syy154373 		if (sz < 0) {
533*6008Syy154373 			ib = oldib;
534*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
535*6008Syy154373 		}
536*6008Syy154373 
537*6008Syy154373 		ob += sz;
538*6008Syy154373 		continue;
539*6008Syy154373 
540*6008Syy154373 REPLACE_INVALID:
541*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
542*6008Syy154373 			ib = oldib;
543*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
544*6008Syy154373 		}
545*6008Syy154373 
546*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
547*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
548*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
549*6008Syy154373 		ret_val++;
550*6008Syy154373 	}
551*6008Syy154373 
552*6008Syy154373 	*inlen = ibtail - ib;
553*6008Syy154373 	*outlen = obtail - ob;
554*6008Syy154373 
555*6008Syy154373 	return (ret_val);
556*6008Syy154373 }
557*6008Syy154373 
558*6008Syy154373 /*
559*6008Syy154373  * Encoding convertor from UTF-8 to BIG5.
560*6008Syy154373  */
561*6008Syy154373 static size_t
kiconv_to_big5(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)562*6008Syy154373 kiconv_to_big5(void *kcd, char **inbuf, size_t *inbytesleft,
563*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
564*6008Syy154373 {
565*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
566*6008Syy154373 	    outbytesleft, errno, utf8_to_big5);
567*6008Syy154373 }
568*6008Syy154373 
569*6008Syy154373 /*
570*6008Syy154373  * String based encoding convertor from UTF-8 to BIG5.
571*6008Syy154373  */
572*6008Syy154373 static size_t
kiconvstr_to_big5(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)573*6008Syy154373 kiconvstr_to_big5(char *inarray, size_t *inlen, char *outarray,
574*6008Syy154373 	size_t *outlen, int flag, int *errno)
575*6008Syy154373 {
576*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
577*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5);
578*6008Syy154373 }
579*6008Syy154373 
580*6008Syy154373 /*
581*6008Syy154373  * Encoding convertor from UTF-8 to EUC-TW.
582*6008Syy154373  */
583*6008Syy154373 static size_t
kiconv_to_euctw(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)584*6008Syy154373 kiconv_to_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
585*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
586*6008Syy154373 {
587*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
588*6008Syy154373 	    outbytesleft, errno, utf8_to_euctw);
589*6008Syy154373 }
590*6008Syy154373 
591*6008Syy154373 /*
592*6008Syy154373  * String based encoding convertor from UTF-8 to EUC-TW.
593*6008Syy154373  */
594*6008Syy154373 static size_t
kiconvstr_to_euctw(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)595*6008Syy154373 kiconvstr_to_euctw(char *inarray, size_t *inlen, char *outarray,
596*6008Syy154373 	size_t *outlen, int flag, int *errno)
597*6008Syy154373 {
598*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
599*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_euctw);
600*6008Syy154373 }
601*6008Syy154373 
602*6008Syy154373 /*
603*6008Syy154373  * Encoding convertor from UTF-8 to CP950HKSCS.
604*6008Syy154373  */
605*6008Syy154373 static size_t
kiconv_to_cp950hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)606*6008Syy154373 kiconv_to_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
607*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
608*6008Syy154373 {
609*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
610*6008Syy154373 	    outbytesleft, errno, utf8_to_cp950hkscs);
611*6008Syy154373 }
612*6008Syy154373 
613*6008Syy154373 /*
614*6008Syy154373  * String based encoding convertor from UTF-8 to CP950HKSCS.
615*6008Syy154373  */
616*6008Syy154373 static size_t
kiconvstr_to_cp950hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)617*6008Syy154373 kiconvstr_to_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
618*6008Syy154373 	size_t *outlen, int flag, int *errno)
619*6008Syy154373 {
620*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
621*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_cp950hkscs);
622*6008Syy154373 }
623*6008Syy154373 
624*6008Syy154373 /*
625*6008Syy154373  * Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
626*6008Syy154373  */
627*6008Syy154373 static size_t
kiconv_to_big5hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)628*6008Syy154373 kiconv_to_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
629*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
630*6008Syy154373 {
631*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
632*6008Syy154373 	    outbytesleft, errno, utf8_to_big5hkscs);
633*6008Syy154373 }
634*6008Syy154373 
635*6008Syy154373 /*
636*6008Syy154373  * String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
637*6008Syy154373  */
638*6008Syy154373 static size_t
kiconvstr_to_big5hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)639*6008Syy154373 kiconvstr_to_big5hkscs(char *inarray, size_t *inlen, char *outarray,
640*6008Syy154373     size_t *outlen, int flag, int *errno)
641*6008Syy154373 {
642*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
643*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5hkscs);
644*6008Syy154373 }
645*6008Syy154373 
646*6008Syy154373 /*
647*6008Syy154373  * Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
648*6008Syy154373  * Return: > 0  - Converted successfully
649*6008Syy154373  *         = -1 - E2BIG
650*6008Syy154373  */
651*6008Syy154373 static int8_t
big5_to_utf8_common(uint32_t big5_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_array_t * table,size_t nitems)652*6008Syy154373 big5_to_utf8_common(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
653*6008Syy154373 	size_t *ret_val, kiconv_table_array_t *table, size_t nitems)
654*6008Syy154373 {
655*6008Syy154373 	size_t	index;
656*6008Syy154373 	int8_t	sz;
657*6008Syy154373 	uchar_t	*u8;
658*6008Syy154373 
659*6008Syy154373 	index = kiconv_binsearch(big5_val, table, nitems);
660*6008Syy154373 	u8 = table[index].u8;
661*6008Syy154373 	sz = u8_number_of_bytes[u8[0]];
662*6008Syy154373 
663*6008Syy154373 	if (obtail - ob < sz) {
664*6008Syy154373 		*ret_val = (size_t)-1;
665*6008Syy154373 		return (-1);
666*6008Syy154373 	}
667*6008Syy154373 
668*6008Syy154373 	if (index == 0)
669*6008Syy154373 		(*ret_val)++;	/* Non-identical conversion */
670*6008Syy154373 
671*6008Syy154373 	for (index = 0; index < sz; index++)
672*6008Syy154373 		*ob++ = u8[index];
673*6008Syy154373 
674*6008Syy154373 	return (sz);
675*6008Syy154373 }
676*6008Syy154373 
677*6008Syy154373 /*
678*6008Syy154373  * Convert single BIG5 character to UTF-8.
679*6008Syy154373  */
680*6008Syy154373 static int8_t
big5_to_utf8(uint32_t big5_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)681*6008Syy154373 big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val)
682*6008Syy154373 {
683*6008Syy154373 	return (big5_to_utf8_common(big5_val, ob, obtail, ret_val,
684*6008Syy154373 	    kiconv_big5_utf8, KICONV_BIG5_UTF8_MAX));
685*6008Syy154373 }
686*6008Syy154373 
687*6008Syy154373 /*
688*6008Syy154373  * Convert single CP950-HKSCS character to UTF-8.
689*6008Syy154373  */
690*6008Syy154373 static int8_t
cp950hkscs_to_utf8(uint32_t hkscs_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)691*6008Syy154373 cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
692*6008Syy154373 	size_t *ret_val)
693*6008Syy154373 {
694*6008Syy154373 	return (big5_to_utf8_common(hkscs_val, ob, obtail, ret_val,
695*6008Syy154373 	    kiconv_cp950hkscs_utf8, KICONV_CP950HKSCS_UTF8_MAX));
696*6008Syy154373 }
697*6008Syy154373 
698*6008Syy154373 /*
699*6008Syy154373  * Calculate unicode value for some CNS planes which fall in Unicode
700*6008Syy154373  * UDA range.
701*6008Syy154373  */
702*6008Syy154373 static uint32_t
get_unicode_from_UDA(size_t plane_no,uchar_t b1,uchar_t b2)703*6008Syy154373 get_unicode_from_UDA(size_t plane_no, uchar_t b1, uchar_t b2)
704*6008Syy154373 {
705*6008Syy154373 	/*
706*6008Syy154373 	 * CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
707*6008Syy154373 	 * to compute the Unicode value.
708*6008Syy154373 	 */
709*6008Syy154373 	if (plane_no == 16)
710*6008Syy154373 		--plane_no;
711*6008Syy154373 
712*6008Syy154373 	/* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
713*6008Syy154373 	return (8836 * plane_no + 94 * b1 + b2 + 0xD2611);
714*6008Syy154373 }
715*6008Syy154373 
716*6008Syy154373 /*
717*6008Syy154373  * Convert single EUC-TW character to UTF-8.
718*6008Syy154373  * Return: > 0  - Converted successfully
719*6008Syy154373  *         = -1 - E2BIG
720*6008Syy154373  */
721*6008Syy154373 static int8_t
euctw_to_utf8(size_t plane_no,uint32_t euctw_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)722*6008Syy154373 euctw_to_utf8(size_t plane_no, uint32_t euctw_val, uchar_t *ob,
723*6008Syy154373 	uchar_t *obtail, size_t *ret_val)
724*6008Syy154373 {
725*6008Syy154373 	uint32_t u32;
726*6008Syy154373 	size_t	index;
727*6008Syy154373 	int8_t	sz;
728*6008Syy154373 	uchar_t	udc[4];
729*6008Syy154373 	uchar_t	*u8;
730*6008Syy154373 
731*6008Syy154373 	switch (plane_no) {
732*6008Syy154373 	case 1:
733*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns1_utf8,
734*6008Syy154373 		    KICONV_CNS1_UTF8_MAX);
735*6008Syy154373 		u8 = kiconv_cns1_utf8[index].u8;
736*6008Syy154373 		break;
737*6008Syy154373 	case 2:
738*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns2_utf8,
739*6008Syy154373 		    KICONV_CNS2_UTF8_MAX);
740*6008Syy154373 		u8 = kiconv_cns2_utf8[index].u8;
741*6008Syy154373 		break;
742*6008Syy154373 	case 3:
743*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns3_utf8,
744*6008Syy154373 		    KICONV_CNS3_UTF8_MAX);
745*6008Syy154373 		u8 = kiconv_cns3_utf8[index].u8;
746*6008Syy154373 		break;
747*6008Syy154373 	case 4:
748*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns4_utf8,
749*6008Syy154373 		    KICONV_CNS4_UTF8_MAX);
750*6008Syy154373 		u8 = kiconv_cns4_utf8[index].u8;
751*6008Syy154373 		break;
752*6008Syy154373 	case 5:
753*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns5_utf8,
754*6008Syy154373 		    KICONV_CNS5_UTF8_MAX);
755*6008Syy154373 		u8 = kiconv_cns5_utf8[index].u8;
756*6008Syy154373 		break;
757*6008Syy154373 	case 6:
758*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns6_utf8,
759*6008Syy154373 		    KICONV_CNS6_UTF8_MAX);
760*6008Syy154373 		u8 = kiconv_cns6_utf8[index].u8;
761*6008Syy154373 		break;
762*6008Syy154373 	case 7:
763*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns7_utf8,
764*6008Syy154373 		    KICONV_CNS7_UTF8_MAX);
765*6008Syy154373 		u8 = kiconv_cns7_utf8[index].u8;
766*6008Syy154373 		break;
767*6008Syy154373 	case 12:
768*6008Syy154373 	case 13:
769*6008Syy154373 	case 14:
770*6008Syy154373 	case 16:
771*6008Syy154373 		u32 = get_unicode_from_UDA(plane_no,
772*6008Syy154373 		    (euctw_val & 0xFF00) >> 8, euctw_val & 0xFF);
773*6008Syy154373 		/*
774*6008Syy154373 		 * As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
775*6008Syy154373 		 * will occupy 4 bytes.
776*6008Syy154373 		 */
777*6008Syy154373 		udc[0] = 0xF3;
778*6008Syy154373 		udc[1] = (uchar_t)(0x80 | (u32 & 0x03F000) >> 12);
779*6008Syy154373 		udc[2] = (uchar_t)(0x80 | (u32 & 0x000FC0) >> 6);
780*6008Syy154373 		udc[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
781*6008Syy154373 		u8 = udc;
782*6008Syy154373 		index = 1;
783*6008Syy154373 		break;
784*6008Syy154373 	case 15:
785*6008Syy154373 		index = kiconv_binsearch(euctw_val, kiconv_cns15_utf8,
786*6008Syy154373 		    KICONV_CNS15_UTF8_MAX);
787*6008Syy154373 		u8 = kiconv_cns15_utf8[index].u8;
788*6008Syy154373 		break;
789*6008Syy154373 	default:
790*6008Syy154373 		index = 0;
791*6008Syy154373 		u8 = kiconv_cns1_utf8[index].u8;
792*6008Syy154373 	}
793*6008Syy154373 
794*6008Syy154373 	sz = u8_number_of_bytes[u8[0]];
795*6008Syy154373 	if (obtail - ob < sz) {
796*6008Syy154373 		*ret_val = (size_t)-1;
797*6008Syy154373 		return (-1);
798*6008Syy154373 	}
799*6008Syy154373 
800*6008Syy154373 	if (index == 0)
801*6008Syy154373 		(*ret_val)++;
802*6008Syy154373 
803*6008Syy154373 	for (index = 0; index < sz; index++)
804*6008Syy154373 		*ob++ = u8[index];
805*6008Syy154373 
806*6008Syy154373 	return (sz);
807*6008Syy154373 }
808*6008Syy154373 
809*6008Syy154373 /*
810*6008Syy154373  * Convert single HKSCS character to UTF-8.
811*6008Syy154373  * Return: > 0  - Converted successfully
812*6008Syy154373  *         = -1 - E2BIG
813*6008Syy154373  */
814*6008Syy154373 static int8_t
big5hkscs_to_utf8(uint32_t hkscs_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)815*6008Syy154373 big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
816*6008Syy154373 	size_t *ret_val)
817*6008Syy154373 {
818*6008Syy154373 	size_t	index;
819*6008Syy154373 	int8_t	sz;
820*6008Syy154373 	uchar_t	*u8;
821*6008Syy154373 
822*6008Syy154373 	index = kiconv_binsearch(hkscs_val, kiconv_hkscs_utf8,
823*6008Syy154373 	    KICONV_HKSCS_UTF8_MAX);
824*6008Syy154373 	u8 = kiconv_hkscs_utf8[index].u8;
825*6008Syy154373 
826*6008Syy154373 	/*
827*6008Syy154373 	 * Single HKSCS-2004 character may map to 2 Unicode
828*6008Syy154373 	 * code points.
829*6008Syy154373 	 */
830*6008Syy154373 	if (u8[0] == 0xFF) {
831*6008Syy154373 		u8 = hkscs_special_sequence[u8[1]];
832*6008Syy154373 		sz = 4;
833*6008Syy154373 	} else {
834*6008Syy154373 		sz = u8_number_of_bytes[u8[0]];
835*6008Syy154373 	}
836*6008Syy154373 
837*6008Syy154373 	if (obtail - ob < sz) {
838*6008Syy154373 		*ret_val = (size_t)-1;
839*6008Syy154373 		return (-1);
840*6008Syy154373 	}
841*6008Syy154373 
842*6008Syy154373 	if (index == 0)
843*6008Syy154373 		(*ret_val)++;	/* Non-identical conversion. */
844*6008Syy154373 
845*6008Syy154373 	for (index = 0; index < sz; index++)
846*6008Syy154373 		*ob++ = u8[index];
847*6008Syy154373 
848*6008Syy154373 	return (sz);
849*6008Syy154373 }
850*6008Syy154373 
851*6008Syy154373 /*
852*6008Syy154373  * Convert single UTF-8 character to EUC-TW.
853*6008Syy154373  * Return: > 0  - Converted successfully
854*6008Syy154373  *         = -1 - E2BIG
855*6008Syy154373  */
856*6008Syy154373 /* ARGSUSED */
857*6008Syy154373 static int8_t
utf8_to_euctw(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)858*6008Syy154373 utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
859*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val)
860*6008Syy154373 {
861*6008Syy154373 	size_t		index;
862*6008Syy154373 	size_t		plane_no;
863*6008Syy154373 	uchar_t		byte1;
864*6008Syy154373 	uchar_t		byte2;
865*6008Syy154373 
866*6008Syy154373 	if (utf8 >= KICONV_TC_UDA_UTF8_START &&
867*6008Syy154373 	    utf8 <= KICONV_TC_UDA_UTF8_END) {
868*6008Syy154373 		/*
869*6008Syy154373 		 * Calculate EUC-TW code if utf8 is in Unicode
870*6008Syy154373 		 * Private Plane 15.
871*6008Syy154373 		 */
872*6008Syy154373 		index = (((utf8 & 0x7000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
873*6008Syy154373 		    ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
874*6008Syy154373 		    KICONV_TC_UDA_UCS4_START;
875*6008Syy154373 		plane_no = 12 + index / 8836;
876*6008Syy154373 		byte1 = 0xA1 + (index % 8836) / 94;
877*6008Syy154373 		byte2 = 0xA1 + index % 94;
878*6008Syy154373 
879*6008Syy154373 		/* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
880*6008Syy154373 		if (plane_no == 15)
881*6008Syy154373 			plane_no = 16;
882*6008Syy154373 	} else {
883*6008Syy154373 		uint32_t	euctw_val;
884*6008Syy154373 
885*6008Syy154373 		index = kiconv_binsearch(utf8, kiconv_utf8_euctw,
886*6008Syy154373 		    KICONV_UTF8_EUCTW_MAX);
887*6008Syy154373 
888*6008Syy154373 		if (index == 0) {
889*6008Syy154373 			if (ob >= obtail) {
890*6008Syy154373 				*ret_val = (size_t)-1;
891*6008Syy154373 				return (-1);
892*6008Syy154373 			}
893*6008Syy154373 
894*6008Syy154373 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
895*6008Syy154373 			(*ret_val)++;
896*6008Syy154373 
897*6008Syy154373 			return (1);
898*6008Syy154373 		}
899*6008Syy154373 
900*6008Syy154373 		euctw_val = kiconv_utf8_euctw[index].value;
901*6008Syy154373 		byte1 = (euctw_val & 0xFF00) >> 8;
902*6008Syy154373 		byte2 = euctw_val & 0xFF;
903*6008Syy154373 		plane_no = euctw_val >> 16;
904*6008Syy154373 	}
905*6008Syy154373 
906*6008Syy154373 	if (obtail - ob < (plane_no == 1 ? 2 : 4)) {
907*6008Syy154373 		*ret_val = (size_t)-1;
908*6008Syy154373 		return (-1);
909*6008Syy154373 	}
910*6008Syy154373 
911*6008Syy154373 	if (plane_no != 1) {
912*6008Syy154373 		*ob++ = KICONV_TC_EUCTW_MBYTE;
913*6008Syy154373 		*ob++ = KICONV_TC_EUCTW_PMASK + plane_no;
914*6008Syy154373 	}
915*6008Syy154373 
916*6008Syy154373 	*ob++ = byte1;
917*6008Syy154373 	*ob = byte2;
918*6008Syy154373 
919*6008Syy154373 	return (plane_no == 1 ? 2 : 4);
920*6008Syy154373 }
921*6008Syy154373 
922*6008Syy154373 /*
923*6008Syy154373  * Convert single UTF-8 character to BIG5-HKSCS
924*6008Syy154373  * Return: > 0  - Converted successfully
925*6008Syy154373  *         = -1 - E2BIG
926*6008Syy154373  */
927*6008Syy154373 static int8_t
utf8_to_big5hkscs(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)928*6008Syy154373 utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
929*6008Syy154373     uchar_t *ob, uchar_t *obtail, size_t *ret_val)
930*6008Syy154373 {
931*6008Syy154373 	size_t		index;
932*6008Syy154373 	int8_t		hkscslen;
933*6008Syy154373 	uint32_t	hkscscode;
934*6008Syy154373 	boolean_t	special_sequence = B_FALSE;
935*6008Syy154373 
936*6008Syy154373 	index = kiconv_binsearch(utf8, kiconv_utf8_hkscs,
937*6008Syy154373 	    KICONV_UTF8_HKSCS_MAX);
938*6008Syy154373 	hkscscode = kiconv_utf8_hkscs[index].value;
939*6008Syy154373 
940*6008Syy154373 	/*
941*6008Syy154373 	 * There are 4 special code points in HKSCS-2004 which mapped
942*6008Syy154373 	 * to 2 UNICODE code points.
943*6008Syy154373 	 */
944*6008Syy154373 	if ((int32_t)hkscscode < 0) {
945*6008Syy154373 		size_t special_index = (-(int32_t)hkscscode - 1) * 3;
946*6008Syy154373 
947*6008Syy154373 		/* Check the following 2 bytes. */
948*6008Syy154373 		if (ibtail - *inbuf >= 2 && **inbuf == 0xcc &&
949*6008Syy154373 		    (*(*inbuf + 1) == 0x84 || *(*inbuf + 1) == 0x8c)) {
950*6008Syy154373 			special_index += (*(*inbuf + 1) == 0x84 ? 1 : 2);
951*6008Syy154373 			special_sequence = B_TRUE;
952*6008Syy154373 		}
953*6008Syy154373 
954*6008Syy154373 		hkscscode = ucs_special_sequence[special_index];
955*6008Syy154373 	}
956*6008Syy154373 
957*6008Syy154373 	hkscslen = (hkscscode <= 0xFF) ? 1 : 2;
958*6008Syy154373 	if (obtail - ob < hkscslen) {
959*6008Syy154373 		*ret_val = (size_t)-1;
960*6008Syy154373 		return (-1);
961*6008Syy154373 	}
962*6008Syy154373 
963*6008Syy154373 	if (index == 0)
964*6008Syy154373 		(*ret_val)++;
965*6008Syy154373 
966*6008Syy154373 	if (hkscslen > 1)
967*6008Syy154373 		*ob++ = (uchar_t)(hkscscode >> 8);
968*6008Syy154373 	*ob = (uchar_t)(hkscscode & 0xFF);
969*6008Syy154373 
970*6008Syy154373 	if (special_sequence) {		/* Advance for special sequence */
971*6008Syy154373 		(*inbuf) += 2;
972*6008Syy154373 	}
973*6008Syy154373 
974*6008Syy154373 	return (hkscslen);
975*6008Syy154373 }
976*6008Syy154373 
977*6008Syy154373 /*
978*6008Syy154373  * Common convertor for UTF-8 to BIG5/CP950-HKSCS.
979*6008Syy154373  * Return: > 0  - Converted successfully
980*6008Syy154373  *         = -1 - E2BIG
981*6008Syy154373  */
982*6008Syy154373 static int8_t
utf8_to_big5_common(uint32_t utf8,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_t * table,size_t nitems)983*6008Syy154373 utf8_to_big5_common(uint32_t utf8, uchar_t *ob, uchar_t *obtail,
984*6008Syy154373 	size_t *ret_val, kiconv_table_t *table, size_t nitems)
985*6008Syy154373 {
986*6008Syy154373 	size_t		index;
987*6008Syy154373 	int8_t		big5len;
988*6008Syy154373 	uint32_t	big5code;
989*6008Syy154373 
990*6008Syy154373 	index = kiconv_binsearch(utf8, table, nitems);
991*6008Syy154373 	big5code = table[index].value;
992*6008Syy154373 	big5len = (big5code <= 0xFF) ? 1 : 2;
993*6008Syy154373 
994*6008Syy154373 	if (obtail - ob < big5len) {
995*6008Syy154373 		*ret_val = (size_t)-1;
996*6008Syy154373 		return (-1);
997*6008Syy154373 	}
998*6008Syy154373 
999*6008Syy154373 	if (index == 0)
1000*6008Syy154373 		(*ret_val)++;
1001*6008Syy154373 
1002*6008Syy154373 	if (big5len > 1)
1003*6008Syy154373 		*ob++ = (uchar_t)(big5code >> 8);
1004*6008Syy154373 	*ob = (uchar_t)(big5code & 0xFF);
1005*6008Syy154373 
1006*6008Syy154373 	return (big5len);
1007*6008Syy154373 }
1008*6008Syy154373 
1009*6008Syy154373 /*
1010*6008Syy154373  * Convert single UTF-8 character to BIG5.
1011*6008Syy154373  */
1012*6008Syy154373 /* ARGSUSED */
1013*6008Syy154373 static int8_t
utf8_to_big5(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)1014*6008Syy154373 utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1015*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1016*6008Syy154373 {
1017*6008Syy154373 	return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1018*6008Syy154373 	    kiconv_utf8_big5, KICONV_UTF8_BIG5_MAX));
1019*6008Syy154373 }
1020*6008Syy154373 
1021*6008Syy154373 /*
1022*6008Syy154373  * Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
1023*6008Syy154373  */
1024*6008Syy154373 /* ARGSUSED */
1025*6008Syy154373 static int8_t
utf8_to_cp950hkscs(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)1026*6008Syy154373 utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1027*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1028*6008Syy154373 {
1029*6008Syy154373 	return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1030*6008Syy154373 	    kiconv_utf8_cp950hkscs, KICONV_UTF8_CP950HKSCS));
1031*6008Syy154373 }
1032*6008Syy154373 
1033*6008Syy154373 static kiconv_ops_t kiconv_tc_ops_tbl[] = {
1034*6008Syy154373 	{
1035*6008Syy154373 		"big5", "utf-8", kiconv_open_to_cck, kiconv_to_big5,
1036*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_big5
1037*6008Syy154373 	},
1038*6008Syy154373 	{
1039*6008Syy154373 		"utf-8", "big5", open_fr_big5, kiconv_fr_big5,
1040*6008Syy154373 		close_fr_tc, kiconvstr_fr_big5
1041*6008Syy154373 	},
1042*6008Syy154373 
1043*6008Syy154373 	{
1044*6008Syy154373 		"big5-hkscs", "utf-8", kiconv_open_to_cck, kiconv_to_big5hkscs,
1045*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_big5hkscs
1046*6008Syy154373 	},
1047*6008Syy154373 	{
1048*6008Syy154373 		"utf-8", "big5-hkscs", open_fr_big5hkscs, kiconv_fr_big5hkscs,
1049*6008Syy154373 		close_fr_tc, kiconvstr_fr_big5hkscs
1050*6008Syy154373 	},
1051*6008Syy154373 
1052*6008Syy154373 	{
1053*6008Syy154373 		"euc-tw", "utf-8", kiconv_open_to_cck, kiconv_to_euctw,
1054*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_euctw
1055*6008Syy154373 	},
1056*6008Syy154373 	{
1057*6008Syy154373 		"utf-8", "euc-tw", open_fr_euctw, kiconv_fr_euctw,
1058*6008Syy154373 		close_fr_tc, kiconvstr_fr_euctw
1059*6008Syy154373 	},
1060*6008Syy154373 
1061*6008Syy154373 	{
1062*6008Syy154373 		"cp950-hkscs", "utf-8", kiconv_open_to_cck,
1063*6008Syy154373 		kiconv_to_cp950hkscs, kiconv_close_to_cck,
1064*6008Syy154373 		kiconvstr_to_cp950hkscs
1065*6008Syy154373 	},
1066*6008Syy154373 	{
1067*6008Syy154373 		"utf-8", "cp950-hkscs", open_fr_cp950hkscs,
1068*6008Syy154373 		kiconv_fr_cp950hkscs, close_fr_tc, kiconvstr_fr_cp950hkscs
1069*6008Syy154373 	},
1070*6008Syy154373 };
1071*6008Syy154373 
1072*6008Syy154373 static kiconv_module_info_t kiconv_tc_info = {
1073*6008Syy154373 	"kiconv_tc",		/* module name */
1074*6008Syy154373 	sizeof (kiconv_tc_ops_tbl) / sizeof (kiconv_tc_ops_tbl[0]),
1075*6008Syy154373 	kiconv_tc_ops_tbl,
1076*6008Syy154373 	0,
1077*6008Syy154373 	NULL,
1078*6008Syy154373 	NULL,
1079*6008Syy154373 	0
1080*6008Syy154373 };
1081*6008Syy154373 
1082*6008Syy154373 static struct modlkiconv modlkiconv_tc = {
1083*6008Syy154373 	&mod_kiconvops,
1084*6008Syy154373 	"kiconv Traditional Chinese module 1.0",
1085*6008Syy154373 	&kiconv_tc_info
1086*6008Syy154373 };
1087*6008Syy154373 
1088*6008Syy154373 static struct modlinkage modlinkage = {
1089*6008Syy154373 	MODREV_1,
1090*6008Syy154373 	(void *)&modlkiconv_tc,
1091*6008Syy154373 	NULL
1092*6008Syy154373 };
1093*6008Syy154373 
1094*6008Syy154373 int
_init(void)1095*6008Syy154373 _init(void)
1096*6008Syy154373 {
1097*6008Syy154373 	int err;
1098*6008Syy154373 
1099*6008Syy154373 	err = mod_install(&modlinkage);
1100*6008Syy154373 	if (err)
1101*6008Syy154373 		cmn_err(CE_WARN, "kiconv_tc: failed to load kernel module");
1102*6008Syy154373 
1103*6008Syy154373 	return (err);
1104*6008Syy154373 }
1105*6008Syy154373 
1106*6008Syy154373 int
_fini(void)1107*6008Syy154373 _fini(void)
1108*6008Syy154373 {
1109*6008Syy154373 	int err;
1110*6008Syy154373 
1111*6008Syy154373 	/*
1112*6008Syy154373 	 * If this module is being used, then, we cannot remove the module.
1113*6008Syy154373 	 * The following checking will catch pretty much all usual cases.
1114*6008Syy154373 	 *
1115*6008Syy154373 	 * Any remaining will be catached by the kiconv_unregister_module()
1116*6008Syy154373 	 * during mod_remove() at below.
1117*6008Syy154373 	 */
1118*6008Syy154373 	if (kiconv_module_ref_count(KICONV_MODULE_ID_TC))
1119*6008Syy154373 		return (EBUSY);
1120*6008Syy154373 
1121*6008Syy154373 	err = mod_remove(&modlinkage);
1122*6008Syy154373 	if (err)
1123*6008Syy154373 		cmn_err(CE_WARN, "kiconv_tc: failed to remove kernel module");
1124*6008Syy154373 
1125*6008Syy154373 	return (err);
1126*6008Syy154373 }
1127*6008Syy154373 
1128*6008Syy154373 int
_info(struct modinfo * modinfop)1129*6008Syy154373 _info(struct modinfo *modinfop)
1130*6008Syy154373 {
1131*6008Syy154373 	return (mod_info(&modlinkage, modinfop));
1132*6008Syy154373 }
1133