xref: /onnv-gate/usr/src/uts/common/kiconv/kiconv_sc/kiconv_sc.c (revision 6008:3a1c10482cf2)
1*6008Syy154373 /*
2*6008Syy154373  * CDDL HEADER START
3*6008Syy154373  *
4*6008Syy154373  * The contents of this file are subject to the terms of the
5*6008Syy154373  * Common Development and Distribution License (the "License").
6*6008Syy154373  * You may not use this file except in compliance with the License.
7*6008Syy154373  *
8*6008Syy154373  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373  * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373  * See the License for the specific language governing permissions
11*6008Syy154373  * and limitations under the License.
12*6008Syy154373  *
13*6008Syy154373  * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373  * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373  * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373  * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373  *
19*6008Syy154373  * CDDL HEADER END
20*6008Syy154373  */
21*6008Syy154373 /*
22*6008Syy154373  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23*6008Syy154373  * Use is subject to license terms.
24*6008Syy154373  */
25*6008Syy154373 
26*6008Syy154373 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*6008Syy154373 
28*6008Syy154373 #include <sys/types.h>
29*6008Syy154373 #include <sys/param.h>
30*6008Syy154373 #include <sys/sysmacros.h>
31*6008Syy154373 #include <sys/systm.h>
32*6008Syy154373 #include <sys/debug.h>
33*6008Syy154373 #include <sys/kmem.h>
34*6008Syy154373 #include <sys/sunddi.h>
35*6008Syy154373 #include <sys/byteorder.h>
36*6008Syy154373 #include <sys/errno.h>
37*6008Syy154373 #include <sys/modctl.h>
38*6008Syy154373 #include <sys/kiconv.h>
39*6008Syy154373 #include <sys/u8_textprep.h>
40*6008Syy154373 #include <sys/kiconv_cck_common.h>
41*6008Syy154373 #include <sys/kiconv_sc.h>
42*6008Syy154373 #include <sys/kiconv_gb18030_utf8.h>
43*6008Syy154373 #include <sys/kiconv_gb2312_utf8.h>
44*6008Syy154373 #include <sys/kiconv_utf8_gb18030.h>
45*6008Syy154373 #include <sys/kiconv_utf8_gb2312.h>
46*6008Syy154373 
47*6008Syy154373 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
48*6008Syy154373 	uchar_t *obtail, size_t *ret_val);
49*6008Syy154373 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
50*6008Syy154373 	size_t *ret_val, boolean_t isgbk4);
51*6008Syy154373 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
52*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret);
53*6008Syy154373 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
54*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret);
55*6008Syy154373 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
56*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret);
57*6008Syy154373 
58*6008Syy154373 #define	KICONV_SC_GB18030		(0x01)
59*6008Syy154373 #define	KICONV_SC_GBK			(0x02)
60*6008Syy154373 #define	KICONV_SC_EUCCN			(0x03)
61*6008Syy154373 #define	KICONV_SC_MAX_MAGIC_ID		(0x03)
62*6008Syy154373 
63*6008Syy154373 static void *
open_fr_gb18030()64*6008Syy154373 open_fr_gb18030()
65*6008Syy154373 {
66*6008Syy154373 	return ((void *)KICONV_SC_GB18030);
67*6008Syy154373 }
68*6008Syy154373 
69*6008Syy154373 static void *
open_fr_gbk()70*6008Syy154373 open_fr_gbk()
71*6008Syy154373 {
72*6008Syy154373 	return ((void *)KICONV_SC_GBK);
73*6008Syy154373 }
74*6008Syy154373 
75*6008Syy154373 static void *
open_fr_euccn()76*6008Syy154373 open_fr_euccn()
77*6008Syy154373 {
78*6008Syy154373 	return ((void *)KICONV_SC_EUCCN);
79*6008Syy154373 }
80*6008Syy154373 
81*6008Syy154373 static int
close_fr_sc(void * s)82*6008Syy154373 close_fr_sc(void *s)
83*6008Syy154373 {
84*6008Syy154373 	if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
85*6008Syy154373 		return (EBADF);
86*6008Syy154373 
87*6008Syy154373 	return (0);
88*6008Syy154373 }
89*6008Syy154373 
90*6008Syy154373 /*
91*6008Syy154373  * Encoding convertor from UTF-8 to GB18030.
92*6008Syy154373  */
93*6008Syy154373 size_t
kiconv_to_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)94*6008Syy154373 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
95*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
96*6008Syy154373 {
97*6008Syy154373 
98*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
99*6008Syy154373 	    outbytesleft, errno, utf8_to_gb18030);
100*6008Syy154373 }
101*6008Syy154373 
102*6008Syy154373 /*
103*6008Syy154373  * String based encoding convertor from UTF-8 to GB18030.
104*6008Syy154373  */
105*6008Syy154373 size_t
kiconvstr_to_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)106*6008Syy154373 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
107*6008Syy154373 	size_t *outlen, int flag, int *errno)
108*6008Syy154373 {
109*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
110*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
111*6008Syy154373 }
112*6008Syy154373 
113*6008Syy154373 /*
114*6008Syy154373  * Encoding convertor from GB18030 to UTF-8.
115*6008Syy154373  */
116*6008Syy154373 size_t
kiconv_fr_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)117*6008Syy154373 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
118*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
119*6008Syy154373 {
120*6008Syy154373 	uchar_t		*ib;
121*6008Syy154373 	uchar_t		*ob;
122*6008Syy154373 	uchar_t		*ibtail;
123*6008Syy154373 	uchar_t		*obtail;
124*6008Syy154373 	size_t		ret_val;
125*6008Syy154373 	int8_t		sz;
126*6008Syy154373 	uint32_t	gb_val;
127*6008Syy154373 	boolean_t	isgbk4;
128*6008Syy154373 
129*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
130*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
131*6008Syy154373 		*errno = EBADF;
132*6008Syy154373 		return ((size_t)-1);
133*6008Syy154373 	}
134*6008Syy154373 
135*6008Syy154373 	/* If this is a state reset request, process and return. */
136*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
137*6008Syy154373 		return (0);
138*6008Syy154373 	}
139*6008Syy154373 
140*6008Syy154373 	ret_val = 0;
141*6008Syy154373 	ib = (uchar_t *)*inbuf;
142*6008Syy154373 	ob = (uchar_t *)*outbuf;
143*6008Syy154373 	ibtail = ib + *inbytesleft;
144*6008Syy154373 	obtail = ob + *outbytesleft;
145*6008Syy154373 
146*6008Syy154373 	while (ib < ibtail) {
147*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
148*6008Syy154373 			if (ob >= obtail) {
149*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
150*6008Syy154373 			}
151*6008Syy154373 
152*6008Syy154373 			*ob++ = *ib++;
153*6008Syy154373 			continue;
154*6008Syy154373 		}
155*6008Syy154373 
156*6008Syy154373 		/*
157*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
158*6008Syy154373 		 * valid GB18030 leading byte.
159*6008Syy154373 		 */
160*6008Syy154373 		if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
161*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
162*6008Syy154373 		}
163*6008Syy154373 
164*6008Syy154373 		isgbk4 = (ibtail - ib < 2) ? B_FALSE :
165*6008Syy154373 		    KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
166*6008Syy154373 
167*6008Syy154373 		if (isgbk4) {
168*6008Syy154373 			if (ibtail - ib < 4) {
169*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(EINVAL);
170*6008Syy154373 			}
171*6008Syy154373 
172*6008Syy154373 			if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
173*6008Syy154373 			    KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
174*6008Syy154373 			    KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
175*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
176*6008Syy154373 			}
177*6008Syy154373 
178*6008Syy154373 			gb_val = (uint32_t)(*ib) << 24 |
179*6008Syy154373 			    (uint32_t)(*(ib + 1)) << 16 |
180*6008Syy154373 			    (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
181*6008Syy154373 		} else {
182*6008Syy154373 			if (ibtail - ib < 2) {
183*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(EINVAL);
184*6008Syy154373 			}
185*6008Syy154373 
186*6008Syy154373 			if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
187*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
188*6008Syy154373 			}
189*6008Syy154373 
190*6008Syy154373 			gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
191*6008Syy154373 		}
192*6008Syy154373 
193*6008Syy154373 		sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
194*6008Syy154373 		if (sz < 0) {
195*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
196*6008Syy154373 		}
197*6008Syy154373 
198*6008Syy154373 		ib += isgbk4 ? 4 : 2;
199*6008Syy154373 		ob += sz;
200*6008Syy154373 	}
201*6008Syy154373 
202*6008Syy154373 	*inbuf = (char *)ib;
203*6008Syy154373 	*inbytesleft = ibtail - ib;
204*6008Syy154373 	*outbuf = (char *)ob;
205*6008Syy154373 	*outbytesleft = obtail - ob;
206*6008Syy154373 
207*6008Syy154373 	return (ret_val);
208*6008Syy154373 }
209*6008Syy154373 
210*6008Syy154373 /*
211*6008Syy154373  * String based encoding convertor from GB18030 to UTF-8.
212*6008Syy154373  */
213*6008Syy154373 size_t
kiconvstr_fr_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)214*6008Syy154373 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
215*6008Syy154373 	size_t *outlen, int flag, int *errno)
216*6008Syy154373 {
217*6008Syy154373 	uchar_t		*ib;
218*6008Syy154373 	uchar_t		*ob;
219*6008Syy154373 	uchar_t		*ibtail;
220*6008Syy154373 	uchar_t		*obtail;
221*6008Syy154373 	uchar_t		*oldib;
222*6008Syy154373 	size_t		ret_val;
223*6008Syy154373 	int8_t		sz;
224*6008Syy154373 	uint32_t	gb_val;
225*6008Syy154373 	boolean_t	isgbk4;
226*6008Syy154373 	boolean_t	do_not_ignore_null;
227*6008Syy154373 
228*6008Syy154373 	ret_val = 0;
229*6008Syy154373 	ib = (uchar_t *)inarray;
230*6008Syy154373 	ob = (uchar_t *)outarray;
231*6008Syy154373 	ibtail = ib + *inlen;
232*6008Syy154373 	obtail = ob + *outlen;
233*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
234*6008Syy154373 
235*6008Syy154373 	while (ib < ibtail) {
236*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
237*6008Syy154373 			break;
238*6008Syy154373 
239*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
240*6008Syy154373 			if (ob >= obtail) {
241*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
242*6008Syy154373 			}
243*6008Syy154373 
244*6008Syy154373 			*ob++ = *ib++;
245*6008Syy154373 			continue;
246*6008Syy154373 		}
247*6008Syy154373 
248*6008Syy154373 		oldib = ib;
249*6008Syy154373 
250*6008Syy154373 		if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
251*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
252*6008Syy154373 		}
253*6008Syy154373 
254*6008Syy154373 		isgbk4 = (ibtail - ib < 2) ? B_FALSE :
255*6008Syy154373 		    KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
256*6008Syy154373 
257*6008Syy154373 		if (isgbk4) {
258*6008Syy154373 			if (ibtail - ib < 4) {
259*6008Syy154373 				if (flag & KICONV_REPLACE_INVALID) {
260*6008Syy154373 					ib = ibtail;
261*6008Syy154373 					goto REPLACE_INVALID;
262*6008Syy154373 				}
263*6008Syy154373 
264*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(EINVAL);
265*6008Syy154373 			}
266*6008Syy154373 
267*6008Syy154373 			if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
268*6008Syy154373 			    KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
269*6008Syy154373 			    KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
270*6008Syy154373 				KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
271*6008Syy154373 			}
272*6008Syy154373 
273*6008Syy154373 			gb_val = (uint32_t)(*ib) << 24 |
274*6008Syy154373 			    (uint32_t)(*(ib + 1)) << 16 |
275*6008Syy154373 			    (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
276*6008Syy154373 		} else {
277*6008Syy154373 			if (ibtail - ib < 2) {
278*6008Syy154373 				if (flag & KICONV_REPLACE_INVALID) {
279*6008Syy154373 					ib = ibtail;
280*6008Syy154373 					goto REPLACE_INVALID;
281*6008Syy154373 				}
282*6008Syy154373 
283*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(EINVAL);
284*6008Syy154373 			}
285*6008Syy154373 
286*6008Syy154373 			if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
287*6008Syy154373 				KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
288*6008Syy154373 			}
289*6008Syy154373 
290*6008Syy154373 			gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
291*6008Syy154373 		}
292*6008Syy154373 
293*6008Syy154373 		sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
294*6008Syy154373 		if (sz < 0) {
295*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
296*6008Syy154373 		}
297*6008Syy154373 
298*6008Syy154373 		ib += isgbk4 ? 4 : 2;
299*6008Syy154373 		ob += sz;
300*6008Syy154373 		continue;
301*6008Syy154373 
302*6008Syy154373 REPLACE_INVALID:
303*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
304*6008Syy154373 			ib = oldib;
305*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
306*6008Syy154373 		}
307*6008Syy154373 
308*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
309*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
310*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
311*6008Syy154373 		ret_val++;
312*6008Syy154373 	}
313*6008Syy154373 
314*6008Syy154373 	*inlen = ibtail - ib;
315*6008Syy154373 	*outlen = obtail - ob;
316*6008Syy154373 
317*6008Syy154373 	return (ret_val);
318*6008Syy154373 }
319*6008Syy154373 
320*6008Syy154373 /*
321*6008Syy154373  * Encoding convertor from UTF-8 to GBK.
322*6008Syy154373  */
323*6008Syy154373 size_t
kiconv_to_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)324*6008Syy154373 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
325*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
326*6008Syy154373 {
327*6008Syy154373 
328*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
329*6008Syy154373 	    outbytesleft, errno, utf8_to_gbk);
330*6008Syy154373 }
331*6008Syy154373 
332*6008Syy154373 /*
333*6008Syy154373  * String based encoding convertor from UTF-8 to GBK.
334*6008Syy154373  */
335*6008Syy154373 size_t
kiconvstr_to_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)336*6008Syy154373 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
337*6008Syy154373 	size_t *outlen, int flag, int *errno)
338*6008Syy154373 {
339*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
340*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
341*6008Syy154373 }
342*6008Syy154373 
343*6008Syy154373 /*
344*6008Syy154373  * Encoding convertor from GBK to UTF-8.
345*6008Syy154373  */
346*6008Syy154373 size_t
kiconv_fr_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)347*6008Syy154373 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
348*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
349*6008Syy154373 {
350*6008Syy154373 	uchar_t		*ib;
351*6008Syy154373 	uchar_t		*ob;
352*6008Syy154373 	uchar_t		*ibtail;
353*6008Syy154373 	uchar_t		*obtail;
354*6008Syy154373 	size_t		ret_val;
355*6008Syy154373 	int8_t		sz;
356*6008Syy154373 	uint32_t	gb_val;
357*6008Syy154373 
358*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
359*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
360*6008Syy154373 		*errno = EBADF;
361*6008Syy154373 		return ((size_t)-1);
362*6008Syy154373 	}
363*6008Syy154373 
364*6008Syy154373 	/* If this is a state reset request, process and return. */
365*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
366*6008Syy154373 		return (0);
367*6008Syy154373 	}
368*6008Syy154373 
369*6008Syy154373 	ret_val = 0;
370*6008Syy154373 	ib = (uchar_t *)*inbuf;
371*6008Syy154373 	ob = (uchar_t *)*outbuf;
372*6008Syy154373 	ibtail = ib + *inbytesleft;
373*6008Syy154373 	obtail = ob + *outbytesleft;
374*6008Syy154373 
375*6008Syy154373 	while (ib < ibtail) {
376*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
377*6008Syy154373 			if (ob >= obtail) {
378*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
379*6008Syy154373 			}
380*6008Syy154373 
381*6008Syy154373 			*ob++ = *ib++;
382*6008Syy154373 			continue;
383*6008Syy154373 		}
384*6008Syy154373 
385*6008Syy154373 		/*
386*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
387*6008Syy154373 		 * valid GBK leading byte.
388*6008Syy154373 		 */
389*6008Syy154373 		if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
390*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
391*6008Syy154373 		}
392*6008Syy154373 
393*6008Syy154373 		/*
394*6008Syy154373 		 * Issue EINVAL error if input buffer has an incomplete
395*6008Syy154373 		 * character at the end of the buffer.
396*6008Syy154373 		 */
397*6008Syy154373 		if (ibtail - ib < 2) {
398*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
399*6008Syy154373 		}
400*6008Syy154373 
401*6008Syy154373 		/*
402*6008Syy154373 		 * Issue EILSEQ error if the remaining byte is not
403*6008Syy154373 		 * a valid GBK byte.
404*6008Syy154373 		 */
405*6008Syy154373 		if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
406*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
407*6008Syy154373 		}
408*6008Syy154373 
409*6008Syy154373 		/* Now we have a valid GBK character. */
410*6008Syy154373 		gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
411*6008Syy154373 		sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
412*6008Syy154373 
413*6008Syy154373 		if (sz < 0) {
414*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
415*6008Syy154373 		}
416*6008Syy154373 
417*6008Syy154373 		ib += 2;
418*6008Syy154373 		ob += sz;
419*6008Syy154373 	}
420*6008Syy154373 
421*6008Syy154373 	*inbuf = (char *)ib;
422*6008Syy154373 	*inbytesleft = ibtail - ib;
423*6008Syy154373 	*outbuf = (char *)ob;
424*6008Syy154373 	*outbytesleft = obtail - ob;
425*6008Syy154373 
426*6008Syy154373 	return (ret_val);
427*6008Syy154373 }
428*6008Syy154373 
429*6008Syy154373 /*
430*6008Syy154373  * String based encoding convertor from GBK to UTF-8.
431*6008Syy154373  */
432*6008Syy154373 size_t
kiconvstr_fr_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)433*6008Syy154373 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
434*6008Syy154373 	size_t *outlen, int flag, int *errno)
435*6008Syy154373 {
436*6008Syy154373 	uchar_t		*ib;
437*6008Syy154373 	uchar_t		*ob;
438*6008Syy154373 	uchar_t		*ibtail;
439*6008Syy154373 	uchar_t		*obtail;
440*6008Syy154373 	uchar_t		*oldib;
441*6008Syy154373 	size_t		ret_val;
442*6008Syy154373 	int8_t		sz;
443*6008Syy154373 	uint32_t	gb_val;
444*6008Syy154373 	boolean_t	do_not_ignore_null;
445*6008Syy154373 
446*6008Syy154373 	ret_val = 0;
447*6008Syy154373 	ib = (uchar_t *)inarray;
448*6008Syy154373 	ob = (uchar_t *)outarray;
449*6008Syy154373 	ibtail = ib + *inlen;
450*6008Syy154373 	obtail = ob + *outlen;
451*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
452*6008Syy154373 
453*6008Syy154373 	while (ib < ibtail) {
454*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
455*6008Syy154373 			break;
456*6008Syy154373 
457*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
458*6008Syy154373 			if (ob >= obtail) {
459*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
460*6008Syy154373 			}
461*6008Syy154373 
462*6008Syy154373 			*ob++ = *ib++;
463*6008Syy154373 			continue;
464*6008Syy154373 		}
465*6008Syy154373 
466*6008Syy154373 		oldib = ib;
467*6008Syy154373 
468*6008Syy154373 		if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
469*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
470*6008Syy154373 		}
471*6008Syy154373 
472*6008Syy154373 		if (ibtail - ib < 2) {
473*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
474*6008Syy154373 		}
475*6008Syy154373 
476*6008Syy154373 		if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
477*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
478*6008Syy154373 		}
479*6008Syy154373 
480*6008Syy154373 		gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
481*6008Syy154373 		sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
482*6008Syy154373 
483*6008Syy154373 		if (sz < 0) {
484*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
485*6008Syy154373 		}
486*6008Syy154373 
487*6008Syy154373 		ib += 2;
488*6008Syy154373 		ob += sz;
489*6008Syy154373 		continue;
490*6008Syy154373 
491*6008Syy154373 REPLACE_INVALID:
492*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
493*6008Syy154373 			ib = oldib;
494*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495*6008Syy154373 		}
496*6008Syy154373 
497*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
498*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
499*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
500*6008Syy154373 		ret_val++;
501*6008Syy154373 	}
502*6008Syy154373 
503*6008Syy154373 	*inlen = ibtail - ib;
504*6008Syy154373 	*outlen = obtail - ob;
505*6008Syy154373 
506*6008Syy154373 	return (ret_val);
507*6008Syy154373 }
508*6008Syy154373 
509*6008Syy154373 /*
510*6008Syy154373  * Encoding convertor from UTF-8 to EUC-CN.
511*6008Syy154373  */
512*6008Syy154373 size_t
kiconv_to_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)513*6008Syy154373 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
514*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
515*6008Syy154373 {
516*6008Syy154373 	return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
517*6008Syy154373 	    outbytesleft, errno, utf8_to_gb2312);
518*6008Syy154373 }
519*6008Syy154373 
520*6008Syy154373 /*
521*6008Syy154373  * String based encoding convertor from UTF-8 to EUC-CN.
522*6008Syy154373  */
523*6008Syy154373 size_t
kiconvstr_to_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)524*6008Syy154373 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
525*6008Syy154373 	size_t *outlen, int flag, int *errno)
526*6008Syy154373 {
527*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
528*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
529*6008Syy154373 }
530*6008Syy154373 
531*6008Syy154373 /*
532*6008Syy154373  * Encoding converto from EUC-CN to UTF-8 code.
533*6008Syy154373  */
534*6008Syy154373 size_t
kiconv_fr_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)535*6008Syy154373 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
536*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
537*6008Syy154373 {
538*6008Syy154373 	uchar_t		*ib;
539*6008Syy154373 	uchar_t		*ob;
540*6008Syy154373 	uchar_t		*ibtail;
541*6008Syy154373 	uchar_t		*obtail;
542*6008Syy154373 	size_t		ret_val;
543*6008Syy154373 	int8_t		sz;
544*6008Syy154373 
545*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
546*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
547*6008Syy154373 		*errno = EBADF;
548*6008Syy154373 		return ((size_t)-1);
549*6008Syy154373 	}
550*6008Syy154373 
551*6008Syy154373 	/* If this is a state reset request, process and return. */
552*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
553*6008Syy154373 		return (0);
554*6008Syy154373 	}
555*6008Syy154373 
556*6008Syy154373 	ret_val = 0;
557*6008Syy154373 	ib = (uchar_t *)*inbuf;
558*6008Syy154373 	ob = (uchar_t *)*outbuf;
559*6008Syy154373 	ibtail = ib + *inbytesleft;
560*6008Syy154373 	obtail = ob + *outbytesleft;
561*6008Syy154373 
562*6008Syy154373 	while (ib < ibtail) {
563*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
564*6008Syy154373 			if (ob >= obtail) {
565*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
566*6008Syy154373 			}
567*6008Syy154373 
568*6008Syy154373 			*ob++ = *ib++;
569*6008Syy154373 			continue;
570*6008Syy154373 		}
571*6008Syy154373 
572*6008Syy154373 		/*
573*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
574*6008Syy154373 		 * valid GB2312 leading byte.
575*6008Syy154373 		 */
576*6008Syy154373 		if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
577*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
578*6008Syy154373 		}
579*6008Syy154373 
580*6008Syy154373 		/*
581*6008Syy154373 		 * Issue EINVAL error if input buffer has an incomplete
582*6008Syy154373 		 * character at the end of the buffer.
583*6008Syy154373 		 */
584*6008Syy154373 		if (ibtail - ib < 2) {
585*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
586*6008Syy154373 		}
587*6008Syy154373 
588*6008Syy154373 		/*
589*6008Syy154373 		 * Issue EILSEQ error if the remaining byte is not
590*6008Syy154373 		 * a valid GB2312 byte.
591*6008Syy154373 		 */
592*6008Syy154373 		if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
593*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
594*6008Syy154373 		}
595*6008Syy154373 
596*6008Syy154373 		/* Now we have a valid GB2312 character */
597*6008Syy154373 		sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
598*6008Syy154373 		if (sz < 0) {
599*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
600*6008Syy154373 		}
601*6008Syy154373 
602*6008Syy154373 		ib += 2;
603*6008Syy154373 		ob += sz;
604*6008Syy154373 	}
605*6008Syy154373 
606*6008Syy154373 	*inbuf = (char *)ib;
607*6008Syy154373 	*inbytesleft = ibtail - ib;
608*6008Syy154373 	*outbuf = (char *)ob;
609*6008Syy154373 	*outbytesleft = obtail - ob;
610*6008Syy154373 
611*6008Syy154373 	return (ret_val);
612*6008Syy154373 }
613*6008Syy154373 
614*6008Syy154373 /*
615*6008Syy154373  * String based encoding convertor from EUC-CN to UTF-8.
616*6008Syy154373  */
617*6008Syy154373 size_t
kiconvstr_fr_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)618*6008Syy154373 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
619*6008Syy154373     size_t *outlen, int flag, int *errno)
620*6008Syy154373 {
621*6008Syy154373 	uchar_t		*ib;
622*6008Syy154373 	uchar_t		*ob;
623*6008Syy154373 	uchar_t		*ibtail;
624*6008Syy154373 	uchar_t		*obtail;
625*6008Syy154373 	uchar_t		*oldib;
626*6008Syy154373 	size_t		ret_val;
627*6008Syy154373 	int8_t		sz;
628*6008Syy154373 	boolean_t	do_not_ignore_null;
629*6008Syy154373 
630*6008Syy154373 	ret_val = 0;
631*6008Syy154373 	ib = (uchar_t *)inarray;
632*6008Syy154373 	ob = (uchar_t *)outarray;
633*6008Syy154373 	ibtail = ib + *inlen;
634*6008Syy154373 	obtail = ob + *outlen;
635*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
636*6008Syy154373 
637*6008Syy154373 	while (ib < ibtail) {
638*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
639*6008Syy154373 			break;
640*6008Syy154373 
641*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
642*6008Syy154373 			if (ob >= obtail) {
643*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
644*6008Syy154373 			}
645*6008Syy154373 
646*6008Syy154373 			*ob++ = *ib++;
647*6008Syy154373 			continue;
648*6008Syy154373 		}
649*6008Syy154373 
650*6008Syy154373 		oldib = ib;
651*6008Syy154373 
652*6008Syy154373 		if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
653*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
654*6008Syy154373 		}
655*6008Syy154373 
656*6008Syy154373 		if (ibtail - ib < 2) {
657*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
658*6008Syy154373 		}
659*6008Syy154373 
660*6008Syy154373 		if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
661*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
662*6008Syy154373 		}
663*6008Syy154373 
664*6008Syy154373 		sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
665*6008Syy154373 		if (sz < 0) {
666*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
667*6008Syy154373 		}
668*6008Syy154373 
669*6008Syy154373 		ib += 2;
670*6008Syy154373 		ob += sz;
671*6008Syy154373 		continue;
672*6008Syy154373 
673*6008Syy154373 REPLACE_INVALID:
674*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
675*6008Syy154373 			ib = oldib;
676*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
677*6008Syy154373 		}
678*6008Syy154373 
679*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
680*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
681*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
682*6008Syy154373 		ret_val++;
683*6008Syy154373 	}
684*6008Syy154373 
685*6008Syy154373 	*inlen = ibtail - ib;
686*6008Syy154373 	*outlen = obtail - ob;
687*6008Syy154373 
688*6008Syy154373 	return (ret_val);
689*6008Syy154373 }
690*6008Syy154373 
691*6008Syy154373 /*
692*6008Syy154373  * Convert single GB2312 character to UTF-8.
693*6008Syy154373  * Return: > 0  - Converted successfully
694*6008Syy154373  *         = -1 - E2BIG
695*6008Syy154373  */
696*6008Syy154373 static int8_t
gb2312_to_utf8(uchar_t b1,uchar_t b2,uchar_t * ob,uchar_t * obtail,size_t * ret_val)697*6008Syy154373 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
698*6008Syy154373 	size_t *ret_val)
699*6008Syy154373 {
700*6008Syy154373 	size_t	index;
701*6008Syy154373 	int8_t	sz;
702*6008Syy154373 	uchar_t	*u8;
703*6008Syy154373 
704*6008Syy154373 	/* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
705*6008Syy154373 	index = b1 * 94 + b2 - 0x3BBF;
706*6008Syy154373 
707*6008Syy154373 	if (index >= KICONV_GB2312_UTF8_MAX)
708*6008Syy154373 		index = KICONV_GB2312_UTF8_MAX - 1;	/* Map to 0xEFBFBD */
709*6008Syy154373 
710*6008Syy154373 	u8 = kiconv_gb2312_utf8[index];
711*6008Syy154373 	sz = u8_number_of_bytes[u8[0]];
712*6008Syy154373 
713*6008Syy154373 	if (obtail - ob < sz) {
714*6008Syy154373 		*ret_val = (size_t)-1;
715*6008Syy154373 		return (-1);
716*6008Syy154373 	}
717*6008Syy154373 
718*6008Syy154373 	for (index = 0; index < sz; index++)
719*6008Syy154373 		*ob++ = u8[index];
720*6008Syy154373 
721*6008Syy154373 	/*
722*6008Syy154373 	 * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
723*6008Syy154373 	 * elements, so need to ckeck more.
724*6008Syy154373 	 */
725*6008Syy154373 	if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
726*6008Syy154373 	    u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
727*6008Syy154373 	    u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
728*6008Syy154373 	    u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
729*6008Syy154373 		(*ret_val)++;
730*6008Syy154373 
731*6008Syy154373 	return (sz);
732*6008Syy154373 }
733*6008Syy154373 
734*6008Syy154373 /*
735*6008Syy154373  * Convert single GB18030 or GBK character to UTF-8.
736*6008Syy154373  * Return: > 0  - Converted successfully
737*6008Syy154373  *         = -1 - E2BIG
738*6008Syy154373  */
739*6008Syy154373 static int8_t
gbk_to_utf8(uint32_t gbk_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,boolean_t isgbk4)740*6008Syy154373 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
741*6008Syy154373 	boolean_t isgbk4)
742*6008Syy154373 {
743*6008Syy154373 	size_t	index;
744*6008Syy154373 	int8_t	sz;
745*6008Syy154373 	uchar_t	u8array[4];
746*6008Syy154373 	uchar_t	*u8;
747*6008Syy154373 
748*6008Syy154373 	if (isgbk4) {
749*6008Syy154373 		if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
750*6008Syy154373 			uint32_t	u32;
751*6008Syy154373 
752*6008Syy154373 			/*
753*6008Syy154373 			 * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
754*6008Syy154373 			 *   (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
755*6008Syy154373 			 *   (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
756*6008Syy154373 			 *   (gbk_val & 0xFF - 0x30)+
757*6008Syy154373 			 *   KICONV_SC_PLANE1_UCS4_START;
758*6008Syy154373 			 */
759*6008Syy154373 			u32 = (gbk_val >> 24) * 12600 +
760*6008Syy154373 			    ((gbk_val & 0xFF0000) >> 16) * 1260 +
761*6008Syy154373 			    ((gbk_val & 0xFF00) >> 8) * 10 +
762*6008Syy154373 			    (gbk_val & 0xFF) - 0x1BA0FA;
763*6008Syy154373 			u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
764*6008Syy154373 			u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
765*6008Syy154373 			u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
766*6008Syy154373 			u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
767*6008Syy154373 			u8 = u8array;
768*6008Syy154373 			index = 1;
769*6008Syy154373 		} else {
770*6008Syy154373 			index = kiconv_binsearch(gbk_val,
771*6008Syy154373 			    kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
772*6008Syy154373 			u8 = kiconv_gbk4_utf8[index].u8;
773*6008Syy154373 		}
774*6008Syy154373 	} else {
775*6008Syy154373 		index = kiconv_binsearch(gbk_val,
776*6008Syy154373 		    kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
777*6008Syy154373 		u8 = kiconv_gbk_utf8[index].u8;
778*6008Syy154373 	}
779*6008Syy154373 
780*6008Syy154373 	sz = u8_number_of_bytes[u8[0]];
781*6008Syy154373 	if (obtail - ob < sz) {
782*6008Syy154373 		*ret_val = (size_t)-1;
783*6008Syy154373 		return (-1);
784*6008Syy154373 	}
785*6008Syy154373 
786*6008Syy154373 	if (index == 0)
787*6008Syy154373 		(*ret_val)++;	/* Non-identical conversion */
788*6008Syy154373 
789*6008Syy154373 	for (index = 0; index < sz; index++)
790*6008Syy154373 		*ob++ = u8[index];
791*6008Syy154373 
792*6008Syy154373 	return (sz);
793*6008Syy154373 }
794*6008Syy154373 
795*6008Syy154373 /*
796*6008Syy154373  * Convert single UTF-8 character to GB18030.
797*6008Syy154373  * Return: > 0  - Converted successfully
798*6008Syy154373  *         = -1 - E2BIG
799*6008Syy154373  */
800*6008Syy154373 /* ARGSUSED */
801*6008Syy154373 static int8_t
utf8_to_gb18030(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)802*6008Syy154373 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
803*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret)
804*6008Syy154373 {
805*6008Syy154373 	size_t 		index;
806*6008Syy154373 	int8_t		gbklen;
807*6008Syy154373 	uint32_t	gbkcode;
808*6008Syy154373 
809*6008Syy154373 	if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
810*6008Syy154373 		/* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
811*6008Syy154373 		uint32_t	u32;
812*6008Syy154373 
813*6008Syy154373 		u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
814*6008Syy154373 		    ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
815*6008Syy154373 		    KICONV_SC_PLANE1_UCS4_START;
816*6008Syy154373 		gbkcode = ((u32 / 12600 + 0x90) << 24) |
817*6008Syy154373 		    (((u32 % 12600) / 1260 + 0x30) << 16) |
818*6008Syy154373 		    (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
819*6008Syy154373 		gbklen = 4;
820*6008Syy154373 		index = 1;
821*6008Syy154373 	} else {
822*6008Syy154373 		index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
823*6008Syy154373 		    KICONV_UTF8_GB18030_MAX);
824*6008Syy154373 		gbkcode = kiconv_utf8_gb18030[index].value;
825*6008Syy154373 		KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
826*6008Syy154373 	}
827*6008Syy154373 
828*6008Syy154373 	if (obtail - ob < gbklen) {
829*6008Syy154373 		*ret = (size_t)-1;
830*6008Syy154373 		return (-1);
831*6008Syy154373 	}
832*6008Syy154373 
833*6008Syy154373 	if (index == 0)
834*6008Syy154373 		(*ret)++;		/* Non-identical conversion */
835*6008Syy154373 
836*6008Syy154373 	if (gbklen == 2) {
837*6008Syy154373 		*ob++ = (uchar_t)(gbkcode >> 8);
838*6008Syy154373 	} else if (gbklen == 4) {
839*6008Syy154373 		*ob++ = (uchar_t)(gbkcode >> 24);
840*6008Syy154373 		*ob++ = (uchar_t)(gbkcode >> 16);
841*6008Syy154373 		*ob++ = (uchar_t)(gbkcode >> 8);
842*6008Syy154373 	}
843*6008Syy154373 	*ob = (uchar_t)(gbkcode & 0xFF);
844*6008Syy154373 
845*6008Syy154373 	return (gbklen);
846*6008Syy154373 }
847*6008Syy154373 
848*6008Syy154373 /*
849*6008Syy154373  * Convert single UTF-8 character to GBK.
850*6008Syy154373  * Return: > 0  - Converted successfully
851*6008Syy154373  *         = -1 - E2BIG
852*6008Syy154373  */
853*6008Syy154373 /* ARGSUSED */
854*6008Syy154373 static int8_t
utf8_to_gbk(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)855*6008Syy154373 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
856*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret)
857*6008Syy154373 {
858*6008Syy154373 	size_t 		index;
859*6008Syy154373 	int8_t		gbklen;
860*6008Syy154373 	uint32_t	gbkcode;
861*6008Syy154373 
862*6008Syy154373 	index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
863*6008Syy154373 	    KICONV_UTF8_GB18030_MAX);
864*6008Syy154373 	gbkcode = kiconv_utf8_gb18030[index].value;
865*6008Syy154373 	KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
866*6008Syy154373 
867*6008Syy154373 	/* GBK and GB18030 share the same table, so check the length. */
868*6008Syy154373 	if (gbklen == 4) {
869*6008Syy154373 		index = 0;
870*6008Syy154373 		gbkcode = kiconv_utf8_gb18030[index].value;
871*6008Syy154373 		gbklen = 1;
872*6008Syy154373 	}
873*6008Syy154373 
874*6008Syy154373 	if (obtail - ob < gbklen) {
875*6008Syy154373 		*ret = (size_t)-1;
876*6008Syy154373 		return (-1);
877*6008Syy154373 	}
878*6008Syy154373 
879*6008Syy154373 	if (index == 0)
880*6008Syy154373 		(*ret)++;		/* Non-identical conversion */
881*6008Syy154373 
882*6008Syy154373 	if (gbklen > 1)
883*6008Syy154373 		*ob++ = (uchar_t)(gbkcode >> 8);
884*6008Syy154373 	*ob = (uchar_t)(gbkcode & 0xFF);
885*6008Syy154373 
886*6008Syy154373 	return (gbklen);
887*6008Syy154373 }
888*6008Syy154373 
889*6008Syy154373 /*
890*6008Syy154373  * Convert single UTF-8 character to GB2312.
891*6008Syy154373  * Return: > 0  - Converted successfully
892*6008Syy154373  *         = -1 - E2BIG
893*6008Syy154373  */
894*6008Syy154373 /* ARGSUSED */
895*6008Syy154373 static int8_t
utf8_to_gb2312(uint32_t utf8,uchar_t ** inbuf,uchar_t * intail,uchar_t * ob,uchar_t * obtail,size_t * ret)896*6008Syy154373 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
897*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret)
898*6008Syy154373 {
899*6008Syy154373 	size_t		index;
900*6008Syy154373 	int8_t		gblen;
901*6008Syy154373 	uint32_t	gbcode;
902*6008Syy154373 
903*6008Syy154373 	index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
904*6008Syy154373 	    KICONV_UTF8_GB2312_MAX);
905*6008Syy154373 	gbcode = kiconv_utf8_gb2312[index].value;
906*6008Syy154373 	gblen = (gbcode <= 0xFF) ? 1 : 2;
907*6008Syy154373 
908*6008Syy154373 	if (obtail - ob < gblen) {
909*6008Syy154373 		*ret = (size_t)-1;
910*6008Syy154373 		return (-1);
911*6008Syy154373 	}
912*6008Syy154373 
913*6008Syy154373 	if (index == 0)
914*6008Syy154373 		(*ret)++;
915*6008Syy154373 
916*6008Syy154373 	if (gblen > 1)
917*6008Syy154373 		*ob++ = (uchar_t)(gbcode >> 8);
918*6008Syy154373 	*ob = (uchar_t)(gbcode & 0xFF);
919*6008Syy154373 
920*6008Syy154373 	return (gblen);
921*6008Syy154373 }
922*6008Syy154373 
923*6008Syy154373 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
924*6008Syy154373 	{
925*6008Syy154373 		"gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
926*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_gb18030
927*6008Syy154373 	},
928*6008Syy154373 	{
929*6008Syy154373 		"utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
930*6008Syy154373 		close_fr_sc, kiconvstr_fr_gb18030
931*6008Syy154373 	},
932*6008Syy154373 	{
933*6008Syy154373 		"gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
934*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_gbk
935*6008Syy154373 	},
936*6008Syy154373 	{
937*6008Syy154373 		"utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
938*6008Syy154373 		close_fr_sc, kiconvstr_fr_gbk
939*6008Syy154373 	},
940*6008Syy154373 	{
941*6008Syy154373 		"euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
942*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_euccn
943*6008Syy154373 	},
944*6008Syy154373 	{
945*6008Syy154373 		"utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
946*6008Syy154373 		close_fr_sc, kiconvstr_fr_euccn
947*6008Syy154373 	},
948*6008Syy154373 };
949*6008Syy154373 
950*6008Syy154373 static kiconv_module_info_t kiconv_sc_info = {
951*6008Syy154373 	"kiconv_sc",		/* module name */
952*6008Syy154373 	sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
953*6008Syy154373 	kiconv_sc_ops_tbl,
954*6008Syy154373 	0,
955*6008Syy154373 	NULL,
956*6008Syy154373 	NULL,
957*6008Syy154373 	0
958*6008Syy154373 };
959*6008Syy154373 
960*6008Syy154373 static struct modlkiconv modlkiconv_sc = {
961*6008Syy154373 	&mod_kiconvops,
962*6008Syy154373 	"kiconv Simplified Chinese module 1.0",
963*6008Syy154373 	&kiconv_sc_info
964*6008Syy154373 };
965*6008Syy154373 
966*6008Syy154373 static struct modlinkage modlinkage = {
967*6008Syy154373 	MODREV_1,
968*6008Syy154373 	(void *)&modlkiconv_sc,
969*6008Syy154373 	NULL
970*6008Syy154373 };
971*6008Syy154373 
972*6008Syy154373 int
_init(void)973*6008Syy154373 _init(void)
974*6008Syy154373 {
975*6008Syy154373 	int err;
976*6008Syy154373 
977*6008Syy154373 	err = mod_install(&modlinkage);
978*6008Syy154373 	if (err)
979*6008Syy154373 		cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
980*6008Syy154373 
981*6008Syy154373 	return (err);
982*6008Syy154373 }
983*6008Syy154373 
984*6008Syy154373 int
_fini(void)985*6008Syy154373 _fini(void)
986*6008Syy154373 {
987*6008Syy154373 	int err;
988*6008Syy154373 
989*6008Syy154373 	/*
990*6008Syy154373 	 * If this module is being used, then, we cannot remove the module.
991*6008Syy154373 	 * The following checking will catch pretty much all usual cases.
992*6008Syy154373 	 *
993*6008Syy154373 	 * Any remaining will be catached by the kiconv_unregister_module()
994*6008Syy154373 	 * during mod_remove() at below.
995*6008Syy154373 	 */
996*6008Syy154373 	if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
997*6008Syy154373 		return (EBUSY);
998*6008Syy154373 
999*6008Syy154373 	err = mod_remove(&modlinkage);
1000*6008Syy154373 	if (err)
1001*6008Syy154373 		cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
1002*6008Syy154373 
1003*6008Syy154373 	return (err);
1004*6008Syy154373 }
1005*6008Syy154373 
1006*6008Syy154373 int
_info(struct modinfo * modinfop)1007*6008Syy154373 _info(struct modinfo *modinfop)
1008*6008Syy154373 {
1009*6008Syy154373 	return (mod_info(&modlinkage, modinfop));
1010*6008Syy154373 }
1011