1*6008Syy154373 /*
2*6008Syy154373 * CDDL HEADER START
3*6008Syy154373 *
4*6008Syy154373 * The contents of this file are subject to the terms of the
5*6008Syy154373 * Common Development and Distribution License (the "License").
6*6008Syy154373 * You may not use this file except in compliance with the License.
7*6008Syy154373 *
8*6008Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373 * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373 * See the License for the specific language governing permissions
11*6008Syy154373 * and limitations under the License.
12*6008Syy154373 *
13*6008Syy154373 * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373 * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373 *
19*6008Syy154373 * CDDL HEADER END
20*6008Syy154373 */
21*6008Syy154373 /*
22*6008Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23*6008Syy154373 * Use is subject to license terms.
24*6008Syy154373 */
25*6008Syy154373
26*6008Syy154373 #pragma ident "%Z%%M% %I% %E% SMI"
27*6008Syy154373
28*6008Syy154373 #include <sys/types.h>
29*6008Syy154373 #include <sys/param.h>
30*6008Syy154373 #include <sys/sysmacros.h>
31*6008Syy154373 #include <sys/systm.h>
32*6008Syy154373 #include <sys/debug.h>
33*6008Syy154373 #include <sys/kmem.h>
34*6008Syy154373 #include <sys/sunddi.h>
35*6008Syy154373 #include <sys/byteorder.h>
36*6008Syy154373 #include <sys/errno.h>
37*6008Syy154373 #include <sys/modctl.h>
38*6008Syy154373 #include <sys/kiconv.h>
39*6008Syy154373 #include <sys/u8_textprep.h>
40*6008Syy154373 #include <sys/kiconv_cck_common.h>
41*6008Syy154373 #include <sys/kiconv_sc.h>
42*6008Syy154373 #include <sys/kiconv_gb18030_utf8.h>
43*6008Syy154373 #include <sys/kiconv_gb2312_utf8.h>
44*6008Syy154373 #include <sys/kiconv_utf8_gb18030.h>
45*6008Syy154373 #include <sys/kiconv_utf8_gb2312.h>
46*6008Syy154373
47*6008Syy154373 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
48*6008Syy154373 uchar_t *obtail, size_t *ret_val);
49*6008Syy154373 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
50*6008Syy154373 size_t *ret_val, boolean_t isgbk4);
51*6008Syy154373 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
52*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret);
53*6008Syy154373 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
54*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret);
55*6008Syy154373 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
56*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret);
57*6008Syy154373
58*6008Syy154373 #define KICONV_SC_GB18030 (0x01)
59*6008Syy154373 #define KICONV_SC_GBK (0x02)
60*6008Syy154373 #define KICONV_SC_EUCCN (0x03)
61*6008Syy154373 #define KICONV_SC_MAX_MAGIC_ID (0x03)
62*6008Syy154373
63*6008Syy154373 static void *
open_fr_gb18030()64*6008Syy154373 open_fr_gb18030()
65*6008Syy154373 {
66*6008Syy154373 return ((void *)KICONV_SC_GB18030);
67*6008Syy154373 }
68*6008Syy154373
69*6008Syy154373 static void *
open_fr_gbk()70*6008Syy154373 open_fr_gbk()
71*6008Syy154373 {
72*6008Syy154373 return ((void *)KICONV_SC_GBK);
73*6008Syy154373 }
74*6008Syy154373
75*6008Syy154373 static void *
open_fr_euccn()76*6008Syy154373 open_fr_euccn()
77*6008Syy154373 {
78*6008Syy154373 return ((void *)KICONV_SC_EUCCN);
79*6008Syy154373 }
80*6008Syy154373
81*6008Syy154373 static int
close_fr_sc(void * s)82*6008Syy154373 close_fr_sc(void *s)
83*6008Syy154373 {
84*6008Syy154373 if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
85*6008Syy154373 return (EBADF);
86*6008Syy154373
87*6008Syy154373 return (0);
88*6008Syy154373 }
89*6008Syy154373
90*6008Syy154373 /*
91*6008Syy154373 * Encoding convertor from UTF-8 to GB18030.
92*6008Syy154373 */
93*6008Syy154373 size_t
kiconv_to_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)94*6008Syy154373 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
95*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
96*6008Syy154373 {
97*6008Syy154373
98*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
99*6008Syy154373 outbytesleft, errno, utf8_to_gb18030);
100*6008Syy154373 }
101*6008Syy154373
102*6008Syy154373 /*
103*6008Syy154373 * String based encoding convertor from UTF-8 to GB18030.
104*6008Syy154373 */
105*6008Syy154373 size_t
kiconvstr_to_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)106*6008Syy154373 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
107*6008Syy154373 size_t *outlen, int flag, int *errno)
108*6008Syy154373 {
109*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
110*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
111*6008Syy154373 }
112*6008Syy154373
113*6008Syy154373 /*
114*6008Syy154373 * Encoding convertor from GB18030 to UTF-8.
115*6008Syy154373 */
116*6008Syy154373 size_t
kiconv_fr_gb18030(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)117*6008Syy154373 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
118*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
119*6008Syy154373 {
120*6008Syy154373 uchar_t *ib;
121*6008Syy154373 uchar_t *ob;
122*6008Syy154373 uchar_t *ibtail;
123*6008Syy154373 uchar_t *obtail;
124*6008Syy154373 size_t ret_val;
125*6008Syy154373 int8_t sz;
126*6008Syy154373 uint32_t gb_val;
127*6008Syy154373 boolean_t isgbk4;
128*6008Syy154373
129*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
130*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
131*6008Syy154373 *errno = EBADF;
132*6008Syy154373 return ((size_t)-1);
133*6008Syy154373 }
134*6008Syy154373
135*6008Syy154373 /* If this is a state reset request, process and return. */
136*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
137*6008Syy154373 return (0);
138*6008Syy154373 }
139*6008Syy154373
140*6008Syy154373 ret_val = 0;
141*6008Syy154373 ib = (uchar_t *)*inbuf;
142*6008Syy154373 ob = (uchar_t *)*outbuf;
143*6008Syy154373 ibtail = ib + *inbytesleft;
144*6008Syy154373 obtail = ob + *outbytesleft;
145*6008Syy154373
146*6008Syy154373 while (ib < ibtail) {
147*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
148*6008Syy154373 if (ob >= obtail) {
149*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
150*6008Syy154373 }
151*6008Syy154373
152*6008Syy154373 *ob++ = *ib++;
153*6008Syy154373 continue;
154*6008Syy154373 }
155*6008Syy154373
156*6008Syy154373 /*
157*6008Syy154373 * Issue EILSEQ error if the first byte is not a
158*6008Syy154373 * valid GB18030 leading byte.
159*6008Syy154373 */
160*6008Syy154373 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
161*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
162*6008Syy154373 }
163*6008Syy154373
164*6008Syy154373 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
165*6008Syy154373 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
166*6008Syy154373
167*6008Syy154373 if (isgbk4) {
168*6008Syy154373 if (ibtail - ib < 4) {
169*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
170*6008Syy154373 }
171*6008Syy154373
172*6008Syy154373 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
173*6008Syy154373 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
174*6008Syy154373 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
175*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
176*6008Syy154373 }
177*6008Syy154373
178*6008Syy154373 gb_val = (uint32_t)(*ib) << 24 |
179*6008Syy154373 (uint32_t)(*(ib + 1)) << 16 |
180*6008Syy154373 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
181*6008Syy154373 } else {
182*6008Syy154373 if (ibtail - ib < 2) {
183*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
184*6008Syy154373 }
185*6008Syy154373
186*6008Syy154373 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
187*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
188*6008Syy154373 }
189*6008Syy154373
190*6008Syy154373 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
191*6008Syy154373 }
192*6008Syy154373
193*6008Syy154373 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
194*6008Syy154373 if (sz < 0) {
195*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
196*6008Syy154373 }
197*6008Syy154373
198*6008Syy154373 ib += isgbk4 ? 4 : 2;
199*6008Syy154373 ob += sz;
200*6008Syy154373 }
201*6008Syy154373
202*6008Syy154373 *inbuf = (char *)ib;
203*6008Syy154373 *inbytesleft = ibtail - ib;
204*6008Syy154373 *outbuf = (char *)ob;
205*6008Syy154373 *outbytesleft = obtail - ob;
206*6008Syy154373
207*6008Syy154373 return (ret_val);
208*6008Syy154373 }
209*6008Syy154373
210*6008Syy154373 /*
211*6008Syy154373 * String based encoding convertor from GB18030 to UTF-8.
212*6008Syy154373 */
213*6008Syy154373 size_t
kiconvstr_fr_gb18030(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)214*6008Syy154373 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
215*6008Syy154373 size_t *outlen, int flag, int *errno)
216*6008Syy154373 {
217*6008Syy154373 uchar_t *ib;
218*6008Syy154373 uchar_t *ob;
219*6008Syy154373 uchar_t *ibtail;
220*6008Syy154373 uchar_t *obtail;
221*6008Syy154373 uchar_t *oldib;
222*6008Syy154373 size_t ret_val;
223*6008Syy154373 int8_t sz;
224*6008Syy154373 uint32_t gb_val;
225*6008Syy154373 boolean_t isgbk4;
226*6008Syy154373 boolean_t do_not_ignore_null;
227*6008Syy154373
228*6008Syy154373 ret_val = 0;
229*6008Syy154373 ib = (uchar_t *)inarray;
230*6008Syy154373 ob = (uchar_t *)outarray;
231*6008Syy154373 ibtail = ib + *inlen;
232*6008Syy154373 obtail = ob + *outlen;
233*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
234*6008Syy154373
235*6008Syy154373 while (ib < ibtail) {
236*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
237*6008Syy154373 break;
238*6008Syy154373
239*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
240*6008Syy154373 if (ob >= obtail) {
241*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
242*6008Syy154373 }
243*6008Syy154373
244*6008Syy154373 *ob++ = *ib++;
245*6008Syy154373 continue;
246*6008Syy154373 }
247*6008Syy154373
248*6008Syy154373 oldib = ib;
249*6008Syy154373
250*6008Syy154373 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
251*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
252*6008Syy154373 }
253*6008Syy154373
254*6008Syy154373 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
255*6008Syy154373 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
256*6008Syy154373
257*6008Syy154373 if (isgbk4) {
258*6008Syy154373 if (ibtail - ib < 4) {
259*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
260*6008Syy154373 ib = ibtail;
261*6008Syy154373 goto REPLACE_INVALID;
262*6008Syy154373 }
263*6008Syy154373
264*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
265*6008Syy154373 }
266*6008Syy154373
267*6008Syy154373 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
268*6008Syy154373 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
269*6008Syy154373 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
270*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
271*6008Syy154373 }
272*6008Syy154373
273*6008Syy154373 gb_val = (uint32_t)(*ib) << 24 |
274*6008Syy154373 (uint32_t)(*(ib + 1)) << 16 |
275*6008Syy154373 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
276*6008Syy154373 } else {
277*6008Syy154373 if (ibtail - ib < 2) {
278*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
279*6008Syy154373 ib = ibtail;
280*6008Syy154373 goto REPLACE_INVALID;
281*6008Syy154373 }
282*6008Syy154373
283*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
284*6008Syy154373 }
285*6008Syy154373
286*6008Syy154373 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
287*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
288*6008Syy154373 }
289*6008Syy154373
290*6008Syy154373 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
291*6008Syy154373 }
292*6008Syy154373
293*6008Syy154373 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
294*6008Syy154373 if (sz < 0) {
295*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
296*6008Syy154373 }
297*6008Syy154373
298*6008Syy154373 ib += isgbk4 ? 4 : 2;
299*6008Syy154373 ob += sz;
300*6008Syy154373 continue;
301*6008Syy154373
302*6008Syy154373 REPLACE_INVALID:
303*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
304*6008Syy154373 ib = oldib;
305*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
306*6008Syy154373 }
307*6008Syy154373
308*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
309*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
310*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
311*6008Syy154373 ret_val++;
312*6008Syy154373 }
313*6008Syy154373
314*6008Syy154373 *inlen = ibtail - ib;
315*6008Syy154373 *outlen = obtail - ob;
316*6008Syy154373
317*6008Syy154373 return (ret_val);
318*6008Syy154373 }
319*6008Syy154373
320*6008Syy154373 /*
321*6008Syy154373 * Encoding convertor from UTF-8 to GBK.
322*6008Syy154373 */
323*6008Syy154373 size_t
kiconv_to_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)324*6008Syy154373 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
325*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
326*6008Syy154373 {
327*6008Syy154373
328*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
329*6008Syy154373 outbytesleft, errno, utf8_to_gbk);
330*6008Syy154373 }
331*6008Syy154373
332*6008Syy154373 /*
333*6008Syy154373 * String based encoding convertor from UTF-8 to GBK.
334*6008Syy154373 */
335*6008Syy154373 size_t
kiconvstr_to_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)336*6008Syy154373 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
337*6008Syy154373 size_t *outlen, int flag, int *errno)
338*6008Syy154373 {
339*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
340*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
341*6008Syy154373 }
342*6008Syy154373
343*6008Syy154373 /*
344*6008Syy154373 * Encoding convertor from GBK to UTF-8.
345*6008Syy154373 */
346*6008Syy154373 size_t
kiconv_fr_gbk(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)347*6008Syy154373 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
348*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
349*6008Syy154373 {
350*6008Syy154373 uchar_t *ib;
351*6008Syy154373 uchar_t *ob;
352*6008Syy154373 uchar_t *ibtail;
353*6008Syy154373 uchar_t *obtail;
354*6008Syy154373 size_t ret_val;
355*6008Syy154373 int8_t sz;
356*6008Syy154373 uint32_t gb_val;
357*6008Syy154373
358*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
359*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
360*6008Syy154373 *errno = EBADF;
361*6008Syy154373 return ((size_t)-1);
362*6008Syy154373 }
363*6008Syy154373
364*6008Syy154373 /* If this is a state reset request, process and return. */
365*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
366*6008Syy154373 return (0);
367*6008Syy154373 }
368*6008Syy154373
369*6008Syy154373 ret_val = 0;
370*6008Syy154373 ib = (uchar_t *)*inbuf;
371*6008Syy154373 ob = (uchar_t *)*outbuf;
372*6008Syy154373 ibtail = ib + *inbytesleft;
373*6008Syy154373 obtail = ob + *outbytesleft;
374*6008Syy154373
375*6008Syy154373 while (ib < ibtail) {
376*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
377*6008Syy154373 if (ob >= obtail) {
378*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
379*6008Syy154373 }
380*6008Syy154373
381*6008Syy154373 *ob++ = *ib++;
382*6008Syy154373 continue;
383*6008Syy154373 }
384*6008Syy154373
385*6008Syy154373 /*
386*6008Syy154373 * Issue EILSEQ error if the first byte is not a
387*6008Syy154373 * valid GBK leading byte.
388*6008Syy154373 */
389*6008Syy154373 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
390*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
391*6008Syy154373 }
392*6008Syy154373
393*6008Syy154373 /*
394*6008Syy154373 * Issue EINVAL error if input buffer has an incomplete
395*6008Syy154373 * character at the end of the buffer.
396*6008Syy154373 */
397*6008Syy154373 if (ibtail - ib < 2) {
398*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
399*6008Syy154373 }
400*6008Syy154373
401*6008Syy154373 /*
402*6008Syy154373 * Issue EILSEQ error if the remaining byte is not
403*6008Syy154373 * a valid GBK byte.
404*6008Syy154373 */
405*6008Syy154373 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
406*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
407*6008Syy154373 }
408*6008Syy154373
409*6008Syy154373 /* Now we have a valid GBK character. */
410*6008Syy154373 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
411*6008Syy154373 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
412*6008Syy154373
413*6008Syy154373 if (sz < 0) {
414*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
415*6008Syy154373 }
416*6008Syy154373
417*6008Syy154373 ib += 2;
418*6008Syy154373 ob += sz;
419*6008Syy154373 }
420*6008Syy154373
421*6008Syy154373 *inbuf = (char *)ib;
422*6008Syy154373 *inbytesleft = ibtail - ib;
423*6008Syy154373 *outbuf = (char *)ob;
424*6008Syy154373 *outbytesleft = obtail - ob;
425*6008Syy154373
426*6008Syy154373 return (ret_val);
427*6008Syy154373 }
428*6008Syy154373
429*6008Syy154373 /*
430*6008Syy154373 * String based encoding convertor from GBK to UTF-8.
431*6008Syy154373 */
432*6008Syy154373 size_t
kiconvstr_fr_gbk(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)433*6008Syy154373 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
434*6008Syy154373 size_t *outlen, int flag, int *errno)
435*6008Syy154373 {
436*6008Syy154373 uchar_t *ib;
437*6008Syy154373 uchar_t *ob;
438*6008Syy154373 uchar_t *ibtail;
439*6008Syy154373 uchar_t *obtail;
440*6008Syy154373 uchar_t *oldib;
441*6008Syy154373 size_t ret_val;
442*6008Syy154373 int8_t sz;
443*6008Syy154373 uint32_t gb_val;
444*6008Syy154373 boolean_t do_not_ignore_null;
445*6008Syy154373
446*6008Syy154373 ret_val = 0;
447*6008Syy154373 ib = (uchar_t *)inarray;
448*6008Syy154373 ob = (uchar_t *)outarray;
449*6008Syy154373 ibtail = ib + *inlen;
450*6008Syy154373 obtail = ob + *outlen;
451*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
452*6008Syy154373
453*6008Syy154373 while (ib < ibtail) {
454*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
455*6008Syy154373 break;
456*6008Syy154373
457*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
458*6008Syy154373 if (ob >= obtail) {
459*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
460*6008Syy154373 }
461*6008Syy154373
462*6008Syy154373 *ob++ = *ib++;
463*6008Syy154373 continue;
464*6008Syy154373 }
465*6008Syy154373
466*6008Syy154373 oldib = ib;
467*6008Syy154373
468*6008Syy154373 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
469*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
470*6008Syy154373 }
471*6008Syy154373
472*6008Syy154373 if (ibtail - ib < 2) {
473*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
474*6008Syy154373 }
475*6008Syy154373
476*6008Syy154373 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
477*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
478*6008Syy154373 }
479*6008Syy154373
480*6008Syy154373 gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
481*6008Syy154373 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
482*6008Syy154373
483*6008Syy154373 if (sz < 0) {
484*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
485*6008Syy154373 }
486*6008Syy154373
487*6008Syy154373 ib += 2;
488*6008Syy154373 ob += sz;
489*6008Syy154373 continue;
490*6008Syy154373
491*6008Syy154373 REPLACE_INVALID:
492*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
493*6008Syy154373 ib = oldib;
494*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495*6008Syy154373 }
496*6008Syy154373
497*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
498*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
499*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
500*6008Syy154373 ret_val++;
501*6008Syy154373 }
502*6008Syy154373
503*6008Syy154373 *inlen = ibtail - ib;
504*6008Syy154373 *outlen = obtail - ob;
505*6008Syy154373
506*6008Syy154373 return (ret_val);
507*6008Syy154373 }
508*6008Syy154373
509*6008Syy154373 /*
510*6008Syy154373 * Encoding convertor from UTF-8 to EUC-CN.
511*6008Syy154373 */
512*6008Syy154373 size_t
kiconv_to_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)513*6008Syy154373 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
514*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
515*6008Syy154373 {
516*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
517*6008Syy154373 outbytesleft, errno, utf8_to_gb2312);
518*6008Syy154373 }
519*6008Syy154373
520*6008Syy154373 /*
521*6008Syy154373 * String based encoding convertor from UTF-8 to EUC-CN.
522*6008Syy154373 */
523*6008Syy154373 size_t
kiconvstr_to_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)524*6008Syy154373 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
525*6008Syy154373 size_t *outlen, int flag, int *errno)
526*6008Syy154373 {
527*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
528*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
529*6008Syy154373 }
530*6008Syy154373
531*6008Syy154373 /*
532*6008Syy154373 * Encoding converto from EUC-CN to UTF-8 code.
533*6008Syy154373 */
534*6008Syy154373 size_t
kiconv_fr_euccn(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)535*6008Syy154373 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
536*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
537*6008Syy154373 {
538*6008Syy154373 uchar_t *ib;
539*6008Syy154373 uchar_t *ob;
540*6008Syy154373 uchar_t *ibtail;
541*6008Syy154373 uchar_t *obtail;
542*6008Syy154373 size_t ret_val;
543*6008Syy154373 int8_t sz;
544*6008Syy154373
545*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
546*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
547*6008Syy154373 *errno = EBADF;
548*6008Syy154373 return ((size_t)-1);
549*6008Syy154373 }
550*6008Syy154373
551*6008Syy154373 /* If this is a state reset request, process and return. */
552*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
553*6008Syy154373 return (0);
554*6008Syy154373 }
555*6008Syy154373
556*6008Syy154373 ret_val = 0;
557*6008Syy154373 ib = (uchar_t *)*inbuf;
558*6008Syy154373 ob = (uchar_t *)*outbuf;
559*6008Syy154373 ibtail = ib + *inbytesleft;
560*6008Syy154373 obtail = ob + *outbytesleft;
561*6008Syy154373
562*6008Syy154373 while (ib < ibtail) {
563*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
564*6008Syy154373 if (ob >= obtail) {
565*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
566*6008Syy154373 }
567*6008Syy154373
568*6008Syy154373 *ob++ = *ib++;
569*6008Syy154373 continue;
570*6008Syy154373 }
571*6008Syy154373
572*6008Syy154373 /*
573*6008Syy154373 * Issue EILSEQ error if the first byte is not a
574*6008Syy154373 * valid GB2312 leading byte.
575*6008Syy154373 */
576*6008Syy154373 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
577*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
578*6008Syy154373 }
579*6008Syy154373
580*6008Syy154373 /*
581*6008Syy154373 * Issue EINVAL error if input buffer has an incomplete
582*6008Syy154373 * character at the end of the buffer.
583*6008Syy154373 */
584*6008Syy154373 if (ibtail - ib < 2) {
585*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
586*6008Syy154373 }
587*6008Syy154373
588*6008Syy154373 /*
589*6008Syy154373 * Issue EILSEQ error if the remaining byte is not
590*6008Syy154373 * a valid GB2312 byte.
591*6008Syy154373 */
592*6008Syy154373 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
593*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
594*6008Syy154373 }
595*6008Syy154373
596*6008Syy154373 /* Now we have a valid GB2312 character */
597*6008Syy154373 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
598*6008Syy154373 if (sz < 0) {
599*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
600*6008Syy154373 }
601*6008Syy154373
602*6008Syy154373 ib += 2;
603*6008Syy154373 ob += sz;
604*6008Syy154373 }
605*6008Syy154373
606*6008Syy154373 *inbuf = (char *)ib;
607*6008Syy154373 *inbytesleft = ibtail - ib;
608*6008Syy154373 *outbuf = (char *)ob;
609*6008Syy154373 *outbytesleft = obtail - ob;
610*6008Syy154373
611*6008Syy154373 return (ret_val);
612*6008Syy154373 }
613*6008Syy154373
614*6008Syy154373 /*
615*6008Syy154373 * String based encoding convertor from EUC-CN to UTF-8.
616*6008Syy154373 */
617*6008Syy154373 size_t
kiconvstr_fr_euccn(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)618*6008Syy154373 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
619*6008Syy154373 size_t *outlen, int flag, int *errno)
620*6008Syy154373 {
621*6008Syy154373 uchar_t *ib;
622*6008Syy154373 uchar_t *ob;
623*6008Syy154373 uchar_t *ibtail;
624*6008Syy154373 uchar_t *obtail;
625*6008Syy154373 uchar_t *oldib;
626*6008Syy154373 size_t ret_val;
627*6008Syy154373 int8_t sz;
628*6008Syy154373 boolean_t do_not_ignore_null;
629*6008Syy154373
630*6008Syy154373 ret_val = 0;
631*6008Syy154373 ib = (uchar_t *)inarray;
632*6008Syy154373 ob = (uchar_t *)outarray;
633*6008Syy154373 ibtail = ib + *inlen;
634*6008Syy154373 obtail = ob + *outlen;
635*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
636*6008Syy154373
637*6008Syy154373 while (ib < ibtail) {
638*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
639*6008Syy154373 break;
640*6008Syy154373
641*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
642*6008Syy154373 if (ob >= obtail) {
643*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
644*6008Syy154373 }
645*6008Syy154373
646*6008Syy154373 *ob++ = *ib++;
647*6008Syy154373 continue;
648*6008Syy154373 }
649*6008Syy154373
650*6008Syy154373 oldib = ib;
651*6008Syy154373
652*6008Syy154373 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
653*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
654*6008Syy154373 }
655*6008Syy154373
656*6008Syy154373 if (ibtail - ib < 2) {
657*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
658*6008Syy154373 }
659*6008Syy154373
660*6008Syy154373 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
661*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
662*6008Syy154373 }
663*6008Syy154373
664*6008Syy154373 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
665*6008Syy154373 if (sz < 0) {
666*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
667*6008Syy154373 }
668*6008Syy154373
669*6008Syy154373 ib += 2;
670*6008Syy154373 ob += sz;
671*6008Syy154373 continue;
672*6008Syy154373
673*6008Syy154373 REPLACE_INVALID:
674*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
675*6008Syy154373 ib = oldib;
676*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
677*6008Syy154373 }
678*6008Syy154373
679*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
680*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
681*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
682*6008Syy154373 ret_val++;
683*6008Syy154373 }
684*6008Syy154373
685*6008Syy154373 *inlen = ibtail - ib;
686*6008Syy154373 *outlen = obtail - ob;
687*6008Syy154373
688*6008Syy154373 return (ret_val);
689*6008Syy154373 }
690*6008Syy154373
691*6008Syy154373 /*
692*6008Syy154373 * Convert single GB2312 character to UTF-8.
693*6008Syy154373 * Return: > 0 - Converted successfully
694*6008Syy154373 * = -1 - E2BIG
695*6008Syy154373 */
696*6008Syy154373 static int8_t
gb2312_to_utf8(uchar_t b1,uchar_t b2,uchar_t * ob,uchar_t * obtail,size_t * ret_val)697*6008Syy154373 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
698*6008Syy154373 size_t *ret_val)
699*6008Syy154373 {
700*6008Syy154373 size_t index;
701*6008Syy154373 int8_t sz;
702*6008Syy154373 uchar_t *u8;
703*6008Syy154373
704*6008Syy154373 /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
705*6008Syy154373 index = b1 * 94 + b2 - 0x3BBF;
706*6008Syy154373
707*6008Syy154373 if (index >= KICONV_GB2312_UTF8_MAX)
708*6008Syy154373 index = KICONV_GB2312_UTF8_MAX - 1; /* Map to 0xEFBFBD */
709*6008Syy154373
710*6008Syy154373 u8 = kiconv_gb2312_utf8[index];
711*6008Syy154373 sz = u8_number_of_bytes[u8[0]];
712*6008Syy154373
713*6008Syy154373 if (obtail - ob < sz) {
714*6008Syy154373 *ret_val = (size_t)-1;
715*6008Syy154373 return (-1);
716*6008Syy154373 }
717*6008Syy154373
718*6008Syy154373 for (index = 0; index < sz; index++)
719*6008Syy154373 *ob++ = u8[index];
720*6008Syy154373
721*6008Syy154373 /*
722*6008Syy154373 * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
723*6008Syy154373 * elements, so need to ckeck more.
724*6008Syy154373 */
725*6008Syy154373 if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
726*6008Syy154373 u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
727*6008Syy154373 u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
728*6008Syy154373 u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
729*6008Syy154373 (*ret_val)++;
730*6008Syy154373
731*6008Syy154373 return (sz);
732*6008Syy154373 }
733*6008Syy154373
734*6008Syy154373 /*
735*6008Syy154373 * Convert single GB18030 or GBK character to UTF-8.
736*6008Syy154373 * Return: > 0 - Converted successfully
737*6008Syy154373 * = -1 - E2BIG
738*6008Syy154373 */
739*6008Syy154373 static int8_t
gbk_to_utf8(uint32_t gbk_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,boolean_t isgbk4)740*6008Syy154373 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
741*6008Syy154373 boolean_t isgbk4)
742*6008Syy154373 {
743*6008Syy154373 size_t index;
744*6008Syy154373 int8_t sz;
745*6008Syy154373 uchar_t u8array[4];
746*6008Syy154373 uchar_t *u8;
747*6008Syy154373
748*6008Syy154373 if (isgbk4) {
749*6008Syy154373 if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
750*6008Syy154373 uint32_t u32;
751*6008Syy154373
752*6008Syy154373 /*
753*6008Syy154373 * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
754*6008Syy154373 * (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
755*6008Syy154373 * (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
756*6008Syy154373 * (gbk_val & 0xFF - 0x30)+
757*6008Syy154373 * KICONV_SC_PLANE1_UCS4_START;
758*6008Syy154373 */
759*6008Syy154373 u32 = (gbk_val >> 24) * 12600 +
760*6008Syy154373 ((gbk_val & 0xFF0000) >> 16) * 1260 +
761*6008Syy154373 ((gbk_val & 0xFF00) >> 8) * 10 +
762*6008Syy154373 (gbk_val & 0xFF) - 0x1BA0FA;
763*6008Syy154373 u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
764*6008Syy154373 u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
765*6008Syy154373 u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
766*6008Syy154373 u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
767*6008Syy154373 u8 = u8array;
768*6008Syy154373 index = 1;
769*6008Syy154373 } else {
770*6008Syy154373 index = kiconv_binsearch(gbk_val,
771*6008Syy154373 kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
772*6008Syy154373 u8 = kiconv_gbk4_utf8[index].u8;
773*6008Syy154373 }
774*6008Syy154373 } else {
775*6008Syy154373 index = kiconv_binsearch(gbk_val,
776*6008Syy154373 kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
777*6008Syy154373 u8 = kiconv_gbk_utf8[index].u8;
778*6008Syy154373 }
779*6008Syy154373
780*6008Syy154373 sz = u8_number_of_bytes[u8[0]];
781*6008Syy154373 if (obtail - ob < sz) {
782*6008Syy154373 *ret_val = (size_t)-1;
783*6008Syy154373 return (-1);
784*6008Syy154373 }
785*6008Syy154373
786*6008Syy154373 if (index == 0)
787*6008Syy154373 (*ret_val)++; /* Non-identical conversion */
788*6008Syy154373
789*6008Syy154373 for (index = 0; index < sz; index++)
790*6008Syy154373 *ob++ = u8[index];
791*6008Syy154373
792*6008Syy154373 return (sz);
793*6008Syy154373 }
794*6008Syy154373
795*6008Syy154373 /*
796*6008Syy154373 * Convert single UTF-8 character to GB18030.
797*6008Syy154373 * Return: > 0 - Converted successfully
798*6008Syy154373 * = -1 - E2BIG
799*6008Syy154373 */
800*6008Syy154373 /* ARGSUSED */
801*6008Syy154373 static int8_t
utf8_to_gb18030(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)802*6008Syy154373 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
803*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret)
804*6008Syy154373 {
805*6008Syy154373 size_t index;
806*6008Syy154373 int8_t gbklen;
807*6008Syy154373 uint32_t gbkcode;
808*6008Syy154373
809*6008Syy154373 if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
810*6008Syy154373 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
811*6008Syy154373 uint32_t u32;
812*6008Syy154373
813*6008Syy154373 u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
814*6008Syy154373 ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
815*6008Syy154373 KICONV_SC_PLANE1_UCS4_START;
816*6008Syy154373 gbkcode = ((u32 / 12600 + 0x90) << 24) |
817*6008Syy154373 (((u32 % 12600) / 1260 + 0x30) << 16) |
818*6008Syy154373 (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
819*6008Syy154373 gbklen = 4;
820*6008Syy154373 index = 1;
821*6008Syy154373 } else {
822*6008Syy154373 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
823*6008Syy154373 KICONV_UTF8_GB18030_MAX);
824*6008Syy154373 gbkcode = kiconv_utf8_gb18030[index].value;
825*6008Syy154373 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
826*6008Syy154373 }
827*6008Syy154373
828*6008Syy154373 if (obtail - ob < gbklen) {
829*6008Syy154373 *ret = (size_t)-1;
830*6008Syy154373 return (-1);
831*6008Syy154373 }
832*6008Syy154373
833*6008Syy154373 if (index == 0)
834*6008Syy154373 (*ret)++; /* Non-identical conversion */
835*6008Syy154373
836*6008Syy154373 if (gbklen == 2) {
837*6008Syy154373 *ob++ = (uchar_t)(gbkcode >> 8);
838*6008Syy154373 } else if (gbklen == 4) {
839*6008Syy154373 *ob++ = (uchar_t)(gbkcode >> 24);
840*6008Syy154373 *ob++ = (uchar_t)(gbkcode >> 16);
841*6008Syy154373 *ob++ = (uchar_t)(gbkcode >> 8);
842*6008Syy154373 }
843*6008Syy154373 *ob = (uchar_t)(gbkcode & 0xFF);
844*6008Syy154373
845*6008Syy154373 return (gbklen);
846*6008Syy154373 }
847*6008Syy154373
848*6008Syy154373 /*
849*6008Syy154373 * Convert single UTF-8 character to GBK.
850*6008Syy154373 * Return: > 0 - Converted successfully
851*6008Syy154373 * = -1 - E2BIG
852*6008Syy154373 */
853*6008Syy154373 /* ARGSUSED */
854*6008Syy154373 static int8_t
utf8_to_gbk(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret)855*6008Syy154373 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
856*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret)
857*6008Syy154373 {
858*6008Syy154373 size_t index;
859*6008Syy154373 int8_t gbklen;
860*6008Syy154373 uint32_t gbkcode;
861*6008Syy154373
862*6008Syy154373 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
863*6008Syy154373 KICONV_UTF8_GB18030_MAX);
864*6008Syy154373 gbkcode = kiconv_utf8_gb18030[index].value;
865*6008Syy154373 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
866*6008Syy154373
867*6008Syy154373 /* GBK and GB18030 share the same table, so check the length. */
868*6008Syy154373 if (gbklen == 4) {
869*6008Syy154373 index = 0;
870*6008Syy154373 gbkcode = kiconv_utf8_gb18030[index].value;
871*6008Syy154373 gbklen = 1;
872*6008Syy154373 }
873*6008Syy154373
874*6008Syy154373 if (obtail - ob < gbklen) {
875*6008Syy154373 *ret = (size_t)-1;
876*6008Syy154373 return (-1);
877*6008Syy154373 }
878*6008Syy154373
879*6008Syy154373 if (index == 0)
880*6008Syy154373 (*ret)++; /* Non-identical conversion */
881*6008Syy154373
882*6008Syy154373 if (gbklen > 1)
883*6008Syy154373 *ob++ = (uchar_t)(gbkcode >> 8);
884*6008Syy154373 *ob = (uchar_t)(gbkcode & 0xFF);
885*6008Syy154373
886*6008Syy154373 return (gbklen);
887*6008Syy154373 }
888*6008Syy154373
889*6008Syy154373 /*
890*6008Syy154373 * Convert single UTF-8 character to GB2312.
891*6008Syy154373 * Return: > 0 - Converted successfully
892*6008Syy154373 * = -1 - E2BIG
893*6008Syy154373 */
894*6008Syy154373 /* ARGSUSED */
895*6008Syy154373 static int8_t
utf8_to_gb2312(uint32_t utf8,uchar_t ** inbuf,uchar_t * intail,uchar_t * ob,uchar_t * obtail,size_t * ret)896*6008Syy154373 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
897*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret)
898*6008Syy154373 {
899*6008Syy154373 size_t index;
900*6008Syy154373 int8_t gblen;
901*6008Syy154373 uint32_t gbcode;
902*6008Syy154373
903*6008Syy154373 index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
904*6008Syy154373 KICONV_UTF8_GB2312_MAX);
905*6008Syy154373 gbcode = kiconv_utf8_gb2312[index].value;
906*6008Syy154373 gblen = (gbcode <= 0xFF) ? 1 : 2;
907*6008Syy154373
908*6008Syy154373 if (obtail - ob < gblen) {
909*6008Syy154373 *ret = (size_t)-1;
910*6008Syy154373 return (-1);
911*6008Syy154373 }
912*6008Syy154373
913*6008Syy154373 if (index == 0)
914*6008Syy154373 (*ret)++;
915*6008Syy154373
916*6008Syy154373 if (gblen > 1)
917*6008Syy154373 *ob++ = (uchar_t)(gbcode >> 8);
918*6008Syy154373 *ob = (uchar_t)(gbcode & 0xFF);
919*6008Syy154373
920*6008Syy154373 return (gblen);
921*6008Syy154373 }
922*6008Syy154373
923*6008Syy154373 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
924*6008Syy154373 {
925*6008Syy154373 "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
926*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_gb18030
927*6008Syy154373 },
928*6008Syy154373 {
929*6008Syy154373 "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
930*6008Syy154373 close_fr_sc, kiconvstr_fr_gb18030
931*6008Syy154373 },
932*6008Syy154373 {
933*6008Syy154373 "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
934*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_gbk
935*6008Syy154373 },
936*6008Syy154373 {
937*6008Syy154373 "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
938*6008Syy154373 close_fr_sc, kiconvstr_fr_gbk
939*6008Syy154373 },
940*6008Syy154373 {
941*6008Syy154373 "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
942*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_euccn
943*6008Syy154373 },
944*6008Syy154373 {
945*6008Syy154373 "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
946*6008Syy154373 close_fr_sc, kiconvstr_fr_euccn
947*6008Syy154373 },
948*6008Syy154373 };
949*6008Syy154373
950*6008Syy154373 static kiconv_module_info_t kiconv_sc_info = {
951*6008Syy154373 "kiconv_sc", /* module name */
952*6008Syy154373 sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
953*6008Syy154373 kiconv_sc_ops_tbl,
954*6008Syy154373 0,
955*6008Syy154373 NULL,
956*6008Syy154373 NULL,
957*6008Syy154373 0
958*6008Syy154373 };
959*6008Syy154373
960*6008Syy154373 static struct modlkiconv modlkiconv_sc = {
961*6008Syy154373 &mod_kiconvops,
962*6008Syy154373 "kiconv Simplified Chinese module 1.0",
963*6008Syy154373 &kiconv_sc_info
964*6008Syy154373 };
965*6008Syy154373
966*6008Syy154373 static struct modlinkage modlinkage = {
967*6008Syy154373 MODREV_1,
968*6008Syy154373 (void *)&modlkiconv_sc,
969*6008Syy154373 NULL
970*6008Syy154373 };
971*6008Syy154373
972*6008Syy154373 int
_init(void)973*6008Syy154373 _init(void)
974*6008Syy154373 {
975*6008Syy154373 int err;
976*6008Syy154373
977*6008Syy154373 err = mod_install(&modlinkage);
978*6008Syy154373 if (err)
979*6008Syy154373 cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
980*6008Syy154373
981*6008Syy154373 return (err);
982*6008Syy154373 }
983*6008Syy154373
984*6008Syy154373 int
_fini(void)985*6008Syy154373 _fini(void)
986*6008Syy154373 {
987*6008Syy154373 int err;
988*6008Syy154373
989*6008Syy154373 /*
990*6008Syy154373 * If this module is being used, then, we cannot remove the module.
991*6008Syy154373 * The following checking will catch pretty much all usual cases.
992*6008Syy154373 *
993*6008Syy154373 * Any remaining will be catached by the kiconv_unregister_module()
994*6008Syy154373 * during mod_remove() at below.
995*6008Syy154373 */
996*6008Syy154373 if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
997*6008Syy154373 return (EBUSY);
998*6008Syy154373
999*6008Syy154373 err = mod_remove(&modlinkage);
1000*6008Syy154373 if (err)
1001*6008Syy154373 cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
1002*6008Syy154373
1003*6008Syy154373 return (err);
1004*6008Syy154373 }
1005*6008Syy154373
1006*6008Syy154373 int
_info(struct modinfo * modinfop)1007*6008Syy154373 _info(struct modinfo *modinfop)
1008*6008Syy154373 {
1009*6008Syy154373 return (mod_info(&modlinkage, modinfop));
1010*6008Syy154373 }
1011