1*6008Syy154373 /*
2*6008Syy154373 * CDDL HEADER START
3*6008Syy154373 *
4*6008Syy154373 * The contents of this file are subject to the terms of the
5*6008Syy154373 * Common Development and Distribution License (the "License").
6*6008Syy154373 * You may not use this file except in compliance with the License.
7*6008Syy154373 *
8*6008Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373 * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373 * See the License for the specific language governing permissions
11*6008Syy154373 * and limitations under the License.
12*6008Syy154373 *
13*6008Syy154373 * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373 * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373 *
19*6008Syy154373 * CDDL HEADER END
20*6008Syy154373 */
21*6008Syy154373 /*
22*6008Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23*6008Syy154373 * Use is subject to license terms.
24*6008Syy154373 */
25*6008Syy154373
26*6008Syy154373 #pragma ident "%Z%%M% %I% %E% SMI"
27*6008Syy154373
28*6008Syy154373 #include <sys/types.h>
29*6008Syy154373 #include <sys/param.h>
30*6008Syy154373 #include <sys/sysmacros.h>
31*6008Syy154373 #include <sys/systm.h>
32*6008Syy154373 #include <sys/debug.h>
33*6008Syy154373 #include <sys/kmem.h>
34*6008Syy154373 #include <sys/sunddi.h>
35*6008Syy154373 #include <sys/byteorder.h>
36*6008Syy154373 #include <sys/errno.h>
37*6008Syy154373 #include <sys/modctl.h>
38*6008Syy154373 #include <sys/u8_textprep.h>
39*6008Syy154373 #include <sys/kiconv.h>
40*6008Syy154373 #include <sys/kiconv_cck_common.h>
41*6008Syy154373 #include <sys/kiconv_tc.h>
42*6008Syy154373 #include <sys/kiconv_big5_utf8.h>
43*6008Syy154373 #include <sys/kiconv_euctw_utf8.h>
44*6008Syy154373 #include <sys/kiconv_hkscs_utf8.h>
45*6008Syy154373 #include <sys/kiconv_cp950hkscs_utf8.h>
46*6008Syy154373 #include <sys/kiconv_utf8_big5.h>
47*6008Syy154373 #include <sys/kiconv_utf8_euctw.h>
48*6008Syy154373 #include <sys/kiconv_utf8_cp950hkscs.h>
49*6008Syy154373 #include <sys/kiconv_utf8_hkscs.h>
50*6008Syy154373
51*6008Syy154373 /* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
52*6008Syy154373 static uchar_t hkscs_special_sequence[][4] = {
53*6008Syy154373 { 0xc3, 0x8a, 0xcc, 0x84 }, /* 0x8862 */
54*6008Syy154373 { 0xc3, 0x8a, 0xcc, 0x8c }, /* 0x8864 */
55*6008Syy154373 { 0xc3, 0xaa, 0xcc, 0x84 }, /* 0x88a3 */
56*6008Syy154373 { 0xc3, 0xaa, 0xcc, 0x8c } /* 0x88a5 */
57*6008Syy154373 };
58*6008Syy154373
59*6008Syy154373 /* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
60*6008Syy154373 static uint32_t ucs_special_sequence[] = {
61*6008Syy154373 0x8866, /* U+00ca */
62*6008Syy154373 0x8862, /* U+00ca U+0304 */
63*6008Syy154373 0x8864, /* U+00ca U+030c */
64*6008Syy154373 0x88a7, /* U+00ea */
65*6008Syy154373 0x88a3, /* U+00ea U+0304 */
66*6008Syy154373 0x88a5 /* U+00ea U+030c */
67*6008Syy154373 };
68*6008Syy154373
69*6008Syy154373 typedef int8_t (*kiconv_big5toutf8_t)(uint32_t value, uchar_t *ob,
70*6008Syy154373 uchar_t *obtail, size_t *ret_val);
71*6008Syy154373
72*6008Syy154373 static int8_t utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
73*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
74*6008Syy154373 static int8_t utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
75*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
76*6008Syy154373 static int8_t utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf,
77*6008Syy154373 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
78*6008Syy154373 static int8_t utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
79*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
80*6008Syy154373 static int8_t big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
81*6008Syy154373 size_t *ret_val);
82*6008Syy154373 static int8_t big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
83*6008Syy154373 uchar_t *obtail, size_t *ret_val);
84*6008Syy154373 static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
85*6008Syy154373 uchar_t *obtail, size_t *ret_val);
86*6008Syy154373 static int8_t euctw_to_utf8(size_t plane_no, uint32_t euctw_val,
87*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
88*6008Syy154373 static uint32_t get_unicode_from_UDA(size_t plane_no, uchar_t byte1,
89*6008Syy154373 uchar_t byte2);
90*6008Syy154373
91*6008Syy154373 #define KICONV_TC_BIG5 (0x01)
92*6008Syy154373 #define KICONV_TC_BIG5HKSCS (0x02)
93*6008Syy154373 #define KICONV_TC_CP950HKSCS (0x03)
94*6008Syy154373 #define KICONV_TC_EUCTW (0x04)
95*6008Syy154373 #define KICONV_TC_MAX_MAGIC_ID (0x04)
96*6008Syy154373
97*6008Syy154373 static void *
open_fr_big5()98*6008Syy154373 open_fr_big5()
99*6008Syy154373 {
100*6008Syy154373 return ((void *)KICONV_TC_BIG5);
101*6008Syy154373 }
102*6008Syy154373
103*6008Syy154373 static void *
open_fr_big5hkscs()104*6008Syy154373 open_fr_big5hkscs()
105*6008Syy154373 {
106*6008Syy154373 return ((void *)KICONV_TC_BIG5HKSCS);
107*6008Syy154373 }
108*6008Syy154373
109*6008Syy154373 static void *
open_fr_cp950hkscs()110*6008Syy154373 open_fr_cp950hkscs()
111*6008Syy154373 {
112*6008Syy154373 return ((void *)KICONV_TC_CP950HKSCS);
113*6008Syy154373 }
114*6008Syy154373
115*6008Syy154373 static void *
open_fr_euctw()116*6008Syy154373 open_fr_euctw()
117*6008Syy154373 {
118*6008Syy154373 return ((void *)KICONV_TC_EUCTW);
119*6008Syy154373 }
120*6008Syy154373
121*6008Syy154373 static int
close_fr_tc(void * s)122*6008Syy154373 close_fr_tc(void *s)
123*6008Syy154373 {
124*6008Syy154373 if ((uintptr_t)s > KICONV_TC_MAX_MAGIC_ID)
125*6008Syy154373 return (EBADF);
126*6008Syy154373
127*6008Syy154373 return (0);
128*6008Syy154373 }
129*6008Syy154373
130*6008Syy154373 /*
131*6008Syy154373 * Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
132*6008Syy154373 */
133*6008Syy154373 static size_t
kiconv_fr_big5_common(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno,kiconv_big5toutf8_t ptr_big5touf8)134*6008Syy154373 kiconv_fr_big5_common(void *kcd, char **inbuf, size_t *inbytesleft,
135*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno,
136*6008Syy154373 kiconv_big5toutf8_t ptr_big5touf8)
137*6008Syy154373 {
138*6008Syy154373 uchar_t *ib;
139*6008Syy154373 uchar_t *ob;
140*6008Syy154373 uchar_t *ibtail;
141*6008Syy154373 uchar_t *obtail;
142*6008Syy154373 size_t ret_val;
143*6008Syy154373 int8_t sz;
144*6008Syy154373 uint32_t big5_val;
145*6008Syy154373
146*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
147*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
148*6008Syy154373 *errno = EBADF;
149*6008Syy154373 return ((size_t)-1);
150*6008Syy154373 }
151*6008Syy154373
152*6008Syy154373 /* If this is a state reset request, process and return. */
153*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
154*6008Syy154373 return (0);
155*6008Syy154373 }
156*6008Syy154373
157*6008Syy154373 ret_val = 0;
158*6008Syy154373 ib = (uchar_t *)*inbuf;
159*6008Syy154373 ob = (uchar_t *)*outbuf;
160*6008Syy154373 ibtail = ib + *inbytesleft;
161*6008Syy154373 obtail = ob + *outbytesleft;
162*6008Syy154373
163*6008Syy154373 while (ib < ibtail) {
164*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
165*6008Syy154373 if (ob >= obtail) {
166*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
167*6008Syy154373 }
168*6008Syy154373
169*6008Syy154373 *ob++ = *ib++;
170*6008Syy154373 continue;
171*6008Syy154373 }
172*6008Syy154373
173*6008Syy154373 /*
174*6008Syy154373 * Issue EILSEQ error if the first byte is not a
175*6008Syy154373 * valid BIG5/HKSCS leading byte.
176*6008Syy154373 */
177*6008Syy154373 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
178*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
179*6008Syy154373 }
180*6008Syy154373
181*6008Syy154373 /*
182*6008Syy154373 * Issue EINVAL error if input buffer has an incomplete
183*6008Syy154373 * character at the end of the buffer.
184*6008Syy154373 */
185*6008Syy154373 if (ibtail - ib < 2) {
186*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
187*6008Syy154373 }
188*6008Syy154373
189*6008Syy154373 /*
190*6008Syy154373 * Issue EILSEQ error if the remaining bytes is not
191*6008Syy154373 * a valid BIG5/HKSCS byte.
192*6008Syy154373 */
193*6008Syy154373 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
194*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
195*6008Syy154373 }
196*6008Syy154373
197*6008Syy154373 /* Now we have a valid BIG5/HKSCS character. */
198*6008Syy154373 big5_val = (uint32_t)(*ib) << 8 | *(ib + 1);
199*6008Syy154373 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
200*6008Syy154373
201*6008Syy154373 if (sz < 0) {
202*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
203*6008Syy154373 }
204*6008Syy154373
205*6008Syy154373 ib += 2;
206*6008Syy154373 ob += sz;
207*6008Syy154373 }
208*6008Syy154373
209*6008Syy154373 *inbuf = (char *)ib;
210*6008Syy154373 *inbytesleft = ibtail - ib;
211*6008Syy154373 *outbuf = (char *)ob;
212*6008Syy154373 *outbytesleft = obtail - ob;
213*6008Syy154373
214*6008Syy154373 return (ret_val);
215*6008Syy154373 }
216*6008Syy154373
217*6008Syy154373 /*
218*6008Syy154373 * String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
219*6008Syy154373 * to UTF-8.
220*6008Syy154373 */
221*6008Syy154373 static size_t
kiconvstr_fr_big5_common(uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno,kiconv_big5toutf8_t ptr_big5touf8)222*6008Syy154373 kiconvstr_fr_big5_common(uchar_t *ib, size_t *inlen, uchar_t *ob,
223*6008Syy154373 size_t *outlen, int flag, int *errno,
224*6008Syy154373 kiconv_big5toutf8_t ptr_big5touf8)
225*6008Syy154373 {
226*6008Syy154373 uchar_t *oldib;
227*6008Syy154373 uchar_t *ibtail;
228*6008Syy154373 uchar_t *obtail;
229*6008Syy154373 size_t ret_val;
230*6008Syy154373 int8_t sz;
231*6008Syy154373 uint32_t big5_val;
232*6008Syy154373 boolean_t do_not_ignore_null;
233*6008Syy154373
234*6008Syy154373 ret_val = 0;
235*6008Syy154373 ibtail = ib + *inlen;
236*6008Syy154373 obtail = ob + *outlen;
237*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
238*6008Syy154373
239*6008Syy154373 while (ib < ibtail) {
240*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
241*6008Syy154373 break;
242*6008Syy154373
243*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
244*6008Syy154373 if (ob >= obtail) {
245*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
246*6008Syy154373 }
247*6008Syy154373
248*6008Syy154373 *ob++ = *ib++;
249*6008Syy154373 continue;
250*6008Syy154373 }
251*6008Syy154373
252*6008Syy154373 oldib = ib;
253*6008Syy154373
254*6008Syy154373 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
255*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
256*6008Syy154373 }
257*6008Syy154373
258*6008Syy154373 if (ibtail - ib < 2) {
259*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
260*6008Syy154373 }
261*6008Syy154373
262*6008Syy154373 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
263*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
264*6008Syy154373 }
265*6008Syy154373
266*6008Syy154373 big5_val = *ib++;
267*6008Syy154373 big5_val = (big5_val << 8) | *ib++;
268*6008Syy154373 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
269*6008Syy154373
270*6008Syy154373 if (sz < 0) {
271*6008Syy154373 ib = oldib;
272*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
273*6008Syy154373 }
274*6008Syy154373
275*6008Syy154373 ob += sz;
276*6008Syy154373 continue;
277*6008Syy154373
278*6008Syy154373 REPLACE_INVALID:
279*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
280*6008Syy154373 ib = oldib;
281*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
282*6008Syy154373 }
283*6008Syy154373
284*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
285*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
286*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
287*6008Syy154373 ret_val++;
288*6008Syy154373 }
289*6008Syy154373
290*6008Syy154373 *inlen = ibtail - ib;
291*6008Syy154373 *outlen = obtail - ob;
292*6008Syy154373
293*6008Syy154373 return (ret_val);
294*6008Syy154373 }
295*6008Syy154373
296*6008Syy154373 /*
297*6008Syy154373 * Encoding convertor from BIG5 to UTF-8.
298*6008Syy154373 */
299*6008Syy154373 static size_t
kiconv_fr_big5(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)300*6008Syy154373 kiconv_fr_big5(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
301*6008Syy154373 size_t *outbytesleft, int *errno)
302*6008Syy154373 {
303*6008Syy154373 return (kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
304*6008Syy154373 outbytesleft, errno, big5_to_utf8));
305*6008Syy154373 }
306*6008Syy154373
307*6008Syy154373 /*
308*6008Syy154373 * String based encoding convertor from BIG5 to UTF-8.
309*6008Syy154373 */
310*6008Syy154373 static size_t
kiconvstr_fr_big5(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)311*6008Syy154373 kiconvstr_fr_big5(char *inarray, size_t *inlen, char *outarray,
312*6008Syy154373 size_t *outlen, int flag, int *errno)
313*6008Syy154373 {
314*6008Syy154373 return (kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
315*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno,
316*6008Syy154373 big5_to_utf8));
317*6008Syy154373 }
318*6008Syy154373
319*6008Syy154373 /*
320*6008Syy154373 * Encoding convertor from BIG5-HKSCS to UTF-8.
321*6008Syy154373 */
322*6008Syy154373 static size_t
kiconv_fr_big5hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)323*6008Syy154373 kiconv_fr_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
324*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
325*6008Syy154373 {
326*6008Syy154373 return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
327*6008Syy154373 outbytesleft, errno, big5hkscs_to_utf8);
328*6008Syy154373 }
329*6008Syy154373
330*6008Syy154373 /*
331*6008Syy154373 * String based encoding convertor from BIG5-HKSCS to UTF-8.
332*6008Syy154373 */
333*6008Syy154373 static size_t
kiconvstr_fr_big5hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)334*6008Syy154373 kiconvstr_fr_big5hkscs(char *inarray, size_t *inlen, char *outarray,
335*6008Syy154373 size_t *outlen, int flag, int *errno)
336*6008Syy154373 {
337*6008Syy154373 return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
338*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, big5hkscs_to_utf8);
339*6008Syy154373 }
340*6008Syy154373
341*6008Syy154373 /*
342*6008Syy154373 * Encoding convertor from CP950-HKSCS to UTF-8.
343*6008Syy154373 */
344*6008Syy154373 static size_t
kiconv_fr_cp950hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)345*6008Syy154373 kiconv_fr_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
346*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
347*6008Syy154373 {
348*6008Syy154373 return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
349*6008Syy154373 outbytesleft, errno, cp950hkscs_to_utf8);
350*6008Syy154373 }
351*6008Syy154373
352*6008Syy154373 /*
353*6008Syy154373 * String based encoding convertor from CP950-HKSCS to UTF-8.
354*6008Syy154373 */
355*6008Syy154373 static size_t
kiconvstr_fr_cp950hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)356*6008Syy154373 kiconvstr_fr_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
357*6008Syy154373 size_t *outlen, int flag, int *errno)
358*6008Syy154373 {
359*6008Syy154373 return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
360*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, cp950hkscs_to_utf8);
361*6008Syy154373 }
362*6008Syy154373
363*6008Syy154373 /*
364*6008Syy154373 * Encoding convertor from EUC-TW to UTF-8.
365*6008Syy154373 */
366*6008Syy154373 static size_t
kiconv_fr_euctw(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)367*6008Syy154373 kiconv_fr_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
368*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
369*6008Syy154373 {
370*6008Syy154373 uchar_t *ib;
371*6008Syy154373 uchar_t *ob;
372*6008Syy154373 uchar_t *ibtail;
373*6008Syy154373 uchar_t *obtail;
374*6008Syy154373 uchar_t *oldib;
375*6008Syy154373 size_t ret_val;
376*6008Syy154373 size_t plane_no;
377*6008Syy154373 int8_t sz;
378*6008Syy154373 uint32_t euctw_val;
379*6008Syy154373 boolean_t isplane1;
380*6008Syy154373
381*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
382*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
383*6008Syy154373 *errno = EBADF;
384*6008Syy154373 return ((size_t)-1);
385*6008Syy154373 }
386*6008Syy154373
387*6008Syy154373 /* If this is a state reset request, process and return. */
388*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
389*6008Syy154373 return (0);
390*6008Syy154373 }
391*6008Syy154373
392*6008Syy154373 ret_val = 0;
393*6008Syy154373 ib = (uchar_t *)*inbuf;
394*6008Syy154373 ob = (uchar_t *)*outbuf;
395*6008Syy154373 ibtail = ib + *inbytesleft;
396*6008Syy154373 obtail = ob + *outbytesleft;
397*6008Syy154373
398*6008Syy154373 while (ib < ibtail) {
399*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
400*6008Syy154373 if (ob >= obtail) {
401*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
402*6008Syy154373 }
403*6008Syy154373
404*6008Syy154373 *ob++ = *ib++;
405*6008Syy154373 continue;
406*6008Syy154373 }
407*6008Syy154373
408*6008Syy154373 /*
409*6008Syy154373 * Issue EILSEQ error if the first byte is not a
410*6008Syy154373 * valid EUC-TW leading byte.
411*6008Syy154373 */
412*6008Syy154373 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
413*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
414*6008Syy154373 }
415*6008Syy154373
416*6008Syy154373 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
417*6008Syy154373 B_FALSE : B_TRUE;
418*6008Syy154373
419*6008Syy154373 /*
420*6008Syy154373 * Issue EINVAL error if input buffer has an incomplete
421*6008Syy154373 * character at the end of the buffer.
422*6008Syy154373 */
423*6008Syy154373 if (ibtail - ib < (isplane1 ? 2 : 4)) {
424*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
425*6008Syy154373 }
426*6008Syy154373
427*6008Syy154373 oldib = ib;
428*6008Syy154373 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
429*6008Syy154373
430*6008Syy154373 /*
431*6008Syy154373 * Issue EILSEQ error if the remaining bytes are not
432*6008Syy154373 * valid EUC-TW bytes.
433*6008Syy154373 */
434*6008Syy154373 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
435*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
436*6008Syy154373 }
437*6008Syy154373
438*6008Syy154373 if (! isplane1)
439*6008Syy154373 ib += 2;
440*6008Syy154373
441*6008Syy154373 /* Now we have a valid EUC-TW character. */
442*6008Syy154373 euctw_val = *ib++;
443*6008Syy154373 euctw_val = (euctw_val << 8) | *ib++;
444*6008Syy154373 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
445*6008Syy154373
446*6008Syy154373 if (sz < 0) {
447*6008Syy154373 ib = oldib;
448*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
449*6008Syy154373 }
450*6008Syy154373
451*6008Syy154373 ob += sz;
452*6008Syy154373 }
453*6008Syy154373
454*6008Syy154373 *inbuf = (char *)ib;
455*6008Syy154373 *inbytesleft = ibtail - ib;
456*6008Syy154373 *outbuf = (char *)ob;
457*6008Syy154373 *outbytesleft = obtail - ob;
458*6008Syy154373
459*6008Syy154373 return (ret_val);
460*6008Syy154373 }
461*6008Syy154373
462*6008Syy154373 /*
463*6008Syy154373 * String based encoding convertor from EUC-TW to UTF-8.
464*6008Syy154373 */
465*6008Syy154373 static size_t
kiconvstr_fr_euctw(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)466*6008Syy154373 kiconvstr_fr_euctw(char *inarray, size_t *inlen, char *outarray,
467*6008Syy154373 size_t *outlen, int flag, int *errno)
468*6008Syy154373 {
469*6008Syy154373 uchar_t *ib;
470*6008Syy154373 uchar_t *ob;
471*6008Syy154373 uchar_t *ibtail;
472*6008Syy154373 uchar_t *obtail;
473*6008Syy154373 uchar_t *oldib;
474*6008Syy154373 size_t ret_val;
475*6008Syy154373 size_t plane_no;
476*6008Syy154373 int8_t sz;
477*6008Syy154373 uint32_t euctw_val;
478*6008Syy154373 boolean_t isplane1;
479*6008Syy154373 boolean_t do_not_ignore_null;
480*6008Syy154373
481*6008Syy154373 ret_val = 0;
482*6008Syy154373 ib = (uchar_t *)inarray;
483*6008Syy154373 ob = (uchar_t *)outarray;
484*6008Syy154373 ibtail = ib + *inlen;
485*6008Syy154373 obtail = ob + *outlen;
486*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
487*6008Syy154373
488*6008Syy154373 while (ib < ibtail) {
489*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
490*6008Syy154373 break;
491*6008Syy154373
492*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
493*6008Syy154373 if (ob >= obtail) {
494*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495*6008Syy154373 }
496*6008Syy154373
497*6008Syy154373 *ob++ = *ib++;
498*6008Syy154373 continue;
499*6008Syy154373 }
500*6008Syy154373
501*6008Syy154373 oldib = ib;
502*6008Syy154373
503*6008Syy154373 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
504*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
505*6008Syy154373 }
506*6008Syy154373
507*6008Syy154373 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
508*6008Syy154373 B_FALSE : B_TRUE;
509*6008Syy154373
510*6008Syy154373 if (ibtail - ib < (isplane1 ? 2 : 4)) {
511*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
512*6008Syy154373 ib = ibtail;
513*6008Syy154373 goto REPLACE_INVALID;
514*6008Syy154373 }
515*6008Syy154373
516*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
517*6008Syy154373 }
518*6008Syy154373
519*6008Syy154373 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
520*6008Syy154373
521*6008Syy154373 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
522*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(isplane1 ? 2 : 4, EILSEQ);
523*6008Syy154373 }
524*6008Syy154373
525*6008Syy154373 if (! isplane1)
526*6008Syy154373 ib += 2;
527*6008Syy154373
528*6008Syy154373 euctw_val = *ib++;
529*6008Syy154373 euctw_val = (euctw_val << 8) | *ib++;
530*6008Syy154373 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
531*6008Syy154373
532*6008Syy154373 if (sz < 0) {
533*6008Syy154373 ib = oldib;
534*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
535*6008Syy154373 }
536*6008Syy154373
537*6008Syy154373 ob += sz;
538*6008Syy154373 continue;
539*6008Syy154373
540*6008Syy154373 REPLACE_INVALID:
541*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
542*6008Syy154373 ib = oldib;
543*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
544*6008Syy154373 }
545*6008Syy154373
546*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
547*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
548*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
549*6008Syy154373 ret_val++;
550*6008Syy154373 }
551*6008Syy154373
552*6008Syy154373 *inlen = ibtail - ib;
553*6008Syy154373 *outlen = obtail - ob;
554*6008Syy154373
555*6008Syy154373 return (ret_val);
556*6008Syy154373 }
557*6008Syy154373
558*6008Syy154373 /*
559*6008Syy154373 * Encoding convertor from UTF-8 to BIG5.
560*6008Syy154373 */
561*6008Syy154373 static size_t
kiconv_to_big5(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)562*6008Syy154373 kiconv_to_big5(void *kcd, char **inbuf, size_t *inbytesleft,
563*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
564*6008Syy154373 {
565*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
566*6008Syy154373 outbytesleft, errno, utf8_to_big5);
567*6008Syy154373 }
568*6008Syy154373
569*6008Syy154373 /*
570*6008Syy154373 * String based encoding convertor from UTF-8 to BIG5.
571*6008Syy154373 */
572*6008Syy154373 static size_t
kiconvstr_to_big5(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)573*6008Syy154373 kiconvstr_to_big5(char *inarray, size_t *inlen, char *outarray,
574*6008Syy154373 size_t *outlen, int flag, int *errno)
575*6008Syy154373 {
576*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
577*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5);
578*6008Syy154373 }
579*6008Syy154373
580*6008Syy154373 /*
581*6008Syy154373 * Encoding convertor from UTF-8 to EUC-TW.
582*6008Syy154373 */
583*6008Syy154373 static size_t
kiconv_to_euctw(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)584*6008Syy154373 kiconv_to_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
585*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
586*6008Syy154373 {
587*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
588*6008Syy154373 outbytesleft, errno, utf8_to_euctw);
589*6008Syy154373 }
590*6008Syy154373
591*6008Syy154373 /*
592*6008Syy154373 * String based encoding convertor from UTF-8 to EUC-TW.
593*6008Syy154373 */
594*6008Syy154373 static size_t
kiconvstr_to_euctw(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)595*6008Syy154373 kiconvstr_to_euctw(char *inarray, size_t *inlen, char *outarray,
596*6008Syy154373 size_t *outlen, int flag, int *errno)
597*6008Syy154373 {
598*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
599*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_euctw);
600*6008Syy154373 }
601*6008Syy154373
602*6008Syy154373 /*
603*6008Syy154373 * Encoding convertor from UTF-8 to CP950HKSCS.
604*6008Syy154373 */
605*6008Syy154373 static size_t
kiconv_to_cp950hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)606*6008Syy154373 kiconv_to_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
607*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
608*6008Syy154373 {
609*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
610*6008Syy154373 outbytesleft, errno, utf8_to_cp950hkscs);
611*6008Syy154373 }
612*6008Syy154373
613*6008Syy154373 /*
614*6008Syy154373 * String based encoding convertor from UTF-8 to CP950HKSCS.
615*6008Syy154373 */
616*6008Syy154373 static size_t
kiconvstr_to_cp950hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)617*6008Syy154373 kiconvstr_to_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
618*6008Syy154373 size_t *outlen, int flag, int *errno)
619*6008Syy154373 {
620*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
621*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_cp950hkscs);
622*6008Syy154373 }
623*6008Syy154373
624*6008Syy154373 /*
625*6008Syy154373 * Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
626*6008Syy154373 */
627*6008Syy154373 static size_t
kiconv_to_big5hkscs(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)628*6008Syy154373 kiconv_to_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
629*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
630*6008Syy154373 {
631*6008Syy154373 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
632*6008Syy154373 outbytesleft, errno, utf8_to_big5hkscs);
633*6008Syy154373 }
634*6008Syy154373
635*6008Syy154373 /*
636*6008Syy154373 * String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
637*6008Syy154373 */
638*6008Syy154373 static size_t
kiconvstr_to_big5hkscs(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)639*6008Syy154373 kiconvstr_to_big5hkscs(char *inarray, size_t *inlen, char *outarray,
640*6008Syy154373 size_t *outlen, int flag, int *errno)
641*6008Syy154373 {
642*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
643*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5hkscs);
644*6008Syy154373 }
645*6008Syy154373
646*6008Syy154373 /*
647*6008Syy154373 * Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
648*6008Syy154373 * Return: > 0 - Converted successfully
649*6008Syy154373 * = -1 - E2BIG
650*6008Syy154373 */
651*6008Syy154373 static int8_t
big5_to_utf8_common(uint32_t big5_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_array_t * table,size_t nitems)652*6008Syy154373 big5_to_utf8_common(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
653*6008Syy154373 size_t *ret_val, kiconv_table_array_t *table, size_t nitems)
654*6008Syy154373 {
655*6008Syy154373 size_t index;
656*6008Syy154373 int8_t sz;
657*6008Syy154373 uchar_t *u8;
658*6008Syy154373
659*6008Syy154373 index = kiconv_binsearch(big5_val, table, nitems);
660*6008Syy154373 u8 = table[index].u8;
661*6008Syy154373 sz = u8_number_of_bytes[u8[0]];
662*6008Syy154373
663*6008Syy154373 if (obtail - ob < sz) {
664*6008Syy154373 *ret_val = (size_t)-1;
665*6008Syy154373 return (-1);
666*6008Syy154373 }
667*6008Syy154373
668*6008Syy154373 if (index == 0)
669*6008Syy154373 (*ret_val)++; /* Non-identical conversion */
670*6008Syy154373
671*6008Syy154373 for (index = 0; index < sz; index++)
672*6008Syy154373 *ob++ = u8[index];
673*6008Syy154373
674*6008Syy154373 return (sz);
675*6008Syy154373 }
676*6008Syy154373
677*6008Syy154373 /*
678*6008Syy154373 * Convert single BIG5 character to UTF-8.
679*6008Syy154373 */
680*6008Syy154373 static int8_t
big5_to_utf8(uint32_t big5_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)681*6008Syy154373 big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val)
682*6008Syy154373 {
683*6008Syy154373 return (big5_to_utf8_common(big5_val, ob, obtail, ret_val,
684*6008Syy154373 kiconv_big5_utf8, KICONV_BIG5_UTF8_MAX));
685*6008Syy154373 }
686*6008Syy154373
687*6008Syy154373 /*
688*6008Syy154373 * Convert single CP950-HKSCS character to UTF-8.
689*6008Syy154373 */
690*6008Syy154373 static int8_t
cp950hkscs_to_utf8(uint32_t hkscs_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)691*6008Syy154373 cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
692*6008Syy154373 size_t *ret_val)
693*6008Syy154373 {
694*6008Syy154373 return (big5_to_utf8_common(hkscs_val, ob, obtail, ret_val,
695*6008Syy154373 kiconv_cp950hkscs_utf8, KICONV_CP950HKSCS_UTF8_MAX));
696*6008Syy154373 }
697*6008Syy154373
698*6008Syy154373 /*
699*6008Syy154373 * Calculate unicode value for some CNS planes which fall in Unicode
700*6008Syy154373 * UDA range.
701*6008Syy154373 */
702*6008Syy154373 static uint32_t
get_unicode_from_UDA(size_t plane_no,uchar_t b1,uchar_t b2)703*6008Syy154373 get_unicode_from_UDA(size_t plane_no, uchar_t b1, uchar_t b2)
704*6008Syy154373 {
705*6008Syy154373 /*
706*6008Syy154373 * CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
707*6008Syy154373 * to compute the Unicode value.
708*6008Syy154373 */
709*6008Syy154373 if (plane_no == 16)
710*6008Syy154373 --plane_no;
711*6008Syy154373
712*6008Syy154373 /* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
713*6008Syy154373 return (8836 * plane_no + 94 * b1 + b2 + 0xD2611);
714*6008Syy154373 }
715*6008Syy154373
716*6008Syy154373 /*
717*6008Syy154373 * Convert single EUC-TW character to UTF-8.
718*6008Syy154373 * Return: > 0 - Converted successfully
719*6008Syy154373 * = -1 - E2BIG
720*6008Syy154373 */
721*6008Syy154373 static int8_t
euctw_to_utf8(size_t plane_no,uint32_t euctw_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)722*6008Syy154373 euctw_to_utf8(size_t plane_no, uint32_t euctw_val, uchar_t *ob,
723*6008Syy154373 uchar_t *obtail, size_t *ret_val)
724*6008Syy154373 {
725*6008Syy154373 uint32_t u32;
726*6008Syy154373 size_t index;
727*6008Syy154373 int8_t sz;
728*6008Syy154373 uchar_t udc[4];
729*6008Syy154373 uchar_t *u8;
730*6008Syy154373
731*6008Syy154373 switch (plane_no) {
732*6008Syy154373 case 1:
733*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns1_utf8,
734*6008Syy154373 KICONV_CNS1_UTF8_MAX);
735*6008Syy154373 u8 = kiconv_cns1_utf8[index].u8;
736*6008Syy154373 break;
737*6008Syy154373 case 2:
738*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns2_utf8,
739*6008Syy154373 KICONV_CNS2_UTF8_MAX);
740*6008Syy154373 u8 = kiconv_cns2_utf8[index].u8;
741*6008Syy154373 break;
742*6008Syy154373 case 3:
743*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns3_utf8,
744*6008Syy154373 KICONV_CNS3_UTF8_MAX);
745*6008Syy154373 u8 = kiconv_cns3_utf8[index].u8;
746*6008Syy154373 break;
747*6008Syy154373 case 4:
748*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns4_utf8,
749*6008Syy154373 KICONV_CNS4_UTF8_MAX);
750*6008Syy154373 u8 = kiconv_cns4_utf8[index].u8;
751*6008Syy154373 break;
752*6008Syy154373 case 5:
753*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns5_utf8,
754*6008Syy154373 KICONV_CNS5_UTF8_MAX);
755*6008Syy154373 u8 = kiconv_cns5_utf8[index].u8;
756*6008Syy154373 break;
757*6008Syy154373 case 6:
758*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns6_utf8,
759*6008Syy154373 KICONV_CNS6_UTF8_MAX);
760*6008Syy154373 u8 = kiconv_cns6_utf8[index].u8;
761*6008Syy154373 break;
762*6008Syy154373 case 7:
763*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns7_utf8,
764*6008Syy154373 KICONV_CNS7_UTF8_MAX);
765*6008Syy154373 u8 = kiconv_cns7_utf8[index].u8;
766*6008Syy154373 break;
767*6008Syy154373 case 12:
768*6008Syy154373 case 13:
769*6008Syy154373 case 14:
770*6008Syy154373 case 16:
771*6008Syy154373 u32 = get_unicode_from_UDA(plane_no,
772*6008Syy154373 (euctw_val & 0xFF00) >> 8, euctw_val & 0xFF);
773*6008Syy154373 /*
774*6008Syy154373 * As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
775*6008Syy154373 * will occupy 4 bytes.
776*6008Syy154373 */
777*6008Syy154373 udc[0] = 0xF3;
778*6008Syy154373 udc[1] = (uchar_t)(0x80 | (u32 & 0x03F000) >> 12);
779*6008Syy154373 udc[2] = (uchar_t)(0x80 | (u32 & 0x000FC0) >> 6);
780*6008Syy154373 udc[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
781*6008Syy154373 u8 = udc;
782*6008Syy154373 index = 1;
783*6008Syy154373 break;
784*6008Syy154373 case 15:
785*6008Syy154373 index = kiconv_binsearch(euctw_val, kiconv_cns15_utf8,
786*6008Syy154373 KICONV_CNS15_UTF8_MAX);
787*6008Syy154373 u8 = kiconv_cns15_utf8[index].u8;
788*6008Syy154373 break;
789*6008Syy154373 default:
790*6008Syy154373 index = 0;
791*6008Syy154373 u8 = kiconv_cns1_utf8[index].u8;
792*6008Syy154373 }
793*6008Syy154373
794*6008Syy154373 sz = u8_number_of_bytes[u8[0]];
795*6008Syy154373 if (obtail - ob < sz) {
796*6008Syy154373 *ret_val = (size_t)-1;
797*6008Syy154373 return (-1);
798*6008Syy154373 }
799*6008Syy154373
800*6008Syy154373 if (index == 0)
801*6008Syy154373 (*ret_val)++;
802*6008Syy154373
803*6008Syy154373 for (index = 0; index < sz; index++)
804*6008Syy154373 *ob++ = u8[index];
805*6008Syy154373
806*6008Syy154373 return (sz);
807*6008Syy154373 }
808*6008Syy154373
809*6008Syy154373 /*
810*6008Syy154373 * Convert single HKSCS character to UTF-8.
811*6008Syy154373 * Return: > 0 - Converted successfully
812*6008Syy154373 * = -1 - E2BIG
813*6008Syy154373 */
814*6008Syy154373 static int8_t
big5hkscs_to_utf8(uint32_t hkscs_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val)815*6008Syy154373 big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
816*6008Syy154373 size_t *ret_val)
817*6008Syy154373 {
818*6008Syy154373 size_t index;
819*6008Syy154373 int8_t sz;
820*6008Syy154373 uchar_t *u8;
821*6008Syy154373
822*6008Syy154373 index = kiconv_binsearch(hkscs_val, kiconv_hkscs_utf8,
823*6008Syy154373 KICONV_HKSCS_UTF8_MAX);
824*6008Syy154373 u8 = kiconv_hkscs_utf8[index].u8;
825*6008Syy154373
826*6008Syy154373 /*
827*6008Syy154373 * Single HKSCS-2004 character may map to 2 Unicode
828*6008Syy154373 * code points.
829*6008Syy154373 */
830*6008Syy154373 if (u8[0] == 0xFF) {
831*6008Syy154373 u8 = hkscs_special_sequence[u8[1]];
832*6008Syy154373 sz = 4;
833*6008Syy154373 } else {
834*6008Syy154373 sz = u8_number_of_bytes[u8[0]];
835*6008Syy154373 }
836*6008Syy154373
837*6008Syy154373 if (obtail - ob < sz) {
838*6008Syy154373 *ret_val = (size_t)-1;
839*6008Syy154373 return (-1);
840*6008Syy154373 }
841*6008Syy154373
842*6008Syy154373 if (index == 0)
843*6008Syy154373 (*ret_val)++; /* Non-identical conversion. */
844*6008Syy154373
845*6008Syy154373 for (index = 0; index < sz; index++)
846*6008Syy154373 *ob++ = u8[index];
847*6008Syy154373
848*6008Syy154373 return (sz);
849*6008Syy154373 }
850*6008Syy154373
851*6008Syy154373 /*
852*6008Syy154373 * Convert single UTF-8 character to EUC-TW.
853*6008Syy154373 * Return: > 0 - Converted successfully
854*6008Syy154373 * = -1 - E2BIG
855*6008Syy154373 */
856*6008Syy154373 /* ARGSUSED */
857*6008Syy154373 static int8_t
utf8_to_euctw(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)858*6008Syy154373 utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
859*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
860*6008Syy154373 {
861*6008Syy154373 size_t index;
862*6008Syy154373 size_t plane_no;
863*6008Syy154373 uchar_t byte1;
864*6008Syy154373 uchar_t byte2;
865*6008Syy154373
866*6008Syy154373 if (utf8 >= KICONV_TC_UDA_UTF8_START &&
867*6008Syy154373 utf8 <= KICONV_TC_UDA_UTF8_END) {
868*6008Syy154373 /*
869*6008Syy154373 * Calculate EUC-TW code if utf8 is in Unicode
870*6008Syy154373 * Private Plane 15.
871*6008Syy154373 */
872*6008Syy154373 index = (((utf8 & 0x7000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
873*6008Syy154373 ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
874*6008Syy154373 KICONV_TC_UDA_UCS4_START;
875*6008Syy154373 plane_no = 12 + index / 8836;
876*6008Syy154373 byte1 = 0xA1 + (index % 8836) / 94;
877*6008Syy154373 byte2 = 0xA1 + index % 94;
878*6008Syy154373
879*6008Syy154373 /* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
880*6008Syy154373 if (plane_no == 15)
881*6008Syy154373 plane_no = 16;
882*6008Syy154373 } else {
883*6008Syy154373 uint32_t euctw_val;
884*6008Syy154373
885*6008Syy154373 index = kiconv_binsearch(utf8, kiconv_utf8_euctw,
886*6008Syy154373 KICONV_UTF8_EUCTW_MAX);
887*6008Syy154373
888*6008Syy154373 if (index == 0) {
889*6008Syy154373 if (ob >= obtail) {
890*6008Syy154373 *ret_val = (size_t)-1;
891*6008Syy154373 return (-1);
892*6008Syy154373 }
893*6008Syy154373
894*6008Syy154373 *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
895*6008Syy154373 (*ret_val)++;
896*6008Syy154373
897*6008Syy154373 return (1);
898*6008Syy154373 }
899*6008Syy154373
900*6008Syy154373 euctw_val = kiconv_utf8_euctw[index].value;
901*6008Syy154373 byte1 = (euctw_val & 0xFF00) >> 8;
902*6008Syy154373 byte2 = euctw_val & 0xFF;
903*6008Syy154373 plane_no = euctw_val >> 16;
904*6008Syy154373 }
905*6008Syy154373
906*6008Syy154373 if (obtail - ob < (plane_no == 1 ? 2 : 4)) {
907*6008Syy154373 *ret_val = (size_t)-1;
908*6008Syy154373 return (-1);
909*6008Syy154373 }
910*6008Syy154373
911*6008Syy154373 if (plane_no != 1) {
912*6008Syy154373 *ob++ = KICONV_TC_EUCTW_MBYTE;
913*6008Syy154373 *ob++ = KICONV_TC_EUCTW_PMASK + plane_no;
914*6008Syy154373 }
915*6008Syy154373
916*6008Syy154373 *ob++ = byte1;
917*6008Syy154373 *ob = byte2;
918*6008Syy154373
919*6008Syy154373 return (plane_no == 1 ? 2 : 4);
920*6008Syy154373 }
921*6008Syy154373
922*6008Syy154373 /*
923*6008Syy154373 * Convert single UTF-8 character to BIG5-HKSCS
924*6008Syy154373 * Return: > 0 - Converted successfully
925*6008Syy154373 * = -1 - E2BIG
926*6008Syy154373 */
927*6008Syy154373 static int8_t
utf8_to_big5hkscs(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)928*6008Syy154373 utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
929*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
930*6008Syy154373 {
931*6008Syy154373 size_t index;
932*6008Syy154373 int8_t hkscslen;
933*6008Syy154373 uint32_t hkscscode;
934*6008Syy154373 boolean_t special_sequence = B_FALSE;
935*6008Syy154373
936*6008Syy154373 index = kiconv_binsearch(utf8, kiconv_utf8_hkscs,
937*6008Syy154373 KICONV_UTF8_HKSCS_MAX);
938*6008Syy154373 hkscscode = kiconv_utf8_hkscs[index].value;
939*6008Syy154373
940*6008Syy154373 /*
941*6008Syy154373 * There are 4 special code points in HKSCS-2004 which mapped
942*6008Syy154373 * to 2 UNICODE code points.
943*6008Syy154373 */
944*6008Syy154373 if ((int32_t)hkscscode < 0) {
945*6008Syy154373 size_t special_index = (-(int32_t)hkscscode - 1) * 3;
946*6008Syy154373
947*6008Syy154373 /* Check the following 2 bytes. */
948*6008Syy154373 if (ibtail - *inbuf >= 2 && **inbuf == 0xcc &&
949*6008Syy154373 (*(*inbuf + 1) == 0x84 || *(*inbuf + 1) == 0x8c)) {
950*6008Syy154373 special_index += (*(*inbuf + 1) == 0x84 ? 1 : 2);
951*6008Syy154373 special_sequence = B_TRUE;
952*6008Syy154373 }
953*6008Syy154373
954*6008Syy154373 hkscscode = ucs_special_sequence[special_index];
955*6008Syy154373 }
956*6008Syy154373
957*6008Syy154373 hkscslen = (hkscscode <= 0xFF) ? 1 : 2;
958*6008Syy154373 if (obtail - ob < hkscslen) {
959*6008Syy154373 *ret_val = (size_t)-1;
960*6008Syy154373 return (-1);
961*6008Syy154373 }
962*6008Syy154373
963*6008Syy154373 if (index == 0)
964*6008Syy154373 (*ret_val)++;
965*6008Syy154373
966*6008Syy154373 if (hkscslen > 1)
967*6008Syy154373 *ob++ = (uchar_t)(hkscscode >> 8);
968*6008Syy154373 *ob = (uchar_t)(hkscscode & 0xFF);
969*6008Syy154373
970*6008Syy154373 if (special_sequence) { /* Advance for special sequence */
971*6008Syy154373 (*inbuf) += 2;
972*6008Syy154373 }
973*6008Syy154373
974*6008Syy154373 return (hkscslen);
975*6008Syy154373 }
976*6008Syy154373
977*6008Syy154373 /*
978*6008Syy154373 * Common convertor for UTF-8 to BIG5/CP950-HKSCS.
979*6008Syy154373 * Return: > 0 - Converted successfully
980*6008Syy154373 * = -1 - E2BIG
981*6008Syy154373 */
982*6008Syy154373 static int8_t
utf8_to_big5_common(uint32_t utf8,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_t * table,size_t nitems)983*6008Syy154373 utf8_to_big5_common(uint32_t utf8, uchar_t *ob, uchar_t *obtail,
984*6008Syy154373 size_t *ret_val, kiconv_table_t *table, size_t nitems)
985*6008Syy154373 {
986*6008Syy154373 size_t index;
987*6008Syy154373 int8_t big5len;
988*6008Syy154373 uint32_t big5code;
989*6008Syy154373
990*6008Syy154373 index = kiconv_binsearch(utf8, table, nitems);
991*6008Syy154373 big5code = table[index].value;
992*6008Syy154373 big5len = (big5code <= 0xFF) ? 1 : 2;
993*6008Syy154373
994*6008Syy154373 if (obtail - ob < big5len) {
995*6008Syy154373 *ret_val = (size_t)-1;
996*6008Syy154373 return (-1);
997*6008Syy154373 }
998*6008Syy154373
999*6008Syy154373 if (index == 0)
1000*6008Syy154373 (*ret_val)++;
1001*6008Syy154373
1002*6008Syy154373 if (big5len > 1)
1003*6008Syy154373 *ob++ = (uchar_t)(big5code >> 8);
1004*6008Syy154373 *ob = (uchar_t)(big5code & 0xFF);
1005*6008Syy154373
1006*6008Syy154373 return (big5len);
1007*6008Syy154373 }
1008*6008Syy154373
1009*6008Syy154373 /*
1010*6008Syy154373 * Convert single UTF-8 character to BIG5.
1011*6008Syy154373 */
1012*6008Syy154373 /* ARGSUSED */
1013*6008Syy154373 static int8_t
utf8_to_big5(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)1014*6008Syy154373 utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1015*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1016*6008Syy154373 {
1017*6008Syy154373 return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1018*6008Syy154373 kiconv_utf8_big5, KICONV_UTF8_BIG5_MAX));
1019*6008Syy154373 }
1020*6008Syy154373
1021*6008Syy154373 /*
1022*6008Syy154373 * Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
1023*6008Syy154373 */
1024*6008Syy154373 /* ARGSUSED */
1025*6008Syy154373 static int8_t
utf8_to_cp950hkscs(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)1026*6008Syy154373 utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1027*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1028*6008Syy154373 {
1029*6008Syy154373 return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1030*6008Syy154373 kiconv_utf8_cp950hkscs, KICONV_UTF8_CP950HKSCS));
1031*6008Syy154373 }
1032*6008Syy154373
1033*6008Syy154373 static kiconv_ops_t kiconv_tc_ops_tbl[] = {
1034*6008Syy154373 {
1035*6008Syy154373 "big5", "utf-8", kiconv_open_to_cck, kiconv_to_big5,
1036*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_big5
1037*6008Syy154373 },
1038*6008Syy154373 {
1039*6008Syy154373 "utf-8", "big5", open_fr_big5, kiconv_fr_big5,
1040*6008Syy154373 close_fr_tc, kiconvstr_fr_big5
1041*6008Syy154373 },
1042*6008Syy154373
1043*6008Syy154373 {
1044*6008Syy154373 "big5-hkscs", "utf-8", kiconv_open_to_cck, kiconv_to_big5hkscs,
1045*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_big5hkscs
1046*6008Syy154373 },
1047*6008Syy154373 {
1048*6008Syy154373 "utf-8", "big5-hkscs", open_fr_big5hkscs, kiconv_fr_big5hkscs,
1049*6008Syy154373 close_fr_tc, kiconvstr_fr_big5hkscs
1050*6008Syy154373 },
1051*6008Syy154373
1052*6008Syy154373 {
1053*6008Syy154373 "euc-tw", "utf-8", kiconv_open_to_cck, kiconv_to_euctw,
1054*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_euctw
1055*6008Syy154373 },
1056*6008Syy154373 {
1057*6008Syy154373 "utf-8", "euc-tw", open_fr_euctw, kiconv_fr_euctw,
1058*6008Syy154373 close_fr_tc, kiconvstr_fr_euctw
1059*6008Syy154373 },
1060*6008Syy154373
1061*6008Syy154373 {
1062*6008Syy154373 "cp950-hkscs", "utf-8", kiconv_open_to_cck,
1063*6008Syy154373 kiconv_to_cp950hkscs, kiconv_close_to_cck,
1064*6008Syy154373 kiconvstr_to_cp950hkscs
1065*6008Syy154373 },
1066*6008Syy154373 {
1067*6008Syy154373 "utf-8", "cp950-hkscs", open_fr_cp950hkscs,
1068*6008Syy154373 kiconv_fr_cp950hkscs, close_fr_tc, kiconvstr_fr_cp950hkscs
1069*6008Syy154373 },
1070*6008Syy154373 };
1071*6008Syy154373
1072*6008Syy154373 static kiconv_module_info_t kiconv_tc_info = {
1073*6008Syy154373 "kiconv_tc", /* module name */
1074*6008Syy154373 sizeof (kiconv_tc_ops_tbl) / sizeof (kiconv_tc_ops_tbl[0]),
1075*6008Syy154373 kiconv_tc_ops_tbl,
1076*6008Syy154373 0,
1077*6008Syy154373 NULL,
1078*6008Syy154373 NULL,
1079*6008Syy154373 0
1080*6008Syy154373 };
1081*6008Syy154373
1082*6008Syy154373 static struct modlkiconv modlkiconv_tc = {
1083*6008Syy154373 &mod_kiconvops,
1084*6008Syy154373 "kiconv Traditional Chinese module 1.0",
1085*6008Syy154373 &kiconv_tc_info
1086*6008Syy154373 };
1087*6008Syy154373
1088*6008Syy154373 static struct modlinkage modlinkage = {
1089*6008Syy154373 MODREV_1,
1090*6008Syy154373 (void *)&modlkiconv_tc,
1091*6008Syy154373 NULL
1092*6008Syy154373 };
1093*6008Syy154373
1094*6008Syy154373 int
_init(void)1095*6008Syy154373 _init(void)
1096*6008Syy154373 {
1097*6008Syy154373 int err;
1098*6008Syy154373
1099*6008Syy154373 err = mod_install(&modlinkage);
1100*6008Syy154373 if (err)
1101*6008Syy154373 cmn_err(CE_WARN, "kiconv_tc: failed to load kernel module");
1102*6008Syy154373
1103*6008Syy154373 return (err);
1104*6008Syy154373 }
1105*6008Syy154373
1106*6008Syy154373 int
_fini(void)1107*6008Syy154373 _fini(void)
1108*6008Syy154373 {
1109*6008Syy154373 int err;
1110*6008Syy154373
1111*6008Syy154373 /*
1112*6008Syy154373 * If this module is being used, then, we cannot remove the module.
1113*6008Syy154373 * The following checking will catch pretty much all usual cases.
1114*6008Syy154373 *
1115*6008Syy154373 * Any remaining will be catached by the kiconv_unregister_module()
1116*6008Syy154373 * during mod_remove() at below.
1117*6008Syy154373 */
1118*6008Syy154373 if (kiconv_module_ref_count(KICONV_MODULE_ID_TC))
1119*6008Syy154373 return (EBUSY);
1120*6008Syy154373
1121*6008Syy154373 err = mod_remove(&modlinkage);
1122*6008Syy154373 if (err)
1123*6008Syy154373 cmn_err(CE_WARN, "kiconv_tc: failed to remove kernel module");
1124*6008Syy154373
1125*6008Syy154373 return (err);
1126*6008Syy154373 }
1127*6008Syy154373
1128*6008Syy154373 int
_info(struct modinfo * modinfop)1129*6008Syy154373 _info(struct modinfo *modinfop)
1130*6008Syy154373 {
1131*6008Syy154373 return (mod_info(&modlinkage, modinfop));
1132*6008Syy154373 }
1133