1*6008Syy154373 /*
2*6008Syy154373 * CDDL HEADER START
3*6008Syy154373 *
4*6008Syy154373 * The contents of this file are subject to the terms of the
5*6008Syy154373 * Common Development and Distribution License (the "License").
6*6008Syy154373 * You may not use this file except in compliance with the License.
7*6008Syy154373 *
8*6008Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373 * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373 * See the License for the specific language governing permissions
11*6008Syy154373 * and limitations under the License.
12*6008Syy154373 *
13*6008Syy154373 * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373 * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373 *
19*6008Syy154373 * CDDL HEADER END
20*6008Syy154373 */
21*6008Syy154373
22*6008Syy154373 /*
23*6008Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24*6008Syy154373 * Use is subject to license terms.
25*6008Syy154373 */
26*6008Syy154373
27*6008Syy154373 #pragma ident "%Z%%M% %I% %E% SMI"
28*6008Syy154373
29*6008Syy154373 #include <sys/types.h>
30*6008Syy154373 #include <sys/param.h>
31*6008Syy154373 #include <sys/sysmacros.h>
32*6008Syy154373 #include <sys/systm.h>
33*6008Syy154373 #include <sys/debug.h>
34*6008Syy154373 #include <sys/kmem.h>
35*6008Syy154373 #include <sys/sunddi.h>
36*6008Syy154373 #include <sys/byteorder.h>
37*6008Syy154373 #include <sys/errno.h>
38*6008Syy154373 #include <sys/modctl.h>
39*6008Syy154373 #include <sys/u8_textprep.h>
40*6008Syy154373 #include <sys/kiconv.h>
41*6008Syy154373 #include <sys/kiconv_cck_common.h>
42*6008Syy154373 #include <sys/kiconv_ko.h>
43*6008Syy154373 #include <sys/kiconv_uhc_utf8.h>
44*6008Syy154373 #include <sys/kiconv_utf8_uhc.h>
45*6008Syy154373 #include <sys/kiconv_euckr_utf8.h>
46*6008Syy154373 #include <sys/kiconv_utf8_euckr.h>
47*6008Syy154373
48*6008Syy154373 static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
49*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
50*6008Syy154373 static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
51*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
52*6008Syy154373 static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail,
53*6008Syy154373 size_t *ret_val, kiconv_table_array_t *table, size_t nitems);
54*6008Syy154373
55*6008Syy154373
56*6008Syy154373 #define KICONV_KO_EUCKR (0x01)
57*6008Syy154373 #define KICONV_KO_UHC (0x02)
58*6008Syy154373 #define KICONV_KO_MAX_MAGIC_ID (0x02)
59*6008Syy154373
60*6008Syy154373 static void *
open_fr_euckr()61*6008Syy154373 open_fr_euckr()
62*6008Syy154373 {
63*6008Syy154373 return ((void *)KICONV_KO_EUCKR);
64*6008Syy154373 }
65*6008Syy154373
66*6008Syy154373 static void *
open_fr_uhc()67*6008Syy154373 open_fr_uhc()
68*6008Syy154373 {
69*6008Syy154373 return ((void *)KICONV_KO_UHC);
70*6008Syy154373 }
71*6008Syy154373
72*6008Syy154373 static int
close_fr_ko(void * s)73*6008Syy154373 close_fr_ko(void *s)
74*6008Syy154373 {
75*6008Syy154373 if ((uintptr_t)s > KICONV_KO_MAX_MAGIC_ID)
76*6008Syy154373 return (EBADF);
77*6008Syy154373
78*6008Syy154373 return (0);
79*6008Syy154373 }
80*6008Syy154373
81*6008Syy154373 /*
82*6008Syy154373 * Encoding convertor from EUC-KR to UTF-8.
83*6008Syy154373 */
84*6008Syy154373 static size_t
kiconv_fr_euckr(void * kcd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft,int * errno)85*6008Syy154373 kiconv_fr_euckr(void *kcd, char **inbuf, size_t *inbufleft,
86*6008Syy154373 char **outbuf, size_t *outbufleft, int *errno)
87*6008Syy154373 {
88*6008Syy154373 uchar_t *ib;
89*6008Syy154373 uchar_t *ob;
90*6008Syy154373 uchar_t *ibtail;
91*6008Syy154373 uchar_t *obtail;
92*6008Syy154373 size_t ret_val;
93*6008Syy154373 int8_t sz;
94*6008Syy154373 uint32_t euckr_val;
95*6008Syy154373
96*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
97*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
98*6008Syy154373 *errno = EBADF;
99*6008Syy154373 return ((size_t)-1);
100*6008Syy154373 }
101*6008Syy154373
102*6008Syy154373 /* If this is a state reset request, process and return. */
103*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
104*6008Syy154373 return (0);
105*6008Syy154373 }
106*6008Syy154373
107*6008Syy154373 ret_val = 0;
108*6008Syy154373 ib = (uchar_t *)*inbuf;
109*6008Syy154373 ob = (uchar_t *)*outbuf;
110*6008Syy154373 ibtail = ib + *inbufleft;
111*6008Syy154373 obtail = ob + *outbufleft;
112*6008Syy154373
113*6008Syy154373 while (ib < ibtail) {
114*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
115*6008Syy154373 if (ob >= obtail) {
116*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
117*6008Syy154373 }
118*6008Syy154373
119*6008Syy154373 *ob++ = *ib++;
120*6008Syy154373 continue;
121*6008Syy154373 }
122*6008Syy154373
123*6008Syy154373 /*
124*6008Syy154373 * Issue EILSEQ error if the first byte is not a
125*6008Syy154373 * valid EUC-KR leading byte.
126*6008Syy154373 */
127*6008Syy154373 if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
128*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
129*6008Syy154373 }
130*6008Syy154373
131*6008Syy154373 /*
132*6008Syy154373 * Issue EINVAL error if input buffer has an incomplete
133*6008Syy154373 * character at the end of the buffer.
134*6008Syy154373 */
135*6008Syy154373 if (ibtail - ib < 2) {
136*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
137*6008Syy154373 }
138*6008Syy154373
139*6008Syy154373 /*
140*6008Syy154373 * Issue EILSEQ error if the remaining byte is not
141*6008Syy154373 * a valid EUC-KR byte.
142*6008Syy154373 */
143*6008Syy154373 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
144*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
145*6008Syy154373 }
146*6008Syy154373
147*6008Syy154373 euckr_val = (uint32_t)(*ib) << 8 | *(ib + 1);
148*6008Syy154373 sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
149*6008Syy154373 kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
150*6008Syy154373
151*6008Syy154373 if (sz < 0) {
152*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
153*6008Syy154373 }
154*6008Syy154373
155*6008Syy154373 ib += 2;
156*6008Syy154373 ob += sz;
157*6008Syy154373 }
158*6008Syy154373
159*6008Syy154373 *inbuf = (char *)ib;
160*6008Syy154373 *inbufleft = ibtail - ib;
161*6008Syy154373 *outbuf = (char *)ob;
162*6008Syy154373 *outbufleft = obtail - ob;
163*6008Syy154373
164*6008Syy154373 return (ret_val);
165*6008Syy154373 }
166*6008Syy154373
167*6008Syy154373 /*
168*6008Syy154373 * String based encoding convertor from EUC-KR to UTF-8.
169*6008Syy154373 */
170*6008Syy154373 static size_t
kiconvstr_fr_euckr(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)171*6008Syy154373 kiconvstr_fr_euckr(char *inarray, size_t *inlen, char *outarray,
172*6008Syy154373 size_t *outlen, int flag, int *errno)
173*6008Syy154373 {
174*6008Syy154373 uchar_t *ib;
175*6008Syy154373 uchar_t *ob;
176*6008Syy154373 uchar_t *ibtail;
177*6008Syy154373 uchar_t *obtail;
178*6008Syy154373 uchar_t *oldib;
179*6008Syy154373 size_t ret_val;
180*6008Syy154373 int8_t sz;
181*6008Syy154373 uint32_t euckr_val;
182*6008Syy154373 boolean_t do_not_ignore_null;
183*6008Syy154373
184*6008Syy154373 ret_val = 0;
185*6008Syy154373 ib = (uchar_t *)inarray;
186*6008Syy154373 ob = (uchar_t *)outarray;
187*6008Syy154373 ibtail = ib + *inlen;
188*6008Syy154373 obtail = ob + *outlen;
189*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
190*6008Syy154373
191*6008Syy154373 while (ib < ibtail) {
192*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
193*6008Syy154373 break;
194*6008Syy154373
195*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
196*6008Syy154373 if (ob >= obtail) {
197*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
198*6008Syy154373 }
199*6008Syy154373
200*6008Syy154373 *ob++ = *ib++;
201*6008Syy154373 continue;
202*6008Syy154373 }
203*6008Syy154373
204*6008Syy154373 oldib = ib;
205*6008Syy154373
206*6008Syy154373 if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
207*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
208*6008Syy154373 }
209*6008Syy154373
210*6008Syy154373 if (ibtail - ib < 2) {
211*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
212*6008Syy154373 }
213*6008Syy154373
214*6008Syy154373 if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
215*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
216*6008Syy154373 }
217*6008Syy154373
218*6008Syy154373 euckr_val = *ib++;
219*6008Syy154373 euckr_val = (euckr_val << 8) | *ib++;
220*6008Syy154373 sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
221*6008Syy154373 kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
222*6008Syy154373
223*6008Syy154373 if (sz < 0) {
224*6008Syy154373 ib = oldib;
225*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
226*6008Syy154373 }
227*6008Syy154373
228*6008Syy154373 ob += sz;
229*6008Syy154373 continue;
230*6008Syy154373
231*6008Syy154373 REPLACE_INVALID:
232*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
233*6008Syy154373 ib = oldib;
234*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
235*6008Syy154373 }
236*6008Syy154373
237*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
238*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
239*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
240*6008Syy154373 ret_val++;
241*6008Syy154373 }
242*6008Syy154373
243*6008Syy154373 *inlen = ibtail - ib;
244*6008Syy154373 *outlen = obtail - ob;
245*6008Syy154373
246*6008Syy154373 return (ret_val);
247*6008Syy154373 }
248*6008Syy154373
249*6008Syy154373 /*
250*6008Syy154373 * Encoding convertor from Unified Hangul Code to UTF-8.
251*6008Syy154373 */
252*6008Syy154373 static size_t
kiconv_fr_uhc(void * kcd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft,int * errno)253*6008Syy154373 kiconv_fr_uhc(void *kcd, char **inbuf, size_t *inbufleft,
254*6008Syy154373 char **outbuf, size_t *outbufleft, int *errno)
255*6008Syy154373 {
256*6008Syy154373 uchar_t *ib;
257*6008Syy154373 uchar_t *ob;
258*6008Syy154373 uchar_t *ibtail;
259*6008Syy154373 uchar_t *obtail;
260*6008Syy154373 size_t ret_val;
261*6008Syy154373 int8_t sz;
262*6008Syy154373 uint32_t uhc_val;
263*6008Syy154373
264*6008Syy154373 /* Check on the kiconv code conversion descriptor. */
265*6008Syy154373 if (kcd == NULL || kcd == (void *)-1) {
266*6008Syy154373 *errno = EBADF;
267*6008Syy154373 return ((size_t)-1);
268*6008Syy154373 }
269*6008Syy154373
270*6008Syy154373 /* If this is a state reset request, process and return. */
271*6008Syy154373 if (inbuf == NULL || *inbuf == NULL) {
272*6008Syy154373 return (0);
273*6008Syy154373 }
274*6008Syy154373
275*6008Syy154373 ret_val = 0;
276*6008Syy154373 ib = (uchar_t *)*inbuf;
277*6008Syy154373 ob = (uchar_t *)*outbuf;
278*6008Syy154373 ibtail = ib + *inbufleft;
279*6008Syy154373 obtail = ob + *outbufleft;
280*6008Syy154373
281*6008Syy154373 while (ib < ibtail) {
282*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
283*6008Syy154373 if (ob >= obtail) {
284*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
285*6008Syy154373 }
286*6008Syy154373
287*6008Syy154373 *ob++ = *ib++;
288*6008Syy154373 continue;
289*6008Syy154373 }
290*6008Syy154373
291*6008Syy154373 /*
292*6008Syy154373 * Issue EILSEQ error if the first byte is not a
293*6008Syy154373 * valid UHC leading byte.
294*6008Syy154373 */
295*6008Syy154373 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
296*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
297*6008Syy154373 }
298*6008Syy154373
299*6008Syy154373 /*
300*6008Syy154373 * Issue EINVAL error if input buffer has an incomplete
301*6008Syy154373 * character at the end of the buffer.
302*6008Syy154373 */
303*6008Syy154373 if (ibtail - ib < 2) {
304*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
305*6008Syy154373 }
306*6008Syy154373
307*6008Syy154373 /*
308*6008Syy154373 * Issue EILSEQ error if the remaining byte is not
309*6008Syy154373 * a valid UHC byte.
310*6008Syy154373 */
311*6008Syy154373 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
312*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
313*6008Syy154373 }
314*6008Syy154373
315*6008Syy154373 uhc_val = (uint32_t)(*ib) << 8 | *(ib + 1);
316*6008Syy154373 sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
317*6008Syy154373 kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
318*6008Syy154373
319*6008Syy154373 if (sz < 0) {
320*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
321*6008Syy154373 }
322*6008Syy154373
323*6008Syy154373 ib += 2;
324*6008Syy154373 ob += sz;
325*6008Syy154373 }
326*6008Syy154373
327*6008Syy154373 *inbuf = (char *)ib;
328*6008Syy154373 *inbufleft = ibtail - ib;
329*6008Syy154373 *outbuf = (char *)ob;
330*6008Syy154373 *outbufleft = obtail - ob;
331*6008Syy154373
332*6008Syy154373 return (ret_val);
333*6008Syy154373 }
334*6008Syy154373
335*6008Syy154373 /*
336*6008Syy154373 * String based encoding convertor from Unified Hangul Code to UTF-8.
337*6008Syy154373 */
338*6008Syy154373 static size_t
kiconvstr_fr_uhc(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)339*6008Syy154373 kiconvstr_fr_uhc(char *inarray, size_t *inlen, char *outarray,
340*6008Syy154373 size_t *outlen, int flag, int *errno)
341*6008Syy154373 {
342*6008Syy154373 uchar_t *ib;
343*6008Syy154373 uchar_t *ob;
344*6008Syy154373 uchar_t *ibtail;
345*6008Syy154373 uchar_t *obtail;
346*6008Syy154373 uchar_t *oldib;
347*6008Syy154373 size_t ret_val;
348*6008Syy154373 int8_t sz;
349*6008Syy154373 uint32_t uhc_val;
350*6008Syy154373 boolean_t do_not_ignore_null;
351*6008Syy154373
352*6008Syy154373 ret_val = 0;
353*6008Syy154373 ib = (uchar_t *)inarray;
354*6008Syy154373 ob = (uchar_t *)outarray;
355*6008Syy154373 ibtail = ib + *inlen;
356*6008Syy154373 obtail = ob + *outlen;
357*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
358*6008Syy154373
359*6008Syy154373 while (ib < ibtail) {
360*6008Syy154373 if (*ib == '\0' && do_not_ignore_null)
361*6008Syy154373 break;
362*6008Syy154373
363*6008Syy154373 if (KICONV_IS_ASCII(*ib)) {
364*6008Syy154373 if (ob >= obtail) {
365*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
366*6008Syy154373 }
367*6008Syy154373
368*6008Syy154373 *ob++ = *ib++;
369*6008Syy154373 continue;
370*6008Syy154373 }
371*6008Syy154373
372*6008Syy154373 oldib = ib;
373*6008Syy154373
374*6008Syy154373 if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
375*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
376*6008Syy154373 }
377*6008Syy154373
378*6008Syy154373 if (ibtail - ib < 2) {
379*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
380*6008Syy154373 }
381*6008Syy154373
382*6008Syy154373 if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
383*6008Syy154373 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
384*6008Syy154373 }
385*6008Syy154373
386*6008Syy154373 uhc_val = *ib++;
387*6008Syy154373 uhc_val = (uhc_val << 8) | *ib++;
388*6008Syy154373 sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
389*6008Syy154373 kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
390*6008Syy154373
391*6008Syy154373 if (sz < 0) {
392*6008Syy154373 ib = oldib;
393*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
394*6008Syy154373 }
395*6008Syy154373
396*6008Syy154373 ob += sz;
397*6008Syy154373 continue;
398*6008Syy154373
399*6008Syy154373 REPLACE_INVALID:
400*6008Syy154373 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
401*6008Syy154373 ib = oldib;
402*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
403*6008Syy154373 }
404*6008Syy154373
405*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
406*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
407*6008Syy154373 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
408*6008Syy154373 ret_val++;
409*6008Syy154373 }
410*6008Syy154373
411*6008Syy154373 *inlen = ibtail - ib;
412*6008Syy154373 *outlen = obtail - ob;
413*6008Syy154373
414*6008Syy154373 return (ret_val);
415*6008Syy154373 }
416*6008Syy154373
417*6008Syy154373 /*
418*6008Syy154373 * Encoding convertor from UTF-8 to EUC-KR.
419*6008Syy154373 */
420*6008Syy154373 static size_t
kiconv_to_euckr(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)421*6008Syy154373 kiconv_to_euckr(void *kcd, char **inbuf, size_t *inbytesleft,
422*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
423*6008Syy154373 {
424*6008Syy154373 return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
425*6008Syy154373 outbytesleft, errno, utf8_to_euckr));
426*6008Syy154373 }
427*6008Syy154373
428*6008Syy154373 /*
429*6008Syy154373 * Encoding convertor from UTF-8 to Unified Hangul Code.
430*6008Syy154373 */
431*6008Syy154373 static size_t
kiconv_to_uhc(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)432*6008Syy154373 kiconv_to_uhc(void *kcd, char **inbuf, size_t *inbytesleft,
433*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
434*6008Syy154373 {
435*6008Syy154373 return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
436*6008Syy154373 outbytesleft, errno, utf8_to_uhc));
437*6008Syy154373 }
438*6008Syy154373
439*6008Syy154373 /*
440*6008Syy154373 * String based encoding convertor from UTF-8 to EUC-KR.
441*6008Syy154373 */
442*6008Syy154373 static size_t
kiconvstr_to_euckr(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)443*6008Syy154373 kiconvstr_to_euckr(char *inarray, size_t *inlen, char *outarray,
444*6008Syy154373 size_t *outlen, int flag, int *errno)
445*6008Syy154373 {
446*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
447*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_euckr);
448*6008Syy154373 }
449*6008Syy154373
450*6008Syy154373 /*
451*6008Syy154373 * String based encoding convertor from UTF-8 to Unified Hangul Code.
452*6008Syy154373 */
453*6008Syy154373 static size_t
kiconvstr_to_uhc(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)454*6008Syy154373 kiconvstr_to_uhc(char *inarray, size_t *inlen, char *outarray,
455*6008Syy154373 size_t *outlen, int flag, int *errno)
456*6008Syy154373 {
457*6008Syy154373 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
458*6008Syy154373 (uchar_t *)outarray, outlen, flag, errno, utf8_to_uhc);
459*6008Syy154373 }
460*6008Syy154373
461*6008Syy154373 /*
462*6008Syy154373 * Convert an UTF-8 character to a character of ko encodings
463*6008Syy154373 * (EUC-KR or UHC).
464*6008Syy154373 */
465*6008Syy154373 static int8_t
utf8_to_ko(uint32_t utf8,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_t * table,size_t nitems)466*6008Syy154373 utf8_to_ko(uint32_t utf8, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
467*6008Syy154373 kiconv_table_t *table, size_t nitems)
468*6008Syy154373 {
469*6008Syy154373 size_t index;
470*6008Syy154373 size_t kocode;
471*6008Syy154373 int8_t kolen;
472*6008Syy154373
473*6008Syy154373 if (KICONV_KO_IS_UDC_IN_UTF8(utf8)) {
474*6008Syy154373 /* User Definable Area handing. */
475*6008Syy154373 kocode = (((utf8 & 0xF0000) >> 4) | ((utf8 & 0x3F00) >> 2) |
476*6008Syy154373 (utf8 & 0x3F)) - KICONV_KO_UDA_UCS4_START;
477*6008Syy154373 if (kocode < KICONV_KO_UDA_RANGE) {
478*6008Syy154373 kocode = (KICONV_KO_UDA_EUC_SEG1 << 8) |
479*6008Syy154373 (kocode + KICONV_KO_UDA_OFFSET_START);
480*6008Syy154373 } else {
481*6008Syy154373 /* 0x43 = 0xA1 - 0x5E */
482*6008Syy154373 kocode = (KICONV_KO_UDA_EUC_SEG2 << 8) |
483*6008Syy154373 (kocode + 0x43);
484*6008Syy154373 }
485*6008Syy154373
486*6008Syy154373 index = 1;
487*6008Syy154373 } else {
488*6008Syy154373 index = kiconv_binsearch(utf8, table, nitems);
489*6008Syy154373 kocode = table[index].value;
490*6008Syy154373 }
491*6008Syy154373
492*6008Syy154373 kolen = (kocode <= 0xFF) ? 1 : 2;
493*6008Syy154373
494*6008Syy154373 if (obtail - ob < kolen) {
495*6008Syy154373 *ret_val = (size_t)-1;
496*6008Syy154373 return (-1);
497*6008Syy154373 }
498*6008Syy154373
499*6008Syy154373 if (index == 0)
500*6008Syy154373 (*ret_val)++;
501*6008Syy154373
502*6008Syy154373 if (kolen > 1)
503*6008Syy154373 *ob++ = (uchar_t)(kocode >> 8);
504*6008Syy154373 *ob = (uchar_t)(kocode & 0xFF);
505*6008Syy154373
506*6008Syy154373 return (kolen);
507*6008Syy154373 }
508*6008Syy154373
509*6008Syy154373 /*
510*6008Syy154373 * Convert an UTF-8 character to Unified Hangual Code.
511*6008Syy154373 */
512*6008Syy154373 /* ARGSUSED */
513*6008Syy154373 static int8_t
utf8_to_uhc(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)514*6008Syy154373 utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
515*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
516*6008Syy154373 {
517*6008Syy154373 return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_uhc,
518*6008Syy154373 KICONV_UTF8_UHC_MAX));
519*6008Syy154373 }
520*6008Syy154373
521*6008Syy154373 /*
522*6008Syy154373 * Convert an UTF-8 character to EUC-KR.
523*6008Syy154373 */
524*6008Syy154373 /* ARGSUSED */
525*6008Syy154373 static int8_t
utf8_to_euckr(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)526*6008Syy154373 utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
527*6008Syy154373 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
528*6008Syy154373 {
529*6008Syy154373 return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_euckr,
530*6008Syy154373 KICONV_UTF8_EUCKR_MAX));
531*6008Syy154373 }
532*6008Syy154373
533*6008Syy154373 /*
534*6008Syy154373 * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8.
535*6008Syy154373 */
536*6008Syy154373 static int8_t
ko_to_utf8(uint32_t ko_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_array_t * table,size_t nitems)537*6008Syy154373 ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
538*6008Syy154373 kiconv_table_array_t *table, size_t nitems)
539*6008Syy154373 {
540*6008Syy154373 size_t index;
541*6008Syy154373 int8_t sz;
542*6008Syy154373 uchar_t udc[3];
543*6008Syy154373 uchar_t *u8;
544*6008Syy154373
545*6008Syy154373 if (KICONV_KO_IS_UDC_IN_EUC(ko_val)) {
546*6008Syy154373 /* UDA(User Definable Area) handling. */
547*6008Syy154373 uint32_t u32;
548*6008Syy154373
549*6008Syy154373 u32 = (ko_val & 0xFF) + (((ko_val & 0xFF00) == 0xC900) ?
550*6008Syy154373 KICONV_KO_UDA_OFFSET_1 : KICONV_KO_UDA_OFFSET_2);
551*6008Syy154373 udc[0] = 0xEF;
552*6008Syy154373 udc[1] = (uchar_t)(0x80 | (u32 & 0x00000FC0) >> 6);
553*6008Syy154373 udc[2] = (uchar_t)(0x80 | (u32 & 0x0000003F));
554*6008Syy154373 u8 = udc;
555*6008Syy154373 index = 1;
556*6008Syy154373 } else {
557*6008Syy154373 index = kiconv_binsearch(ko_val, table, nitems);
558*6008Syy154373 u8 = table[index].u8;
559*6008Syy154373 }
560*6008Syy154373
561*6008Syy154373 sz = u8_number_of_bytes[u8[0]];
562*6008Syy154373
563*6008Syy154373 if (obtail - ob < sz) {
564*6008Syy154373 *ret_val = (size_t)-1;
565*6008Syy154373 return (-1);
566*6008Syy154373 }
567*6008Syy154373
568*6008Syy154373 if (index == 0)
569*6008Syy154373 (*ret_val)++; /* Non-identical conversion */
570*6008Syy154373
571*6008Syy154373 for (index = 0; index < sz; index++)
572*6008Syy154373 *ob++ = u8[index];
573*6008Syy154373
574*6008Syy154373 return (sz);
575*6008Syy154373 }
576*6008Syy154373
577*6008Syy154373 static kiconv_ops_t kiconv_ko_ops_tbl[] = {
578*6008Syy154373 {
579*6008Syy154373 "euc-kr", "utf-8", kiconv_open_to_cck, kiconv_to_euckr,
580*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_euckr
581*6008Syy154373 },
582*6008Syy154373 {
583*6008Syy154373 "utf-8", "euc-kr", open_fr_euckr, kiconv_fr_euckr,
584*6008Syy154373 close_fr_ko, kiconvstr_fr_euckr
585*6008Syy154373 },
586*6008Syy154373 {
587*6008Syy154373 "unifiedhangul", "utf-8", kiconv_open_to_cck, kiconv_to_uhc,
588*6008Syy154373 kiconv_close_to_cck, kiconvstr_to_uhc
589*6008Syy154373 },
590*6008Syy154373 {
591*6008Syy154373 "utf-8", "unifiedhangul", open_fr_uhc, kiconv_fr_uhc,
592*6008Syy154373 close_fr_ko, kiconvstr_fr_uhc
593*6008Syy154373 }
594*6008Syy154373 };
595*6008Syy154373
596*6008Syy154373 static kiconv_module_info_t kiconv_ko_info = {
597*6008Syy154373 "kiconv_ko", /* module name */
598*6008Syy154373 sizeof (kiconv_ko_ops_tbl) / sizeof (kiconv_ko_ops_tbl[0]),
599*6008Syy154373 kiconv_ko_ops_tbl,
600*6008Syy154373 0,
601*6008Syy154373 NULL,
602*6008Syy154373 NULL,
603*6008Syy154373 0
604*6008Syy154373 };
605*6008Syy154373
606*6008Syy154373 static struct modlkiconv modlkiconv_ko = {
607*6008Syy154373 &mod_kiconvops,
608*6008Syy154373 "kiconv korean module 1.0",
609*6008Syy154373 &kiconv_ko_info
610*6008Syy154373 };
611*6008Syy154373
612*6008Syy154373 static struct modlinkage modlinkage = {
613*6008Syy154373 MODREV_1,
614*6008Syy154373 (void *)&modlkiconv_ko,
615*6008Syy154373 NULL
616*6008Syy154373 };
617*6008Syy154373
618*6008Syy154373 int
_init(void)619*6008Syy154373 _init(void)
620*6008Syy154373 {
621*6008Syy154373 int err;
622*6008Syy154373
623*6008Syy154373 err = mod_install(&modlinkage);
624*6008Syy154373 if (err)
625*6008Syy154373 cmn_err(CE_WARN, "kiconv_ko: failed to load kernel module");
626*6008Syy154373
627*6008Syy154373 return (err);
628*6008Syy154373 }
629*6008Syy154373
630*6008Syy154373 int
_fini(void)631*6008Syy154373 _fini(void)
632*6008Syy154373 {
633*6008Syy154373 int err;
634*6008Syy154373
635*6008Syy154373 /*
636*6008Syy154373 * If this module is being used, then, we cannot remove the module.
637*6008Syy154373 * The following checking will catch pretty much all usual cases.
638*6008Syy154373 *
639*6008Syy154373 * Any remaining will be catached by the kiconv_unregister_module()
640*6008Syy154373 * during mod_remove() at below.
641*6008Syy154373 */
642*6008Syy154373 if (kiconv_module_ref_count(KICONV_MODULE_ID_KO))
643*6008Syy154373 return (EBUSY);
644*6008Syy154373
645*6008Syy154373 err = mod_remove(&modlinkage);
646*6008Syy154373 if (err)
647*6008Syy154373 cmn_err(CE_WARN, "kiconv_ko: failed to remove kernel module");
648*6008Syy154373
649*6008Syy154373 return (err);
650*6008Syy154373 }
651*6008Syy154373
652*6008Syy154373 int
_info(struct modinfo * modinfop)653*6008Syy154373 _info(struct modinfo *modinfop)
654*6008Syy154373 {
655*6008Syy154373 return (mod_info(&modlinkage, modinfop));
656*6008Syy154373 }
657