xref: /onnv-gate/usr/src/uts/common/kiconv/kiconv_ko/kiconv_ko.c (revision 6008:3a1c10482cf2)
1*6008Syy154373 /*
2*6008Syy154373  * CDDL HEADER START
3*6008Syy154373  *
4*6008Syy154373  * The contents of this file are subject to the terms of the
5*6008Syy154373  * Common Development and Distribution License (the "License").
6*6008Syy154373  * You may not use this file except in compliance with the License.
7*6008Syy154373  *
8*6008Syy154373  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373  * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373  * See the License for the specific language governing permissions
11*6008Syy154373  * and limitations under the License.
12*6008Syy154373  *
13*6008Syy154373  * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373  * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373  * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373  * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373  *
19*6008Syy154373  * CDDL HEADER END
20*6008Syy154373  */
21*6008Syy154373 
22*6008Syy154373 /*
23*6008Syy154373  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24*6008Syy154373  * Use is subject to license terms.
25*6008Syy154373  */
26*6008Syy154373 
27*6008Syy154373 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*6008Syy154373 
29*6008Syy154373 #include <sys/types.h>
30*6008Syy154373 #include <sys/param.h>
31*6008Syy154373 #include <sys/sysmacros.h>
32*6008Syy154373 #include <sys/systm.h>
33*6008Syy154373 #include <sys/debug.h>
34*6008Syy154373 #include <sys/kmem.h>
35*6008Syy154373 #include <sys/sunddi.h>
36*6008Syy154373 #include <sys/byteorder.h>
37*6008Syy154373 #include <sys/errno.h>
38*6008Syy154373 #include <sys/modctl.h>
39*6008Syy154373 #include <sys/u8_textprep.h>
40*6008Syy154373 #include <sys/kiconv.h>
41*6008Syy154373 #include <sys/kiconv_cck_common.h>
42*6008Syy154373 #include <sys/kiconv_ko.h>
43*6008Syy154373 #include <sys/kiconv_uhc_utf8.h>
44*6008Syy154373 #include <sys/kiconv_utf8_uhc.h>
45*6008Syy154373 #include <sys/kiconv_euckr_utf8.h>
46*6008Syy154373 #include <sys/kiconv_utf8_euckr.h>
47*6008Syy154373 
48*6008Syy154373 static int8_t utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
49*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val);
50*6008Syy154373 static int8_t utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
51*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val);
52*6008Syy154373 static int8_t ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail,
53*6008Syy154373 	size_t *ret_val, kiconv_table_array_t *table, size_t nitems);
54*6008Syy154373 
55*6008Syy154373 
56*6008Syy154373 #define	KICONV_KO_EUCKR		(0x01)
57*6008Syy154373 #define	KICONV_KO_UHC		(0x02)
58*6008Syy154373 #define	KICONV_KO_MAX_MAGIC_ID	(0x02)
59*6008Syy154373 
60*6008Syy154373 static void *
open_fr_euckr()61*6008Syy154373 open_fr_euckr()
62*6008Syy154373 {
63*6008Syy154373 	return ((void *)KICONV_KO_EUCKR);
64*6008Syy154373 }
65*6008Syy154373 
66*6008Syy154373 static void *
open_fr_uhc()67*6008Syy154373 open_fr_uhc()
68*6008Syy154373 {
69*6008Syy154373 	return ((void *)KICONV_KO_UHC);
70*6008Syy154373 }
71*6008Syy154373 
72*6008Syy154373 static int
close_fr_ko(void * s)73*6008Syy154373 close_fr_ko(void *s)
74*6008Syy154373 {
75*6008Syy154373 	if ((uintptr_t)s > KICONV_KO_MAX_MAGIC_ID)
76*6008Syy154373 		return (EBADF);
77*6008Syy154373 
78*6008Syy154373 	return (0);
79*6008Syy154373 }
80*6008Syy154373 
81*6008Syy154373 /*
82*6008Syy154373  * Encoding convertor from EUC-KR to UTF-8.
83*6008Syy154373  */
84*6008Syy154373 static size_t
kiconv_fr_euckr(void * kcd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft,int * errno)85*6008Syy154373 kiconv_fr_euckr(void *kcd, char **inbuf, size_t *inbufleft,
86*6008Syy154373 	char **outbuf, size_t *outbufleft, int *errno)
87*6008Syy154373 {
88*6008Syy154373 	uchar_t		*ib;
89*6008Syy154373 	uchar_t		*ob;
90*6008Syy154373 	uchar_t		*ibtail;
91*6008Syy154373 	uchar_t		*obtail;
92*6008Syy154373 	size_t		ret_val;
93*6008Syy154373 	int8_t		sz;
94*6008Syy154373 	uint32_t	euckr_val;
95*6008Syy154373 
96*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
97*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
98*6008Syy154373 		*errno = EBADF;
99*6008Syy154373 		return ((size_t)-1);
100*6008Syy154373 	}
101*6008Syy154373 
102*6008Syy154373 	/* If this is a state reset request, process and return. */
103*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
104*6008Syy154373 		return (0);
105*6008Syy154373 	}
106*6008Syy154373 
107*6008Syy154373 	ret_val = 0;
108*6008Syy154373 	ib = (uchar_t *)*inbuf;
109*6008Syy154373 	ob = (uchar_t *)*outbuf;
110*6008Syy154373 	ibtail = ib + *inbufleft;
111*6008Syy154373 	obtail = ob + *outbufleft;
112*6008Syy154373 
113*6008Syy154373 	while (ib < ibtail) {
114*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
115*6008Syy154373 			if (ob >= obtail) {
116*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
117*6008Syy154373 			}
118*6008Syy154373 
119*6008Syy154373 			*ob++ = *ib++;
120*6008Syy154373 			continue;
121*6008Syy154373 		}
122*6008Syy154373 
123*6008Syy154373 		/*
124*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
125*6008Syy154373 		 * valid EUC-KR leading byte.
126*6008Syy154373 		 */
127*6008Syy154373 		if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
128*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
129*6008Syy154373 		}
130*6008Syy154373 
131*6008Syy154373 		/*
132*6008Syy154373 		 * Issue EINVAL error if input buffer has an incomplete
133*6008Syy154373 		 * character at the end of the buffer.
134*6008Syy154373 		 */
135*6008Syy154373 		if (ibtail - ib < 2) {
136*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
137*6008Syy154373 		}
138*6008Syy154373 
139*6008Syy154373 		/*
140*6008Syy154373 		 * Issue EILSEQ error if the remaining byte is not
141*6008Syy154373 		 * a valid EUC-KR byte.
142*6008Syy154373 		 */
143*6008Syy154373 		if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
144*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
145*6008Syy154373 		}
146*6008Syy154373 
147*6008Syy154373 		euckr_val = (uint32_t)(*ib) << 8 | *(ib + 1);
148*6008Syy154373 		sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
149*6008Syy154373 		    kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
150*6008Syy154373 
151*6008Syy154373 		if (sz < 0) {
152*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
153*6008Syy154373 		}
154*6008Syy154373 
155*6008Syy154373 		ib += 2;
156*6008Syy154373 		ob += sz;
157*6008Syy154373 	}
158*6008Syy154373 
159*6008Syy154373 	*inbuf = (char *)ib;
160*6008Syy154373 	*inbufleft = ibtail - ib;
161*6008Syy154373 	*outbuf = (char *)ob;
162*6008Syy154373 	*outbufleft = obtail - ob;
163*6008Syy154373 
164*6008Syy154373 	return (ret_val);
165*6008Syy154373 }
166*6008Syy154373 
167*6008Syy154373 /*
168*6008Syy154373  * String based encoding convertor from EUC-KR to UTF-8.
169*6008Syy154373  */
170*6008Syy154373 static size_t
kiconvstr_fr_euckr(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)171*6008Syy154373 kiconvstr_fr_euckr(char *inarray, size_t *inlen, char *outarray,
172*6008Syy154373 	size_t *outlen, int flag, int *errno)
173*6008Syy154373 {
174*6008Syy154373 	uchar_t		*ib;
175*6008Syy154373 	uchar_t		*ob;
176*6008Syy154373 	uchar_t		*ibtail;
177*6008Syy154373 	uchar_t		*obtail;
178*6008Syy154373 	uchar_t		*oldib;
179*6008Syy154373 	size_t		ret_val;
180*6008Syy154373 	int8_t		sz;
181*6008Syy154373 	uint32_t	euckr_val;
182*6008Syy154373 	boolean_t	do_not_ignore_null;
183*6008Syy154373 
184*6008Syy154373 	ret_val = 0;
185*6008Syy154373 	ib = (uchar_t *)inarray;
186*6008Syy154373 	ob = (uchar_t *)outarray;
187*6008Syy154373 	ibtail = ib + *inlen;
188*6008Syy154373 	obtail = ob + *outlen;
189*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
190*6008Syy154373 
191*6008Syy154373 	while (ib < ibtail) {
192*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
193*6008Syy154373 			break;
194*6008Syy154373 
195*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
196*6008Syy154373 			if (ob >= obtail) {
197*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
198*6008Syy154373 			}
199*6008Syy154373 
200*6008Syy154373 			*ob++ = *ib++;
201*6008Syy154373 			continue;
202*6008Syy154373 		}
203*6008Syy154373 
204*6008Syy154373 		oldib = ib;
205*6008Syy154373 
206*6008Syy154373 		if (! KICONV_KO_IS_EUCKR_BYTE(*ib)) {
207*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
208*6008Syy154373 		}
209*6008Syy154373 
210*6008Syy154373 		if (ibtail - ib < 2) {
211*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
212*6008Syy154373 		}
213*6008Syy154373 
214*6008Syy154373 		if (! KICONV_KO_IS_EUCKR_BYTE(*(ib + 1))) {
215*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
216*6008Syy154373 		}
217*6008Syy154373 
218*6008Syy154373 		euckr_val = *ib++;
219*6008Syy154373 		euckr_val = (euckr_val << 8) | *ib++;
220*6008Syy154373 		sz = ko_to_utf8(euckr_val, ob, obtail, &ret_val,
221*6008Syy154373 		    kiconv_euckr_utf8, KICONV_EUCKR_UTF8_MAX);
222*6008Syy154373 
223*6008Syy154373 		if (sz < 0) {
224*6008Syy154373 			ib = oldib;
225*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
226*6008Syy154373 		}
227*6008Syy154373 
228*6008Syy154373 		ob += sz;
229*6008Syy154373 		continue;
230*6008Syy154373 
231*6008Syy154373 REPLACE_INVALID:
232*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
233*6008Syy154373 			ib = oldib;
234*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
235*6008Syy154373 		}
236*6008Syy154373 
237*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
238*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
239*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
240*6008Syy154373 		ret_val++;
241*6008Syy154373 	}
242*6008Syy154373 
243*6008Syy154373 	*inlen = ibtail - ib;
244*6008Syy154373 	*outlen = obtail - ob;
245*6008Syy154373 
246*6008Syy154373 	return (ret_val);
247*6008Syy154373 }
248*6008Syy154373 
249*6008Syy154373 /*
250*6008Syy154373  * Encoding convertor from Unified Hangul Code to UTF-8.
251*6008Syy154373  */
252*6008Syy154373 static size_t
kiconv_fr_uhc(void * kcd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft,int * errno)253*6008Syy154373 kiconv_fr_uhc(void *kcd, char **inbuf, size_t *inbufleft,
254*6008Syy154373 	char **outbuf, size_t *outbufleft, int *errno)
255*6008Syy154373 {
256*6008Syy154373 	uchar_t		*ib;
257*6008Syy154373 	uchar_t		*ob;
258*6008Syy154373 	uchar_t		*ibtail;
259*6008Syy154373 	uchar_t		*obtail;
260*6008Syy154373 	size_t		ret_val;
261*6008Syy154373 	int8_t		sz;
262*6008Syy154373 	uint32_t	uhc_val;
263*6008Syy154373 
264*6008Syy154373 	/* Check on the kiconv code conversion descriptor. */
265*6008Syy154373 	if (kcd == NULL || kcd == (void *)-1) {
266*6008Syy154373 		*errno = EBADF;
267*6008Syy154373 		return ((size_t)-1);
268*6008Syy154373 	}
269*6008Syy154373 
270*6008Syy154373 	/* If this is a state reset request, process and return. */
271*6008Syy154373 	if (inbuf == NULL || *inbuf == NULL) {
272*6008Syy154373 		return (0);
273*6008Syy154373 	}
274*6008Syy154373 
275*6008Syy154373 	ret_val = 0;
276*6008Syy154373 	ib = (uchar_t *)*inbuf;
277*6008Syy154373 	ob = (uchar_t *)*outbuf;
278*6008Syy154373 	ibtail = ib + *inbufleft;
279*6008Syy154373 	obtail = ob + *outbufleft;
280*6008Syy154373 
281*6008Syy154373 	while (ib < ibtail) {
282*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
283*6008Syy154373 			if (ob >= obtail) {
284*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
285*6008Syy154373 			}
286*6008Syy154373 
287*6008Syy154373 			*ob++ = *ib++;
288*6008Syy154373 			continue;
289*6008Syy154373 		}
290*6008Syy154373 
291*6008Syy154373 		/*
292*6008Syy154373 		 * Issue EILSEQ error if the first byte is not a
293*6008Syy154373 		 * valid UHC leading byte.
294*6008Syy154373 		 */
295*6008Syy154373 		if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
296*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
297*6008Syy154373 		}
298*6008Syy154373 
299*6008Syy154373 		/*
300*6008Syy154373 		 * Issue EINVAL error if input buffer has an incomplete
301*6008Syy154373 		 * character at the end of the buffer.
302*6008Syy154373 		 */
303*6008Syy154373 		if (ibtail - ib < 2) {
304*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EINVAL);
305*6008Syy154373 		}
306*6008Syy154373 
307*6008Syy154373 		/*
308*6008Syy154373 		 * Issue EILSEQ error if the remaining byte is not
309*6008Syy154373 		 * a valid UHC byte.
310*6008Syy154373 		 */
311*6008Syy154373 		if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
312*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
313*6008Syy154373 		}
314*6008Syy154373 
315*6008Syy154373 		uhc_val = (uint32_t)(*ib) << 8 | *(ib + 1);
316*6008Syy154373 		sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
317*6008Syy154373 		    kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
318*6008Syy154373 
319*6008Syy154373 		if (sz < 0) {
320*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
321*6008Syy154373 		}
322*6008Syy154373 
323*6008Syy154373 		ib += 2;
324*6008Syy154373 		ob += sz;
325*6008Syy154373 	}
326*6008Syy154373 
327*6008Syy154373 	*inbuf = (char *)ib;
328*6008Syy154373 	*inbufleft = ibtail - ib;
329*6008Syy154373 	*outbuf = (char *)ob;
330*6008Syy154373 	*outbufleft = obtail - ob;
331*6008Syy154373 
332*6008Syy154373 	return (ret_val);
333*6008Syy154373 }
334*6008Syy154373 
335*6008Syy154373 /*
336*6008Syy154373  * String based encoding convertor from Unified Hangul Code to UTF-8.
337*6008Syy154373  */
338*6008Syy154373 static size_t
kiconvstr_fr_uhc(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)339*6008Syy154373 kiconvstr_fr_uhc(char *inarray, size_t *inlen, char *outarray,
340*6008Syy154373 	size_t *outlen, int flag, int *errno)
341*6008Syy154373 {
342*6008Syy154373 	uchar_t		*ib;
343*6008Syy154373 	uchar_t		*ob;
344*6008Syy154373 	uchar_t		*ibtail;
345*6008Syy154373 	uchar_t		*obtail;
346*6008Syy154373 	uchar_t		*oldib;
347*6008Syy154373 	size_t		ret_val;
348*6008Syy154373 	int8_t		sz;
349*6008Syy154373 	uint32_t	uhc_val;
350*6008Syy154373 	boolean_t	do_not_ignore_null;
351*6008Syy154373 
352*6008Syy154373 	ret_val = 0;
353*6008Syy154373 	ib = (uchar_t *)inarray;
354*6008Syy154373 	ob = (uchar_t *)outarray;
355*6008Syy154373 	ibtail = ib + *inlen;
356*6008Syy154373 	obtail = ob + *outlen;
357*6008Syy154373 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
358*6008Syy154373 
359*6008Syy154373 	while (ib < ibtail) {
360*6008Syy154373 		if (*ib == '\0' && do_not_ignore_null)
361*6008Syy154373 			break;
362*6008Syy154373 
363*6008Syy154373 		if (KICONV_IS_ASCII(*ib)) {
364*6008Syy154373 			if (ob >= obtail) {
365*6008Syy154373 				KICONV_SET_ERRNO_AND_BREAK(E2BIG);
366*6008Syy154373 			}
367*6008Syy154373 
368*6008Syy154373 			*ob++ = *ib++;
369*6008Syy154373 			continue;
370*6008Syy154373 		}
371*6008Syy154373 
372*6008Syy154373 		oldib = ib;
373*6008Syy154373 
374*6008Syy154373 		if (! KICONV_KO_IS_UHC_1st_BYTE(*ib)) {
375*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
376*6008Syy154373 		}
377*6008Syy154373 
378*6008Syy154373 		if (ibtail - ib < 2) {
379*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
380*6008Syy154373 		}
381*6008Syy154373 
382*6008Syy154373 		if (! KICONV_KO_IS_UHC_2nd_BYTE(*(ib + 1))) {
383*6008Syy154373 			KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
384*6008Syy154373 		}
385*6008Syy154373 
386*6008Syy154373 		uhc_val = *ib++;
387*6008Syy154373 		uhc_val = (uhc_val << 8) | *ib++;
388*6008Syy154373 		sz = ko_to_utf8(uhc_val, ob, obtail, &ret_val,
389*6008Syy154373 		    kiconv_uhc_utf8, KICONV_UHC_UTF8_MAX);
390*6008Syy154373 
391*6008Syy154373 		if (sz < 0) {
392*6008Syy154373 			ib = oldib;
393*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
394*6008Syy154373 		}
395*6008Syy154373 
396*6008Syy154373 		ob += sz;
397*6008Syy154373 		continue;
398*6008Syy154373 
399*6008Syy154373 REPLACE_INVALID:
400*6008Syy154373 		if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
401*6008Syy154373 			ib = oldib;
402*6008Syy154373 			KICONV_SET_ERRNO_AND_BREAK(E2BIG);
403*6008Syy154373 		}
404*6008Syy154373 
405*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
406*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
407*6008Syy154373 		*ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
408*6008Syy154373 		ret_val++;
409*6008Syy154373 	}
410*6008Syy154373 
411*6008Syy154373 	*inlen = ibtail - ib;
412*6008Syy154373 	*outlen = obtail - ob;
413*6008Syy154373 
414*6008Syy154373 	return (ret_val);
415*6008Syy154373 }
416*6008Syy154373 
417*6008Syy154373 /*
418*6008Syy154373  * Encoding convertor from UTF-8 to EUC-KR.
419*6008Syy154373  */
420*6008Syy154373 static size_t
kiconv_to_euckr(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)421*6008Syy154373 kiconv_to_euckr(void *kcd, char **inbuf, size_t *inbytesleft,
422*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
423*6008Syy154373 {
424*6008Syy154373 	return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
425*6008Syy154373 	    outbytesleft, errno, utf8_to_euckr));
426*6008Syy154373 }
427*6008Syy154373 
428*6008Syy154373 /*
429*6008Syy154373  * Encoding convertor from UTF-8 to Unified Hangul Code.
430*6008Syy154373  */
431*6008Syy154373 static size_t
kiconv_to_uhc(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)432*6008Syy154373 kiconv_to_uhc(void *kcd, char **inbuf, size_t *inbytesleft,
433*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno)
434*6008Syy154373 {
435*6008Syy154373 	return (kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
436*6008Syy154373 	    outbytesleft, errno, utf8_to_uhc));
437*6008Syy154373 }
438*6008Syy154373 
439*6008Syy154373 /*
440*6008Syy154373  * String based encoding convertor from UTF-8 to EUC-KR.
441*6008Syy154373  */
442*6008Syy154373 static size_t
kiconvstr_to_euckr(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)443*6008Syy154373 kiconvstr_to_euckr(char *inarray, size_t *inlen, char *outarray,
444*6008Syy154373 	size_t *outlen, int flag, int *errno)
445*6008Syy154373 {
446*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
447*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_euckr);
448*6008Syy154373 }
449*6008Syy154373 
450*6008Syy154373 /*
451*6008Syy154373  * String based encoding convertor from UTF-8 to Unified Hangul Code.
452*6008Syy154373  */
453*6008Syy154373 static size_t
kiconvstr_to_uhc(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)454*6008Syy154373 kiconvstr_to_uhc(char *inarray, size_t *inlen, char *outarray,
455*6008Syy154373 	size_t *outlen, int flag, int *errno)
456*6008Syy154373 {
457*6008Syy154373 	return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
458*6008Syy154373 	    (uchar_t *)outarray, outlen, flag, errno, utf8_to_uhc);
459*6008Syy154373 }
460*6008Syy154373 
461*6008Syy154373 /*
462*6008Syy154373  * Convert an UTF-8 character to a character of ko encodings
463*6008Syy154373  * (EUC-KR or UHC).
464*6008Syy154373  */
465*6008Syy154373 static int8_t
utf8_to_ko(uint32_t utf8,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_t * table,size_t nitems)466*6008Syy154373 utf8_to_ko(uint32_t utf8, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
467*6008Syy154373 	kiconv_table_t *table, size_t nitems)
468*6008Syy154373 {
469*6008Syy154373 	size_t	index;
470*6008Syy154373 	size_t	kocode;
471*6008Syy154373 	int8_t  kolen;
472*6008Syy154373 
473*6008Syy154373 	if (KICONV_KO_IS_UDC_IN_UTF8(utf8)) {
474*6008Syy154373 		/* User Definable Area handing. */
475*6008Syy154373 		kocode = (((utf8 & 0xF0000) >> 4) | ((utf8 & 0x3F00) >> 2) |
476*6008Syy154373 		    (utf8 & 0x3F)) - KICONV_KO_UDA_UCS4_START;
477*6008Syy154373 		if (kocode < KICONV_KO_UDA_RANGE) {
478*6008Syy154373 			kocode = (KICONV_KO_UDA_EUC_SEG1 << 8) |
479*6008Syy154373 			    (kocode + KICONV_KO_UDA_OFFSET_START);
480*6008Syy154373 		} else {
481*6008Syy154373 			/* 0x43 = 0xA1 - 0x5E */
482*6008Syy154373 			kocode = (KICONV_KO_UDA_EUC_SEG2 << 8) |
483*6008Syy154373 			    (kocode + 0x43);
484*6008Syy154373 		}
485*6008Syy154373 
486*6008Syy154373 		index = 1;
487*6008Syy154373 	} else {
488*6008Syy154373 		index = kiconv_binsearch(utf8, table, nitems);
489*6008Syy154373 		kocode = table[index].value;
490*6008Syy154373 	}
491*6008Syy154373 
492*6008Syy154373 	kolen = (kocode <= 0xFF) ? 1 : 2;
493*6008Syy154373 
494*6008Syy154373 	if (obtail - ob < kolen) {
495*6008Syy154373 		*ret_val = (size_t)-1;
496*6008Syy154373 		return (-1);
497*6008Syy154373 	}
498*6008Syy154373 
499*6008Syy154373 	if (index == 0)
500*6008Syy154373 		(*ret_val)++;
501*6008Syy154373 
502*6008Syy154373 	if (kolen > 1)
503*6008Syy154373 		*ob++ = (uchar_t)(kocode >> 8);
504*6008Syy154373 	*ob = (uchar_t)(kocode & 0xFF);
505*6008Syy154373 
506*6008Syy154373 	return (kolen);
507*6008Syy154373 }
508*6008Syy154373 
509*6008Syy154373 /*
510*6008Syy154373  * Convert an UTF-8 character to Unified Hangual Code.
511*6008Syy154373  */
512*6008Syy154373 /* ARGSUSED */
513*6008Syy154373 static int8_t
utf8_to_uhc(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)514*6008Syy154373 utf8_to_uhc(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
515*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val)
516*6008Syy154373 {
517*6008Syy154373 	return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_uhc,
518*6008Syy154373 	    KICONV_UTF8_UHC_MAX));
519*6008Syy154373 }
520*6008Syy154373 
521*6008Syy154373 /*
522*6008Syy154373  * Convert an UTF-8 character to EUC-KR.
523*6008Syy154373  */
524*6008Syy154373 /* ARGSUSED */
525*6008Syy154373 static int8_t
utf8_to_euckr(uint32_t utf8,uchar_t ** inbuf,uchar_t * ibtail,uchar_t * ob,uchar_t * obtail,size_t * ret_val)526*6008Syy154373 utf8_to_euckr(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
527*6008Syy154373 	uchar_t *ob, uchar_t *obtail, size_t *ret_val)
528*6008Syy154373 {
529*6008Syy154373 	return (utf8_to_ko(utf8, ob, obtail, ret_val, kiconv_utf8_euckr,
530*6008Syy154373 	    KICONV_UTF8_EUCKR_MAX));
531*6008Syy154373 }
532*6008Syy154373 
533*6008Syy154373 /*
534*6008Syy154373  * Convert a single ko encoding (EUC-KR or UHC) character to UTF-8.
535*6008Syy154373  */
536*6008Syy154373 static int8_t
ko_to_utf8(uint32_t ko_val,uchar_t * ob,uchar_t * obtail,size_t * ret_val,kiconv_table_array_t * table,size_t nitems)537*6008Syy154373 ko_to_utf8(uint32_t ko_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
538*6008Syy154373 	kiconv_table_array_t *table, size_t nitems)
539*6008Syy154373 {
540*6008Syy154373 	size_t	index;
541*6008Syy154373 	int8_t	sz;
542*6008Syy154373 	uchar_t	udc[3];
543*6008Syy154373 	uchar_t	*u8;
544*6008Syy154373 
545*6008Syy154373 	if (KICONV_KO_IS_UDC_IN_EUC(ko_val)) {
546*6008Syy154373 		/* UDA(User Definable Area) handling. */
547*6008Syy154373 		uint32_t u32;
548*6008Syy154373 
549*6008Syy154373 		u32 = (ko_val & 0xFF) + (((ko_val & 0xFF00) == 0xC900) ?
550*6008Syy154373 		    KICONV_KO_UDA_OFFSET_1 : KICONV_KO_UDA_OFFSET_2);
551*6008Syy154373 		udc[0] = 0xEF;
552*6008Syy154373 		udc[1] = (uchar_t)(0x80 | (u32 & 0x00000FC0) >> 6);
553*6008Syy154373 		udc[2] = (uchar_t)(0x80 | (u32 & 0x0000003F));
554*6008Syy154373 		u8 = udc;
555*6008Syy154373 		index = 1;
556*6008Syy154373 	} else {
557*6008Syy154373 		index = kiconv_binsearch(ko_val, table, nitems);
558*6008Syy154373 		u8 = table[index].u8;
559*6008Syy154373 	}
560*6008Syy154373 
561*6008Syy154373 	sz = u8_number_of_bytes[u8[0]];
562*6008Syy154373 
563*6008Syy154373 	if (obtail - ob < sz) {
564*6008Syy154373 		*ret_val = (size_t)-1;
565*6008Syy154373 		return (-1);
566*6008Syy154373 	}
567*6008Syy154373 
568*6008Syy154373 	if (index == 0)
569*6008Syy154373 		(*ret_val)++;	/* Non-identical conversion */
570*6008Syy154373 
571*6008Syy154373 	for (index = 0; index < sz; index++)
572*6008Syy154373 		*ob++ = u8[index];
573*6008Syy154373 
574*6008Syy154373 	return (sz);
575*6008Syy154373 }
576*6008Syy154373 
577*6008Syy154373 static kiconv_ops_t kiconv_ko_ops_tbl[] = {
578*6008Syy154373 	{
579*6008Syy154373 		"euc-kr", "utf-8", kiconv_open_to_cck, kiconv_to_euckr,
580*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_euckr
581*6008Syy154373 	},
582*6008Syy154373 	{
583*6008Syy154373 		"utf-8", "euc-kr", open_fr_euckr, kiconv_fr_euckr,
584*6008Syy154373 		close_fr_ko, kiconvstr_fr_euckr
585*6008Syy154373 	},
586*6008Syy154373 	{
587*6008Syy154373 		"unifiedhangul", "utf-8", kiconv_open_to_cck, kiconv_to_uhc,
588*6008Syy154373 		kiconv_close_to_cck, kiconvstr_to_uhc
589*6008Syy154373 	},
590*6008Syy154373 	{
591*6008Syy154373 		"utf-8", "unifiedhangul", open_fr_uhc, kiconv_fr_uhc,
592*6008Syy154373 		close_fr_ko, kiconvstr_fr_uhc
593*6008Syy154373 	}
594*6008Syy154373 };
595*6008Syy154373 
596*6008Syy154373 static kiconv_module_info_t kiconv_ko_info = {
597*6008Syy154373 	"kiconv_ko",		/* module name */
598*6008Syy154373 	sizeof (kiconv_ko_ops_tbl) / sizeof (kiconv_ko_ops_tbl[0]),
599*6008Syy154373 	kiconv_ko_ops_tbl,
600*6008Syy154373 	0,
601*6008Syy154373 	NULL,
602*6008Syy154373 	NULL,
603*6008Syy154373 	0
604*6008Syy154373 };
605*6008Syy154373 
606*6008Syy154373 static struct modlkiconv modlkiconv_ko = {
607*6008Syy154373 	&mod_kiconvops,
608*6008Syy154373 	"kiconv korean module 1.0",
609*6008Syy154373 	&kiconv_ko_info
610*6008Syy154373 };
611*6008Syy154373 
612*6008Syy154373 static struct modlinkage modlinkage = {
613*6008Syy154373 	MODREV_1,
614*6008Syy154373 	(void *)&modlkiconv_ko,
615*6008Syy154373 	NULL
616*6008Syy154373 };
617*6008Syy154373 
618*6008Syy154373 int
_init(void)619*6008Syy154373 _init(void)
620*6008Syy154373 {
621*6008Syy154373 	int err;
622*6008Syy154373 
623*6008Syy154373 	err = mod_install(&modlinkage);
624*6008Syy154373 	if (err)
625*6008Syy154373 		cmn_err(CE_WARN, "kiconv_ko: failed to load kernel module");
626*6008Syy154373 
627*6008Syy154373 	return (err);
628*6008Syy154373 }
629*6008Syy154373 
630*6008Syy154373 int
_fini(void)631*6008Syy154373 _fini(void)
632*6008Syy154373 {
633*6008Syy154373 	int err;
634*6008Syy154373 
635*6008Syy154373 	/*
636*6008Syy154373 	 * If this module is being used, then, we cannot remove the module.
637*6008Syy154373 	 * The following checking will catch pretty much all usual cases.
638*6008Syy154373 	 *
639*6008Syy154373 	 * Any remaining will be catached by the kiconv_unregister_module()
640*6008Syy154373 	 * during mod_remove() at below.
641*6008Syy154373 	 */
642*6008Syy154373 	if (kiconv_module_ref_count(KICONV_MODULE_ID_KO))
643*6008Syy154373 		return (EBUSY);
644*6008Syy154373 
645*6008Syy154373 	err = mod_remove(&modlinkage);
646*6008Syy154373 	if (err)
647*6008Syy154373 		cmn_err(CE_WARN, "kiconv_ko: failed to remove kernel module");
648*6008Syy154373 
649*6008Syy154373 	return (err);
650*6008Syy154373 }
651*6008Syy154373 
652*6008Syy154373 int
_info(struct modinfo * modinfop)653*6008Syy154373 _info(struct modinfo *modinfop)
654*6008Syy154373 {
655*6008Syy154373 	return (mod_info(&modlinkage, modinfop));
656*6008Syy154373 }
657