xref: /onnv-gate/usr/src/uts/common/sys/kiconv_cck_common.h (revision 6008:3a1c10482cf2)
1*6008Syy154373 /*
2*6008Syy154373  * CDDL HEADER START
3*6008Syy154373  *
4*6008Syy154373  * The contents of this file are subject to the terms of the
5*6008Syy154373  * Common Development and Distribution License (the "License").
6*6008Syy154373  * You may not use this file except in compliance with the License.
7*6008Syy154373  *
8*6008Syy154373  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373  * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373  * See the License for the specific language governing permissions
11*6008Syy154373  * and limitations under the License.
12*6008Syy154373  *
13*6008Syy154373  * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373  * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373  * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373  * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373  *
19*6008Syy154373  * CDDL HEADER END
20*6008Syy154373  */
21*6008Syy154373 /*
22*6008Syy154373  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23*6008Syy154373  * Use is subject to license terms.
24*6008Syy154373  */
25*6008Syy154373 
26*6008Syy154373 #ifndef _SYS_KICONV_CCK_COMMON_H
27*6008Syy154373 #define	_SYS_KICONV_CCK_COMMON_H
28*6008Syy154373 
29*6008Syy154373 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30*6008Syy154373 
31*6008Syy154373 #ifdef __cplusplus
32*6008Syy154373 extern "C" {
33*6008Syy154373 #endif
34*6008Syy154373 
35*6008Syy154373 #ifdef	_KERNEL
36*6008Syy154373 
37*6008Syy154373 /* The start value of leading byte of EUC encoding. */
38*6008Syy154373 #define	KICONV_EUC_START		(0xA1)
39*6008Syy154373 
40*6008Syy154373 /* Valid EUC range or not. */
41*6008Syy154373 #define	KICONV_IS_VALID_EUC_BYTE(v)	((v) >= 0xA1 &&	(v) <= 0xFE)
42*6008Syy154373 
43*6008Syy154373 /* Is ASCII character or not: 0x00 - 0x7F. */
44*6008Syy154373 #define	KICONV_IS_ASCII(c)		(((uchar_t)(c)) <= 0x7F)
45*6008Syy154373 
46*6008Syy154373 /* UTF-8 replacement character for non-identicals and its length. */
47*6008Syy154373 #define	KICONV_UTF8_REPLACEMENT_CHAR1		(0xEF)
48*6008Syy154373 #define	KICONV_UTF8_REPLACEMENT_CHAR2		(0xBF)
49*6008Syy154373 #define	KICONV_UTF8_REPLACEMENT_CHAR3		(0xBD)
50*6008Syy154373 #define	KICONV_UTF8_REPLACEMENT_CHAR		(0xefbfbd)
51*6008Syy154373 #define	KICONV_UTF8_REPLACEMENT_CHAR_LEN	(3)
52*6008Syy154373 
53*6008Syy154373 /*
54*6008Syy154373  * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not.
55*6008Syy154373  */
56*6008Syy154373 #define	KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first)		\
57*6008Syy154373 	    ((second) < u8_valid_min_2nd_byte[(first)] ||		\
58*6008Syy154373 	    (second) > u8_valid_max_2nd_byte[(first)])
59*6008Syy154373 
60*6008Syy154373 /*
61*6008Syy154373  * If we haven't checked on the UTF-8 signature BOM character in
62*6008Syy154373  * the beginning of the conversion data stream, we check it and if
63*6008Syy154373  * find one, we skip it since we have no use for it.
64*6008Syy154373  */
65*6008Syy154373 #define	KICONV_CHECK_UTF8_BOM(ib, ibtail)				\
66*6008Syy154373 	if (((kiconv_state_t)kcd)->bom_processed == 0 &&		\
67*6008Syy154373 		((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&		\
68*6008Syy154373 		*((ib) + 1) == 0xbb &&	*((ib) + 2) == 0xbf) {		\
69*6008Syy154373 		(ib) += 3;						\
70*6008Syy154373 	}								\
71*6008Syy154373 	((kiconv_state_t)kcd)->bom_processed = 1
72*6008Syy154373 
73*6008Syy154373 /*
74*6008Syy154373  * Check BOM of UTF-8 without state information.
75*6008Syy154373  */
76*6008Syy154373 #define	KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail)			\
77*6008Syy154373 	if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef &&			\
78*6008Syy154373 		*((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) {		\
79*6008Syy154373 		(ib) += 3;						\
80*6008Syy154373 	}
81*6008Syy154373 
82*6008Syy154373 /*
83*6008Syy154373  * Set errno and break.
84*6008Syy154373  */
85*6008Syy154373 #define	KICONV_SET_ERRNO_AND_BREAK(err)					\
86*6008Syy154373 	*errno = (err);							\
87*6008Syy154373 	ret_val = (size_t)-1;						\
88*6008Syy154373 	break
89*6008Syy154373 
90*6008Syy154373 /*
91*6008Syy154373  * Handling flag, advance input buffer, set errno and break.
92*6008Syy154373  */
93*6008Syy154373 #define	KICONV_SET_ERRNO_WITH_FLAG(advance, err)			\
94*6008Syy154373 	if (flag & KICONV_REPLACE_INVALID) {				\
95*6008Syy154373 		ib += (advance);					\
96*6008Syy154373 		goto REPLACE_INVALID;					\
97*6008Syy154373 	}								\
98*6008Syy154373 	KICONV_SET_ERRNO_AND_BREAK((err))
99*6008Syy154373 
100*6008Syy154373 /* Conversion table for UTF-8 -> CCK encoding. */
101*6008Syy154373 typedef struct {
102*6008Syy154373 	uint32_t key;
103*6008Syy154373 	uint32_t value;
104*6008Syy154373 } kiconv_table_t;
105*6008Syy154373 
106*6008Syy154373 /* Conversion table for CCK encoding -> utf8. */
107*6008Syy154373 typedef struct {
108*6008Syy154373 	uint32_t key;
109*6008Syy154373 	uchar_t u8[4];
110*6008Syy154373 } kiconv_table_array_t;
111*6008Syy154373 
112*6008Syy154373 /*
113*6008Syy154373  * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC...
114*6008Syy154373  * Currently parameter ib/ibtail are used by BIG5HKSCS only.
115*6008Syy154373  */
116*6008Syy154373 typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib,
117*6008Syy154373 	uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
118*6008Syy154373 
119*6008Syy154373 /* Common open and close function for UTF-8 to CCK conversion. */
120*6008Syy154373 void * 	kiconv_open_to_cck(void);
121*6008Syy154373 int    	kiconv_close_to_cck(void *);
122*6008Syy154373 
123*6008Syy154373 /* Binary search funciton. */
124*6008Syy154373 size_t	kiconv_binsearch(uint32_t key, void *tbl, size_t nitems);
125*6008Syy154373 
126*6008Syy154373 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */
127*6008Syy154373 size_t 	kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft,
128*6008Syy154373 	char **outbuf, size_t *outbytesleft, int *errno,
129*6008Syy154373 	kiconv_utf8tocck_t ptr_utf8tocck);
130*6008Syy154373 
131*6008Syy154373 /*
132*6008Syy154373  * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC...
133*6008Syy154373  */
134*6008Syy154373 size_t 	kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen,
135*6008Syy154373 	uchar_t *outarray, size_t *outlen, int flag, int *errno,
136*6008Syy154373 	kiconv_utf8tocck_t ptr_utf8tocck);
137*6008Syy154373 
138*6008Syy154373 /*
139*6008Syy154373  * The following tables are coming from u8_textprep.c. We use them to
140*6008Syy154373  * check on validity of UTF-8 characters and their bytes.
141*6008Syy154373  */
142*6008Syy154373 extern const int8_t u8_number_of_bytes[];
143*6008Syy154373 extern const uint8_t u8_valid_min_2nd_byte[];
144*6008Syy154373 extern const uint8_t u8_valid_max_2nd_byte[];
145*6008Syy154373 
146*6008Syy154373 #endif	/* _KERNEL */
147*6008Syy154373 
148*6008Syy154373 #ifdef __cplusplus
149*6008Syy154373 }
150*6008Syy154373 #endif
151*6008Syy154373 
152*6008Syy154373 #endif	/* _SYS_KICONV_CCK_COMMON_H */
153