1*6008Syy154373 /* 2*6008Syy154373 * CDDL HEADER START 3*6008Syy154373 * 4*6008Syy154373 * The contents of this file are subject to the terms of the 5*6008Syy154373 * Common Development and Distribution License (the "License"). 6*6008Syy154373 * You may not use this file except in compliance with the License. 7*6008Syy154373 * 8*6008Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*6008Syy154373 * or http://www.opensolaris.org/os/licensing. 10*6008Syy154373 * See the License for the specific language governing permissions 11*6008Syy154373 * and limitations under the License. 12*6008Syy154373 * 13*6008Syy154373 * When distributing Covered Code, include this CDDL HEADER in each 14*6008Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*6008Syy154373 * If applicable, add the following below this CDDL HEADER, with the 16*6008Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying 17*6008Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner] 18*6008Syy154373 * 19*6008Syy154373 * CDDL HEADER END 20*6008Syy154373 */ 21*6008Syy154373 /* 22*6008Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*6008Syy154373 * Use is subject to license terms. 24*6008Syy154373 */ 25*6008Syy154373 26*6008Syy154373 #ifndef _SYS_KICONV_CCK_COMMON_H 27*6008Syy154373 #define _SYS_KICONV_CCK_COMMON_H 28*6008Syy154373 29*6008Syy154373 #pragma ident "%Z%%M% %I% %E% SMI" 30*6008Syy154373 31*6008Syy154373 #ifdef __cplusplus 32*6008Syy154373 extern "C" { 33*6008Syy154373 #endif 34*6008Syy154373 35*6008Syy154373 #ifdef _KERNEL 36*6008Syy154373 37*6008Syy154373 /* The start value of leading byte of EUC encoding. */ 38*6008Syy154373 #define KICONV_EUC_START (0xA1) 39*6008Syy154373 40*6008Syy154373 /* Valid EUC range or not. */ 41*6008Syy154373 #define KICONV_IS_VALID_EUC_BYTE(v) ((v) >= 0xA1 && (v) <= 0xFE) 42*6008Syy154373 43*6008Syy154373 /* Is ASCII character or not: 0x00 - 0x7F. */ 44*6008Syy154373 #define KICONV_IS_ASCII(c) (((uchar_t)(c)) <= 0x7F) 45*6008Syy154373 46*6008Syy154373 /* UTF-8 replacement character for non-identicals and its length. */ 47*6008Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR1 (0xEF) 48*6008Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR2 (0xBF) 49*6008Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR3 (0xBD) 50*6008Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR (0xefbfbd) 51*6008Syy154373 #define KICONV_UTF8_REPLACEMENT_CHAR_LEN (3) 52*6008Syy154373 53*6008Syy154373 /* 54*6008Syy154373 * Whether the 2nd byte of 3 or 4 bytes UTF-8 character is invalid or not. 55*6008Syy154373 */ 56*6008Syy154373 #define KICONV_IS_INVALID_UTF8_SECOND_BYTE(second, first) \ 57*6008Syy154373 ((second) < u8_valid_min_2nd_byte[(first)] || \ 58*6008Syy154373 (second) > u8_valid_max_2nd_byte[(first)]) 59*6008Syy154373 60*6008Syy154373 /* 61*6008Syy154373 * If we haven't checked on the UTF-8 signature BOM character in 62*6008Syy154373 * the beginning of the conversion data stream, we check it and if 63*6008Syy154373 * find one, we skip it since we have no use for it. 64*6008Syy154373 */ 65*6008Syy154373 #define KICONV_CHECK_UTF8_BOM(ib, ibtail) \ 66*6008Syy154373 if (((kiconv_state_t)kcd)->bom_processed == 0 && \ 67*6008Syy154373 ((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 68*6008Syy154373 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 69*6008Syy154373 (ib) += 3; \ 70*6008Syy154373 } \ 71*6008Syy154373 ((kiconv_state_t)kcd)->bom_processed = 1 72*6008Syy154373 73*6008Syy154373 /* 74*6008Syy154373 * Check BOM of UTF-8 without state information. 75*6008Syy154373 */ 76*6008Syy154373 #define KICONV_CHECK_UTF8_BOM_WITHOUT_STATE(ib, ibtail) \ 77*6008Syy154373 if (((ibtail) - (ib)) >= 3 && *(ib) == 0xef && \ 78*6008Syy154373 *((ib) + 1) == 0xbb && *((ib) + 2) == 0xbf) { \ 79*6008Syy154373 (ib) += 3; \ 80*6008Syy154373 } 81*6008Syy154373 82*6008Syy154373 /* 83*6008Syy154373 * Set errno and break. 84*6008Syy154373 */ 85*6008Syy154373 #define KICONV_SET_ERRNO_AND_BREAK(err) \ 86*6008Syy154373 *errno = (err); \ 87*6008Syy154373 ret_val = (size_t)-1; \ 88*6008Syy154373 break 89*6008Syy154373 90*6008Syy154373 /* 91*6008Syy154373 * Handling flag, advance input buffer, set errno and break. 92*6008Syy154373 */ 93*6008Syy154373 #define KICONV_SET_ERRNO_WITH_FLAG(advance, err) \ 94*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) { \ 95*6008Syy154373 ib += (advance); \ 96*6008Syy154373 goto REPLACE_INVALID; \ 97*6008Syy154373 } \ 98*6008Syy154373 KICONV_SET_ERRNO_AND_BREAK((err)) 99*6008Syy154373 100*6008Syy154373 /* Conversion table for UTF-8 -> CCK encoding. */ 101*6008Syy154373 typedef struct { 102*6008Syy154373 uint32_t key; 103*6008Syy154373 uint32_t value; 104*6008Syy154373 } kiconv_table_t; 105*6008Syy154373 106*6008Syy154373 /* Conversion table for CCK encoding -> utf8. */ 107*6008Syy154373 typedef struct { 108*6008Syy154373 uint32_t key; 109*6008Syy154373 uchar_t u8[4]; 110*6008Syy154373 } kiconv_table_array_t; 111*6008Syy154373 112*6008Syy154373 /* 113*6008Syy154373 * Function prototype for UTF-8 -> GB18030/BIG5/EUC-TW/UHC... 114*6008Syy154373 * Currently parameter ib/ibtail are used by BIG5HKSCS only. 115*6008Syy154373 */ 116*6008Syy154373 typedef int8_t (*kiconv_utf8tocck_t)(uint32_t utf8, uchar_t **ib, 117*6008Syy154373 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val); 118*6008Syy154373 119*6008Syy154373 /* Common open and close function for UTF-8 to CCK conversion. */ 120*6008Syy154373 void * kiconv_open_to_cck(void); 121*6008Syy154373 int kiconv_close_to_cck(void *); 122*6008Syy154373 123*6008Syy154373 /* Binary search funciton. */ 124*6008Syy154373 size_t kiconv_binsearch(uint32_t key, void *tbl, size_t nitems); 125*6008Syy154373 126*6008Syy154373 /* Wrapper for conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... */ 127*6008Syy154373 size_t kiconv_utf8_to_cck(void *kcd, char **inbuf, size_t *inbytesleft, 128*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno, 129*6008Syy154373 kiconv_utf8tocck_t ptr_utf8tocck); 130*6008Syy154373 131*6008Syy154373 /* 132*6008Syy154373 * Wrapper for string based conversion from UTF-8 to GB18030/BIG5/EUC-TW/UHC... 133*6008Syy154373 */ 134*6008Syy154373 size_t kiconvstr_utf8_to_cck(uchar_t *inarray, size_t *inlen, 135*6008Syy154373 uchar_t *outarray, size_t *outlen, int flag, int *errno, 136*6008Syy154373 kiconv_utf8tocck_t ptr_utf8tocck); 137*6008Syy154373 138*6008Syy154373 /* 139*6008Syy154373 * The following tables are coming from u8_textprep.c. We use them to 140*6008Syy154373 * check on validity of UTF-8 characters and their bytes. 141*6008Syy154373 */ 142*6008Syy154373 extern const int8_t u8_number_of_bytes[]; 143*6008Syy154373 extern const uint8_t u8_valid_min_2nd_byte[]; 144*6008Syy154373 extern const uint8_t u8_valid_max_2nd_byte[]; 145*6008Syy154373 146*6008Syy154373 #endif /* _KERNEL */ 147*6008Syy154373 148*6008Syy154373 #ifdef __cplusplus 149*6008Syy154373 } 150*6008Syy154373 #endif 151*6008Syy154373 152*6008Syy154373 #endif /* _SYS_KICONV_CCK_COMMON_H */ 153