1*16d86563SAlexander Pyhalov /* 2*16d86563SAlexander Pyhalov * CDDL HEADER START 3*16d86563SAlexander Pyhalov * 4*16d86563SAlexander Pyhalov * The contents of this file are subject to the terms of the 5*16d86563SAlexander Pyhalov * Common Development and Distribution License (the "License"). 6*16d86563SAlexander Pyhalov * You may not use this file except in compliance with the License. 7*16d86563SAlexander Pyhalov * 8*16d86563SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9*16d86563SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing. 10*16d86563SAlexander Pyhalov * See the License for the specific language governing permissions 11*16d86563SAlexander Pyhalov * and limitations under the License. 12*16d86563SAlexander Pyhalov * 13*16d86563SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each 14*16d86563SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE. 15*16d86563SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the 16*16d86563SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying 17*16d86563SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner] 18*16d86563SAlexander Pyhalov * 19*16d86563SAlexander Pyhalov * CDDL HEADER END 20*16d86563SAlexander Pyhalov */ 21*16d86563SAlexander Pyhalov /* 22*16d86563SAlexander Pyhalov * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23*16d86563SAlexander Pyhalov * Use is subject to license terms. 24*16d86563SAlexander Pyhalov */ 25*16d86563SAlexander Pyhalov 26*16d86563SAlexander Pyhalov #ifndef COMMON_DEFS_H 27*16d86563SAlexander Pyhalov #define COMMON_DEFS_H 28*16d86563SAlexander Pyhalov 29*16d86563SAlexander Pyhalov 30*16d86563SAlexander Pyhalov 31*16d86563SAlexander Pyhalov #define MAGIC_NUMBER 201513 32*16d86563SAlexander Pyhalov 33*16d86563SAlexander Pyhalov 34*16d86563SAlexander Pyhalov /* ISO/IEC 10646-1/Unicode Byte Order Mark */ 35*16d86563SAlexander Pyhalov #define ICV_BOM_IN_BIG_ENDIAN 0x00feff 36*16d86563SAlexander Pyhalov #define ICV_BOM_IN_LITTLE_ENDIAN_UCS4 0xfffe0000 37*16d86563SAlexander Pyhalov #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE) || \ 38*16d86563SAlexander Pyhalov defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 39*16d86563SAlexander Pyhalov #define ICV_BOM_IN_LITTLE_ENDIAN 0x00fffe 40*16d86563SAlexander Pyhalov #else 41*16d86563SAlexander Pyhalov #define ICV_BOM_IN_LITTLE_ENDIAN 0xfffe0000 42*16d86563SAlexander Pyhalov #endif 43*16d86563SAlexander Pyhalov 44*16d86563SAlexander Pyhalov 45*16d86563SAlexander Pyhalov /* 46*16d86563SAlexander Pyhalov * Following type macros are for possible error cases that can be defined for 47*16d86563SAlexander Pyhalov * mapping tables. Valid characters will have the byte length which will be 48*16d86563SAlexander Pyhalov * always a positive integer. 49*16d86563SAlexander Pyhalov */ 50*16d86563SAlexander Pyhalov #define ICV_TYPE_NON_IDENTICAL_CHAR (-1) 51*16d86563SAlexander Pyhalov #define ICV_TYPE_ILLEGAL_CHAR (-2) 52*16d86563SAlexander Pyhalov 53*16d86563SAlexander Pyhalov /* Following are replacement characters for non-identical character cases. */ 54*16d86563SAlexander Pyhalov #define ICV_CHAR_ASCII_REPLACEMENT ('?') 55*16d86563SAlexander Pyhalov #define ICV_CHAR_UTF8_REPLACEMENT (0x00efbfbd) 56*16d86563SAlexander Pyhalov #define ICV_CHAR_UCS2_REPLACEMENT (0xfffd) 57*16d86563SAlexander Pyhalov 58*16d86563SAlexander Pyhalov 59*16d86563SAlexander Pyhalov typedef enum { false = 0, true = 1 } boolean; 60*16d86563SAlexander Pyhalov 61*16d86563SAlexander Pyhalov 62*16d86563SAlexander Pyhalov /* We only support characters in range of UTF-16. */ 63*16d86563SAlexander Pyhalov typedef struct { 64*16d86563SAlexander Pyhalov unsigned int u8; 65*16d86563SAlexander Pyhalov signed char size; 66*16d86563SAlexander Pyhalov } to_utf8_table_component_t; 67*16d86563SAlexander Pyhalov 68*16d86563SAlexander Pyhalov typedef struct { 69*16d86563SAlexander Pyhalov unsigned int u8; 70*16d86563SAlexander Pyhalov unsigned char sb; 71*16d86563SAlexander Pyhalov } to_sb_table_component_t; 72*16d86563SAlexander Pyhalov 73*16d86563SAlexander Pyhalov 74*16d86563SAlexander Pyhalov /* UCS-2/UCS-4/UTF-16/UTF-32 requires state management. */ 75*16d86563SAlexander Pyhalov typedef struct { 76*16d86563SAlexander Pyhalov boolean bom_written; 77*16d86563SAlexander Pyhalov boolean little_endian; 78*16d86563SAlexander Pyhalov } ucs_state_t; 79*16d86563SAlexander Pyhalov 80*16d86563SAlexander Pyhalov typedef struct { 81*16d86563SAlexander Pyhalov ucs_state_t input; 82*16d86563SAlexander Pyhalov ucs_state_t output; 83*16d86563SAlexander Pyhalov } ucs_ucs_state_t; 84*16d86563SAlexander Pyhalov 85*16d86563SAlexander Pyhalov 86*16d86563SAlexander Pyhalov /* UTF-7 requires additional state data fields. */ 87*16d86563SAlexander Pyhalov typedef struct { 88*16d86563SAlexander Pyhalov boolean bom_written; 89*16d86563SAlexander Pyhalov boolean little_endian; 90*16d86563SAlexander Pyhalov boolean in_the_middle_of_utf7_sequence; 91*16d86563SAlexander Pyhalov unsigned int remnant; 92*16d86563SAlexander Pyhalov signed char remnant_count; /* in bits */ 93*16d86563SAlexander Pyhalov unsigned char prevch; 94*16d86563SAlexander Pyhalov } utf7_state_t; 95*16d86563SAlexander Pyhalov 96*16d86563SAlexander Pyhalov 97*16d86563SAlexander Pyhalov /* 98*16d86563SAlexander Pyhalov * Following vector shows the number of bytes in a UTF-8 character. 99*16d86563SAlexander Pyhalov * Index will be the first byte of the character. 100*16d86563SAlexander Pyhalov */ 101*16d86563SAlexander Pyhalov 102*16d86563SAlexander Pyhalov #define IL_ ICV_TYPE_ILLEGAL_CHAR 103*16d86563SAlexander Pyhalov 104*16d86563SAlexander Pyhalov static const char number_of_bytes_in_utf8_char[0x100] = { 105*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 106*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 107*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 108*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 109*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 110*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 111*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 112*16d86563SAlexander Pyhalov 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 113*16d86563SAlexander Pyhalov 114*16d86563SAlexander Pyhalov /* 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F */ 115*16d86563SAlexander Pyhalov IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_, 116*16d86563SAlexander Pyhalov 117*16d86563SAlexander Pyhalov /* 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F */ 118*16d86563SAlexander Pyhalov IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_, 119*16d86563SAlexander Pyhalov 120*16d86563SAlexander Pyhalov /* A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF */ 121*16d86563SAlexander Pyhalov IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_, 122*16d86563SAlexander Pyhalov 123*16d86563SAlexander Pyhalov /* B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF */ 124*16d86563SAlexander Pyhalov IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_, 125*16d86563SAlexander Pyhalov 126*16d86563SAlexander Pyhalov /* C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF */ 127*16d86563SAlexander Pyhalov IL_,IL_, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 128*16d86563SAlexander Pyhalov 129*16d86563SAlexander Pyhalov /* D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF */ 130*16d86563SAlexander Pyhalov 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 131*16d86563SAlexander Pyhalov 132*16d86563SAlexander Pyhalov /* E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF */ 133*16d86563SAlexander Pyhalov 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 134*16d86563SAlexander Pyhalov 135*16d86563SAlexander Pyhalov /* F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF */ 136*16d86563SAlexander Pyhalov 4, 4, 4, 4, 4, IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_, 137*16d86563SAlexander Pyhalov }; 138*16d86563SAlexander Pyhalov 139*16d86563SAlexander Pyhalov #undef IL_ 140*16d86563SAlexander Pyhalov 141*16d86563SAlexander Pyhalov /* 142*16d86563SAlexander Pyhalov * Following is a vector of bit-masks to get used bits in the first byte of 143*16d86563SAlexander Pyhalov * a UTF-8 character. Index is the number of bytes in the UTF-8 character 144*16d86563SAlexander Pyhalov * and the index value comes from above table. 145*16d86563SAlexander Pyhalov */ 146*16d86563SAlexander Pyhalov static const char masks_tbl[7] = { 0x00, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 }; 147*16d86563SAlexander Pyhalov 148*16d86563SAlexander Pyhalov /* 149*16d86563SAlexander Pyhalov * The following two vectors are to provide valid minimum and 150*16d86563SAlexander Pyhalov * maximum values for the 2'nd byte of a multibyte UTF-8 character for 151*16d86563SAlexander Pyhalov * better illegal sequence checking. The index value must be the value of 152*16d86563SAlexander Pyhalov * the first byte of the UTF-8 character. 153*16d86563SAlexander Pyhalov */ 154*16d86563SAlexander Pyhalov static const unsigned char valid_min_2nd_byte[0x100] = { 155*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 156*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 157*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 158*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 159*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 160*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 161*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 162*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 163*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 164*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 165*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 166*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 167*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 168*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 169*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 170*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 171*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 172*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 173*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 174*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 175*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 176*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 177*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 178*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 179*16d86563SAlexander Pyhalov /* C0 C1 C2 C3 C4 C5 C6 C7 */ 180*16d86563SAlexander Pyhalov 0, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 181*16d86563SAlexander Pyhalov /* C8 C9 CA CB CC CD CE CF */ 182*16d86563SAlexander Pyhalov 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 183*16d86563SAlexander Pyhalov /* D0 D1 D2 D3 D4 D5 D6 D7 */ 184*16d86563SAlexander Pyhalov 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 185*16d86563SAlexander Pyhalov /* D8 D9 DA DB DC DD DE DF */ 186*16d86563SAlexander Pyhalov 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 187*16d86563SAlexander Pyhalov /* E0 E1 E2 E3 E4 E5 E6 E7 */ 188*16d86563SAlexander Pyhalov 0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 189*16d86563SAlexander Pyhalov /* E8 E9 EA EB EC ED EE EF */ 190*16d86563SAlexander Pyhalov 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 191*16d86563SAlexander Pyhalov /* F0 F1 F2 F3 F4 F5 F6 F7 */ 192*16d86563SAlexander Pyhalov 0x90, 0x80, 0x80, 0x80, 0x80, 0, 0, 0, 193*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 194*16d86563SAlexander Pyhalov }; 195*16d86563SAlexander Pyhalov 196*16d86563SAlexander Pyhalov static const unsigned char valid_max_2nd_byte[0x100] = { 197*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 198*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 199*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 200*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 201*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 202*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 203*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 204*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 205*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 206*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 207*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 208*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 209*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 210*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 211*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 212*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 213*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 214*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 215*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 216*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 217*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 218*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 219*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 220*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 221*16d86563SAlexander Pyhalov /* C0 C1 C2 C3 C4 C5 C6 C7 */ 222*16d86563SAlexander Pyhalov 0, 0, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 223*16d86563SAlexander Pyhalov /* C8 C9 CA CB CC CD CE CF */ 224*16d86563SAlexander Pyhalov 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 225*16d86563SAlexander Pyhalov /* D0 D1 D2 D3 D4 D5 D6 D7 */ 226*16d86563SAlexander Pyhalov 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 227*16d86563SAlexander Pyhalov /* D8 D9 DA DB DC DD DE DF */ 228*16d86563SAlexander Pyhalov 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 229*16d86563SAlexander Pyhalov /* E0 E1 E2 E3 E4 E5 E6 E7 */ 230*16d86563SAlexander Pyhalov 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 231*16d86563SAlexander Pyhalov /* E8 E9 EA EB EC ED EE EF */ 232*16d86563SAlexander Pyhalov 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf, 233*16d86563SAlexander Pyhalov /* F0 F1 F2 F3 F4 F5 F6 F7 */ 234*16d86563SAlexander Pyhalov 0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0, 0, 0, 235*16d86563SAlexander Pyhalov 0, 0, 0, 0, 0, 0, 0, 0, 236*16d86563SAlexander Pyhalov }; 237*16d86563SAlexander Pyhalov 238*16d86563SAlexander Pyhalov 239*16d86563SAlexander Pyhalov /* 240*16d86563SAlexander Pyhalov * Following "6" and "0x3f" came from 10xx xxxx bit representation of UTF-8 241*16d86563SAlexander Pyhalov * characters' second to sixth bytes. 242*16d86563SAlexander Pyhalov */ 243*16d86563SAlexander Pyhalov #define ICV_UTF8_BIT_SHIFT 6 244*16d86563SAlexander Pyhalov #define ICV_UTF8_BIT_MASK 0x3f 245*16d86563SAlexander Pyhalov #define ICV_FETCH_UTF8_BOM_SIZE 6 246*16d86563SAlexander Pyhalov 247*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS4_SIZE 4 248*16d86563SAlexander Pyhalov #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE) || \ 249*16d86563SAlexander Pyhalov defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 250*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE 2 251*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE_TWO 4 252*16d86563SAlexander Pyhalov #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \ 253*16d86563SAlexander Pyhalov defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 254*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE 4 255*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE_TWO 8 256*16d86563SAlexander Pyhalov #endif 257*16d86563SAlexander Pyhalov 258*16d86563SAlexander Pyhalov /* 259*16d86563SAlexander Pyhalov * UTF-8 representations of some useful Unicode values. 260*16d86563SAlexander Pyhalov * 261*16d86563SAlexander Pyhalov * The U+FFFE in UTF-8 is 0x00efbfbe and the U+FFFF is 0x00efbfbf but 262*16d86563SAlexander Pyhalov * we use masked values at the below: 263*16d86563SAlexander Pyhalov */ 264*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_d800 (0x00eda080UL) 265*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_dfff (0x00edbfbfUL) 266*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_fdd0 (0x00efb790UL) 267*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_fdef (0x00efb7afUL) 268*16d86563SAlexander Pyhalov 269*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_fffe (0x000fbfbeUL) 270*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_ffff (0x000fbfbfUL) 271*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_ffff_mask (0x000fffffUL) 272*16d86563SAlexander Pyhalov 273*16d86563SAlexander Pyhalov #define ICV_UTF8_REPRESENTATION_10fffd (0xf48fbfbdUL) 274*16d86563SAlexander Pyhalov 275*16d86563SAlexander Pyhalov /* 276*16d86563SAlexander Pyhalov * UTF-32 and UCS-4 representations of some useful Unicode values for 277*16d86563SAlexander Pyhalov * non-character and out of bound invalid character detection. 278*16d86563SAlexander Pyhalov */ 279*16d86563SAlexander Pyhalov #define ICV_UTF32_NONCHAR_fffe (0xfffeU) 280*16d86563SAlexander Pyhalov #define ICV_UTF32_NONCHAR_ffff (0xffffU) 281*16d86563SAlexander Pyhalov #define ICV_UTF32_NONCHAR_mask (0xffffU) 282*16d86563SAlexander Pyhalov 283*16d86563SAlexander Pyhalov #define ICV_UTF32_SURROGATE_START_d800 (0xd800U) 284*16d86563SAlexander Pyhalov #define ICV_UTF32_SURROGATE_END_dfff (0xdfffU) 285*16d86563SAlexander Pyhalov 286*16d86563SAlexander Pyhalov #define ICV_UTF32_ARABIC_NONCHAR_START_fdd0 (0xfdd0U) 287*16d86563SAlexander Pyhalov #define ICV_UTF32_ARABIC_NONCHAR_END_fdef (0xfdefU) 288*16d86563SAlexander Pyhalov 289*16d86563SAlexander Pyhalov #define ICV_UTF32_LAST_VALID_CHAR (0x10fffdU) 290*16d86563SAlexander Pyhalov 291*16d86563SAlexander Pyhalov #define ICV_UCS4_LAST_VALID_CHAR (0x7fffffff) 292*16d86563SAlexander Pyhalov 293*16d86563SAlexander Pyhalov 294*16d86563SAlexander Pyhalov #endif /* COMMON_DEFS_H */ 295