xref: /illumos-gate/usr/src/lib/iconv_modules/utf-8/common/common_defs.h (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1*16d86563SAlexander Pyhalov /*
2*16d86563SAlexander Pyhalov  * CDDL HEADER START
3*16d86563SAlexander Pyhalov  *
4*16d86563SAlexander Pyhalov  * The contents of this file are subject to the terms of the
5*16d86563SAlexander Pyhalov  * Common Development and Distribution License (the "License").
6*16d86563SAlexander Pyhalov  * You may not use this file except in compliance with the License.
7*16d86563SAlexander Pyhalov  *
8*16d86563SAlexander Pyhalov  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*16d86563SAlexander Pyhalov  * or http://www.opensolaris.org/os/licensing.
10*16d86563SAlexander Pyhalov  * See the License for the specific language governing permissions
11*16d86563SAlexander Pyhalov  * and limitations under the License.
12*16d86563SAlexander Pyhalov  *
13*16d86563SAlexander Pyhalov  * When distributing Covered Code, include this CDDL HEADER in each
14*16d86563SAlexander Pyhalov  * file and include the License file at src/OPENSOLARIS.LICENSE.
15*16d86563SAlexander Pyhalov  * If applicable, add the following below this CDDL HEADER, with the
16*16d86563SAlexander Pyhalov  * fields enclosed by brackets "[]" replaced with your own identifying
17*16d86563SAlexander Pyhalov  * information: Portions Copyright [yyyy] [name of copyright owner]
18*16d86563SAlexander Pyhalov  *
19*16d86563SAlexander Pyhalov  * CDDL HEADER END
20*16d86563SAlexander Pyhalov  */
21*16d86563SAlexander Pyhalov /*
22*16d86563SAlexander Pyhalov  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*16d86563SAlexander Pyhalov  * Use is subject to license terms.
24*16d86563SAlexander Pyhalov  */
25*16d86563SAlexander Pyhalov 
26*16d86563SAlexander Pyhalov #ifndef	COMMON_DEFS_H
27*16d86563SAlexander Pyhalov #define	COMMON_DEFS_H
28*16d86563SAlexander Pyhalov 
29*16d86563SAlexander Pyhalov 
30*16d86563SAlexander Pyhalov 
31*16d86563SAlexander Pyhalov #define	MAGIC_NUMBER			201513
32*16d86563SAlexander Pyhalov 
33*16d86563SAlexander Pyhalov 
34*16d86563SAlexander Pyhalov /* ISO/IEC 10646-1/Unicode Byte Order Mark */
35*16d86563SAlexander Pyhalov #define	ICV_BOM_IN_BIG_ENDIAN		0x00feff
36*16d86563SAlexander Pyhalov #define	ICV_BOM_IN_LITTLE_ENDIAN_UCS4	0xfffe0000
37*16d86563SAlexander Pyhalov #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE) || \
38*16d86563SAlexander Pyhalov 	defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
39*16d86563SAlexander Pyhalov #define	ICV_BOM_IN_LITTLE_ENDIAN	0x00fffe
40*16d86563SAlexander Pyhalov #else
41*16d86563SAlexander Pyhalov #define	ICV_BOM_IN_LITTLE_ENDIAN	0xfffe0000
42*16d86563SAlexander Pyhalov #endif
43*16d86563SAlexander Pyhalov 
44*16d86563SAlexander Pyhalov 
45*16d86563SAlexander Pyhalov /*
46*16d86563SAlexander Pyhalov  * Following type macros are for possible error cases that can be defined for
47*16d86563SAlexander Pyhalov  * mapping tables. Valid characters will have the byte length which will be
48*16d86563SAlexander Pyhalov  * always a positive integer.
49*16d86563SAlexander Pyhalov  */
50*16d86563SAlexander Pyhalov #define	ICV_TYPE_NON_IDENTICAL_CHAR	(-1)
51*16d86563SAlexander Pyhalov #define	ICV_TYPE_ILLEGAL_CHAR		(-2)
52*16d86563SAlexander Pyhalov 
53*16d86563SAlexander Pyhalov /* Following are replacement characters for non-identical character cases. */
54*16d86563SAlexander Pyhalov #define	ICV_CHAR_ASCII_REPLACEMENT	('?')
55*16d86563SAlexander Pyhalov #define	ICV_CHAR_UTF8_REPLACEMENT	(0x00efbfbd)
56*16d86563SAlexander Pyhalov #define	ICV_CHAR_UCS2_REPLACEMENT	(0xfffd)
57*16d86563SAlexander Pyhalov 
58*16d86563SAlexander Pyhalov 
59*16d86563SAlexander Pyhalov typedef enum { false = 0, true = 1 } boolean;
60*16d86563SAlexander Pyhalov 
61*16d86563SAlexander Pyhalov 
62*16d86563SAlexander Pyhalov /* We only support characters in range of UTF-16. */
63*16d86563SAlexander Pyhalov typedef struct {
64*16d86563SAlexander Pyhalov 	unsigned int	u8;
65*16d86563SAlexander Pyhalov 	signed char	size;
66*16d86563SAlexander Pyhalov } to_utf8_table_component_t;
67*16d86563SAlexander Pyhalov 
68*16d86563SAlexander Pyhalov typedef struct {
69*16d86563SAlexander Pyhalov 	unsigned int	u8;
70*16d86563SAlexander Pyhalov 	unsigned char	sb;
71*16d86563SAlexander Pyhalov } to_sb_table_component_t;
72*16d86563SAlexander Pyhalov 
73*16d86563SAlexander Pyhalov 
74*16d86563SAlexander Pyhalov /* UCS-2/UCS-4/UTF-16/UTF-32 requires state management. */
75*16d86563SAlexander Pyhalov typedef struct {
76*16d86563SAlexander Pyhalov 	boolean		bom_written;
77*16d86563SAlexander Pyhalov 	boolean		little_endian;
78*16d86563SAlexander Pyhalov } ucs_state_t;
79*16d86563SAlexander Pyhalov 
80*16d86563SAlexander Pyhalov typedef struct {
81*16d86563SAlexander Pyhalov 	ucs_state_t	input;
82*16d86563SAlexander Pyhalov 	ucs_state_t	output;
83*16d86563SAlexander Pyhalov } ucs_ucs_state_t;
84*16d86563SAlexander Pyhalov 
85*16d86563SAlexander Pyhalov 
86*16d86563SAlexander Pyhalov /* UTF-7 requires additional state data fields. */
87*16d86563SAlexander Pyhalov typedef struct {
88*16d86563SAlexander Pyhalov 	boolean		bom_written;
89*16d86563SAlexander Pyhalov 	boolean		little_endian;
90*16d86563SAlexander Pyhalov 	boolean		in_the_middle_of_utf7_sequence;
91*16d86563SAlexander Pyhalov 	unsigned int	remnant;
92*16d86563SAlexander Pyhalov 	signed char	remnant_count;		/* in bits */
93*16d86563SAlexander Pyhalov 	unsigned char	prevch;
94*16d86563SAlexander Pyhalov } utf7_state_t;
95*16d86563SAlexander Pyhalov 
96*16d86563SAlexander Pyhalov 
97*16d86563SAlexander Pyhalov /*
98*16d86563SAlexander Pyhalov  * Following vector shows the number of bytes in a UTF-8 character.
99*16d86563SAlexander Pyhalov  * Index will be the first byte of the character.
100*16d86563SAlexander Pyhalov  */
101*16d86563SAlexander Pyhalov 
102*16d86563SAlexander Pyhalov #define	IL_				ICV_TYPE_ILLEGAL_CHAR
103*16d86563SAlexander Pyhalov 
104*16d86563SAlexander Pyhalov static const char number_of_bytes_in_utf8_char[0x100] = {
105*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
106*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
107*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
108*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
109*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
110*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
111*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
112*16d86563SAlexander Pyhalov 	 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
113*16d86563SAlexander Pyhalov 
114*16d86563SAlexander Pyhalov     /*  80  81  82  83  84  85  86  87  88  89  8A  8B  8C  8D  8E  8F  */
115*16d86563SAlexander Pyhalov 	IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,
116*16d86563SAlexander Pyhalov 
117*16d86563SAlexander Pyhalov     /*  90  91  92  93  94  95  96  97  98  99  9A  9B  9C  9D  9E  9F  */
118*16d86563SAlexander Pyhalov 	IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,
119*16d86563SAlexander Pyhalov 
120*16d86563SAlexander Pyhalov     /*  A0  A1  A2  A3  A4  A5  A6  A7  A8  A9  AA  AB  AC  AD  AE  AF  */
121*16d86563SAlexander Pyhalov 	IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,
122*16d86563SAlexander Pyhalov 
123*16d86563SAlexander Pyhalov     /*  B0  B1  B2  B3  B4  B5  B6  B7  B8  B9  BA  BB  BC  BD  BE  BF  */
124*16d86563SAlexander Pyhalov 	IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,
125*16d86563SAlexander Pyhalov 
126*16d86563SAlexander Pyhalov     /*  C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF  */
127*16d86563SAlexander Pyhalov 	IL_,IL_, 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
128*16d86563SAlexander Pyhalov 
129*16d86563SAlexander Pyhalov     /*  D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF  */
130*16d86563SAlexander Pyhalov 	 2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
131*16d86563SAlexander Pyhalov 
132*16d86563SAlexander Pyhalov     /*  E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF  */
133*16d86563SAlexander Pyhalov 	 3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
134*16d86563SAlexander Pyhalov 
135*16d86563SAlexander Pyhalov     /*  F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF  */
136*16d86563SAlexander Pyhalov 	 4,  4,  4,  4,  4, IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,IL_,
137*16d86563SAlexander Pyhalov };
138*16d86563SAlexander Pyhalov 
139*16d86563SAlexander Pyhalov #undef IL_
140*16d86563SAlexander Pyhalov 
141*16d86563SAlexander Pyhalov /*
142*16d86563SAlexander Pyhalov  * Following is a vector of bit-masks to get used bits in the first byte of
143*16d86563SAlexander Pyhalov  * a UTF-8 character.  Index is the number of bytes in the UTF-8 character
144*16d86563SAlexander Pyhalov  * and the index value comes from above table.
145*16d86563SAlexander Pyhalov  */
146*16d86563SAlexander Pyhalov static const char masks_tbl[7] = { 0x00, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
147*16d86563SAlexander Pyhalov 
148*16d86563SAlexander Pyhalov /*
149*16d86563SAlexander Pyhalov  * The following two vectors are to provide valid minimum and
150*16d86563SAlexander Pyhalov  * maximum values for the 2'nd byte of a multibyte UTF-8 character for
151*16d86563SAlexander Pyhalov  * better illegal sequence checking. The index value must be the value of
152*16d86563SAlexander Pyhalov  * the first byte of the UTF-8 character.
153*16d86563SAlexander Pyhalov  */
154*16d86563SAlexander Pyhalov static const unsigned char valid_min_2nd_byte[0x100] = {
155*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
156*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
157*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
158*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
159*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
160*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
161*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
162*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
163*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
164*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
165*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
166*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
167*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
168*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
169*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
170*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
171*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
172*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
173*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
174*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
175*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
176*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
177*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
178*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
179*16d86563SAlexander Pyhalov      /*  C0    C1    C2    C3    C4    C5    C6    C7  */
180*16d86563SAlexander Pyhalov 	0,    0,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
181*16d86563SAlexander Pyhalov      /*  C8    C9    CA    CB    CC    CD    CE    CF  */
182*16d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
183*16d86563SAlexander Pyhalov      /*  D0    D1    D2    D3    D4    D5    D6    D7  */
184*16d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
185*16d86563SAlexander Pyhalov      /*  D8    D9    DA    DB    DC    DD    DE    DF  */
186*16d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
187*16d86563SAlexander Pyhalov      /*  E0    E1    E2    E3    E4    E5    E6    E7  */
188*16d86563SAlexander Pyhalov 	0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
189*16d86563SAlexander Pyhalov      /*  E8    E9    EA    EB    EC    ED    EE    EF  */
190*16d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
191*16d86563SAlexander Pyhalov      /*  F0    F1    F2    F3    F4    F5    F6    F7  */
192*16d86563SAlexander Pyhalov 	0x90, 0x80, 0x80, 0x80, 0x80, 0,    0,    0,
193*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
194*16d86563SAlexander Pyhalov };
195*16d86563SAlexander Pyhalov 
196*16d86563SAlexander Pyhalov static const unsigned char valid_max_2nd_byte[0x100] = {
197*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
198*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
199*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
200*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
201*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
202*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
203*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
204*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
205*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
206*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
207*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
208*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
209*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
210*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
211*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
212*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
213*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
214*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
215*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
216*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
217*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
218*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
219*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
220*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
221*16d86563SAlexander Pyhalov      /*  C0    C1    C2    C3    C4    C5    C6    C7  */
222*16d86563SAlexander Pyhalov 	0,    0,    0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
223*16d86563SAlexander Pyhalov      /*  C8    C9    CA    CB    CC    CD    CE    CF  */
224*16d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
225*16d86563SAlexander Pyhalov      /*  D0    D1    D2    D3    D4    D5    D6    D7  */
226*16d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
227*16d86563SAlexander Pyhalov      /*  D8    D9    DA    DB    DC    DD    DE    DF  */
228*16d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
229*16d86563SAlexander Pyhalov      /*  E0    E1    E2    E3    E4    E5    E6    E7  */
230*16d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
231*16d86563SAlexander Pyhalov      /*  E8    E9    EA    EB    EC    ED    EE    EF  */
232*16d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
233*16d86563SAlexander Pyhalov      /*  F0    F1    F2    F3    F4    F5    F6    F7  */
234*16d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0,    0,    0,
235*16d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
236*16d86563SAlexander Pyhalov };
237*16d86563SAlexander Pyhalov 
238*16d86563SAlexander Pyhalov 
239*16d86563SAlexander Pyhalov /*
240*16d86563SAlexander Pyhalov  * Following "6" and "0x3f" came from 10xx xxxx bit representation of UTF-8
241*16d86563SAlexander Pyhalov  * characters' second to sixth bytes.
242*16d86563SAlexander Pyhalov  */
243*16d86563SAlexander Pyhalov #define	ICV_UTF8_BIT_SHIFT		6
244*16d86563SAlexander Pyhalov #define	ICV_UTF8_BIT_MASK		0x3f
245*16d86563SAlexander Pyhalov #define	ICV_FETCH_UTF8_BOM_SIZE		6
246*16d86563SAlexander Pyhalov 
247*16d86563SAlexander Pyhalov #define	ICV_FETCH_UCS4_SIZE		4
248*16d86563SAlexander Pyhalov #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE) || \
249*16d86563SAlexander Pyhalov 	defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
250*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE              2
251*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE_TWO          4
252*16d86563SAlexander Pyhalov #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
253*16d86563SAlexander Pyhalov 	defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
254*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE              4
255*16d86563SAlexander Pyhalov #define ICV_FETCH_UCS_SIZE_TWO          8
256*16d86563SAlexander Pyhalov #endif
257*16d86563SAlexander Pyhalov 
258*16d86563SAlexander Pyhalov /*
259*16d86563SAlexander Pyhalov  * UTF-8 representations of some useful Unicode values.
260*16d86563SAlexander Pyhalov  *
261*16d86563SAlexander Pyhalov  * The U+FFFE in UTF-8 is 0x00efbfbe and the U+FFFF is 0x00efbfbf but
262*16d86563SAlexander Pyhalov  * we use masked values at the below:
263*16d86563SAlexander Pyhalov  */
264*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_d800		(0x00eda080UL)
265*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_dfff		(0x00edbfbfUL)
266*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_fdd0		(0x00efb790UL)
267*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_fdef		(0x00efb7afUL)
268*16d86563SAlexander Pyhalov 
269*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_fffe		(0x000fbfbeUL)
270*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_ffff		(0x000fbfbfUL)
271*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_ffff_mask	(0x000fffffUL)
272*16d86563SAlexander Pyhalov 
273*16d86563SAlexander Pyhalov #define	ICV_UTF8_REPRESENTATION_10fffd		(0xf48fbfbdUL)
274*16d86563SAlexander Pyhalov 
275*16d86563SAlexander Pyhalov /*
276*16d86563SAlexander Pyhalov  * UTF-32 and UCS-4 representations of some useful Unicode values for
277*16d86563SAlexander Pyhalov  * non-character and out of bound invalid character detection.
278*16d86563SAlexander Pyhalov  */
279*16d86563SAlexander Pyhalov #define	ICV_UTF32_NONCHAR_fffe			(0xfffeU)
280*16d86563SAlexander Pyhalov #define	ICV_UTF32_NONCHAR_ffff			(0xffffU)
281*16d86563SAlexander Pyhalov #define	ICV_UTF32_NONCHAR_mask			(0xffffU)
282*16d86563SAlexander Pyhalov 
283*16d86563SAlexander Pyhalov #define	ICV_UTF32_SURROGATE_START_d800		(0xd800U)
284*16d86563SAlexander Pyhalov #define	ICV_UTF32_SURROGATE_END_dfff		(0xdfffU)
285*16d86563SAlexander Pyhalov 
286*16d86563SAlexander Pyhalov #define	ICV_UTF32_ARABIC_NONCHAR_START_fdd0	(0xfdd0U)
287*16d86563SAlexander Pyhalov #define	ICV_UTF32_ARABIC_NONCHAR_END_fdef	(0xfdefU)
288*16d86563SAlexander Pyhalov 
289*16d86563SAlexander Pyhalov #define	ICV_UTF32_LAST_VALID_CHAR		(0x10fffdU)
290*16d86563SAlexander Pyhalov 
291*16d86563SAlexander Pyhalov #define	ICV_UCS4_LAST_VALID_CHAR		(0x7fffffff)
292*16d86563SAlexander Pyhalov 
293*16d86563SAlexander Pyhalov 
294*16d86563SAlexander Pyhalov #endif	/* COMMON_DEFS_H */
295