1*6008Syy154373 /*
2*6008Syy154373 * CDDL HEADER START
3*6008Syy154373 *
4*6008Syy154373 * The contents of this file are subject to the terms of the
5*6008Syy154373 * Common Development and Distribution License (the "License").
6*6008Syy154373 * You may not use this file except in compliance with the License.
7*6008Syy154373 *
8*6008Syy154373 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*6008Syy154373 * or http://www.opensolaris.org/os/licensing.
10*6008Syy154373 * See the License for the specific language governing permissions
11*6008Syy154373 * and limitations under the License.
12*6008Syy154373 *
13*6008Syy154373 * When distributing Covered Code, include this CDDL HEADER in each
14*6008Syy154373 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*6008Syy154373 * If applicable, add the following below this CDDL HEADER, with the
16*6008Syy154373 * fields enclosed by brackets "[]" replaced with your own identifying
17*6008Syy154373 * information: Portions Copyright [yyyy] [name of copyright owner]
18*6008Syy154373 *
19*6008Syy154373 * CDDL HEADER END
20*6008Syy154373 */
21*6008Syy154373
22*6008Syy154373 /*
23*6008Syy154373 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24*6008Syy154373 * Use is subject to license terms.
25*6008Syy154373 */
26*6008Syy154373
27*6008Syy154373 #pragma ident "%Z%%M% %I% %E% SMI"
28*6008Syy154373
29*6008Syy154373 #include <sys/types.h>
30*6008Syy154373 #include <sys/param.h>
31*6008Syy154373 #include <sys/sysmacros.h>
32*6008Syy154373 #include <sys/systm.h>
33*6008Syy154373 #include <sys/debug.h>
34*6008Syy154373 #include <sys/kmem.h>
35*6008Syy154373 #include <sys/sunddi.h>
36*6008Syy154373 #include <sys/byteorder.h>
37*6008Syy154373 #include <sys/errno.h>
38*6008Syy154373 #include <sys/euc.h>
39*6008Syy154373 #include <sys/modctl.h>
40*6008Syy154373 #include <sys/kiconv.h>
41*6008Syy154373
42*6008Syy154373 #include <sys/kiconv_ja.h>
43*6008Syy154373 #include <sys/kiconv_ja_jis_to_unicode.h>
44*6008Syy154373 #include <sys/kiconv_ja_unicode_to_jis.h>
45*6008Syy154373
46*6008Syy154373 /*
47*6008Syy154373 * The following vector shows remaining bytes in a UTF-8 character.
48*6008Syy154373 * Index will be the first byte of the character. This is defined in
49*6008Syy154373 * u8_textprep.c.
50*6008Syy154373 */
51*6008Syy154373 extern const int8_t u8_number_of_bytes[];
52*6008Syy154373
53*6008Syy154373 /*
54*6008Syy154373 * The following is a vector of bit-masks to get used bits in
55*6008Syy154373 * the first byte of a UTF-8 character. Index is remaining bytes at above of
56*6008Syy154373 * the character. This is defined in uconv.c.
57*6008Syy154373 */
58*6008Syy154373 extern const uchar_t u8_masks_tbl[];
59*6008Syy154373
60*6008Syy154373 /*
61*6008Syy154373 * The following two vectors are to provide valid minimum and
62*6008Syy154373 * maximum values for the 2'nd byte of a multibyte UTF-8 character for
63*6008Syy154373 * better illegal sequence checking. The index value must be the value of
64*6008Syy154373 * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
65*6008Syy154373 */
66*6008Syy154373 extern const uint8_t u8_valid_min_2nd_byte[];
67*6008Syy154373 extern const uint8_t u8_valid_max_2nd_byte[];
68*6008Syy154373
69*6008Syy154373 static kiconv_ja_euc16_t
kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)70*6008Syy154373 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
71*6008Syy154373 {
72*6008Syy154373 const kiconv_ja_euc16_t *p;
73*6008Syy154373
74*6008Syy154373 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
75*6008Syy154373 return (p[ucs2 & 0xff]);
76*6008Syy154373
77*6008Syy154373 return (KICONV_JA_NODEST);
78*6008Syy154373 }
79*6008Syy154373
80*6008Syy154373 static size_t
utf8_ucs(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno)81*6008Syy154373 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
82*6008Syy154373 {
83*6008Syy154373 uint_t l; /* to be copied to *p on successful return */
84*6008Syy154373 uchar_t ic; /* current byte */
85*6008Syy154373 uchar_t ic1; /* 1st byte */
86*6008Syy154373 uchar_t *ip = *pip; /* next byte to read */
87*6008Syy154373 size_t ileft = *pileft; /* number of bytes available */
88*6008Syy154373 size_t rv = 0; /* return value of this function */
89*6008Syy154373 int remaining_bytes;
90*6008Syy154373 int u8_size;
91*6008Syy154373
92*6008Syy154373 KICONV_JA_NGET(ic1); /* read 1st byte */
93*6008Syy154373
94*6008Syy154373 if (ic1 < 0x80) {
95*6008Syy154373 /* successfully converted */
96*6008Syy154373 *p = (uint_t)ic1;
97*6008Syy154373 goto ret;
98*6008Syy154373 }
99*6008Syy154373
100*6008Syy154373 u8_size = u8_number_of_bytes[ic1];
101*6008Syy154373 if (u8_size == U8_ILLEGAL_CHAR) {
102*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
103*6008Syy154373 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
104*6008Syy154373 KICONV_JA_RETERROR(ERANGE)
105*6008Syy154373 }
106*6008Syy154373
107*6008Syy154373 remaining_bytes = u8_size - 1;
108*6008Syy154373 if (remaining_bytes != 0) {
109*6008Syy154373 l = ic1 & u8_masks_tbl[remaining_bytes];
110*6008Syy154373
111*6008Syy154373 for (; remaining_bytes > 0; remaining_bytes--) {
112*6008Syy154373 KICONV_JA_NGET(ic);
113*6008Syy154373 if (ic1 != 0U) {
114*6008Syy154373 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
115*6008Syy154373 (ic > u8_valid_max_2nd_byte[ic1])) {
116*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
117*6008Syy154373 }
118*6008Syy154373 ic1 = 0U; /* 2nd byte check done */
119*6008Syy154373 } else {
120*6008Syy154373 if ((ic < 0x80) || (ic > 0xbf)) {
121*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
122*6008Syy154373 }
123*6008Syy154373 }
124*6008Syy154373 l = (l << 6) | (ic & 0x3f);
125*6008Syy154373 }
126*6008Syy154373
127*6008Syy154373 /* successfully converted */
128*6008Syy154373 *p = l;
129*6008Syy154373 } else {
130*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
131*6008Syy154373 }
132*6008Syy154373
133*6008Syy154373 ret:
134*6008Syy154373 if (rv == 0) {
135*6008Syy154373 /*
136*6008Syy154373 * Update rv, *pip, and *pileft on successfule return.
137*6008Syy154373 */
138*6008Syy154373 rv = *pileft - ileft;
139*6008Syy154373 *pip = ip;
140*6008Syy154373 *pileft = ileft;
141*6008Syy154373 }
142*6008Syy154373
143*6008Syy154373 return (rv);
144*6008Syy154373 }
145*6008Syy154373
146*6008Syy154373 static size_t
utf8_ucs_replace(uint_t * p,uchar_t ** pip,size_t * pileft,size_t * repnum)147*6008Syy154373 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
148*6008Syy154373 {
149*6008Syy154373 uint_t l; /* to be copied to *p on successful return */
150*6008Syy154373 uchar_t ic; /* current byte */
151*6008Syy154373 uchar_t ic1; /* 1st byte */
152*6008Syy154373 uchar_t *ip = *pip; /* next byte to read */
153*6008Syy154373 size_t ileft = *pileft; /* number of bytes available */
154*6008Syy154373 size_t rv = 0; /* return value of this function */
155*6008Syy154373 int remaining_bytes;
156*6008Syy154373 int u8_size;
157*6008Syy154373
158*6008Syy154373 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */
159*6008Syy154373
160*6008Syy154373 if (ic1 < 0x80) {
161*6008Syy154373 /* successfully converted */
162*6008Syy154373 l = (uint_t)ic1;
163*6008Syy154373 goto ret;
164*6008Syy154373 }
165*6008Syy154373
166*6008Syy154373 u8_size = u8_number_of_bytes[ic1];
167*6008Syy154373 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
168*6008Syy154373 l = KICONV_JA_DEF_SINGLE;
169*6008Syy154373 (*repnum)++;
170*6008Syy154373 goto ret;
171*6008Syy154373 }
172*6008Syy154373
173*6008Syy154373 remaining_bytes = u8_size - 1;
174*6008Syy154373
175*6008Syy154373 if (remaining_bytes != 0) {
176*6008Syy154373 l = ic1 & u8_masks_tbl[remaining_bytes];
177*6008Syy154373
178*6008Syy154373 for (; remaining_bytes > 0; remaining_bytes--) {
179*6008Syy154373 KICONV_JA_NGET_REP_TO_MB(ic);
180*6008Syy154373 if (ic1 != 0U) {
181*6008Syy154373 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
182*6008Syy154373 (ic > u8_valid_max_2nd_byte[ic1])) {
183*6008Syy154373 l = KICONV_JA_DEF_SINGLE;
184*6008Syy154373 (*repnum)++;
185*6008Syy154373 ileft -= (remaining_bytes - 1);
186*6008Syy154373 ip += (remaining_bytes - 1);
187*6008Syy154373 break;
188*6008Syy154373 }
189*6008Syy154373 ic1 = 0U; /* 2nd byte check done */
190*6008Syy154373 } else {
191*6008Syy154373 if ((ic < 0x80) || (ic > 0xbf)) {
192*6008Syy154373 l = KICONV_JA_DEF_SINGLE;
193*6008Syy154373 (*repnum)++;
194*6008Syy154373 ileft -= (remaining_bytes - 1);
195*6008Syy154373 ip += (remaining_bytes - 1);
196*6008Syy154373 break;
197*6008Syy154373 }
198*6008Syy154373 }
199*6008Syy154373 l = (l << 6) | (ic & 0x3f);
200*6008Syy154373 }
201*6008Syy154373 } else {
202*6008Syy154373 l = KICONV_JA_DEF_SINGLE;
203*6008Syy154373 (*repnum)++;
204*6008Syy154373 }
205*6008Syy154373
206*6008Syy154373 ret:
207*6008Syy154373 /* successfully converted */
208*6008Syy154373 *p = l;
209*6008Syy154373 rv = *pileft - ileft;
210*6008Syy154373
211*6008Syy154373 *pip = ip;
212*6008Syy154373 *pileft = ileft;
213*6008Syy154373
214*6008Syy154373 return (rv);
215*6008Syy154373 }
216*6008Syy154373
217*6008Syy154373 static size_t /* return #bytes read, or -1 */
read_unicode(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno,int flag,size_t * rv)218*6008Syy154373 read_unicode(
219*6008Syy154373 uint_t *p, /* point variable to store UTF-32 */
220*6008Syy154373 uchar_t **pip, /* point pointer to input buf */
221*6008Syy154373 size_t *pileft, /* point #bytes left in input buf */
222*6008Syy154373 int *errno, /* point variable to errno */
223*6008Syy154373 int flag, /* kiconvstr flag */
224*6008Syy154373 size_t *rv) /* point return valuse */
225*6008Syy154373 {
226*6008Syy154373 if (flag & KICONV_REPLACE_INVALID)
227*6008Syy154373 return (utf8_ucs_replace(p, pip, pileft, rv));
228*6008Syy154373 else
229*6008Syy154373 return (utf8_ucs(p, pip, pileft, errno));
230*6008Syy154373 }
231*6008Syy154373
232*6008Syy154373 static size_t
write_unicode(uint_t u32,char ** pop,size_t * poleft,int * errno)233*6008Syy154373 write_unicode(
234*6008Syy154373 uint_t u32, /* UTF-32 to write */
235*6008Syy154373 char **pop, /* point pointer to output buf */
236*6008Syy154373 size_t *poleft, /* point #bytes left in output buf */
237*6008Syy154373 int *errno) /* point variable to errno */
238*6008Syy154373 {
239*6008Syy154373 char *op = *pop;
240*6008Syy154373 size_t oleft = *poleft;
241*6008Syy154373 size_t rv = 0; /* return value */
242*6008Syy154373
243*6008Syy154373 if (u32 <= 0x7f) {
244*6008Syy154373 KICONV_JA_NPUT((uchar_t)(u32));
245*6008Syy154373 rv = 1;
246*6008Syy154373 } else if (u32 <= 0x7ff) {
247*6008Syy154373 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
248*6008Syy154373 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
249*6008Syy154373 rv = 2;
250*6008Syy154373 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
251*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
252*6008Syy154373 } else if (u32 <= 0xffff) {
253*6008Syy154373 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
254*6008Syy154373 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
255*6008Syy154373 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
256*6008Syy154373 rv = 3;
257*6008Syy154373 } else if (u32 <= 0x10ffff) {
258*6008Syy154373 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
259*6008Syy154373 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
260*6008Syy154373 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
261*6008Syy154373 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
262*6008Syy154373 rv = 4;
263*6008Syy154373 } else {
264*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
265*6008Syy154373 }
266*6008Syy154373
267*6008Syy154373 ret:
268*6008Syy154373 if (rv != (size_t)-1) {
269*6008Syy154373 /* update *pop and *poleft only on successful return */
270*6008Syy154373 *pop = op;
271*6008Syy154373 *poleft = oleft;
272*6008Syy154373 }
273*6008Syy154373
274*6008Syy154373 return (rv);
275*6008Syy154373 }
276*6008Syy154373
277*6008Syy154373 static void *
_kiconv_ja_open_unicode(uint8_t id)278*6008Syy154373 _kiconv_ja_open_unicode(uint8_t id)
279*6008Syy154373 {
280*6008Syy154373 kiconv_state_t kcd;
281*6008Syy154373
282*6008Syy154373 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
283*6008Syy154373 KM_SLEEP);
284*6008Syy154373 kcd->id = id;
285*6008Syy154373 kcd->bom_processed = 0;
286*6008Syy154373 return ((void *)kcd);
287*6008Syy154373 }
288*6008Syy154373
289*6008Syy154373 static void *
open_eucjp(void)290*6008Syy154373 open_eucjp(void)
291*6008Syy154373 {
292*6008Syy154373 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
293*6008Syy154373 }
294*6008Syy154373
295*6008Syy154373 static void *
open_eucjpms(void)296*6008Syy154373 open_eucjpms(void)
297*6008Syy154373 {
298*6008Syy154373 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
299*6008Syy154373 }
300*6008Syy154373
301*6008Syy154373 static void *
open_sjis(void)302*6008Syy154373 open_sjis(void)
303*6008Syy154373 {
304*6008Syy154373 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
305*6008Syy154373 }
306*6008Syy154373
307*6008Syy154373 static void *
open_cp932(void)308*6008Syy154373 open_cp932(void)
309*6008Syy154373 {
310*6008Syy154373 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
311*6008Syy154373 }
312*6008Syy154373
313*6008Syy154373 int
close_ja(void * kcd)314*6008Syy154373 close_ja(void *kcd)
315*6008Syy154373 {
316*6008Syy154373 if (! kcd || kcd == (void *)-1)
317*6008Syy154373 return (EBADF);
318*6008Syy154373
319*6008Syy154373 kmem_free(kcd, sizeof (kiconv_state_data_t));
320*6008Syy154373
321*6008Syy154373 return (0);
322*6008Syy154373 }
323*6008Syy154373
324*6008Syy154373 static size_t
_do_kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)325*6008Syy154373 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
326*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
327*6008Syy154373 {
328*6008Syy154373 uint_t u32; /* UTF-32 */
329*6008Syy154373 uint_t index; /* index for table lookup */
330*6008Syy154373 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
331*6008Syy154373 size_t rv = 0; /* return value of this function */
332*6008Syy154373
333*6008Syy154373 uchar_t *ip;
334*6008Syy154373 size_t ileft;
335*6008Syy154373 char *op;
336*6008Syy154373 size_t oleft;
337*6008Syy154373 size_t id = ((kiconv_state_t)kcd)->id;
338*6008Syy154373
339*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
340*6008Syy154373 return (0);
341*6008Syy154373 }
342*6008Syy154373
343*6008Syy154373 ip = (uchar_t *)*inbuf;
344*6008Syy154373 ileft = *inbytesleft;
345*6008Syy154373 op = *outbuf;
346*6008Syy154373 oleft = *outbytesleft;
347*6008Syy154373
348*6008Syy154373 while (ileft != 0) {
349*6008Syy154373 KICONV_JA_NGET(ic1); /* get 1st byte */
350*6008Syy154373
351*6008Syy154373 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
352*6008Syy154373 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
353*6008Syy154373 KICONV_JA_PUTU(u32);
354*6008Syy154373 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
355*6008Syy154373 KICONV_JA_NGET(ic2);
356*6008Syy154373 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
357*6008Syy154373 ic1 &= KICONV_JA_CMASK;
358*6008Syy154373 ic2 &= KICONV_JA_CMASK;
359*6008Syy154373 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
360*6008Syy154373 if (u32 == KICONV_JA_NODEST) {
361*6008Syy154373 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
362*6008Syy154373 u32 = kiconv_ja_jisx0208_to_ucs2[index];
363*6008Syy154373 }
364*6008Syy154373 if (u32 == KICONV_JA_REPLACE)
365*6008Syy154373 rv++;
366*6008Syy154373 KICONV_JA_PUTU(u32);
367*6008Syy154373 } else { /* 2nd byte check failed */
368*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
369*6008Syy154373 }
370*6008Syy154373 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
371*6008Syy154373 KICONV_JA_NGET(ic2);
372*6008Syy154373 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
373*6008Syy154373 index = (ic2 - 0xa1);
374*6008Syy154373 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
375*6008Syy154373 KICONV_JA_PUTU(u32);
376*6008Syy154373 } else { /* 2nd byte check failed */
377*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
378*6008Syy154373 }
379*6008Syy154373 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
380*6008Syy154373 KICONV_JA_NGET(ic2);
381*6008Syy154373 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
382*6008Syy154373 KICONV_JA_NGET(ic3);
383*6008Syy154373 if (KICONV_JA_ISCS3(ic3)) {
384*6008Syy154373 /* 3rd byte check passed */
385*6008Syy154373 ic2 &= KICONV_JA_CMASK;
386*6008Syy154373 ic3 &= KICONV_JA_CMASK;
387*6008Syy154373 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
388*6008Syy154373 ic2, ic3);
389*6008Syy154373 if (u32 == KICONV_JA_NODEST) {
390*6008Syy154373 index = ((ic2 - 0x21) * 94 +
391*6008Syy154373 (ic3 - 0x21));
392*6008Syy154373 u32 = kiconv_ja_jisx0212_to_ucs2
393*6008Syy154373 [index];
394*6008Syy154373 }
395*6008Syy154373 if (u32 == KICONV_JA_REPLACE)
396*6008Syy154373 rv++;
397*6008Syy154373 KICONV_JA_PUTU(u32);
398*6008Syy154373 } else { /* 3rd byte check failed */
399*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
400*6008Syy154373 }
401*6008Syy154373 } else { /* 2nd byte check failed */
402*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
403*6008Syy154373 }
404*6008Syy154373 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
405*6008Syy154373 /* C1 control; 1 byte */
406*6008Syy154373 u32 = ic1;
407*6008Syy154373 KICONV_JA_PUTU(u32);
408*6008Syy154373 } else { /* 1st byte check failed */
409*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
410*6008Syy154373 }
411*6008Syy154373
412*6008Syy154373 /*
413*6008Syy154373 * One character successfully converted so update
414*6008Syy154373 * values outside of this function's stack.
415*6008Syy154373 */
416*6008Syy154373 *inbuf = (char *)ip;
417*6008Syy154373 *inbytesleft = ileft;
418*6008Syy154373 *outbuf = op;
419*6008Syy154373 *outbytesleft = oleft;
420*6008Syy154373 }
421*6008Syy154373
422*6008Syy154373 ret:
423*6008Syy154373 return (rv);
424*6008Syy154373 }
425*6008Syy154373
426*6008Syy154373 static size_t
_do_kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)427*6008Syy154373 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
428*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
429*6008Syy154373 {
430*6008Syy154373 uchar_t ic;
431*6008Syy154373 size_t rv = 0;
432*6008Syy154373 uint_t ucs4;
433*6008Syy154373 ushort_t euc16;
434*6008Syy154373
435*6008Syy154373 uchar_t *ip;
436*6008Syy154373 size_t ileft;
437*6008Syy154373 char *op;
438*6008Syy154373 size_t oleft;
439*6008Syy154373 size_t read_len;
440*6008Syy154373
441*6008Syy154373 size_t id = ((kiconv_state_t)kcd)->id;
442*6008Syy154373
443*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
444*6008Syy154373 return (0);
445*6008Syy154373 }
446*6008Syy154373
447*6008Syy154373 ip = (uchar_t *)*inbuf;
448*6008Syy154373 ileft = *inbytesleft;
449*6008Syy154373 op = *outbuf;
450*6008Syy154373 oleft = *outbytesleft;
451*6008Syy154373
452*6008Syy154373 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
453*6008Syy154373
454*6008Syy154373 while (ileft != 0) {
455*6008Syy154373 KICONV_JA_GETU(&ucs4, 0);
456*6008Syy154373
457*6008Syy154373 if (ucs4 > 0xffff) {
458*6008Syy154373 /* non-BMP */
459*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
460*6008Syy154373 rv++;
461*6008Syy154373 goto next;
462*6008Syy154373 }
463*6008Syy154373
464*6008Syy154373 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
465*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
466*6008Syy154373 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
467*6008Syy154373 }
468*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
469*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
470*6008Syy154373 rv++;
471*6008Syy154373 goto next;
472*6008Syy154373 }
473*6008Syy154373
474*6008Syy154373 switch (euc16 & 0x8080) {
475*6008Syy154373 case 0x0000: /* CS0 */
476*6008Syy154373 ic = (uchar_t)euc16;
477*6008Syy154373 KICONV_JA_NPUT(ic);
478*6008Syy154373 break;
479*6008Syy154373 case 0x8080: /* CS1 */
480*6008Syy154373 ic = (uchar_t)((euc16 >> 8) & 0xff);
481*6008Syy154373 KICONV_JA_NPUT(ic);
482*6008Syy154373 ic = (uchar_t)(euc16 & 0xff);
483*6008Syy154373 KICONV_JA_NPUT(ic);
484*6008Syy154373 break;
485*6008Syy154373 case 0x0080: /* CS2 */
486*6008Syy154373 KICONV_JA_NPUT(SS2);
487*6008Syy154373 ic = (uchar_t)euc16;
488*6008Syy154373 KICONV_JA_NPUT(ic);
489*6008Syy154373 break;
490*6008Syy154373 case 0x8000: /* CS3 */
491*6008Syy154373 KICONV_JA_NPUT(SS3);
492*6008Syy154373 ic = (uchar_t)((euc16 >> 8) & 0xff);
493*6008Syy154373 KICONV_JA_NPUT(ic);
494*6008Syy154373 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
495*6008Syy154373 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
496*6008Syy154373 break;
497*6008Syy154373 }
498*6008Syy154373 next:
499*6008Syy154373 /*
500*6008Syy154373 * One character successfully converted so update
501*6008Syy154373 * values outside of this function's stack.
502*6008Syy154373 */
503*6008Syy154373 *inbuf = (char *)ip;
504*6008Syy154373 *inbytesleft = ileft;
505*6008Syy154373 *outbuf = op;
506*6008Syy154373 *outbytesleft = oleft;
507*6008Syy154373 }
508*6008Syy154373
509*6008Syy154373 ret:
510*6008Syy154373 return (rv);
511*6008Syy154373 }
512*6008Syy154373
513*6008Syy154373 static size_t
_do_kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)514*6008Syy154373 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
515*6008Syy154373 size_t *outbytesleft, int flag, int *errno, uint8_t id)
516*6008Syy154373 {
517*6008Syy154373 uint_t u32; /* UTF-32 */
518*6008Syy154373 uint_t index; /* index for table lookup */
519*6008Syy154373 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
520*6008Syy154373 size_t rv = 0; /* return value of this function */
521*6008Syy154373
522*6008Syy154373 uchar_t *ip;
523*6008Syy154373 size_t ileft;
524*6008Syy154373 char *op;
525*6008Syy154373 size_t oleft;
526*6008Syy154373
527*6008Syy154373 boolean_t do_not_ignore_null;
528*6008Syy154373
529*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
530*6008Syy154373 return (0);
531*6008Syy154373 }
532*6008Syy154373
533*6008Syy154373 ip = (uchar_t *)inbuf;
534*6008Syy154373 ileft = *inbytesleft;
535*6008Syy154373 op = outbuf;
536*6008Syy154373 oleft = *outbytesleft;
537*6008Syy154373
538*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
539*6008Syy154373
540*6008Syy154373 while (ileft != 0) {
541*6008Syy154373 KICONV_JA_NGET(ic1); /* get 1st byte */
542*6008Syy154373
543*6008Syy154373 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
544*6008Syy154373 if (ic1 == '\0' && do_not_ignore_null) {
545*6008Syy154373 return (0);
546*6008Syy154373 }
547*6008Syy154373 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
548*6008Syy154373 KICONV_JA_PUTU(u32);
549*6008Syy154373 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
550*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
551*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
552*6008Syy154373 } else {
553*6008Syy154373 KICONV_JA_NGET(ic2);
554*6008Syy154373 }
555*6008Syy154373 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
556*6008Syy154373 ic1 &= KICONV_JA_CMASK;
557*6008Syy154373 ic2 &= KICONV_JA_CMASK;
558*6008Syy154373 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
559*6008Syy154373 if (u32 == KICONV_JA_NODEST) {
560*6008Syy154373 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
561*6008Syy154373 u32 = kiconv_ja_jisx0208_to_ucs2[index];
562*6008Syy154373 }
563*6008Syy154373 if (u32 == KICONV_JA_REPLACE)
564*6008Syy154373 rv++;
565*6008Syy154373 KICONV_JA_PUTU(u32);
566*6008Syy154373 } else { /* 2nd byte check failed */
567*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
568*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
569*6008Syy154373 rv++;
570*6008Syy154373 } else {
571*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
572*6008Syy154373 }
573*6008Syy154373 }
574*6008Syy154373 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
575*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
576*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
577*6008Syy154373 } else {
578*6008Syy154373 KICONV_JA_NGET(ic2);
579*6008Syy154373 }
580*6008Syy154373 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
581*6008Syy154373 index = (ic2 - 0xa1);
582*6008Syy154373 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
583*6008Syy154373 KICONV_JA_PUTU(u32);
584*6008Syy154373 } else { /* 2nd byte check failed */
585*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
586*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
587*6008Syy154373 rv++;
588*6008Syy154373 } else {
589*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
590*6008Syy154373 }
591*6008Syy154373 }
592*6008Syy154373 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
593*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
594*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
595*6008Syy154373 } else {
596*6008Syy154373 KICONV_JA_NGET(ic2);
597*6008Syy154373 }
598*6008Syy154373 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
599*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
600*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic3);
601*6008Syy154373 } else {
602*6008Syy154373 KICONV_JA_NGET(ic3);
603*6008Syy154373 }
604*6008Syy154373 if (KICONV_JA_ISCS3(ic3)) {
605*6008Syy154373 /* 3rd byte check passed */
606*6008Syy154373 ic2 &= KICONV_JA_CMASK;
607*6008Syy154373 ic3 &= KICONV_JA_CMASK;
608*6008Syy154373 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
609*6008Syy154373 ic2, ic3);
610*6008Syy154373 if (u32 == KICONV_JA_NODEST) {
611*6008Syy154373 index = ((ic2 - 0x21) * 94 +
612*6008Syy154373 (ic3 - 0x21));
613*6008Syy154373 u32 = kiconv_ja_jisx0212_to_ucs2
614*6008Syy154373 [index];
615*6008Syy154373 }
616*6008Syy154373 if (u32 == KICONV_JA_REPLACE)
617*6008Syy154373 rv++;
618*6008Syy154373 KICONV_JA_PUTU(u32);
619*6008Syy154373 } else { /* 3rd byte check failed */
620*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
621*6008Syy154373 KICONV_JA_PUTU(
622*6008Syy154373 KICONV_JA_REPLACE);
623*6008Syy154373 rv++;
624*6008Syy154373 } else {
625*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
626*6008Syy154373 }
627*6008Syy154373 }
628*6008Syy154373 } else { /* 2nd byte check failed */
629*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
630*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
631*6008Syy154373 rv++;
632*6008Syy154373 } else {
633*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
634*6008Syy154373 }
635*6008Syy154373 }
636*6008Syy154373 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
637*6008Syy154373 /* C1 control; 1 byte */
638*6008Syy154373 u32 = ic1;
639*6008Syy154373 KICONV_JA_PUTU(u32);
640*6008Syy154373 } else { /* 1st byte check failed */
641*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
642*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
643*6008Syy154373 rv++;
644*6008Syy154373 } else {
645*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
646*6008Syy154373 }
647*6008Syy154373 }
648*6008Syy154373
649*6008Syy154373 next:
650*6008Syy154373 /*
651*6008Syy154373 * One character successfully converted so update
652*6008Syy154373 * values outside of this function's stack.
653*6008Syy154373 */
654*6008Syy154373 *inbytesleft = ileft;
655*6008Syy154373 *outbytesleft = oleft;
656*6008Syy154373 }
657*6008Syy154373
658*6008Syy154373 ret:
659*6008Syy154373 return (rv);
660*6008Syy154373 }
661*6008Syy154373
662*6008Syy154373 static size_t
_do_kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)663*6008Syy154373 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
664*6008Syy154373 size_t *outbytesleft, int flag, int *errno, uint8_t id)
665*6008Syy154373 {
666*6008Syy154373 uchar_t ic;
667*6008Syy154373 size_t rv = 0;
668*6008Syy154373 uint_t ucs4;
669*6008Syy154373 ushort_t euc16;
670*6008Syy154373
671*6008Syy154373 uchar_t *ip;
672*6008Syy154373 size_t ileft;
673*6008Syy154373 char *op;
674*6008Syy154373 size_t oleft;
675*6008Syy154373 size_t read_len;
676*6008Syy154373
677*6008Syy154373 boolean_t do_not_ignore_null;
678*6008Syy154373
679*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
680*6008Syy154373 return (0);
681*6008Syy154373 }
682*6008Syy154373
683*6008Syy154373 ip = (uchar_t *)inbuf;
684*6008Syy154373 ileft = *inbytesleft;
685*6008Syy154373 op = outbuf;
686*6008Syy154373 oleft = *outbytesleft;
687*6008Syy154373
688*6008Syy154373 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
689*6008Syy154373
690*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
691*6008Syy154373
692*6008Syy154373 while (ileft != 0) {
693*6008Syy154373 KICONV_JA_GETU(&ucs4, flag);
694*6008Syy154373
695*6008Syy154373 if (ucs4 == 0x0 && do_not_ignore_null) {
696*6008Syy154373 return (0);
697*6008Syy154373 }
698*6008Syy154373
699*6008Syy154373 if (ucs4 > 0xffff) {
700*6008Syy154373 /* non-BMP */
701*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
702*6008Syy154373 rv++;
703*6008Syy154373 goto next;
704*6008Syy154373 }
705*6008Syy154373
706*6008Syy154373 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
707*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
708*6008Syy154373 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
709*6008Syy154373 }
710*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
711*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
712*6008Syy154373 rv++;
713*6008Syy154373 goto next;
714*6008Syy154373 }
715*6008Syy154373
716*6008Syy154373 switch (euc16 & 0x8080) {
717*6008Syy154373 case 0x0000: /* CS0 */
718*6008Syy154373 ic = (uchar_t)euc16;
719*6008Syy154373 KICONV_JA_NPUT(ic);
720*6008Syy154373 break;
721*6008Syy154373 case 0x8080: /* CS1 */
722*6008Syy154373 ic = (uchar_t)((euc16 >> 8) & 0xff);
723*6008Syy154373 KICONV_JA_NPUT(ic);
724*6008Syy154373 ic = (uchar_t)(euc16 & 0xff);
725*6008Syy154373 KICONV_JA_NPUT(ic);
726*6008Syy154373 break;
727*6008Syy154373 case 0x0080: /* CS2 */
728*6008Syy154373 KICONV_JA_NPUT(SS2);
729*6008Syy154373 ic = (uchar_t)euc16;
730*6008Syy154373 KICONV_JA_NPUT(ic);
731*6008Syy154373 break;
732*6008Syy154373 case 0x8000: /* CS3 */
733*6008Syy154373 KICONV_JA_NPUT(SS3);
734*6008Syy154373 ic = (uchar_t)((euc16 >> 8) & 0xff);
735*6008Syy154373 KICONV_JA_NPUT(ic);
736*6008Syy154373 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
737*6008Syy154373 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
738*6008Syy154373 break;
739*6008Syy154373 }
740*6008Syy154373 next:
741*6008Syy154373 /*
742*6008Syy154373 * One character successfully converted so update
743*6008Syy154373 * values outside of this function's stack.
744*6008Syy154373 */
745*6008Syy154373 *inbytesleft = ileft;
746*6008Syy154373 *outbytesleft = oleft;
747*6008Syy154373 }
748*6008Syy154373
749*6008Syy154373 ret:
750*6008Syy154373 return (rv);
751*6008Syy154373 }
752*6008Syy154373
753*6008Syy154373 static size_t
kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)754*6008Syy154373 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
755*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
756*6008Syy154373 {
757*6008Syy154373 if (! kcd || kcd == (void *)-1) {
758*6008Syy154373 *errno = EBADF;
759*6008Syy154373 return ((size_t)-1);
760*6008Syy154373 }
761*6008Syy154373
762*6008Syy154373 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
763*6008Syy154373 outbuf, outbytesleft, errno));
764*6008Syy154373 }
765*6008Syy154373
766*6008Syy154373 static size_t
kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)767*6008Syy154373 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
768*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
769*6008Syy154373 {
770*6008Syy154373 if (! kcd || kcd == (void *)-1) {
771*6008Syy154373 *errno = EBADF;
772*6008Syy154373 return ((size_t)-1);
773*6008Syy154373 }
774*6008Syy154373
775*6008Syy154373 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
776*6008Syy154373 outbuf, outbytesleft, errno));
777*6008Syy154373 }
778*6008Syy154373
779*6008Syy154373 static size_t
kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)780*6008Syy154373 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
781*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
782*6008Syy154373 {
783*6008Syy154373 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
784*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
785*6008Syy154373 }
786*6008Syy154373
787*6008Syy154373 static size_t
kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)788*6008Syy154373 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
789*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
790*6008Syy154373 {
791*6008Syy154373 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
792*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
793*6008Syy154373 }
794*6008Syy154373
795*6008Syy154373 static size_t
kiconvstr_fr_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)796*6008Syy154373 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
797*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
798*6008Syy154373 {
799*6008Syy154373 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
800*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
801*6008Syy154373 }
802*6008Syy154373
803*6008Syy154373 static size_t
kiconvstr_to_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)804*6008Syy154373 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
805*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
806*6008Syy154373 {
807*6008Syy154373 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
808*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
809*6008Syy154373 }
810*6008Syy154373
811*6008Syy154373 static size_t
_do_kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)812*6008Syy154373 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
813*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
814*6008Syy154373 {
815*6008Syy154373 uint_t uni; /* UTF-32 */
816*6008Syy154373 uint_t index; /* index for table lookup */
817*6008Syy154373 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
818*6008Syy154373 size_t rv = 0; /* return value of this function */
819*6008Syy154373
820*6008Syy154373 uchar_t *ip;
821*6008Syy154373 size_t ileft;
822*6008Syy154373 char *op;
823*6008Syy154373 size_t oleft;
824*6008Syy154373 size_t id = ((kiconv_state_t)kcd)->id;
825*6008Syy154373
826*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
827*6008Syy154373 return (0);
828*6008Syy154373 }
829*6008Syy154373
830*6008Syy154373 ip = (uchar_t *)*inbuf;
831*6008Syy154373 ileft = *inbytesleft;
832*6008Syy154373 op = *outbuf;
833*6008Syy154373 oleft = *outbytesleft;
834*6008Syy154373
835*6008Syy154373 while (ileft != 0) {
836*6008Syy154373 KICONV_JA_NGET(ic1); /* get 1st byte */
837*6008Syy154373
838*6008Syy154373 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
839*6008Syy154373 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
840*6008Syy154373 KICONV_JA_PUTU(uni);
841*6008Syy154373 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
842*6008Syy154373 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
843*6008Syy154373 KICONV_JA_PUTU(uni);
844*6008Syy154373 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
845*6008Syy154373 KICONV_JA_NGET(ic2);
846*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
847*6008Syy154373 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
848*6008Syy154373 if (ic2 >= 0x9f) {
849*6008Syy154373 ic1++;
850*6008Syy154373 }
851*6008Syy154373 ic2 = kiconv_ja_sjtojis2[ic2];
852*6008Syy154373 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
853*6008Syy154373 if (uni == KICONV_JA_NODEST) {
854*6008Syy154373 index = ((ic1 - 0x21) * 94)
855*6008Syy154373 + (ic2 - 0x21);
856*6008Syy154373 uni = kiconv_ja_jisx0208_to_ucs2[index];
857*6008Syy154373 }
858*6008Syy154373 if (uni == KICONV_JA_REPLACE)
859*6008Syy154373 rv++;
860*6008Syy154373 KICONV_JA_PUTU(uni);
861*6008Syy154373 } else { /* 2nd byte check failed */
862*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
863*6008Syy154373 /* NOTREACHED */
864*6008Syy154373 }
865*6008Syy154373 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
866*6008Syy154373 KICONV_JA_NGET(ic2);
867*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
868*6008Syy154373 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
869*6008Syy154373 if (ic2 >= 0x9f) {
870*6008Syy154373 ic1++;
871*6008Syy154373 }
872*6008Syy154373 index = ((ic1 - 0x21) * 94)
873*6008Syy154373 + (kiconv_ja_sjtojis2[ic2] - 0x21);
874*6008Syy154373 uni = kiconv_ja_jisx0212_to_ucs2[index];
875*6008Syy154373 if (uni == KICONV_JA_REPLACE)
876*6008Syy154373 rv++;
877*6008Syy154373 KICONV_JA_PUTU(uni);
878*6008Syy154373 } else { /* 2nd byte check failed */
879*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
880*6008Syy154373 }
881*6008Syy154373 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
882*6008Syy154373 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
883*6008Syy154373 /*
884*6008Syy154373 * We need a special treatment for each codes.
885*6008Syy154373 * By adding some offset number for them, we
886*6008Syy154373 * can process them as the same way of that of
887*6008Syy154373 * extended IBM chars.
888*6008Syy154373 */
889*6008Syy154373 KICONV_JA_NGET(ic2);
890*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
891*6008Syy154373 ushort_t dest, upper, lower;
892*6008Syy154373 dest = (ic1 << 8) + ic2;
893*6008Syy154373 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
894*6008Syy154373 KICONV_JA_REMAP_NEC(dest);
895*6008Syy154373 if (dest == 0xffff) {
896*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
897*6008Syy154373 }
898*6008Syy154373 }
899*6008Syy154373 /*
900*6008Syy154373 * XXX: 0xfa54 and 0xfa5b must be mapped
901*6008Syy154373 * to JIS0208 area. Therefore we
902*6008Syy154373 * have to do special treatment.
903*6008Syy154373 */
904*6008Syy154373 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
905*6008Syy154373 if (dest == 0xfa54) {
906*6008Syy154373 upper = 0x22;
907*6008Syy154373 lower = 0x4c;
908*6008Syy154373 } else {
909*6008Syy154373 upper = 0x22;
910*6008Syy154373 lower = 0x68;
911*6008Syy154373 }
912*6008Syy154373 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
913*6008Syy154373 upper, lower);
914*6008Syy154373 if (uni == KICONV_JA_NODEST) {
915*6008Syy154373 index = (uint_t)((upper - 0x21)
916*6008Syy154373 * 94 + (lower - 0x21));
917*6008Syy154373 uni = kiconv_ja_jisx0208_to_ucs2
918*6008Syy154373 [index];
919*6008Syy154373 }
920*6008Syy154373 if (uni == KICONV_JA_REPLACE)
921*6008Syy154373 rv++;
922*6008Syy154373 KICONV_JA_PUTU(uni);
923*6008Syy154373 } else {
924*6008Syy154373 dest = dest - 0xfa40 -
925*6008Syy154373 (((dest>>8) - 0xfa) * 0x40);
926*6008Syy154373 dest = kiconv_ja_sjtoibmext[dest];
927*6008Syy154373 if (dest == 0xffff) {
928*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
929*6008Syy154373 }
930*6008Syy154373 upper = (dest >> 8) & KICONV_JA_CMASK;
931*6008Syy154373 lower = dest & KICONV_JA_CMASK;
932*6008Syy154373 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
933*6008Syy154373 upper, lower);
934*6008Syy154373 if (uni == KICONV_JA_NODEST) {
935*6008Syy154373 index = (uint_t)((upper - 0x21)
936*6008Syy154373 * 94 + (lower - 0x21));
937*6008Syy154373 uni = kiconv_ja_jisx0212_to_ucs2
938*6008Syy154373 [index];
939*6008Syy154373 }
940*6008Syy154373 if (uni == KICONV_JA_REPLACE)
941*6008Syy154373 rv++;
942*6008Syy154373 KICONV_JA_PUTU(uni);
943*6008Syy154373 }
944*6008Syy154373 } else { /* 2nd byte check failed */
945*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
946*6008Syy154373 }
947*6008Syy154373 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
948*6008Syy154373 /*
949*6008Syy154373 * Based on the draft convention of OSF-JVC CDEWG,
950*6008Syy154373 * characters in this area will be mapped to
951*6008Syy154373 * "CHIKAN-MOJI." (convertible character)
952*6008Syy154373 * We use U+FFFD in this case.
953*6008Syy154373 */
954*6008Syy154373 KICONV_JA_NGET(ic2);
955*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
956*6008Syy154373 uni = 0xfffd;
957*6008Syy154373 KICONV_JA_PUTU(uni);
958*6008Syy154373 } else { /* 2nd byte check failed */
959*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
960*6008Syy154373 }
961*6008Syy154373 } else { /* 1st byte check failed */
962*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
963*6008Syy154373 }
964*6008Syy154373
965*6008Syy154373 /*
966*6008Syy154373 * One character successfully converted so update
967*6008Syy154373 * values outside of this function's stack.
968*6008Syy154373 */
969*6008Syy154373 *inbuf = (char *)ip;
970*6008Syy154373 *inbytesleft = ileft;
971*6008Syy154373 *outbuf = op;
972*6008Syy154373 *outbytesleft = oleft;
973*6008Syy154373 }
974*6008Syy154373
975*6008Syy154373 ret:
976*6008Syy154373 return (rv);
977*6008Syy154373 }
978*6008Syy154373
979*6008Syy154373 /*
980*6008Syy154373 * _kiconv_ja_lookuptbl()
981*6008Syy154373 * Return the index number if its index-ed number
982*6008Syy154373 * is the same as dest value.
983*6008Syy154373 */
984*6008Syy154373 static ushort_t
_kiconv_ja_lookuptbl(ushort_t dest)985*6008Syy154373 _kiconv_ja_lookuptbl(ushort_t dest)
986*6008Syy154373 {
987*6008Syy154373 ushort_t tmp;
988*6008Syy154373 int i;
989*6008Syy154373 int sz = (sizeof (kiconv_ja_sjtoibmext) /
990*6008Syy154373 sizeof (kiconv_ja_sjtoibmext[0]));
991*6008Syy154373
992*6008Syy154373 for (i = 0; i < sz; i++) {
993*6008Syy154373 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
994*6008Syy154373 if (tmp == dest)
995*6008Syy154373 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
996*6008Syy154373 }
997*6008Syy154373 return (0x3f);
998*6008Syy154373 }
999*6008Syy154373
1000*6008Syy154373 static size_t
_do_kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1001*6008Syy154373 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1002*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
1003*6008Syy154373 {
1004*6008Syy154373 uchar_t ic;
1005*6008Syy154373 size_t rv = 0;
1006*6008Syy154373 uint_t ucs4;
1007*6008Syy154373 ushort_t euc16;
1008*6008Syy154373 ushort_t dest;
1009*6008Syy154373
1010*6008Syy154373 uchar_t *ip;
1011*6008Syy154373 size_t ileft;
1012*6008Syy154373 char *op;
1013*6008Syy154373 size_t oleft;
1014*6008Syy154373 size_t read_len;
1015*6008Syy154373
1016*6008Syy154373 size_t id = ((kiconv_state_t)kcd)->id;
1017*6008Syy154373
1018*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
1019*6008Syy154373 return (0);
1020*6008Syy154373 }
1021*6008Syy154373
1022*6008Syy154373 ip = (uchar_t *)*inbuf;
1023*6008Syy154373 ileft = *inbytesleft;
1024*6008Syy154373 op = *outbuf;
1025*6008Syy154373 oleft = *outbytesleft;
1026*6008Syy154373
1027*6008Syy154373 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1028*6008Syy154373
1029*6008Syy154373 while (ileft != 0) {
1030*6008Syy154373 KICONV_JA_GETU(&ucs4, 0);
1031*6008Syy154373
1032*6008Syy154373 if (ucs4 > 0xffff) {
1033*6008Syy154373 /* non-BMP */
1034*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1035*6008Syy154373 rv++;
1036*6008Syy154373 goto next;
1037*6008Syy154373 }
1038*6008Syy154373
1039*6008Syy154373 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1040*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
1041*6008Syy154373 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1042*6008Syy154373 }
1043*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
1044*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1045*6008Syy154373 rv++;
1046*6008Syy154373 goto next;
1047*6008Syy154373 }
1048*6008Syy154373
1049*6008Syy154373 switch (euc16 & 0x8080) {
1050*6008Syy154373 case 0x0000: /* CS0 */
1051*6008Syy154373 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1052*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1053*6008Syy154373 rv++;
1054*6008Syy154373 } else {
1055*6008Syy154373 ic = (uchar_t)euc16;
1056*6008Syy154373 KICONV_JA_NPUT(ic);
1057*6008Syy154373 }
1058*6008Syy154373 break;
1059*6008Syy154373 case 0x8080: /* CS1 */
1060*6008Syy154373 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1061*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1062*6008Syy154373 /*
1063*6008Syy154373 * for even number row (Ku), add 0x80 to
1064*6008Syy154373 * look latter half of kiconv_ja_jistosj2[] array
1065*6008Syy154373 */
1066*6008Syy154373 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1067*6008Syy154373 + (((ic % 2) == 0) ? 0x80 : 0x00));
1068*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1069*6008Syy154373 break;
1070*6008Syy154373 case 0x0080: /* CS2 */
1071*6008Syy154373 ic = (uchar_t)euc16;
1072*6008Syy154373 KICONV_JA_NPUT(ic);
1073*6008Syy154373 break;
1074*6008Syy154373 case 0x8000: /* CS3 */
1075*6008Syy154373 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1076*6008Syy154373 if (euc16 == 0xa271) {
1077*6008Syy154373 /* NUMERO SIGN */
1078*6008Syy154373 KICONV_JA_NPUT(0x87);
1079*6008Syy154373 KICONV_JA_NPUT(0x82);
1080*6008Syy154373 } else if (ic < 0x75) { /* check if IBM VDC */
1081*6008Syy154373 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1082*6008Syy154373 if (dest == 0xffff) {
1083*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1084*6008Syy154373 } else {
1085*6008Syy154373 /* avoid putting NUL ('\0') */
1086*6008Syy154373 if (dest > 0xff) {
1087*6008Syy154373 KICONV_JA_NPUT(
1088*6008Syy154373 (dest >> 8) & 0xff);
1089*6008Syy154373 KICONV_JA_NPUT(dest & 0xff);
1090*6008Syy154373 } else {
1091*6008Syy154373 KICONV_JA_NPUT(dest & 0xff);
1092*6008Syy154373 }
1093*6008Syy154373 }
1094*6008Syy154373 } else {
1095*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1096*6008Syy154373 /*
1097*6008Syy154373 * for even number row (Ku), add 0x80 to
1098*6008Syy154373 * look latter half of kiconv_ja_jistosj2[]
1099*6008Syy154373 */
1100*6008Syy154373 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1101*6008Syy154373 + (((ic % 2) == 0) ? 0x80 : 0x00));
1102*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1103*6008Syy154373 }
1104*6008Syy154373 break;
1105*6008Syy154373 }
1106*6008Syy154373
1107*6008Syy154373 next:
1108*6008Syy154373 /*
1109*6008Syy154373 * One character successfully converted so update
1110*6008Syy154373 * values outside of this function's stack.
1111*6008Syy154373 */
1112*6008Syy154373 *inbuf = (char *)ip;
1113*6008Syy154373 *inbytesleft = ileft;
1114*6008Syy154373 *outbuf = op;
1115*6008Syy154373 *outbytesleft = oleft;
1116*6008Syy154373 }
1117*6008Syy154373
1118*6008Syy154373 ret:
1119*6008Syy154373 return (rv);
1120*6008Syy154373 }
1121*6008Syy154373
1122*6008Syy154373 static size_t
_do_kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1123*6008Syy154373 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1124*6008Syy154373 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1125*6008Syy154373 {
1126*6008Syy154373 uint_t uni; /* UTF-32 */
1127*6008Syy154373 uint_t index; /* index for table lookup */
1128*6008Syy154373 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
1129*6008Syy154373 size_t rv = 0; /* return value of this function */
1130*6008Syy154373
1131*6008Syy154373 uchar_t *ip;
1132*6008Syy154373 size_t ileft;
1133*6008Syy154373 char *op;
1134*6008Syy154373 size_t oleft;
1135*6008Syy154373
1136*6008Syy154373 boolean_t do_not_ignore_null;
1137*6008Syy154373
1138*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
1139*6008Syy154373 return (0);
1140*6008Syy154373 }
1141*6008Syy154373
1142*6008Syy154373 ip = (uchar_t *)inbuf;
1143*6008Syy154373 ileft = *inbytesleft;
1144*6008Syy154373 op = outbuf;
1145*6008Syy154373 oleft = *outbytesleft;
1146*6008Syy154373
1147*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1148*6008Syy154373
1149*6008Syy154373 while (ileft != 0) {
1150*6008Syy154373 KICONV_JA_NGET(ic1); /* get 1st byte */
1151*6008Syy154373
1152*6008Syy154373 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
1153*6008Syy154373 if (ic1 == '\0' && do_not_ignore_null) {
1154*6008Syy154373 return (0);
1155*6008Syy154373 }
1156*6008Syy154373 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1157*6008Syy154373 KICONV_JA_PUTU(uni);
1158*6008Syy154373 } else if (KICONV_JA_ISSJKANA(ic1)) {
1159*6008Syy154373 /* JIS X 0201 Kana; 1 byte */
1160*6008Syy154373 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1161*6008Syy154373 KICONV_JA_PUTU(uni);
1162*6008Syy154373 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1163*6008Syy154373 /* JIS X 0208 or UDC; 2 bytes */
1164*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1165*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
1166*6008Syy154373 } else {
1167*6008Syy154373 KICONV_JA_NGET(ic2);
1168*6008Syy154373 }
1169*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
1170*6008Syy154373 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1171*6008Syy154373 if (ic2 >= 0x9f) {
1172*6008Syy154373 ic1++;
1173*6008Syy154373 }
1174*6008Syy154373 ic2 = kiconv_ja_sjtojis2[ic2];
1175*6008Syy154373 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1176*6008Syy154373 if (uni == KICONV_JA_NODEST) {
1177*6008Syy154373 index = ((ic1 - 0x21) * 94)
1178*6008Syy154373 + (ic2 - 0x21);
1179*6008Syy154373 uni = kiconv_ja_jisx0208_to_ucs2[index];
1180*6008Syy154373 }
1181*6008Syy154373 if (uni == KICONV_JA_REPLACE)
1182*6008Syy154373 rv++;
1183*6008Syy154373 KICONV_JA_PUTU(uni);
1184*6008Syy154373 } else { /* 2nd byte check failed */
1185*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1186*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1187*6008Syy154373 rv++;
1188*6008Syy154373 } else {
1189*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
1190*6008Syy154373 }
1191*6008Syy154373 /* NOTREACHED */
1192*6008Syy154373 }
1193*6008Syy154373 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1194*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1195*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
1196*6008Syy154373 } else {
1197*6008Syy154373 KICONV_JA_NGET(ic2);
1198*6008Syy154373 }
1199*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
1200*6008Syy154373 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1201*6008Syy154373 if (ic2 >= 0x9f) {
1202*6008Syy154373 ic1++;
1203*6008Syy154373 }
1204*6008Syy154373 index = ((ic1 - 0x21) * 94)
1205*6008Syy154373 + (kiconv_ja_sjtojis2[ic2] - 0x21);
1206*6008Syy154373 uni = kiconv_ja_jisx0212_to_ucs2[index];
1207*6008Syy154373 if (uni == KICONV_JA_REPLACE)
1208*6008Syy154373 rv++;
1209*6008Syy154373 KICONV_JA_PUTU(uni);
1210*6008Syy154373 } else { /* 2nd byte check failed */
1211*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1212*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1213*6008Syy154373 rv++;
1214*6008Syy154373 } else {
1215*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
1216*6008Syy154373 }
1217*6008Syy154373 }
1218*6008Syy154373 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1219*6008Syy154373 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1220*6008Syy154373 /*
1221*6008Syy154373 * We need a special treatment for each codes.
1222*6008Syy154373 * By adding some offset number for them, we
1223*6008Syy154373 * can process them as the same way of that of
1224*6008Syy154373 * extended IBM chars.
1225*6008Syy154373 */
1226*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1227*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
1228*6008Syy154373 } else {
1229*6008Syy154373 KICONV_JA_NGET(ic2);
1230*6008Syy154373 }
1231*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
1232*6008Syy154373 ushort_t dest, upper, lower;
1233*6008Syy154373 dest = (ic1 << 8) + ic2;
1234*6008Syy154373 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1235*6008Syy154373 KICONV_JA_REMAP_NEC(dest);
1236*6008Syy154373 if (dest == 0xffff) {
1237*6008Syy154373 if (flag &
1238*6008Syy154373 KICONV_REPLACE_INVALID) {
1239*6008Syy154373 KICONV_JA_PUTU(
1240*6008Syy154373 KICONV_JA_REPLACE);
1241*6008Syy154373 rv++;
1242*6008Syy154373 } else {
1243*6008Syy154373 KICONV_JA_RETERROR(
1244*6008Syy154373 EILSEQ)
1245*6008Syy154373 }
1246*6008Syy154373 }
1247*6008Syy154373 }
1248*6008Syy154373 /*
1249*6008Syy154373 * XXX: 0xfa54 and 0xfa5b must be mapped
1250*6008Syy154373 * to JIS0208 area. Therefore we
1251*6008Syy154373 * have to do special treatment.
1252*6008Syy154373 */
1253*6008Syy154373 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1254*6008Syy154373 if (dest == 0xfa54) {
1255*6008Syy154373 upper = 0x22;
1256*6008Syy154373 lower = 0x4c;
1257*6008Syy154373 } else {
1258*6008Syy154373 upper = 0x22;
1259*6008Syy154373 lower = 0x68;
1260*6008Syy154373 }
1261*6008Syy154373 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1262*6008Syy154373 upper, lower);
1263*6008Syy154373 if (uni == KICONV_JA_NODEST) {
1264*6008Syy154373 index = (uint_t)((upper - 0x21)
1265*6008Syy154373 * 94 + (lower - 0x21));
1266*6008Syy154373 uni = kiconv_ja_jisx0208_to_ucs2
1267*6008Syy154373 [index];
1268*6008Syy154373 }
1269*6008Syy154373 if (uni == KICONV_JA_REPLACE)
1270*6008Syy154373 rv++;
1271*6008Syy154373 KICONV_JA_PUTU(uni);
1272*6008Syy154373 } else {
1273*6008Syy154373 dest = dest - 0xfa40 -
1274*6008Syy154373 (((dest>>8) - 0xfa) * 0x40);
1275*6008Syy154373 dest = kiconv_ja_sjtoibmext[dest];
1276*6008Syy154373 if (dest == 0xffff) {
1277*6008Syy154373 if (flag &
1278*6008Syy154373 KICONV_REPLACE_INVALID) {
1279*6008Syy154373 KICONV_JA_PUTU(
1280*6008Syy154373 KICONV_JA_REPLACE);
1281*6008Syy154373 rv++;
1282*6008Syy154373 } else {
1283*6008Syy154373 KICONV_JA_RETERROR(
1284*6008Syy154373 EILSEQ)
1285*6008Syy154373 }
1286*6008Syy154373 }
1287*6008Syy154373 upper = (dest >> 8) & KICONV_JA_CMASK;
1288*6008Syy154373 lower = dest & KICONV_JA_CMASK;
1289*6008Syy154373 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1290*6008Syy154373 upper, lower);
1291*6008Syy154373 if (uni == KICONV_JA_NODEST) {
1292*6008Syy154373 index = (uint_t)((upper - 0x21)
1293*6008Syy154373 * 94 + (lower - 0x21));
1294*6008Syy154373 uni = kiconv_ja_jisx0212_to_ucs2
1295*6008Syy154373 [index];
1296*6008Syy154373 }
1297*6008Syy154373 if (uni == KICONV_JA_REPLACE)
1298*6008Syy154373 rv++;
1299*6008Syy154373 KICONV_JA_PUTU(uni);
1300*6008Syy154373 }
1301*6008Syy154373 } else { /* 2nd byte check failed */
1302*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1303*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1304*6008Syy154373 rv++;
1305*6008Syy154373 } else {
1306*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
1307*6008Syy154373 }
1308*6008Syy154373 }
1309*6008Syy154373 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1310*6008Syy154373 /*
1311*6008Syy154373 * Based on the draft convention of OSF-JVC CDEWG,
1312*6008Syy154373 * characters in this area will be mapped to
1313*6008Syy154373 * "CHIKAN-MOJI." (convertible character)
1314*6008Syy154373 * We use U+FFFD in this case.
1315*6008Syy154373 */
1316*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1317*6008Syy154373 KICONV_JA_NGET_REP_FR_MB(ic2);
1318*6008Syy154373 } else {
1319*6008Syy154373 KICONV_JA_NGET(ic2);
1320*6008Syy154373 }
1321*6008Syy154373 if (KICONV_JA_ISSJKANJI2(ic2)) {
1322*6008Syy154373 uni = 0xfffd;
1323*6008Syy154373 KICONV_JA_PUTU(uni);
1324*6008Syy154373 } else { /* 2nd byte check failed */
1325*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1326*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1327*6008Syy154373 rv++;
1328*6008Syy154373 } else {
1329*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
1330*6008Syy154373 }
1331*6008Syy154373 }
1332*6008Syy154373 } else { /* 1st byte check failed */
1333*6008Syy154373 if (flag & KICONV_REPLACE_INVALID) {
1334*6008Syy154373 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1335*6008Syy154373 rv++;
1336*6008Syy154373 } else {
1337*6008Syy154373 KICONV_JA_RETERROR(EILSEQ)
1338*6008Syy154373 }
1339*6008Syy154373 }
1340*6008Syy154373
1341*6008Syy154373 next:
1342*6008Syy154373 /*
1343*6008Syy154373 * One character successfully converted so update
1344*6008Syy154373 * values outside of this function's stack.
1345*6008Syy154373 */
1346*6008Syy154373 *inbytesleft = ileft;
1347*6008Syy154373 *outbytesleft = oleft;
1348*6008Syy154373 }
1349*6008Syy154373
1350*6008Syy154373 ret:
1351*6008Syy154373 return (rv);
1352*6008Syy154373 }
1353*6008Syy154373
1354*6008Syy154373 static size_t
_do_kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1355*6008Syy154373 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1356*6008Syy154373 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1357*6008Syy154373 {
1358*6008Syy154373 uchar_t ic;
1359*6008Syy154373 size_t rv = 0;
1360*6008Syy154373 uint_t ucs4;
1361*6008Syy154373 ushort_t euc16;
1362*6008Syy154373 ushort_t dest;
1363*6008Syy154373
1364*6008Syy154373 uchar_t *ip;
1365*6008Syy154373 size_t ileft;
1366*6008Syy154373 char *op;
1367*6008Syy154373 size_t oleft;
1368*6008Syy154373 size_t read_len;
1369*6008Syy154373
1370*6008Syy154373 boolean_t do_not_ignore_null;
1371*6008Syy154373
1372*6008Syy154373 if ((inbuf == NULL) || (*inbuf == NULL)) {
1373*6008Syy154373 return (0);
1374*6008Syy154373 }
1375*6008Syy154373
1376*6008Syy154373 ip = (uchar_t *)inbuf;
1377*6008Syy154373 ileft = *inbytesleft;
1378*6008Syy154373 op = outbuf;
1379*6008Syy154373 oleft = *outbytesleft;
1380*6008Syy154373
1381*6008Syy154373 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1382*6008Syy154373
1383*6008Syy154373 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1384*6008Syy154373
1385*6008Syy154373 while (ileft != 0) {
1386*6008Syy154373 KICONV_JA_GETU(&ucs4, flag);
1387*6008Syy154373
1388*6008Syy154373 if (ucs4 == 0x0 && do_not_ignore_null) {
1389*6008Syy154373 return (0);
1390*6008Syy154373 }
1391*6008Syy154373
1392*6008Syy154373 if (ucs4 > 0xffff) {
1393*6008Syy154373 /* non-BMP */
1394*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1395*6008Syy154373 rv++;
1396*6008Syy154373 goto next;
1397*6008Syy154373 }
1398*6008Syy154373
1399*6008Syy154373 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1400*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
1401*6008Syy154373 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1402*6008Syy154373 }
1403*6008Syy154373 if (euc16 == KICONV_JA_NODEST) {
1404*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1405*6008Syy154373 rv++;
1406*6008Syy154373 goto next;
1407*6008Syy154373 }
1408*6008Syy154373
1409*6008Syy154373 switch (euc16 & 0x8080) {
1410*6008Syy154373 case 0x0000: /* CS0 */
1411*6008Syy154373 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1412*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1413*6008Syy154373 rv++;
1414*6008Syy154373 } else {
1415*6008Syy154373 ic = (uchar_t)euc16;
1416*6008Syy154373 KICONV_JA_NPUT(ic);
1417*6008Syy154373 }
1418*6008Syy154373 break;
1419*6008Syy154373 case 0x8080: /* CS1 */
1420*6008Syy154373 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1421*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1422*6008Syy154373 /*
1423*6008Syy154373 * for even number row (Ku), add 0x80 to
1424*6008Syy154373 * look latter half of kiconv_ja_jistosj2[] array
1425*6008Syy154373 */
1426*6008Syy154373 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1427*6008Syy154373 + (((ic % 2) == 0) ? 0x80 : 0x00));
1428*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1429*6008Syy154373 break;
1430*6008Syy154373 case 0x0080: /* CS2 */
1431*6008Syy154373 ic = (uchar_t)euc16;
1432*6008Syy154373 KICONV_JA_NPUT(ic);
1433*6008Syy154373 break;
1434*6008Syy154373 case 0x8000: /* CS3 */
1435*6008Syy154373 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1436*6008Syy154373 if (euc16 == 0xa271) {
1437*6008Syy154373 /* NUMERO SIGN */
1438*6008Syy154373 KICONV_JA_NPUT(0x87);
1439*6008Syy154373 KICONV_JA_NPUT(0x82);
1440*6008Syy154373 } else if (ic < 0x75) { /* check if IBM VDC */
1441*6008Syy154373 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1442*6008Syy154373 if (dest == 0xffff) {
1443*6008Syy154373 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1444*6008Syy154373 } else {
1445*6008Syy154373 /* avoid putting NUL ('\0') */
1446*6008Syy154373 if (dest > 0xff) {
1447*6008Syy154373 KICONV_JA_NPUT(
1448*6008Syy154373 (dest >> 8) & 0xff);
1449*6008Syy154373 KICONV_JA_NPUT(dest & 0xff);
1450*6008Syy154373 } else {
1451*6008Syy154373 KICONV_JA_NPUT(dest & 0xff);
1452*6008Syy154373 }
1453*6008Syy154373 }
1454*6008Syy154373 } else {
1455*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1456*6008Syy154373 /*
1457*6008Syy154373 * for even number row (Ku), add 0x80 to
1458*6008Syy154373 * look latter half of kiconv_ja_jistosj2[]
1459*6008Syy154373 */
1460*6008Syy154373 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1461*6008Syy154373 + (((ic % 2) == 0) ? 0x80 : 0x00));
1462*6008Syy154373 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1463*6008Syy154373 }
1464*6008Syy154373 break;
1465*6008Syy154373 }
1466*6008Syy154373
1467*6008Syy154373 next:
1468*6008Syy154373 /*
1469*6008Syy154373 * One character successfully converted so update
1470*6008Syy154373 * values outside of this function's stack.
1471*6008Syy154373 */
1472*6008Syy154373 *inbytesleft = ileft;
1473*6008Syy154373 *outbytesleft = oleft;
1474*6008Syy154373 }
1475*6008Syy154373
1476*6008Syy154373 ret:
1477*6008Syy154373 return (rv);
1478*6008Syy154373 }
1479*6008Syy154373
1480*6008Syy154373 static size_t
kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1481*6008Syy154373 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1482*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
1483*6008Syy154373 {
1484*6008Syy154373 if (! kcd || kcd == (void *)-1) {
1485*6008Syy154373 *errno = EBADF;
1486*6008Syy154373 return ((size_t)-1);
1487*6008Syy154373 }
1488*6008Syy154373
1489*6008Syy154373 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1490*6008Syy154373 outbuf, outbytesleft, errno));
1491*6008Syy154373 }
1492*6008Syy154373
1493*6008Syy154373 static size_t
kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1494*6008Syy154373 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1495*6008Syy154373 char **outbuf, size_t *outbytesleft, int *errno)
1496*6008Syy154373 {
1497*6008Syy154373 if (! kcd || kcd == (void *)-1) {
1498*6008Syy154373 *errno = EBADF;
1499*6008Syy154373 return ((size_t)-1);
1500*6008Syy154373 }
1501*6008Syy154373
1502*6008Syy154373 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1503*6008Syy154373 outbuf, outbytesleft, errno));
1504*6008Syy154373 }
1505*6008Syy154373
1506*6008Syy154373 static size_t
kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1507*6008Syy154373 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1508*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
1509*6008Syy154373 {
1510*6008Syy154373 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1511*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1512*6008Syy154373 }
1513*6008Syy154373
1514*6008Syy154373 static size_t
kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1515*6008Syy154373 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1516*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
1517*6008Syy154373 {
1518*6008Syy154373 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1519*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1520*6008Syy154373 }
1521*6008Syy154373
1522*6008Syy154373 static size_t
kiconvstr_fr_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1523*6008Syy154373 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1524*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
1525*6008Syy154373 {
1526*6008Syy154373 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1527*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1528*6008Syy154373 }
1529*6008Syy154373
1530*6008Syy154373 static size_t
kiconvstr_to_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1531*6008Syy154373 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1532*6008Syy154373 size_t *outbytesleft, int flag, int *errno)
1533*6008Syy154373 {
1534*6008Syy154373 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1535*6008Syy154373 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1536*6008Syy154373 }
1537*6008Syy154373
1538*6008Syy154373 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1539*6008Syy154373 {
1540*6008Syy154373 "eucjp", "utf-8", open_eucjp,
1541*6008Syy154373 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1542*6008Syy154373 },
1543*6008Syy154373 {
1544*6008Syy154373 "utf-8", "eucjp", open_eucjp,
1545*6008Syy154373 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1546*6008Syy154373 },
1547*6008Syy154373 {
1548*6008Syy154373 "eucjpms", "utf-8", open_eucjpms,
1549*6008Syy154373 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1550*6008Syy154373 },
1551*6008Syy154373 {
1552*6008Syy154373 "utf-8", "eucjpms", open_eucjpms,
1553*6008Syy154373 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1554*6008Syy154373 },
1555*6008Syy154373 {
1556*6008Syy154373 "sjis", "utf-8", open_sjis,
1557*6008Syy154373 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1558*6008Syy154373 },
1559*6008Syy154373 {
1560*6008Syy154373 "utf-8", "sjis", open_sjis,
1561*6008Syy154373 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1562*6008Syy154373 },
1563*6008Syy154373 {
1564*6008Syy154373 "cp932", "utf-8", open_cp932,
1565*6008Syy154373 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1566*6008Syy154373 },
1567*6008Syy154373 {
1568*6008Syy154373 "utf-8", "cp932", open_cp932,
1569*6008Syy154373 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1570*6008Syy154373 }
1571*6008Syy154373 };
1572*6008Syy154373
1573*6008Syy154373 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1574*6008Syy154373 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1575*6008Syy154373
1576*6008Syy154373 #define KICONV_JA_MAX_JA_OPS \
1577*6008Syy154373 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1578*6008Syy154373 #define KICONV_JA_MAX_JA_ALIAS \
1579*6008Syy154373 (sizeof (kiconv_ja_aliases) / sizeof (char *))
1580*6008Syy154373
1581*6008Syy154373 static kiconv_module_info_t kiconv_ja_info = {
1582*6008Syy154373 "kiconv_ja", /* module name */
1583*6008Syy154373 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */
1584*6008Syy154373 kiconv_ja_ops_tbl, /* kiconv_ja ops table */
1585*6008Syy154373 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1586*6008Syy154373 kiconv_ja_aliases, /* kiconv_ja aliases */
1587*6008Syy154373 kiconv_ja_canonicals, /* kiconv_ja canonicals */
1588*6008Syy154373 0
1589*6008Syy154373 };
1590*6008Syy154373
1591*6008Syy154373 static struct modlkiconv modlkiconv_ja = {
1592*6008Syy154373 &mod_kiconvops,
1593*6008Syy154373 "kiconv module for Japanese",
1594*6008Syy154373 &kiconv_ja_info
1595*6008Syy154373 };
1596*6008Syy154373
1597*6008Syy154373 static struct modlinkage modlinkage = {
1598*6008Syy154373 MODREV_1,
1599*6008Syy154373 (void *)&modlkiconv_ja,
1600*6008Syy154373 NULL
1601*6008Syy154373 };
1602*6008Syy154373
1603*6008Syy154373 int
_init(void)1604*6008Syy154373 _init(void)
1605*6008Syy154373 {
1606*6008Syy154373 int err;
1607*6008Syy154373
1608*6008Syy154373 err = mod_install(&modlinkage);
1609*6008Syy154373 if (err)
1610*6008Syy154373 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1611*6008Syy154373
1612*6008Syy154373 return (err);
1613*6008Syy154373 }
1614*6008Syy154373
1615*6008Syy154373 int
_info(struct modinfo * modinfop)1616*6008Syy154373 _info(struct modinfo *modinfop)
1617*6008Syy154373 {
1618*6008Syy154373 return (mod_info(&modlinkage, modinfop));
1619*6008Syy154373 }
1620*6008Syy154373
1621*6008Syy154373 int
_fini(void)1622*6008Syy154373 _fini(void)
1623*6008Syy154373 {
1624*6008Syy154373 int err;
1625*6008Syy154373
1626*6008Syy154373 /*
1627*6008Syy154373 * If this module is being used, then, we cannot remove the module.
1628*6008Syy154373 * The following checking will catch pretty much all usual cases.
1629*6008Syy154373 *
1630*6008Syy154373 * Any remaining will be catached by the kiconv_unregister_module()
1631*6008Syy154373 * during mod_remove() at below.
1632*6008Syy154373 */
1633*6008Syy154373 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1634*6008Syy154373 return (EBUSY);
1635*6008Syy154373
1636*6008Syy154373 err = mod_remove(&modlinkage);
1637*6008Syy154373 if (err)
1638*6008Syy154373 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1639*6008Syy154373
1640*6008Syy154373 return (err);
1641*6008Syy154373 }
1642