1 /* $NetBSD: citrus_euc.c,v 1.6 2003/06/25 09:51:42 tshiozak Exp $ */ 2 3 /*- 4 * Copyright (c)2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 #include <sys/cdefs.h> 66 #if defined(LIBC_SCCS) && !defined(lint) 67 __RCSID("$NetBSD: citrus_euc.c,v 1.6 2003/06/25 09:51:42 tshiozak Exp $"); 68 #endif /* LIBC_SCCS and not lint */ 69 70 #include <assert.h> 71 #include <errno.h> 72 #include <string.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <stddef.h> 76 #include <locale.h> 77 #include <wchar.h> 78 #include <sys/types.h> 79 #include <limits.h> 80 81 #include "citrus_namespace.h" 82 #include "citrus_types.h" 83 #include "citrus_module.h" 84 #include "citrus_ctype.h" 85 #include "citrus_stdenc.h" 86 #include "citrus_euc.h" 87 88 89 /* ---------------------------------------------------------------------- 90 * private stuffs used by templates 91 */ 92 93 typedef struct { 94 char ch[3]; 95 int chlen; 96 } _EUCState; 97 98 typedef struct { 99 unsigned count[4]; 100 wchar_t bits[4]; 101 wchar_t mask; 102 unsigned mb_cur_max; 103 } _EUCEncodingInfo; 104 105 typedef struct { 106 _EUCEncodingInfo ei; 107 struct { 108 /* for future multi-locale facility */ 109 _EUCState s_mblen; 110 _EUCState s_mbrlen; 111 _EUCState s_mbrtowc; 112 _EUCState s_mbtowc; 113 _EUCState s_mbsrtowcs; 114 _EUCState s_wcrtomb; 115 _EUCState s_wcsrtombs; 116 _EUCState s_wctomb; 117 } states; 118 } _EUCCTypeInfo; 119 120 #define _SS2 0x008e 121 #define _SS3 0x008f 122 123 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 124 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 125 126 #define _FUNCNAME(m) _citrus_EUC_##m 127 #define _ENCODING_INFO _EUCEncodingInfo 128 #define _CTYPE_INFO _EUCCTypeInfo 129 #define _ENCODING_STATE _EUCState 130 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 131 #define _ENCODING_IS_STATE_DEPENDENT 0 132 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 133 134 135 static __inline int 136 _citrus_EUC_cs(unsigned int c) 137 { 138 c &= 0xff; 139 140 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 141 } 142 143 static __inline int 144 _citrus_EUC_parse_variable(_EUCEncodingInfo *ei, 145 const void *var, size_t lenvar) 146 { 147 const char *v, *e; 148 int x; 149 150 /* parse variable string */ 151 if (!var) 152 return (EFTYPE); 153 154 v = (const char *) var; 155 156 while (*v == ' ' || *v == '\t') 157 ++v; 158 159 ei->mb_cur_max = 1; 160 for (x = 0; x < 4; ++x) { 161 ei->count[x] = (int) strtol(v, (char **)&e, 0); 162 if (v == e || !(v = e) || ei->count[x]<1 || ei->count[x]>4) { 163 return (EFTYPE); 164 } 165 if (ei->mb_cur_max < ei->count[x]) 166 ei->mb_cur_max = ei->count[x]; 167 while (*v == ' ' || *v == '\t') 168 ++v; 169 ei->bits[x] = (int) strtol(v, (char **)&e, 0); 170 if (v == e || !(v = e)) { 171 return (EFTYPE); 172 } 173 while (*v == ' ' || *v == '\t') 174 ++v; 175 } 176 ei->mask = (int)strtol(v, (char **)&e, 0); 177 if (v == e || !(v = e)) { 178 return (EFTYPE); 179 } 180 181 return 0; 182 } 183 184 185 static __inline void 186 /*ARGSUSED*/ 187 _citrus_EUC_init_state(_EUCEncodingInfo *ei, _EUCState *s) 188 { 189 memset(s, 0, sizeof(*s)); 190 } 191 192 static __inline void 193 /*ARGSUSED*/ 194 _citrus_EUC_pack_state(_EUCEncodingInfo *ei, void *pspriv, const _EUCState *s) 195 { 196 memcpy(pspriv, (const void *)s, sizeof(*s)); 197 } 198 199 static __inline void 200 /*ARGSUSED*/ 201 _citrus_EUC_unpack_state(_EUCEncodingInfo *ei, _EUCState *s, 202 const void *pspriv) 203 { 204 memcpy((void *)s, pspriv, sizeof(*s)); 205 } 206 207 static int 208 _citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, const char **s, 209 size_t n, _EUCState *psenc, size_t *nresult) 210 { 211 wchar_t wchar; 212 int c, cs, len; 213 int chlenbak; 214 const char *s0, *s1 = NULL; 215 216 _DIAGASSERT(nresult != 0); 217 _DIAGASSERT(ei != NULL); 218 _DIAGASSERT(psenc != NULL); 219 _DIAGASSERT(s != NULL); 220 221 s0 = *s; 222 223 if (s0 == NULL) { 224 _citrus_EUC_init_state(ei, psenc); 225 *nresult = 0; /* state independent */ 226 return (0); 227 } 228 229 chlenbak = psenc->chlen; 230 231 /* make sure we have the first byte in the buffer */ 232 switch (psenc->chlen) { 233 case 0: 234 if (n < 1) 235 goto restart; 236 psenc->ch[0] = *s0++; 237 psenc->chlen = 1; 238 n--; 239 break; 240 case 1: 241 case 2: 242 break; 243 default: 244 /* illgeal state */ 245 goto encoding_error; 246 } 247 248 c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)]; 249 if (c == 0) 250 goto encoding_error; 251 while (psenc->chlen < c) { 252 if (n < 1) 253 goto restart; 254 psenc->ch[psenc->chlen] = *s0++; 255 psenc->chlen++; 256 n--; 257 } 258 *s = s0; 259 260 switch (cs) { 261 case 3: 262 case 2: 263 /* skip SS2/SS3 */ 264 len = c - 1; 265 s1 = &psenc->ch[1]; 266 break; 267 case 1: 268 case 0: 269 len = c; 270 s1 = &psenc->ch[0]; 271 break; 272 } 273 wchar = 0; 274 while (len-- > 0) 275 wchar = (wchar << 8) | (*s1++ & 0xff); 276 wchar = (wchar & ~ei->mask) | ei->bits[cs]; 277 278 psenc->chlen = 0; 279 if (pwc) 280 *pwc = wchar; 281 282 if (!wchar) { 283 *nresult = 0; 284 } else { 285 *nresult = (size_t)(c - chlenbak); 286 } 287 288 return 0; 289 290 encoding_error: 291 psenc->chlen = 0; 292 *nresult = (size_t)-1; 293 return (EILSEQ); 294 295 restart: 296 *nresult = (size_t)-2; 297 *s = s0; 298 return (0); 299 } 300 301 static int 302 _citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc, 303 _EUCState *psenc, size_t *nresult) 304 { 305 wchar_t m, nm; 306 int cs, i, ret; 307 308 _DIAGASSERT(ei != NULL); 309 _DIAGASSERT(nresult != 0); 310 _DIAGASSERT(s != NULL); 311 312 /* reset state */ 313 if (wc == 0) { 314 *nresult = 0; /* stateless */ 315 return 0; 316 } 317 318 m = wc & ei->mask; 319 nm = wc & ~m; 320 321 for (cs = 0; 322 cs < sizeof(ei->count)/sizeof(ei->count[0]); 323 cs++) { 324 if (m == ei->bits[cs]) 325 break; 326 } 327 /* fallback case - not sure if it is necessary */ 328 if (cs == sizeof(ei->count)/sizeof(ei->count[0])) 329 cs = 1; 330 331 i = ei->count[cs]; 332 if (n < i) { 333 ret = E2BIG; 334 goto err; 335 } 336 m = (cs % 2) ? 0x80 : 0x00; 337 switch (cs) { 338 case 2: 339 *s++ = _SS2; 340 i--; 341 break; 342 case 3: 343 *s++ = _SS3; 344 i--; 345 break; 346 } 347 348 while (i-- > 0) 349 *s++ = ((nm >> (i << 3)) & 0xff) | m; 350 351 *nresult = (size_t)ei->count[cs]; 352 return 0; 353 354 err: 355 *nresult = (size_t)-1; 356 return ret; 357 } 358 359 static __inline int 360 /*ARGSUSED*/ 361 _citrus_EUC_stdenc_wctocs(_EUCEncodingInfo * __restrict ei, 362 _csid_t * __restrict csid, 363 _index_t * __restrict idx, wchar_t wc) 364 { 365 wchar_t m, nm; 366 367 _DIAGASSERT(ei != NULL && csid != NULL && idx != NULL); 368 369 m = wc & ei->mask; 370 nm = wc & ~m; 371 372 *csid = (_citrus_csid_t)m; 373 *idx = (_citrus_index_t)nm; 374 375 return (0); 376 } 377 378 static __inline int 379 /*ARGSUSED*/ 380 _citrus_EUC_stdenc_cstowc(_EUCEncodingInfo * __restrict ei, 381 wchar_t * __restrict wc, 382 _csid_t csid, _index_t idx) 383 { 384 385 _DIAGASSERT(ei != NULL && wc != NULL); 386 387 if ((csid & ~ei->mask) != 0 || (idx & ei->mask) != 0) 388 return (EINVAL); 389 390 *wc = (wchar_t)csid | (wchar_t)idx; 391 392 return (0); 393 } 394 395 static int 396 /*ARGSUSED*/ 397 _citrus_EUC_encoding_module_init(_EUCEncodingInfo * __restrict ei, 398 const void * __restrict var, size_t lenvar) 399 { 400 401 _DIAGASSERT(ei != NULL); 402 403 return (_citrus_EUC_parse_variable(ei, var, lenvar)); 404 } 405 406 static void 407 /*ARGSUSED*/ 408 _citrus_EUC_encoding_module_uninit(_EUCEncodingInfo * __restrict ei) 409 { 410 } 411 412 /* ---------------------------------------------------------------------- 413 * public interface for ctype 414 */ 415 416 _CITRUS_CTYPE_DECLS(EUC); 417 _CITRUS_CTYPE_DEF_OPS(EUC); 418 419 #include "citrus_ctype_template.h" 420 421 /* ---------------------------------------------------------------------- 422 * public interface for stdenc 423 */ 424 425 _CITRUS_STDENC_DECLS(EUC); 426 _CITRUS_STDENC_DEF_OPS(EUC); 427 428 #include "citrus_stdenc_template.h" 429