1 /* $NetBSD: citrus_euc.c,v 1.11 2006/03/19 01:25:44 christos Exp $ */ 2 3 /*- 4 * Copyright (c)2002 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 __RCSID("$NetBSD: citrus_euc.c,v 1.11 2006/03/19 01:25:44 christos Exp $"); 64 #endif /* LIBC_SCCS and not lint */ 65 66 #include <assert.h> 67 #include <errno.h> 68 #include <string.h> 69 #include <stdio.h> 70 #include <stdlib.h> 71 #include <stddef.h> 72 #include <locale.h> 73 #include <wchar.h> 74 #include <sys/types.h> 75 #include <limits.h> 76 77 #include "citrus_namespace.h" 78 #include "citrus_types.h" 79 #include "citrus_module.h" 80 #include "citrus_ctype.h" 81 #include "citrus_stdenc.h" 82 #include "citrus_euc.h" 83 84 85 /* ---------------------------------------------------------------------- 86 * private stuffs used by templates 87 */ 88 89 typedef struct { 90 char ch[3]; 91 int chlen; 92 } _EUCState; 93 94 typedef struct { 95 unsigned count[4]; 96 wchar_t bits[4]; 97 wchar_t mask; 98 unsigned mb_cur_max; 99 } _EUCEncodingInfo; 100 101 typedef struct { 102 _EUCEncodingInfo ei; 103 struct { 104 /* for future multi-locale facility */ 105 _EUCState s_mblen; 106 _EUCState s_mbrlen; 107 _EUCState s_mbrtowc; 108 _EUCState s_mbtowc; 109 _EUCState s_mbsrtowcs; 110 _EUCState s_wcrtomb; 111 _EUCState s_wcsrtombs; 112 _EUCState s_wctomb; 113 } states; 114 } _EUCCTypeInfo; 115 116 #define _SS2 0x008e 117 #define _SS3 0x008f 118 119 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 120 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 121 122 #define _FUNCNAME(m) _citrus_EUC_##m 123 #define _ENCODING_INFO _EUCEncodingInfo 124 #define _CTYPE_INFO _EUCCTypeInfo 125 #define _ENCODING_STATE _EUCState 126 #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max 127 #define _ENCODING_IS_STATE_DEPENDENT 0 128 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 129 130 131 static __inline int 132 _citrus_EUC_cs(unsigned int c) 133 { 134 c &= 0xff; 135 136 return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0); 137 } 138 139 static __inline int 140 _citrus_EUC_parse_variable(_EUCEncodingInfo *ei, 141 const void *var, size_t lenvar) 142 { 143 const char *v, *e; 144 int x; 145 146 /* parse variable string */ 147 if (!var) 148 return (EFTYPE); 149 150 v = (const char *) var; 151 152 while (*v == ' ' || *v == '\t') 153 ++v; 154 155 ei->mb_cur_max = 1; 156 for (x = 0; x < 4; ++x) { 157 ei->count[x] = (int) strtol(v, (char **)&e, 0); 158 if (v == e || !(v = e) || ei->count[x]<1 || ei->count[x]>4) { 159 return (EFTYPE); 160 } 161 if (ei->mb_cur_max < ei->count[x]) 162 ei->mb_cur_max = ei->count[x]; 163 while (*v == ' ' || *v == '\t') 164 ++v; 165 ei->bits[x] = (int) strtol(v, (char **)&e, 0); 166 if (v == e || !(v = e)) { 167 return (EFTYPE); 168 } 169 while (*v == ' ' || *v == '\t') 170 ++v; 171 } 172 ei->mask = (int)strtol(v, (char **)&e, 0); 173 if (v == e || !(v = e)) { 174 return (EFTYPE); 175 } 176 177 return 0; 178 } 179 180 181 static __inline void 182 /*ARGSUSED*/ 183 _citrus_EUC_init_state(_EUCEncodingInfo *ei, _EUCState *s) 184 { 185 memset(s, 0, sizeof(*s)); 186 } 187 188 static __inline void 189 /*ARGSUSED*/ 190 _citrus_EUC_pack_state(_EUCEncodingInfo *ei, void *pspriv, const _EUCState *s) 191 { 192 memcpy(pspriv, (const void *)s, sizeof(*s)); 193 } 194 195 static __inline void 196 /*ARGSUSED*/ 197 _citrus_EUC_unpack_state(_EUCEncodingInfo *ei, _EUCState *s, 198 const void *pspriv) 199 { 200 memcpy((void *)s, pspriv, sizeof(*s)); 201 } 202 203 static int 204 _citrus_EUC_mbrtowc_priv(_EUCEncodingInfo *ei, wchar_t *pwc, const char **s, 205 size_t n, _EUCState *psenc, size_t *nresult) 206 { 207 wchar_t wchar; 208 int c, cs, len; 209 int chlenbak; 210 const char *s0, *s1 = NULL; 211 212 _DIAGASSERT(nresult != 0); 213 _DIAGASSERT(ei != NULL); 214 _DIAGASSERT(psenc != NULL); 215 _DIAGASSERT(s != NULL); 216 217 s0 = *s; 218 219 if (s0 == NULL) { 220 _citrus_EUC_init_state(ei, psenc); 221 *nresult = 0; /* state independent */ 222 return (0); 223 } 224 225 chlenbak = psenc->chlen; 226 227 /* make sure we have the first byte in the buffer */ 228 switch (psenc->chlen) { 229 case 0: 230 if (n < 1) 231 goto restart; 232 psenc->ch[0] = *s0++; 233 psenc->chlen = 1; 234 n--; 235 break; 236 case 1: 237 case 2: 238 break; 239 default: 240 /* illgeal state */ 241 goto encoding_error; 242 } 243 244 c = ei->count[cs = _citrus_EUC_cs(psenc->ch[0] & 0xff)]; 245 if (c == 0) 246 goto encoding_error; 247 while (psenc->chlen < c) { 248 if (n < 1) 249 goto restart; 250 psenc->ch[psenc->chlen] = *s0++; 251 psenc->chlen++; 252 n--; 253 } 254 *s = s0; 255 256 switch (cs) { 257 case 3: 258 case 2: 259 /* skip SS2/SS3 */ 260 len = c - 1; 261 s1 = &psenc->ch[1]; 262 break; 263 case 1: 264 case 0: 265 len = c; 266 s1 = &psenc->ch[0]; 267 break; 268 default: 269 goto encoding_error; 270 } 271 wchar = 0; 272 while (len-- > 0) 273 wchar = (wchar << 8) | (*s1++ & 0xff); 274 wchar = (wchar & ~ei->mask) | ei->bits[cs]; 275 276 psenc->chlen = 0; 277 if (pwc) 278 *pwc = wchar; 279 280 if (!wchar) { 281 *nresult = 0; 282 } else { 283 *nresult = (size_t)(c - chlenbak); 284 } 285 286 return 0; 287 288 encoding_error: 289 psenc->chlen = 0; 290 *nresult = (size_t)-1; 291 return (EILSEQ); 292 293 restart: 294 *nresult = (size_t)-2; 295 *s = s0; 296 return (0); 297 } 298 299 static int 300 _citrus_EUC_wcrtomb_priv(_EUCEncodingInfo *ei, char *s, size_t n, wchar_t wc, 301 _EUCState *psenc, size_t *nresult) 302 { 303 wchar_t m, nm; 304 int cs, i, ret; 305 306 _DIAGASSERT(ei != NULL); 307 _DIAGASSERT(nresult != 0); 308 _DIAGASSERT(s != NULL); 309 310 m = wc & ei->mask; 311 nm = wc & ~m; 312 313 for (cs = 0; 314 cs < sizeof(ei->count)/sizeof(ei->count[0]); 315 cs++) { 316 if (m == ei->bits[cs]) 317 break; 318 } 319 /* fallback case - not sure if it is necessary */ 320 if (cs == sizeof(ei->count)/sizeof(ei->count[0])) 321 cs = 1; 322 323 i = ei->count[cs]; 324 if (n < i) { 325 ret = E2BIG; 326 goto err; 327 } 328 m = (cs) ? 0x80 : 0x00; 329 switch (cs) { 330 case 2: 331 *s++ = _SS2; 332 i--; 333 break; 334 case 3: 335 *s++ = _SS3; 336 i--; 337 break; 338 } 339 340 while (i-- > 0) 341 *s++ = ((nm >> (i << 3)) & 0xff) | m; 342 343 *nresult = (size_t)ei->count[cs]; 344 return 0; 345 346 err: 347 *nresult = (size_t)-1; 348 return ret; 349 } 350 351 static __inline int 352 /*ARGSUSED*/ 353 _citrus_EUC_stdenc_wctocs(_EUCEncodingInfo * __restrict ei, 354 _csid_t * __restrict csid, 355 _index_t * __restrict idx, wchar_t wc) 356 { 357 wchar_t m, nm; 358 359 _DIAGASSERT(ei != NULL && csid != NULL && idx != NULL); 360 361 m = wc & ei->mask; 362 nm = wc & ~m; 363 364 *csid = (_citrus_csid_t)m; 365 *idx = (_citrus_index_t)nm; 366 367 return (0); 368 } 369 370 static __inline int 371 /*ARGSUSED*/ 372 _citrus_EUC_stdenc_cstowc(_EUCEncodingInfo * __restrict ei, 373 wchar_t * __restrict wc, 374 _csid_t csid, _index_t idx) 375 { 376 377 _DIAGASSERT(ei != NULL && wc != NULL); 378 379 if ((csid & ~ei->mask) != 0 || (idx & ei->mask) != 0) 380 return (EINVAL); 381 382 *wc = (wchar_t)csid | (wchar_t)idx; 383 384 return (0); 385 } 386 387 static __inline int 388 /*ARGSUSED*/ 389 _citrus_EUC_stdenc_get_state_desc_generic(_EUCEncodingInfo * __restrict ei, 390 _EUCState * __restrict psenc, 391 int * __restrict rstate) 392 { 393 394 if (psenc->chlen == 0) 395 *rstate = _STDENC_SDGEN_INITIAL; 396 else 397 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR; 398 399 return 0; 400 } 401 402 static int 403 /*ARGSUSED*/ 404 _citrus_EUC_encoding_module_init(_EUCEncodingInfo * __restrict ei, 405 const void * __restrict var, size_t lenvar) 406 { 407 408 _DIAGASSERT(ei != NULL); 409 410 return (_citrus_EUC_parse_variable(ei, var, lenvar)); 411 } 412 413 static void 414 /*ARGSUSED*/ 415 _citrus_EUC_encoding_module_uninit(_EUCEncodingInfo * __restrict ei) 416 { 417 } 418 419 /* ---------------------------------------------------------------------- 420 * public interface for ctype 421 */ 422 423 _CITRUS_CTYPE_DECLS(EUC); 424 _CITRUS_CTYPE_DEF_OPS(EUC); 425 426 #include "citrus_ctype_template.h" 427 428 /* ---------------------------------------------------------------------- 429 * public interface for stdenc 430 */ 431 432 _CITRUS_STDENC_DECLS(EUC); 433 _CITRUS_STDENC_DEF_OPS(EUC); 434 435 #include "citrus_stdenc_template.h" 436