1 /* $NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $ */ 2 3 /*- 4 * Copyright (c)2003 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.16 2012/02/12 13:51:29 wiz Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <machine/endian.h> 41 #include <sys/queue.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_module.h" 46 #include "citrus_region.h" 47 #include "citrus_mmap.h" 48 #include "citrus_hash.h" 49 #include "citrus_iconv.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_mapper.h" 52 #include "citrus_csmapper.h" 53 #include "citrus_memstream.h" 54 #include "citrus_iconv_std.h" 55 #include "citrus_esdb.h" 56 57 /* ---------------------------------------------------------------------- */ 58 59 _CITRUS_ICONV_DECLS(iconv_std); 60 _CITRUS_ICONV_DEF_OPS(iconv_std); 61 62 63 /* ---------------------------------------------------------------------- */ 64 65 int 66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops, 67 u_int32_t expected_version) 68 { 69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops)) 70 return (EINVAL); 71 72 memcpy(ops, &_citrus_iconv_std_iconv_ops, 73 sizeof(_citrus_iconv_std_iconv_ops)); 74 75 return (0); 76 } 77 78 /* ---------------------------------------------------------------------- */ 79 80 /* 81 * convenience routines for stdenc. 82 */ 83 static __inline void 84 save_encoding_state(struct _citrus_iconv_std_encoding *se) 85 { 86 if (se->se_ps) 87 memcpy(se->se_pssaved, se->se_ps, 88 _stdenc_get_state_size(se->se_handle)); 89 } 90 91 static __inline void 92 restore_encoding_state(struct _citrus_iconv_std_encoding *se) 93 { 94 if (se->se_ps) 95 memcpy(se->se_ps, se->se_pssaved, 96 _stdenc_get_state_size(se->se_handle)); 97 } 98 99 static __inline void 100 init_encoding_state(struct _citrus_iconv_std_encoding *se) 101 { 102 if (se->se_ps) 103 _stdenc_init_state(se->se_handle, se->se_ps); 104 } 105 106 static __inline int 107 mbtocsx(struct _citrus_iconv_std_encoding *se, 108 _csid_t *csid, _index_t *idx, const char **s, size_t n, 109 size_t *nresult) 110 { 111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 112 nresult); 113 } 114 115 static __inline int 116 cstombx(struct _citrus_iconv_std_encoding *se, 117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult) 118 { 119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 120 nresult); 121 } 122 123 static __inline int 124 wctombx(struct _citrus_iconv_std_encoding *se, 125 char *s, size_t n, _wc_t wc, size_t *nresult) 126 { 127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult); 128 } 129 130 static __inline int 131 put_state_resetx(struct _citrus_iconv_std_encoding *se, 132 char *s, size_t n, size_t *nresult) 133 { 134 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult); 135 } 136 137 static __inline int 138 get_state_desc_gen(struct _citrus_iconv_std_encoding *se, int *rstate) 139 { 140 int ret; 141 struct _stdenc_state_desc ssd; 142 143 ret = _stdenc_get_state_desc(se->se_handle, se->se_ps, 144 _STDENC_SDID_GENERIC, &ssd); 145 if (!ret) 146 *rstate = ssd.u.generic.state; 147 148 return ret; 149 } 150 151 /* 152 * init encoding context 153 */ 154 static int 155 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, 156 void *ps1, void *ps2) 157 { 158 int ret = -1; 159 160 se->se_handle = cs; 161 se->se_ps = ps1; 162 se->se_pssaved = ps2; 163 164 if (se->se_ps) 165 ret = _stdenc_init_state(cs, se->se_ps); 166 if (!ret && se->se_pssaved) 167 ret = _stdenc_init_state(cs, se->se_pssaved); 168 169 return ret; 170 } 171 172 static int 173 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 174 unsigned long *rnorm) 175 { 176 int ret; 177 struct _csmapper *cm; 178 179 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 180 if (ret) 181 return ret; 182 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 183 _csmapper_get_state_size(cm) != 0) { 184 _csmapper_close(cm); 185 return EINVAL; 186 } 187 188 *rcm = cm; 189 190 return 0; 191 } 192 193 static void 194 close_dsts(struct _citrus_iconv_std_dst_list *dl) 195 { 196 struct _citrus_iconv_std_dst *sd; 197 198 while ((sd=TAILQ_FIRST(dl)) != NULL) { 199 TAILQ_REMOVE(dl, sd, sd_entry); 200 _csmapper_close(sd->sd_mapper); 201 free(sd); 202 } 203 } 204 205 static int 206 open_dsts(struct _citrus_iconv_std_dst_list *dl, 207 const struct _esdb_charset *ec, const struct _esdb *dbdst) 208 { 209 int i, ret; 210 struct _citrus_iconv_std_dst *sd, *sdtmp; 211 unsigned long norm; 212 213 sd = malloc(sizeof(*sd)); 214 if (sd == NULL) 215 return errno; 216 217 for (i=0; i<dbdst->db_num_charsets; i++) { 218 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, 219 dbdst->db_charsets[i].ec_csname, &norm); 220 if (ret == 0) { 221 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 222 sd->sd_norm = norm; 223 /* insert this mapper by sorted order. */ 224 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 225 if (sdtmp->sd_norm > norm) { 226 TAILQ_INSERT_BEFORE(sdtmp, sd, 227 sd_entry); 228 sd = NULL; 229 break; 230 } 231 } 232 if (sd) 233 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 234 sd = malloc(sizeof(*sd)); 235 if (sd == NULL) { 236 ret = errno; 237 close_dsts(dl); 238 return ret; 239 } 240 } else if (ret != ENOENT) { 241 close_dsts(dl); 242 free(sd); 243 return ret; 244 } 245 } 246 free(sd); 247 return 0; 248 } 249 250 static void 251 close_srcs(struct _citrus_iconv_std_src_list *sl) 252 { 253 struct _citrus_iconv_std_src *ss; 254 255 while ((ss=TAILQ_FIRST(sl)) != NULL) { 256 TAILQ_REMOVE(sl, ss, ss_entry); 257 close_dsts(&ss->ss_dsts); 258 free(ss); 259 } 260 } 261 262 static int 263 open_srcs(struct _citrus_iconv_std_src_list *sl, 264 const struct _esdb *dbsrc, const struct _esdb *dbdst) 265 { 266 int i, ret, count = 0; 267 struct _citrus_iconv_std_src *ss; 268 269 ss = malloc(sizeof(*ss)); 270 if (ss == NULL) 271 return errno; 272 273 TAILQ_INIT(&ss->ss_dsts); 274 275 for (i=0; i<dbsrc->db_num_charsets; i++) { 276 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 277 if (ret) 278 goto err; 279 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 280 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 281 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 282 ss = malloc(sizeof(*ss)); 283 if (ss == NULL) { 284 ret = errno; 285 goto err; 286 } 287 count++; 288 TAILQ_INIT(&ss->ss_dsts); 289 } 290 } 291 free(ss); 292 293 return count ? 0 : ENOENT; 294 295 err: 296 free(ss); 297 close_srcs(sl); 298 return ret; 299 } 300 301 /* do convert a character */ 302 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 303 static int 304 /*ARGSUSED*/ 305 do_conv(const struct _citrus_iconv_std_shared *is, 306 struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx) 307 { 308 _index_t tmpidx; 309 int ret; 310 struct _citrus_iconv_std_src *ss; 311 struct _citrus_iconv_std_dst *sd; 312 313 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 314 if (ss->ss_csid == *csid) { 315 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 316 ret = _csmapper_convert(sd->sd_mapper, 317 &tmpidx, *idx, NULL); 318 switch (ret) { 319 case _MAPPER_CONVERT_SUCCESS: 320 *csid = sd->sd_csid; 321 *idx = tmpidx; 322 return 0; 323 case _MAPPER_CONVERT_NONIDENTICAL: 324 break; 325 case _MAPPER_CONVERT_SRC_MORE: 326 /*FALLTHROUGH*/ 327 case _MAPPER_CONVERT_DST_MORE: 328 /*FALLTHROUGH*/ 329 case _MAPPER_CONVERT_FATAL: 330 return EINVAL; 331 case _MAPPER_CONVERT_ILSEQ: 332 return EILSEQ; 333 } 334 } 335 break; 336 } 337 } 338 339 return E_NO_CORRESPONDING_CHAR; 340 } 341 /* ---------------------------------------------------------------------- */ 342 343 static int 344 /*ARGSUSED*/ 345 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, 346 const char * __restrict curdir, 347 const char * __restrict src, 348 const char * __restrict dst, 349 const void * __restrict var, size_t lenvar) 350 { 351 int ret; 352 struct _citrus_iconv_std_shared *is; 353 struct _citrus_esdb esdbsrc, esdbdst; 354 355 is = malloc(sizeof(*is)); 356 if (is==NULL) { 357 ret = errno; 358 goto err0; 359 } 360 ret = _citrus_esdb_open(&esdbsrc, src); 361 if (ret) 362 goto err1; 363 ret = _citrus_esdb_open(&esdbdst, dst); 364 if (ret) 365 goto err2; 366 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, 367 esdbsrc.db_variable, esdbsrc.db_len_variable); 368 if (ret) 369 goto err3; 370 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, 371 esdbdst.db_variable, esdbdst.db_len_variable); 372 if (ret) 373 goto err4; 374 is->is_use_invalid = esdbdst.db_use_invalid; 375 is->is_invalid = esdbdst.db_invalid; 376 377 TAILQ_INIT(&is->is_srcs); 378 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 379 if (ret) 380 goto err5; 381 382 _esdb_close(&esdbsrc); 383 _esdb_close(&esdbdst); 384 ci->ci_closure = is; 385 386 return 0; 387 388 err5: 389 _stdenc_close(is->is_dst_encoding); 390 err4: 391 _stdenc_close(is->is_src_encoding); 392 err3: 393 _esdb_close(&esdbdst); 394 err2: 395 _esdb_close(&esdbsrc); 396 err1: 397 free(is); 398 err0: 399 return ret; 400 } 401 402 static void 403 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) 404 { 405 struct _citrus_iconv_std_shared *is = ci->ci_closure; 406 407 if (is == NULL) 408 return; 409 410 _stdenc_close(is->is_src_encoding); 411 _stdenc_close(is->is_dst_encoding); 412 close_srcs(&is->is_srcs); 413 free(is); 414 } 415 416 static int 417 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) 418 { 419 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 420 struct _citrus_iconv_std_context *sc; 421 size_t szpssrc, szpsdst, sz; 422 char *ptr; 423 424 szpssrc = _stdenc_get_state_size(is->is_src_encoding); 425 szpsdst = _stdenc_get_state_size(is->is_dst_encoding); 426 427 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); 428 sc = malloc(sz); 429 if (sc == NULL) 430 return errno; 431 432 ptr = (char *)&sc[1]; 433 if (szpssrc) 434 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 435 ptr, ptr+szpssrc); 436 else 437 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 438 NULL, NULL); 439 ptr += szpssrc*2; 440 if (szpsdst) 441 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 442 ptr, ptr+szpsdst); 443 else 444 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 445 NULL, NULL); 446 447 cv->cv_closure = (void *)sc; 448 449 return 0; 450 } 451 452 static void 453 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) 454 { 455 free(cv->cv_closure); 456 } 457 458 static int 459 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, 460 const char * __restrict * __restrict in, 461 size_t * __restrict inbytes, 462 char * __restrict * __restrict out, 463 size_t * __restrict outbytes, u_int32_t flags, 464 size_t * __restrict invalids) 465 { 466 const struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 467 struct _citrus_iconv_std_context *sc = cv->cv_closure; 468 _index_t idx; 469 _csid_t csid; 470 int ret, state; 471 size_t szrin, szrout; 472 size_t inval; 473 const char *tmpin; 474 475 inval = 0; 476 if (in==NULL || *in==NULL) { 477 /* special cases */ 478 if (out!=NULL && *out!=NULL) { 479 /* init output state and store the shift sequence */ 480 save_encoding_state(&sc->sc_src_encoding); 481 save_encoding_state(&sc->sc_dst_encoding); 482 szrout = 0; 483 484 ret = put_state_resetx(&sc->sc_dst_encoding, 485 *out, *outbytes, 486 &szrout); 487 if (ret) 488 goto err; 489 490 if (szrout == (size_t)-2) { 491 /* too small to store the character */ 492 ret = EINVAL; 493 goto err; 494 } 495 *out += szrout; 496 *outbytes -= szrout; 497 } else 498 /* otherwise, discard the shift sequence */ 499 init_encoding_state(&sc->sc_dst_encoding); 500 init_encoding_state(&sc->sc_src_encoding); 501 *invalids = 0; 502 return 0; 503 } 504 505 /* normal case */ 506 for (;;) { 507 if (*inbytes==0) { 508 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 509 if (state == _STDENC_SDGEN_INITIAL || 510 state == _STDENC_SDGEN_STABLE) 511 break; 512 } 513 514 /* save the encoding states for the error recovery */ 515 save_encoding_state(&sc->sc_src_encoding); 516 save_encoding_state(&sc->sc_dst_encoding); 517 518 /* mb -> csid/index */ 519 tmpin = *in; 520 szrin = szrout = 0; 521 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, 522 &tmpin, *inbytes, &szrin); 523 if (ret) 524 goto err; 525 526 if (szrin == (size_t)-2) { 527 /* incompleted character */ 528 ret = get_state_desc_gen(&sc->sc_src_encoding, &state); 529 if (ret) { 530 ret = EINVAL; 531 goto err; 532 } 533 switch (state) { 534 case _STDENC_SDGEN_INITIAL: 535 case _STDENC_SDGEN_STABLE: 536 /* fetch shift sequences only. */ 537 goto next; 538 } 539 ret = EINVAL; 540 goto err; 541 } 542 /* convert the character */ 543 ret = do_conv(is, sc, &csid, &idx); 544 if (ret) { 545 if (ret == E_NO_CORRESPONDING_CHAR) { 546 inval++; 547 szrout = 0; 548 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 && 549 is->is_use_invalid) { 550 ret = wctombx(&sc->sc_dst_encoding, 551 *out, *outbytes, 552 is->is_invalid, 553 &szrout); 554 if (ret) 555 goto err; 556 } 557 goto next; 558 } else { 559 goto err; 560 } 561 } 562 /* csid/index -> mb */ 563 ret = cstombx(&sc->sc_dst_encoding, 564 *out, *outbytes, csid, idx, &szrout); 565 if (ret) 566 goto err; 567 next: 568 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout); 569 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 570 *in = tmpin; 571 *outbytes -= szrout; 572 *out += szrout; 573 } 574 *invalids = inval; 575 576 return 0; 577 578 err: 579 restore_encoding_state(&sc->sc_src_encoding); 580 restore_encoding_state(&sc->sc_dst_encoding); 581 err_norestore: 582 *invalids = inval; 583 584 return ret; 585 } 586