1 /* $NetBSD: citrus_iconv_std.c,v 1.5 2003/07/12 15:39:20 tshiozak Exp $ */ 2 3 /*- 4 * Copyright (c)2003 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.5 2003/07/12 15:39:20 tshiozak Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <sys/endian.h> 41 #include <sys/queue.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_module.h" 46 #include "citrus_region.h" 47 #include "citrus_mmap.h" 48 #include "citrus_hash.h" 49 #include "citrus_iconv.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_mapper.h" 52 #include "citrus_csmapper.h" 53 #include "citrus_memstream.h" 54 #include "citrus_iconv_std.h" 55 #include "citrus_esdb.h" 56 57 /* ---------------------------------------------------------------------- */ 58 59 _CITRUS_ICONV_DECLS(iconv_std); 60 _CITRUS_ICONV_DEF_OPS(iconv_std); 61 62 63 /* ---------------------------------------------------------------------- */ 64 65 int 66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops, 67 u_int32_t expected_version) 68 { 69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops)) 70 return (EINVAL); 71 72 memcpy(ops, &_citrus_iconv_std_iconv_ops, 73 sizeof(_citrus_iconv_std_iconv_ops)); 74 75 return (0); 76 } 77 78 /* ---------------------------------------------------------------------- */ 79 80 /* 81 * convenience routines for stdenc. 82 */ 83 static __inline void 84 save_encoding_state(struct _citrus_iconv_std_encoding *se) 85 { 86 if (se->se_ps) 87 memcpy(se->se_pssaved, se->se_ps, 88 _stdenc_get_state_size(se->se_handle)); 89 } 90 91 static __inline void 92 restore_encoding_state(struct _citrus_iconv_std_encoding *se) 93 { 94 if (se->se_ps) 95 memcpy(se->se_ps, se->se_pssaved, 96 _stdenc_get_state_size(se->se_handle)); 97 } 98 99 static __inline void 100 init_encoding_state(struct _citrus_iconv_std_encoding *se) 101 { 102 if (se->se_ps) 103 _stdenc_init_state(se->se_handle, se->se_ps); 104 } 105 106 static __inline int 107 mbtocsx(struct _citrus_iconv_std_encoding *se, 108 _csid_t *csid, _index_t *idx, const char **s, size_t n, 109 size_t *nresult) 110 { 111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 112 nresult); 113 } 114 115 static __inline int 116 cstombx(struct _citrus_iconv_std_encoding *se, 117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult) 118 { 119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 120 nresult); 121 } 122 123 static __inline int 124 wctombx(struct _citrus_iconv_std_encoding *se, 125 char *s, size_t n, _wc_t wc, size_t *nresult) 126 { 127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult); 128 } 129 130 static __inline int 131 put_state_resetx(struct _citrus_iconv_std_encoding *se, 132 char *s, size_t n, size_t *nresult) 133 { 134 return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult); 135 } 136 137 /* 138 * init encoding context 139 */ 140 static int 141 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs, 142 void *ps1, void *ps2) 143 { 144 int ret; 145 146 se->se_handle = cs; 147 se->se_ps = ps1; 148 se->se_pssaved = ps2; 149 150 if (se->se_ps) 151 ret = _stdenc_init_state(cs, se->se_ps); 152 if (!ret && se->se_pssaved) 153 ret = _stdenc_init_state(cs, se->se_pssaved); 154 155 return ret; 156 } 157 158 static int 159 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 160 unsigned long *rnorm) 161 { 162 int ret; 163 struct _csmapper *cm; 164 165 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 166 if (ret) 167 return ret; 168 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 169 _csmapper_get_state_size(cm) != 0) { 170 _csmapper_close(cm); 171 return EINVAL; 172 } 173 174 *rcm = cm; 175 176 return 0; 177 } 178 179 static void 180 close_dsts(struct _citrus_iconv_std_dst_list *dl) 181 { 182 struct _citrus_iconv_std_dst *sd; 183 184 while ((sd=TAILQ_FIRST(dl)) != NULL) { 185 TAILQ_REMOVE(dl, sd, sd_entry); 186 _csmapper_close(sd->sd_mapper); 187 free(sd); 188 } 189 } 190 191 static int 192 open_dsts(struct _citrus_iconv_std_dst_list *dl, 193 struct _esdb_charset *ec, struct _esdb *dbdst) 194 { 195 int i, ret; 196 struct _citrus_iconv_std_dst *sd, *sdtmp; 197 unsigned long norm; 198 199 sd = malloc(sizeof(*sd)); 200 if (sd == NULL) 201 return errno; 202 203 for (i=0; i<dbdst->db_num_charsets; i++) { 204 ret = open_csmapper(&sd->sd_mapper, ec->ec_csname, 205 dbdst->db_charsets[i].ec_csname, &norm); 206 if (ret == 0) { 207 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 208 sd->sd_norm = norm; 209 /* insert this mapper by sorted order. */ 210 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 211 if (sdtmp->sd_norm > norm) { 212 TAILQ_INSERT_BEFORE(sdtmp, sd, 213 sd_entry); 214 sd = NULL; 215 break; 216 } 217 } 218 if (sd) 219 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 220 sd = malloc(sizeof(*sd)); 221 if (sd == NULL) { 222 ret = errno; 223 close_dsts(dl); 224 return ret; 225 } 226 } else if (ret != ENOENT) { 227 close_dsts(dl); 228 free(sd); 229 return ret; 230 } 231 } 232 free(sd); 233 return 0; 234 } 235 236 static void 237 close_srcs(struct _citrus_iconv_std_src_list *sl) 238 { 239 struct _citrus_iconv_std_src *ss; 240 241 while ((ss=TAILQ_FIRST(sl)) != NULL) { 242 TAILQ_REMOVE(sl, ss, ss_entry); 243 close_dsts(&ss->ss_dsts); 244 free(ss); 245 } 246 } 247 248 static int 249 open_srcs(struct _citrus_iconv_std_src_list *sl, 250 struct _esdb *dbsrc, struct _esdb *dbdst) 251 { 252 int i, ret, count = 0; 253 struct _citrus_iconv_std_src *ss; 254 255 ss = malloc(sizeof(*ss)); 256 if (ss == NULL) 257 return errno; 258 259 TAILQ_INIT(&ss->ss_dsts); 260 261 for (i=0; i<dbsrc->db_num_charsets; i++) { 262 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 263 if (ret) 264 goto err; 265 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 266 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 267 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 268 ss = malloc(sizeof(*ss)); 269 if (ss == NULL) { 270 ret = errno; 271 goto err; 272 } 273 count++; 274 TAILQ_INIT(&ss->ss_dsts); 275 } 276 } 277 free(ss); 278 279 return count ? 0 : ENOENT; 280 281 err: 282 free(ss); 283 close_srcs(sl); 284 return ret; 285 } 286 287 /* do convert a character */ 288 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 289 static int 290 /*ARGSUSED*/ 291 do_conv(struct _citrus_iconv_std_shared *is, 292 struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx) 293 { 294 _index_t tmpidx; 295 int ret; 296 struct _citrus_iconv_std_src *ss; 297 struct _citrus_iconv_std_dst *sd; 298 299 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 300 if (ss->ss_csid == *csid) { 301 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 302 ret = _csmapper_convert(sd->sd_mapper, 303 &tmpidx, *idx, NULL); 304 switch (ret) { 305 case _MAPPER_CONVERT_SUCCESS: 306 *csid = sd->sd_csid; 307 *idx = tmpidx; 308 return 0; 309 case _MAPPER_CONVERT_NONIDENTICAL: 310 break; 311 case _MAPPER_CONVERT_SRC_MORE: 312 /*FALLTHROUGH*/ 313 case _MAPPER_CONVERT_DST_MORE: 314 /*FALLTHROUGH*/ 315 case _MAPPER_CONVERT_FATAL: 316 return EINVAL; 317 case _MAPPER_CONVERT_ILSEQ: 318 return EILSEQ; 319 } 320 } 321 break; 322 } 323 } 324 325 return E_NO_CORRESPONDING_CHAR; 326 } 327 /* ---------------------------------------------------------------------- */ 328 329 static int 330 /*ARGSUSED*/ 331 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci, 332 const char * __restrict curdir, 333 const char * __restrict src, 334 const char * __restrict dst, 335 const void * __restrict var, size_t lenvar) 336 { 337 int ret; 338 struct _citrus_iconv_std_shared *is; 339 struct _citrus_esdb esdbsrc, esdbdst; 340 341 is = malloc(sizeof(*is)); 342 if (is==NULL) { 343 ret = errno; 344 goto err0; 345 } 346 ret = _citrus_esdb_open(&esdbsrc, src); 347 if (ret) 348 goto err1; 349 ret = _citrus_esdb_open(&esdbdst, dst); 350 if (ret) 351 goto err2; 352 ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname, 353 esdbsrc.db_variable, esdbsrc.db_len_variable); 354 if (ret) 355 goto err3; 356 ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname, 357 esdbdst.db_variable, esdbdst.db_len_variable); 358 if (ret) 359 goto err4; 360 is->is_use_invalid = esdbdst.db_use_invalid; 361 is->is_invalid = esdbdst.db_invalid; 362 363 TAILQ_INIT(&is->is_srcs); 364 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 365 if (ret) 366 goto err5; 367 368 _esdb_close(&esdbsrc); 369 _esdb_close(&esdbdst); 370 ci->ci_closure = is; 371 372 return 0; 373 374 err5: 375 _stdenc_close(is->is_dst_encoding); 376 err4: 377 _stdenc_close(is->is_src_encoding); 378 err3: 379 _esdb_close(&esdbdst); 380 err2: 381 _esdb_close(&esdbsrc); 382 err1: 383 free(is); 384 err0: 385 return ret; 386 } 387 388 static void 389 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci) 390 { 391 struct _citrus_iconv_std_shared *is = ci->ci_closure; 392 393 if (is == NULL) 394 return; 395 396 _stdenc_close(is->is_src_encoding); 397 _stdenc_close(is->is_dst_encoding); 398 close_srcs(&is->is_srcs); 399 free(is); 400 } 401 402 static int 403 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv) 404 { 405 struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 406 struct _citrus_iconv_std_context *sc; 407 int ret; 408 size_t szpssrc, szpsdst, sz; 409 char *ptr; 410 411 szpssrc = _stdenc_get_state_size(is->is_src_encoding); 412 szpsdst = _stdenc_get_state_size(is->is_dst_encoding); 413 414 sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context); 415 sc = malloc(sz); 416 if (sc == NULL) 417 return errno; 418 419 ptr = (char *)&sc[1]; 420 if (szpssrc) 421 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 422 ptr, ptr+szpssrc); 423 else 424 init_encoding(&sc->sc_src_encoding, is->is_src_encoding, 425 NULL, NULL); 426 ptr += szpssrc*2; 427 if (szpsdst) 428 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 429 ptr, ptr+szpsdst); 430 else 431 init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding, 432 NULL, NULL); 433 434 cv->cv_closure = (void *)sc; 435 436 return 0; 437 } 438 439 static void 440 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv) 441 { 442 free(cv->cv_closure); 443 } 444 445 static int 446 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv, 447 const char * __restrict * __restrict in, 448 size_t * __restrict inbytes, 449 char * __restrict * __restrict out, 450 size_t * __restrict outbytes, u_int32_t flags, 451 size_t * __restrict invalids) 452 { 453 struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure; 454 struct _citrus_iconv_std_context *sc = cv->cv_closure; 455 _index_t idx; 456 _csid_t csid; 457 int ret; 458 size_t szrin, szrout; 459 size_t inval; 460 const char *tmpin; 461 462 inval = 0; 463 if (in==NULL || *in==NULL) { 464 /* special cases */ 465 if (out!=NULL && *out!=NULL) { 466 /* init output state */ 467 save_encoding_state(&sc->sc_src_encoding); 468 save_encoding_state(&sc->sc_dst_encoding); 469 szrout = 0; 470 471 ret = put_state_resetx(&sc->sc_dst_encoding, 472 *out, *outbytes, 473 &szrout); 474 if (ret) 475 goto err; 476 477 if (szrout == (size_t)-2) { 478 /* too small to store the character */ 479 ret = EINVAL; 480 goto err; 481 } 482 *out += szrout; 483 *outbytes -= szrout; 484 } 485 *invalids = 0; 486 init_encoding_state(&sc->sc_src_encoding); 487 return 0; 488 } 489 490 /* normal case */ 491 for (;;) { 492 /* save the encoding states for the error recovery */ 493 save_encoding_state(&sc->sc_src_encoding); 494 save_encoding_state(&sc->sc_dst_encoding); 495 496 /* mb -> csid/index */ 497 tmpin = *in; 498 szrin = szrout = 0; 499 ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx, 500 &tmpin, *inbytes, &szrin); 501 if (ret) 502 goto err; 503 504 if (szrin == (size_t)-2) { 505 /* incompleted character */ 506 ret = EINVAL; 507 goto err; 508 } 509 /* convert the character */ 510 ret = do_conv(is, sc, &csid, &idx); 511 if (ret) { 512 if (ret == E_NO_CORRESPONDING_CHAR) { 513 inval ++; 514 szrout = 0; 515 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 && 516 is->is_use_invalid) { 517 ret = wctombx(&sc->sc_dst_encoding, 518 *out, *outbytes, 519 is->is_invalid, 520 &szrout); 521 if (ret) 522 goto err; 523 } 524 goto next; 525 } else { 526 goto err; 527 } 528 } 529 /* csid/index -> mb */ 530 ret = cstombx(&sc->sc_dst_encoding, 531 *out, *outbytes, csid, idx, &szrout); 532 if (ret) 533 goto err; 534 next: 535 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout); 536 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 537 *in = tmpin; 538 *outbytes -= szrout; 539 *out += szrout; 540 if (*inbytes==0) 541 break; 542 if (*outbytes == 0) { 543 ret = E2BIG; 544 goto err_norestore; 545 } 546 } 547 *invalids = inval; 548 549 return 0; 550 551 err: 552 restore_encoding_state(&sc->sc_src_encoding); 553 restore_encoding_state(&sc->sc_dst_encoding); 554 err_norestore: 555 *invalids = inval; 556 557 return ret; 558 } 559