1 /* $NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $ */ 2 3 /*- 4 * Copyright (c)2003 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #if defined(LIBC_SCCS) && !defined(lint) 31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $"); 32 #endif /* LIBC_SCCS and not lint */ 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <sys/endian.h> 41 #include <sys/queue.h> 42 43 #include "citrus_namespace.h" 44 #include "citrus_types.h" 45 #include "citrus_module.h" 46 #include "citrus_region.h" 47 #include "citrus_mmap.h" 48 #include "citrus_iconv.h" 49 #include "citrus_stdenc.h" 50 #include "citrus_hash.h" 51 #include "citrus_mapper.h" 52 #include "citrus_csmapper.h" 53 #include "citrus_memstream.h" 54 #include "citrus_iconv_std.h" 55 #include "citrus_esdb.h" 56 57 /* ---------------------------------------------------------------------- */ 58 59 _CITRUS_ICONV_DECLS(iconv_std); 60 _CITRUS_ICONV_DEF_OPS(iconv_std); 61 62 63 /* ---------------------------------------------------------------------- */ 64 65 int 66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops, 67 u_int32_t expected_version) 68 { 69 if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops)) 70 return (EINVAL); 71 72 memcpy(ops, &_citrus_iconv_std_iconv_ops, 73 sizeof(_citrus_iconv_std_iconv_ops)); 74 75 return (0); 76 } 77 78 /* ---------------------------------------------------------------------- */ 79 80 /* 81 * convenience routines for stdenc. 82 */ 83 static __inline void 84 save_encoding_state(struct _citrus_iconv_std_encoding *se) 85 { 86 if (se->se_ps) 87 memcpy(se->se_pssaved, se->se_ps, 88 _stdenc_get_state_size(se->se_handle)); 89 } 90 91 static __inline void 92 restore_encoding_state(struct _citrus_iconv_std_encoding *se) 93 { 94 if (se->se_ps) 95 memcpy(se->se_ps, se->se_pssaved, 96 _stdenc_get_state_size(se->se_handle)); 97 } 98 99 static __inline void 100 init_encoding_state(struct _citrus_iconv_std_encoding *se) 101 { 102 if (se->se_ps) 103 _stdenc_init_state(se->se_handle, se->se_ps); 104 } 105 106 static __inline int 107 mbtocsx(struct _citrus_iconv_std_encoding *se, 108 _csid_t *csid, _index_t *idx, const char **s, size_t n, 109 size_t *nresult) 110 { 111 return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps, 112 nresult); 113 } 114 115 static __inline int 116 cstombx(struct _citrus_iconv_std_encoding *se, 117 char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult) 118 { 119 return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps, 120 nresult); 121 } 122 123 static __inline int 124 wctombx(struct _citrus_iconv_std_encoding *se, 125 char *s, size_t n, _wc_t wc, size_t *nresult) 126 { 127 return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult); 128 } 129 130 131 /* 132 * open/close an encoding. 133 */ 134 static __inline void 135 close_encoding(struct _citrus_iconv_std_encoding *se) 136 { 137 free(se->se_ps); se->se_ps = NULL; 138 free(se->se_pssaved); se->se_pssaved = NULL; 139 } 140 141 static __inline int 142 open_encoding(struct _citrus_iconv_std_encoding *se, struct _esdb *db) 143 { 144 int ret; 145 146 se->se_ps = se->se_pssaved = NULL; 147 ret = _stdenc_open(&se->se_handle, db->db_encname, 148 db->db_variable, db->db_len_variable); 149 if (ret) 150 return ret; 151 152 if (_stdenc_get_state_size(se->se_handle) == 0) 153 return 0; 154 155 se->se_ps = malloc(_stdenc_get_state_size(se->se_handle)); 156 if (se->se_ps == NULL) { 157 ret = errno; 158 goto err; 159 } 160 ret = _stdenc_init_state(se->se_handle, se->se_ps); 161 if (ret) 162 goto err; 163 se->se_pssaved = malloc(_stdenc_get_state_size(se->se_handle)); 164 if (se->se_pssaved == NULL) { 165 ret = errno; 166 goto err; 167 } 168 ret = _stdenc_init_state(se->se_handle, se->se_pssaved); 169 if (ret) 170 goto err; 171 return 0; 172 173 err: 174 close_encoding(se); 175 return ret; 176 } 177 178 static int 179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst, 180 unsigned long *rnorm) 181 { 182 int ret; 183 struct _csmapper *cm; 184 185 ret = _csmapper_open(&cm, src, dst, 0, rnorm); 186 if (ret) 187 return ret; 188 if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 || 189 _csmapper_get_state_size(cm) != 0) { 190 _csmapper_close(cm); 191 return EINVAL; 192 } 193 194 *rcm = cm; 195 196 return 0; 197 } 198 199 static void 200 close_dsts(struct _citrus_iconv_std_dst_list *dl) 201 { 202 struct _citrus_iconv_std_dst *sd; 203 204 while ((sd=TAILQ_FIRST(dl)) != NULL) { 205 TAILQ_REMOVE(dl, sd, sd_entry); 206 _csmapper_close(sd->sd_mapper); 207 free(sd); 208 } 209 } 210 211 static int 212 open_dsts(struct _citrus_iconv_std_dst_list *dl, 213 struct _esdb_charset *ec, struct _esdb *dbdst) 214 { 215 int i, ret; 216 struct _citrus_iconv_std_dst *sd, *sdtmp; 217 unsigned long norm; 218 219 sd = malloc(sizeof(*sd)); 220 if (sd == NULL) 221 return errno; 222 223 for (i=0; i<dbdst->db_num_charsets; i++) { 224 ret = open_csmapper(&sd->sd_mapper,ec->ec_csname, 225 dbdst->db_charsets[i].ec_csname, &norm); 226 if (ret == 0) { 227 sd->sd_csid = dbdst->db_charsets[i].ec_csid; 228 sd->sd_norm = norm; 229 /* insert this mapper by sorted order. */ 230 TAILQ_FOREACH(sdtmp, dl, sd_entry) { 231 if (sdtmp->sd_norm > norm) { 232 TAILQ_INSERT_BEFORE(sdtmp, sd, 233 sd_entry); 234 sd = NULL; 235 break; 236 } 237 } 238 if (sd) 239 TAILQ_INSERT_TAIL(dl, sd, sd_entry); 240 sd = malloc(sizeof(*sd)); 241 if (sd == NULL) { 242 ret = errno; 243 close_dsts(dl); 244 return ret; 245 } 246 } else if (ret != ENOENT) { 247 close_dsts(dl); 248 free(sd); 249 return ret; 250 } 251 } 252 free(sd); 253 return 0; 254 } 255 256 static void 257 close_srcs(struct _citrus_iconv_std_src_list *sl) 258 { 259 struct _citrus_iconv_std_src *ss; 260 261 while ((ss=TAILQ_FIRST(sl)) != NULL) { 262 TAILQ_REMOVE(sl, ss, ss_entry); 263 close_dsts(&ss->ss_dsts); 264 free(ss); 265 } 266 } 267 268 static int 269 open_srcs(struct _citrus_iconv_std_src_list *sl, 270 struct _esdb *dbsrc, struct _esdb *dbdst) 271 { 272 int i, ret, count = 0; 273 struct _citrus_iconv_std_src *ss; 274 275 ss = malloc(sizeof(*ss)); 276 if (ss == NULL) 277 return errno; 278 279 TAILQ_INIT(&ss->ss_dsts); 280 281 for (i=0; i<dbsrc->db_num_charsets; i++) { 282 ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst); 283 if (ret) 284 goto err; 285 if (!TAILQ_EMPTY(&ss->ss_dsts)) { 286 ss->ss_csid = dbsrc->db_charsets[i].ec_csid; 287 TAILQ_INSERT_TAIL(sl, ss, ss_entry); 288 ss = malloc(sizeof(*ss)); 289 if (ss == NULL) { 290 ret = errno; 291 goto err; 292 } 293 count++; 294 TAILQ_INIT(&ss->ss_dsts); 295 } 296 } 297 free(ss); 298 299 return count ? 0 : ENOENT; 300 301 err: 302 free(ss); 303 close_srcs(sl); 304 return ret; 305 } 306 307 /* do convert a character */ 308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */ 309 static int 310 do_conv(struct _citrus_iconv_std *is, _csid_t *csid, _index_t *idx) 311 { 312 _index_t tmpidx; 313 int ret; 314 struct _citrus_iconv_std_src *ss; 315 struct _citrus_iconv_std_dst *sd; 316 317 TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) { 318 if (ss->ss_csid == *csid) { 319 TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) { 320 ret = _csmapper_convert(sd->sd_mapper, 321 &tmpidx, *idx, NULL); 322 switch (ret) { 323 case _CITRUS_MAPPER_CONVERT_SUCCESS: 324 *csid = sd->sd_csid; 325 *idx = tmpidx; 326 return 0; 327 case _CITRUS_MAPPER_CONVERT_INVAL: 328 break; 329 case _CITRUS_MAPPER_CONVERT_SRC_MORE: 330 /*FALLTHROUGH*/ 331 case _CITRUS_MAPPER_CONVERT_DST_MORE: 332 /*FALLTHROUGH*/ 333 case _CITRUS_MAPPER_CONVERT_FATAL: 334 return EINVAL; 335 case _CITRUS_MAPPER_CONVERT_ILSEQ: 336 return EILSEQ; 337 } 338 } 339 break; 340 } 341 } 342 343 return E_NO_CORRESPONDING_CHAR; 344 } 345 /* ---------------------------------------------------------------------- */ 346 347 static int 348 /*ARGSUSED*/ 349 _citrus_iconv_std_iconv_init(struct _citrus_iconv *ci, 350 const char * __restrict curdir, 351 const char * __restrict src, 352 const char * __restrict dst, 353 const void * __restrict var, size_t lenvar) 354 { 355 int ret; 356 struct _citrus_iconv_std *is; 357 struct _citrus_esdb esdbsrc, esdbdst; 358 359 is = malloc(sizeof(*is)); 360 if (is==NULL) { 361 ret = errno; 362 goto err0; 363 } 364 ret = _citrus_esdb_open(&esdbsrc, src); 365 if (ret) 366 goto err1; 367 ret = _citrus_esdb_open(&esdbdst, dst); 368 if (ret) 369 goto err2; 370 ret = open_encoding(&is->is_src_encoding, &esdbsrc); 371 if (ret) 372 goto err3; 373 ret = open_encoding(&is->is_dst_encoding, &esdbdst); 374 if (ret) 375 goto err4; 376 is->is_use_invalid = esdbdst.db_use_invalid; 377 is->is_invalid = esdbdst.db_invalid; 378 379 TAILQ_INIT(&is->is_srcs); 380 ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst); 381 if (ret) 382 goto err5; 383 384 _esdb_close(&esdbsrc); 385 _esdb_close(&esdbdst); 386 ci->ci_closure = is; 387 388 return 0; 389 390 err5: 391 close_encoding(&is->is_dst_encoding); 392 err4: 393 close_encoding(&is->is_src_encoding); 394 err3: 395 _esdb_close(&esdbdst); 396 err2: 397 _esdb_close(&esdbsrc); 398 err1: 399 free(is); 400 err0: 401 return ret; 402 } 403 404 static void 405 /*ARGSUSED*/ 406 _citrus_iconv_std_iconv_uninit(struct _citrus_iconv *ci) 407 { 408 struct _citrus_iconv_std *is; 409 410 if (ci->ci_closure == NULL) 411 return; 412 413 is = ci->ci_closure; 414 close_encoding(&is->is_src_encoding); 415 close_encoding(&is->is_dst_encoding); 416 close_srcs(&is->is_srcs); 417 free(is); 418 } 419 420 static int 421 /*ARGSUSED*/ 422 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict ci, 423 const char * __restrict * __restrict in, 424 size_t * __restrict inbytes, 425 char * __restrict * __restrict out, 426 size_t * __restrict outbytes, u_int32_t flags, 427 size_t * __restrict invalids) 428 { 429 struct _citrus_iconv_std *is = ci->ci_closure; 430 _index_t idx; 431 _csid_t csid; 432 int ret; 433 size_t szrin, szrout; 434 size_t inval; 435 const char *tmpin; 436 437 inval = 0; 438 if (in==NULL || *in==NULL) { 439 /* special cases */ 440 if (out!=NULL && *out!=NULL) { 441 /* init output state */ 442 save_encoding_state(&is->is_src_encoding); 443 save_encoding_state(&is->is_dst_encoding); 444 szrout = 0; 445 446 ret = cstombx(&is->is_dst_encoding, 447 *out, *outbytes, 448 _CITRUS_CSID_INVALID, 449 0, &szrout); 450 if (ret) 451 goto err; 452 453 if (szrout == (size_t)-2) { 454 /* too small to store the character */ 455 ret = EINVAL; 456 goto err; 457 } 458 *out += szrout; 459 *outbytes -= szrout; 460 } 461 *invalids = 0; 462 init_encoding_state(&is->is_src_encoding); 463 return 0; 464 } 465 466 /* normal case */ 467 for (;;) { 468 /* save the encoding states for the error recovery */ 469 save_encoding_state(&is->is_src_encoding); 470 save_encoding_state(&is->is_dst_encoding); 471 472 /* mb -> csid/index */ 473 tmpin = *in; 474 szrin = szrout = 0; 475 ret = mbtocsx(&is->is_src_encoding, &csid, &idx, 476 &tmpin, *inbytes, &szrin); 477 if (ret) 478 goto err; 479 480 if (szrin == (size_t)-2) { 481 /* incompleted character */ 482 ret = EINVAL; 483 goto err; 484 } 485 /* convert the character */ 486 ret = do_conv(is, &csid, &idx); 487 if (ret) { 488 if (ret == E_NO_CORRESPONDING_CHAR) { 489 inval ++; 490 if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 && 491 is->is_use_invalid) { 492 ret = wctombx(&is->is_dst_encoding, 493 *out, *outbytes, 494 is->is_invalid, 495 &szrout); 496 if (ret) 497 goto err; 498 } 499 goto next; 500 } else { 501 goto err; 502 } 503 } 504 /* csid/index -> mb */ 505 ret = cstombx(&is->is_dst_encoding, 506 *out, *outbytes, csid, idx, &szrout); 507 if (ret) 508 goto err; 509 next: 510 _DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout); 511 *inbytes -= tmpin-*in; /* szrin is insufficient on \0. */ 512 *in = tmpin; 513 *outbytes -= szrout; 514 *out += szrout; 515 if (*inbytes==0) 516 break; 517 if (*outbytes == 0) { 518 ret = E2BIG; 519 goto err_norestore; 520 } 521 } 522 *invalids = inval; 523 524 return 0; 525 526 err: 527 restore_encoding_state(&is->is_src_encoding); 528 restore_encoding_state(&is->is_dst_encoding); 529 err_norestore: 530 *invalids = inval; 531 532 return ret; 533 } 534