xref: /netbsd-src/lib/libc/citrus/modules/citrus_iconv_std.c (revision da5f4674a3fc214be3572d358b66af40ab9401e7)
1 /*	$NetBSD: citrus_iconv_std.c,v 1.5 2003/07/12 15:39:20 tshiozak Exp $	*/
2 
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.5 2003/07/12 15:39:20 tshiozak Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/endian.h>
41 #include <sys/queue.h>
42 
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_hash.h"
49 #include "citrus_iconv.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56 
57 /* ---------------------------------------------------------------------- */
58 
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61 
62 
63 /* ---------------------------------------------------------------------- */
64 
65 int
66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 			       u_int32_t expected_version)
68 {
69 	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 		return (EINVAL);
71 
72 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 	       sizeof(_citrus_iconv_std_iconv_ops));
74 
75 	return (0);
76 }
77 
78 /* ---------------------------------------------------------------------- */
79 
80 /*
81  * convenience routines for stdenc.
82  */
83 static __inline void
84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 	if (se->se_ps)
87 		memcpy(se->se_pssaved, se->se_ps,
88 		       _stdenc_get_state_size(se->se_handle));
89 }
90 
91 static __inline void
92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 	if (se->se_ps)
95 		memcpy(se->se_ps, se->se_pssaved,
96 		       _stdenc_get_state_size(se->se_handle));
97 }
98 
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 	if (se->se_ps)
103 		_stdenc_init_state(se->se_handle, se->se_ps);
104 }
105 
106 static __inline int
107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 	_csid_t *csid, _index_t *idx, const char **s, size_t n,
109 	size_t *nresult)
110 {
111 	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 			      nresult);
113 }
114 
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117 	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 			      nresult);
121 }
122 
123 static __inline int
124 wctombx(struct _citrus_iconv_std_encoding *se,
125 	char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129 
130 static __inline int
131 put_state_resetx(struct _citrus_iconv_std_encoding *se,
132 		 char *s, size_t n, size_t *nresult)
133 {
134 	return _stdenc_put_state_reset(se->se_handle, s, n, se->se_ps, nresult);
135 }
136 
137 /*
138  * init encoding context
139  */
140 static int
141 init_encoding(struct _citrus_iconv_std_encoding *se, struct _stdenc *cs,
142 	      void *ps1, void *ps2)
143 {
144 	int ret;
145 
146 	se->se_handle = cs;
147 	se->se_ps = ps1;
148 	se->se_pssaved = ps2;
149 
150 	if (se->se_ps)
151 		ret = _stdenc_init_state(cs, se->se_ps);
152 	if (!ret && se->se_pssaved)
153 		ret = _stdenc_init_state(cs, se->se_pssaved);
154 
155 	return ret;
156 }
157 
158 static int
159 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
160 	      unsigned long *rnorm)
161 {
162 	int ret;
163 	struct _csmapper *cm;
164 
165 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
166 	if (ret)
167 		return ret;
168 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
169 	    _csmapper_get_state_size(cm) != 0) {
170 		_csmapper_close(cm);
171 		return EINVAL;
172 	}
173 
174 	*rcm = cm;
175 
176 	return 0;
177 }
178 
179 static void
180 close_dsts(struct _citrus_iconv_std_dst_list *dl)
181 {
182 	struct _citrus_iconv_std_dst *sd;
183 
184 	while ((sd=TAILQ_FIRST(dl)) != NULL) {
185 		TAILQ_REMOVE(dl, sd, sd_entry);
186 		_csmapper_close(sd->sd_mapper);
187 		free(sd);
188 	}
189 }
190 
191 static int
192 open_dsts(struct _citrus_iconv_std_dst_list *dl,
193 	  struct _esdb_charset *ec, struct _esdb *dbdst)
194 {
195 	int i, ret;
196 	struct _citrus_iconv_std_dst *sd, *sdtmp;
197 	unsigned long norm;
198 
199 	sd = malloc(sizeof(*sd));
200 	if (sd == NULL)
201 		return errno;
202 
203 	for (i=0; i<dbdst->db_num_charsets; i++) {
204 		ret = open_csmapper(&sd->sd_mapper, ec->ec_csname,
205 				    dbdst->db_charsets[i].ec_csname, &norm);
206 		if (ret == 0) {
207 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
208 			sd->sd_norm = norm;
209 			/* insert this mapper by sorted order. */
210 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
211 				if (sdtmp->sd_norm > norm) {
212 					TAILQ_INSERT_BEFORE(sdtmp, sd,
213 							    sd_entry);
214 					sd = NULL;
215 					break;
216 				}
217 			}
218 			if (sd)
219 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
220 			sd = malloc(sizeof(*sd));
221 			if (sd == NULL) {
222 				ret = errno;
223 				close_dsts(dl);
224 				return ret;
225 			}
226 		} else if (ret != ENOENT) {
227 			close_dsts(dl);
228 			free(sd);
229 			return ret;
230 		}
231 	}
232 	free(sd);
233 	return 0;
234 }
235 
236 static void
237 close_srcs(struct _citrus_iconv_std_src_list *sl)
238 {
239 	struct _citrus_iconv_std_src *ss;
240 
241 	while ((ss=TAILQ_FIRST(sl)) != NULL) {
242 		TAILQ_REMOVE(sl, ss, ss_entry);
243 		close_dsts(&ss->ss_dsts);
244 		free(ss);
245 	}
246 }
247 
248 static int
249 open_srcs(struct _citrus_iconv_std_src_list *sl,
250 	  struct _esdb *dbsrc, struct _esdb *dbdst)
251 {
252 	int i, ret, count = 0;
253 	struct _citrus_iconv_std_src *ss;
254 
255 	ss = malloc(sizeof(*ss));
256 	if (ss == NULL)
257 		return errno;
258 
259 	TAILQ_INIT(&ss->ss_dsts);
260 
261 	for (i=0; i<dbsrc->db_num_charsets; i++) {
262 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
263 		if (ret)
264 			goto err;
265 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
266 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
267 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
268 			ss = malloc(sizeof(*ss));
269 			if (ss == NULL) {
270 				ret = errno;
271 				goto err;
272 			}
273 			count++;
274 			TAILQ_INIT(&ss->ss_dsts);
275 		}
276 	}
277 	free(ss);
278 
279 	return count ? 0 : ENOENT;
280 
281 err:
282 	free(ss);
283 	close_srcs(sl);
284 	return ret;
285 }
286 
287 /* do convert a character */
288 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
289 static int
290 /*ARGSUSED*/
291 do_conv(struct _citrus_iconv_std_shared *is,
292 	struct _citrus_iconv_std_context *sc, _csid_t *csid, _index_t *idx)
293 {
294 	_index_t tmpidx;
295 	int ret;
296 	struct _citrus_iconv_std_src *ss;
297 	struct _citrus_iconv_std_dst *sd;
298 
299 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
300 		if (ss->ss_csid == *csid) {
301 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
302 				ret = _csmapper_convert(sd->sd_mapper,
303 							&tmpidx, *idx, NULL);
304 				switch (ret) {
305 				case _MAPPER_CONVERT_SUCCESS:
306 					*csid = sd->sd_csid;
307 					*idx = tmpidx;
308 					return 0;
309 				case _MAPPER_CONVERT_NONIDENTICAL:
310 					break;
311 				case _MAPPER_CONVERT_SRC_MORE:
312 					/*FALLTHROUGH*/
313 				case _MAPPER_CONVERT_DST_MORE:
314 					/*FALLTHROUGH*/
315 				case _MAPPER_CONVERT_FATAL:
316 					return EINVAL;
317 				case _MAPPER_CONVERT_ILSEQ:
318 					return EILSEQ;
319 				}
320 			}
321 			break;
322 		}
323 	}
324 
325 	return E_NO_CORRESPONDING_CHAR;
326 }
327 /* ---------------------------------------------------------------------- */
328 
329 static int
330 /*ARGSUSED*/
331 _citrus_iconv_std_iconv_init_shared(struct _citrus_iconv_shared *ci,
332 				    const char * __restrict curdir,
333 				    const char * __restrict src,
334 				    const char * __restrict dst,
335 				    const void * __restrict var, size_t lenvar)
336 {
337 	int ret;
338 	struct _citrus_iconv_std_shared *is;
339 	struct _citrus_esdb esdbsrc, esdbdst;
340 
341 	is = malloc(sizeof(*is));
342 	if (is==NULL) {
343 		ret = errno;
344 		goto err0;
345 	}
346 	ret = _citrus_esdb_open(&esdbsrc, src);
347 	if (ret)
348 		goto err1;
349 	ret = _citrus_esdb_open(&esdbdst, dst);
350 	if (ret)
351 		goto err2;
352 	ret = _stdenc_open(&is->is_src_encoding, esdbsrc.db_encname,
353 			   esdbsrc.db_variable, esdbsrc.db_len_variable);
354 	if (ret)
355 		goto err3;
356 	ret = _stdenc_open(&is->is_dst_encoding, esdbdst.db_encname,
357 			   esdbdst.db_variable, esdbdst.db_len_variable);
358 	if (ret)
359 		goto err4;
360 	is->is_use_invalid = esdbdst.db_use_invalid;
361 	is->is_invalid = esdbdst.db_invalid;
362 
363 	TAILQ_INIT(&is->is_srcs);
364 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
365 	if (ret)
366 		goto err5;
367 
368 	_esdb_close(&esdbsrc);
369 	_esdb_close(&esdbdst);
370 	ci->ci_closure = is;
371 
372 	return 0;
373 
374 err5:
375 	_stdenc_close(is->is_dst_encoding);
376 err4:
377 	_stdenc_close(is->is_src_encoding);
378 err3:
379 	_esdb_close(&esdbdst);
380 err2:
381 	_esdb_close(&esdbsrc);
382 err1:
383 	free(is);
384 err0:
385 	return ret;
386 }
387 
388 static void
389 _citrus_iconv_std_iconv_uninit_shared(struct _citrus_iconv_shared *ci)
390 {
391 	struct _citrus_iconv_std_shared *is = ci->ci_closure;
392 
393 	if (is == NULL)
394 		return;
395 
396 	_stdenc_close(is->is_src_encoding);
397 	_stdenc_close(is->is_dst_encoding);
398 	close_srcs(&is->is_srcs);
399 	free(is);
400 }
401 
402 static int
403 _citrus_iconv_std_iconv_init_context(struct _citrus_iconv *cv)
404 {
405 	struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
406 	struct _citrus_iconv_std_context *sc;
407 	int ret;
408 	size_t szpssrc, szpsdst, sz;
409 	char *ptr;
410 
411 	szpssrc = _stdenc_get_state_size(is->is_src_encoding);
412 	szpsdst = _stdenc_get_state_size(is->is_dst_encoding);
413 
414 	sz = (szpssrc + szpsdst)*2 + sizeof(struct _citrus_iconv_std_context);
415 	sc = malloc(sz);
416 	if (sc == NULL)
417 		return errno;
418 
419 	ptr = (char *)&sc[1];
420 	if (szpssrc)
421 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
422 			      ptr, ptr+szpssrc);
423 	else
424 		init_encoding(&sc->sc_src_encoding, is->is_src_encoding,
425 			      NULL, NULL);
426 	ptr += szpssrc*2;
427 	if (szpsdst)
428 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
429 			      ptr, ptr+szpsdst);
430 	else
431 		init_encoding(&sc->sc_dst_encoding, is->is_dst_encoding,
432 			      NULL, NULL);
433 
434 	cv->cv_closure = (void *)sc;
435 
436 	return 0;
437 }
438 
439 static void
440 _citrus_iconv_std_iconv_uninit_context(struct _citrus_iconv *cv)
441 {
442 	free(cv->cv_closure);
443 }
444 
445 static int
446 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
447 				const char * __restrict * __restrict in,
448 				size_t * __restrict inbytes,
449 				char * __restrict * __restrict out,
450 				size_t * __restrict outbytes, u_int32_t flags,
451 				size_t * __restrict invalids)
452 {
453 	struct _citrus_iconv_std_shared *is = cv->cv_shared->ci_closure;
454 	struct _citrus_iconv_std_context *sc = cv->cv_closure;
455 	_index_t idx;
456 	_csid_t csid;
457 	int ret;
458 	size_t szrin, szrout;
459 	size_t inval;
460 	const char *tmpin;
461 
462 	inval = 0;
463 	if (in==NULL || *in==NULL) {
464 		/* special cases */
465 		if (out!=NULL && *out!=NULL) {
466 			/* init output state */
467 			save_encoding_state(&sc->sc_src_encoding);
468 			save_encoding_state(&sc->sc_dst_encoding);
469 			szrout = 0;
470 
471 			ret = put_state_resetx(&sc->sc_dst_encoding,
472 					       *out, *outbytes,
473 					       &szrout);
474 			if (ret)
475 				goto err;
476 
477 			if (szrout == (size_t)-2) {
478 				/* too small to store the character */
479 				ret = EINVAL;
480 				goto err;
481 			}
482 			*out += szrout;
483 			*outbytes -= szrout;
484 		}
485 		*invalids = 0;
486 		init_encoding_state(&sc->sc_src_encoding);
487 		return 0;
488 	}
489 
490 	/* normal case */
491 	for (;;) {
492 		/* save the encoding states for the error recovery */
493 		save_encoding_state(&sc->sc_src_encoding);
494 		save_encoding_state(&sc->sc_dst_encoding);
495 
496 		/* mb -> csid/index */
497 		tmpin = *in;
498 		szrin = szrout = 0;
499 		ret = mbtocsx(&sc->sc_src_encoding, &csid, &idx,
500 			      &tmpin, *inbytes, &szrin);
501 		if (ret)
502 			goto err;
503 
504 		if (szrin == (size_t)-2) {
505 			/* incompleted character */
506 			ret = EINVAL;
507 			goto err;
508 		}
509 		/* convert the character */
510 		ret = do_conv(is, sc, &csid, &idx);
511 		if (ret) {
512 			if (ret == E_NO_CORRESPONDING_CHAR) {
513 				inval ++;
514 				szrout = 0;
515 				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
516 				    is->is_use_invalid) {
517 					ret = wctombx(&sc->sc_dst_encoding,
518 						      *out, *outbytes,
519 						      is->is_invalid,
520 						      &szrout);
521 					if (ret)
522 						goto err;
523 				}
524 				goto next;
525 			} else {
526 				goto err;
527 			}
528 		}
529 		/* csid/index -> mb */
530 		ret = cstombx(&sc->sc_dst_encoding,
531 			      *out, *outbytes, csid, idx, &szrout);
532 		if (ret)
533 			goto err;
534 next:
535 		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
536 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
537 		*in = tmpin;
538 		*outbytes -= szrout;
539 		*out += szrout;
540 		if (*inbytes==0)
541 			break;
542 		if (*outbytes == 0) {
543 			ret = E2BIG;
544 			goto err_norestore;
545 		}
546 	}
547 	*invalids = inval;
548 
549 	return 0;
550 
551 err:
552 	restore_encoding_state(&sc->sc_src_encoding);
553 	restore_encoding_state(&sc->sc_dst_encoding);
554 err_norestore:
555 	*invalids = inval;
556 
557 	return ret;
558 }
559