xref: /netbsd-src/lib/libc/citrus/modules/citrus_iconv_std.c (revision d710132b4b8ce7f7cccaaf660cb16aa16b4077a0)
1 /*	$NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $	*/
2 
3 /*-
4  * Copyright (c)2003 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: citrus_iconv_std.c,v 1.1 2003/06/25 09:51:43 tshiozak Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/endian.h>
41 #include <sys/queue.h>
42 
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_module.h"
46 #include "citrus_region.h"
47 #include "citrus_mmap.h"
48 #include "citrus_iconv.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_hash.h"
51 #include "citrus_mapper.h"
52 #include "citrus_csmapper.h"
53 #include "citrus_memstream.h"
54 #include "citrus_iconv_std.h"
55 #include "citrus_esdb.h"
56 
57 /* ---------------------------------------------------------------------- */
58 
59 _CITRUS_ICONV_DECLS(iconv_std);
60 _CITRUS_ICONV_DEF_OPS(iconv_std);
61 
62 
63 /* ---------------------------------------------------------------------- */
64 
65 int
66 _citrus_iconv_std_iconv_getops(struct _citrus_iconv_ops *ops, size_t lenops,
67 			       u_int32_t expected_version)
68 {
69 	if (expected_version<_CITRUS_ICONV_ABI_VERSION || lenops<sizeof(*ops))
70 		return (EINVAL);
71 
72 	memcpy(ops, &_citrus_iconv_std_iconv_ops,
73 	       sizeof(_citrus_iconv_std_iconv_ops));
74 
75 	return (0);
76 }
77 
78 /* ---------------------------------------------------------------------- */
79 
80 /*
81  * convenience routines for stdenc.
82  */
83 static __inline void
84 save_encoding_state(struct _citrus_iconv_std_encoding *se)
85 {
86 	if (se->se_ps)
87 		memcpy(se->se_pssaved, se->se_ps,
88 		       _stdenc_get_state_size(se->se_handle));
89 }
90 
91 static __inline void
92 restore_encoding_state(struct _citrus_iconv_std_encoding *se)
93 {
94 	if (se->se_ps)
95 		memcpy(se->se_ps, se->se_pssaved,
96 		       _stdenc_get_state_size(se->se_handle));
97 }
98 
99 static __inline void
100 init_encoding_state(struct _citrus_iconv_std_encoding *se)
101 {
102 	if (se->se_ps)
103 		_stdenc_init_state(se->se_handle, se->se_ps);
104 }
105 
106 static __inline int
107 mbtocsx(struct _citrus_iconv_std_encoding *se,
108 	_csid_t *csid, _index_t *idx, const char **s, size_t n,
109 	size_t *nresult)
110 {
111 	return _stdenc_mbtocs(se->se_handle, csid, idx, s, n, se->se_ps,
112 			      nresult);
113 }
114 
115 static __inline int
116 cstombx(struct _citrus_iconv_std_encoding *se,
117 	char *s, size_t n, _csid_t csid, _index_t idx, size_t *nresult)
118 {
119 	return _stdenc_cstomb(se->se_handle, s, n, csid, idx, se->se_ps,
120 			      nresult);
121 }
122 
123 static __inline int
124 wctombx(struct _citrus_iconv_std_encoding *se,
125 	char *s, size_t n, _wc_t wc, size_t *nresult)
126 {
127 	return _stdenc_wctomb(se->se_handle, s, n, wc, se->se_ps, nresult);
128 }
129 
130 
131 /*
132  * open/close an encoding.
133  */
134 static __inline void
135 close_encoding(struct _citrus_iconv_std_encoding *se)
136 {
137 	free(se->se_ps); se->se_ps = NULL;
138 	free(se->se_pssaved); se->se_pssaved = NULL;
139 }
140 
141 static __inline int
142 open_encoding(struct _citrus_iconv_std_encoding *se, struct _esdb *db)
143 {
144 	int ret;
145 
146 	se->se_ps = se->se_pssaved = NULL;
147 	ret = _stdenc_open(&se->se_handle, db->db_encname,
148 			   db->db_variable, db->db_len_variable);
149 	if (ret)
150 		return ret;
151 
152 	if (_stdenc_get_state_size(se->se_handle) == 0)
153 		return 0;
154 
155 	se->se_ps = malloc(_stdenc_get_state_size(se->se_handle));
156 	if (se->se_ps == NULL) {
157 		ret = errno;
158 		goto err;
159 	}
160 	ret = _stdenc_init_state(se->se_handle, se->se_ps);
161 	if (ret)
162 		goto err;
163 	se->se_pssaved = malloc(_stdenc_get_state_size(se->se_handle));
164 	if (se->se_pssaved == NULL) {
165 		ret = errno;
166 		goto err;
167 	}
168 	ret = _stdenc_init_state(se->se_handle, se->se_pssaved);
169 	if (ret)
170 		goto err;
171 	return 0;
172 
173 err:
174 	close_encoding(se);
175 	return ret;
176 }
177 
178 static int
179 open_csmapper(struct _csmapper **rcm, const char *src, const char *dst,
180 	      unsigned long *rnorm)
181 {
182 	int ret;
183 	struct _csmapper *cm;
184 
185 	ret = _csmapper_open(&cm, src, dst, 0, rnorm);
186 	if (ret)
187 		return ret;
188 	if (_csmapper_get_src_max(cm) != 1 || _csmapper_get_dst_max(cm) != 1 ||
189 	    _csmapper_get_state_size(cm) != 0) {
190 		_csmapper_close(cm);
191 		return EINVAL;
192 	}
193 
194 	*rcm = cm;
195 
196 	return 0;
197 }
198 
199 static void
200 close_dsts(struct _citrus_iconv_std_dst_list *dl)
201 {
202 	struct _citrus_iconv_std_dst *sd;
203 
204 	while ((sd=TAILQ_FIRST(dl)) != NULL) {
205 		TAILQ_REMOVE(dl, sd, sd_entry);
206 		_csmapper_close(sd->sd_mapper);
207 		free(sd);
208 	}
209 }
210 
211 static int
212 open_dsts(struct _citrus_iconv_std_dst_list *dl,
213 	  struct _esdb_charset *ec, struct _esdb *dbdst)
214 {
215 	int i, ret;
216 	struct _citrus_iconv_std_dst *sd, *sdtmp;
217 	unsigned long norm;
218 
219 	sd = malloc(sizeof(*sd));
220 	if (sd == NULL)
221 		return errno;
222 
223 	for (i=0; i<dbdst->db_num_charsets; i++) {
224 		ret = open_csmapper(&sd->sd_mapper,ec->ec_csname,
225 				    dbdst->db_charsets[i].ec_csname, &norm);
226 		if (ret == 0) {
227 			sd->sd_csid = dbdst->db_charsets[i].ec_csid;
228 			sd->sd_norm = norm;
229 			/* insert this mapper by sorted order. */
230 			TAILQ_FOREACH(sdtmp, dl, sd_entry) {
231 				if (sdtmp->sd_norm > norm) {
232 					TAILQ_INSERT_BEFORE(sdtmp, sd,
233 							    sd_entry);
234 					sd = NULL;
235 					break;
236 				}
237 			}
238 			if (sd)
239 				TAILQ_INSERT_TAIL(dl, sd, sd_entry);
240 			sd = malloc(sizeof(*sd));
241 			if (sd == NULL) {
242 				ret = errno;
243 				close_dsts(dl);
244 				return ret;
245 			}
246 		} else if (ret != ENOENT) {
247 			close_dsts(dl);
248 			free(sd);
249 			return ret;
250 		}
251 	}
252 	free(sd);
253 	return 0;
254 }
255 
256 static void
257 close_srcs(struct _citrus_iconv_std_src_list *sl)
258 {
259 	struct _citrus_iconv_std_src *ss;
260 
261 	while ((ss=TAILQ_FIRST(sl)) != NULL) {
262 		TAILQ_REMOVE(sl, ss, ss_entry);
263 		close_dsts(&ss->ss_dsts);
264 		free(ss);
265 	}
266 }
267 
268 static int
269 open_srcs(struct _citrus_iconv_std_src_list *sl,
270 	  struct _esdb *dbsrc, struct _esdb *dbdst)
271 {
272 	int i, ret, count = 0;
273 	struct _citrus_iconv_std_src *ss;
274 
275 	ss = malloc(sizeof(*ss));
276 	if (ss == NULL)
277 		return errno;
278 
279 	TAILQ_INIT(&ss->ss_dsts);
280 
281 	for (i=0; i<dbsrc->db_num_charsets; i++) {
282 		ret = open_dsts(&ss->ss_dsts, &dbsrc->db_charsets[i], dbdst);
283 		if (ret)
284 			goto err;
285 		if (!TAILQ_EMPTY(&ss->ss_dsts)) {
286 			ss->ss_csid = dbsrc->db_charsets[i].ec_csid;
287 			TAILQ_INSERT_TAIL(sl, ss, ss_entry);
288 			ss = malloc(sizeof(*ss));
289 			if (ss == NULL) {
290 				ret = errno;
291 				goto err;
292 			}
293 			count++;
294 			TAILQ_INIT(&ss->ss_dsts);
295 		}
296 	}
297 	free(ss);
298 
299 	return count ? 0 : ENOENT;
300 
301 err:
302 	free(ss);
303 	close_srcs(sl);
304 	return ret;
305 }
306 
307 /* do convert a character */
308 #define E_NO_CORRESPONDING_CHAR ENOENT /* XXX */
309 static int
310 do_conv(struct _citrus_iconv_std *is, _csid_t *csid, _index_t *idx)
311 {
312 	_index_t tmpidx;
313 	int ret;
314 	struct _citrus_iconv_std_src *ss;
315 	struct _citrus_iconv_std_dst *sd;
316 
317 	TAILQ_FOREACH(ss, &is->is_srcs, ss_entry) {
318 		if (ss->ss_csid == *csid) {
319 			TAILQ_FOREACH(sd, &ss->ss_dsts, sd_entry) {
320 				ret = _csmapper_convert(sd->sd_mapper,
321 							&tmpidx, *idx, NULL);
322 				switch (ret) {
323 				case _CITRUS_MAPPER_CONVERT_SUCCESS:
324 					*csid = sd->sd_csid;
325 					*idx = tmpidx;
326 					return 0;
327 				case _CITRUS_MAPPER_CONVERT_INVAL:
328 					break;
329 				case _CITRUS_MAPPER_CONVERT_SRC_MORE:
330 					/*FALLTHROUGH*/
331 				case _CITRUS_MAPPER_CONVERT_DST_MORE:
332 					/*FALLTHROUGH*/
333 				case _CITRUS_MAPPER_CONVERT_FATAL:
334 					return EINVAL;
335 				case _CITRUS_MAPPER_CONVERT_ILSEQ:
336 					return EILSEQ;
337 				}
338 			}
339 			break;
340 		}
341 	}
342 
343 	return E_NO_CORRESPONDING_CHAR;
344 }
345 /* ---------------------------------------------------------------------- */
346 
347 static int
348 /*ARGSUSED*/
349 _citrus_iconv_std_iconv_init(struct _citrus_iconv *ci,
350 			     const char * __restrict curdir,
351 			     const char * __restrict src,
352 			     const char * __restrict dst,
353 			     const void * __restrict var, size_t lenvar)
354 {
355 	int ret;
356 	struct _citrus_iconv_std *is;
357 	struct _citrus_esdb esdbsrc, esdbdst;
358 
359 	is = malloc(sizeof(*is));
360 	if (is==NULL) {
361 		ret = errno;
362 		goto err0;
363 	}
364 	ret = _citrus_esdb_open(&esdbsrc, src);
365 	if (ret)
366 		goto err1;
367 	ret = _citrus_esdb_open(&esdbdst, dst);
368 	if (ret)
369 		goto err2;
370 	ret = open_encoding(&is->is_src_encoding, &esdbsrc);
371 	if (ret)
372 		goto err3;
373 	ret = open_encoding(&is->is_dst_encoding, &esdbdst);
374 	if (ret)
375 		goto err4;
376 	is->is_use_invalid = esdbdst.db_use_invalid;
377 	is->is_invalid = esdbdst.db_invalid;
378 
379 	TAILQ_INIT(&is->is_srcs);
380 	ret = open_srcs(&is->is_srcs, &esdbsrc, &esdbdst);
381 	if (ret)
382 		goto err5;
383 
384 	_esdb_close(&esdbsrc);
385 	_esdb_close(&esdbdst);
386 	ci->ci_closure = is;
387 
388 	return 0;
389 
390 err5:
391 	close_encoding(&is->is_dst_encoding);
392 err4:
393 	close_encoding(&is->is_src_encoding);
394 err3:
395 	_esdb_close(&esdbdst);
396 err2:
397 	_esdb_close(&esdbsrc);
398 err1:
399 	free(is);
400 err0:
401 	return ret;
402 }
403 
404 static void
405 /*ARGSUSED*/
406 _citrus_iconv_std_iconv_uninit(struct _citrus_iconv *ci)
407 {
408 	struct _citrus_iconv_std *is;
409 
410 	if (ci->ci_closure == NULL)
411 		return;
412 
413 	is = ci->ci_closure;
414 	close_encoding(&is->is_src_encoding);
415 	close_encoding(&is->is_dst_encoding);
416 	close_srcs(&is->is_srcs);
417 	free(is);
418 }
419 
420 static int
421 /*ARGSUSED*/
422 _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict ci,
423 				const char * __restrict * __restrict in,
424 				size_t * __restrict inbytes,
425 				char * __restrict * __restrict out,
426 				size_t * __restrict outbytes, u_int32_t flags,
427 				size_t * __restrict invalids)
428 {
429 	struct _citrus_iconv_std *is = ci->ci_closure;
430 	_index_t idx;
431 	_csid_t csid;
432 	int ret;
433 	size_t szrin, szrout;
434 	size_t inval;
435 	const char *tmpin;
436 
437 	inval = 0;
438 	if (in==NULL || *in==NULL) {
439 		/* special cases */
440 		if (out!=NULL && *out!=NULL) {
441 			/* init output state */
442 			save_encoding_state(&is->is_src_encoding);
443 			save_encoding_state(&is->is_dst_encoding);
444 			szrout = 0;
445 
446 			ret = cstombx(&is->is_dst_encoding,
447 				      *out, *outbytes,
448 				      _CITRUS_CSID_INVALID,
449 				      0, &szrout);
450 			if (ret)
451 				goto err;
452 
453 			if (szrout == (size_t)-2) {
454 				/* too small to store the character */
455 				ret = EINVAL;
456 				goto err;
457 			}
458 			*out += szrout;
459 			*outbytes -= szrout;
460 		}
461 		*invalids = 0;
462 		init_encoding_state(&is->is_src_encoding);
463 		return 0;
464 	}
465 
466 	/* normal case */
467 	for (;;) {
468 		/* save the encoding states for the error recovery */
469 		save_encoding_state(&is->is_src_encoding);
470 		save_encoding_state(&is->is_dst_encoding);
471 
472 		/* mb -> csid/index */
473 		tmpin = *in;
474 		szrin = szrout = 0;
475 		ret = mbtocsx(&is->is_src_encoding, &csid, &idx,
476 			     &tmpin, *inbytes, &szrin);
477 		if (ret)
478 			goto err;
479 
480 		if (szrin == (size_t)-2) {
481 			/* incompleted character */
482 			ret = EINVAL;
483 			goto err;
484 		}
485 		/* convert the character */
486 		ret = do_conv(is, &csid, &idx);
487 		if (ret) {
488 			if (ret == E_NO_CORRESPONDING_CHAR) {
489 				inval ++;
490 				if ((flags&_CITRUS_ICONV_F_HIDE_INVALID)==0 &&
491 				    is->is_use_invalid) {
492 					ret = wctombx(&is->is_dst_encoding,
493 						      *out, *outbytes,
494 						      is->is_invalid,
495 						      &szrout);
496 					if (ret)
497 						goto err;
498 				}
499 				goto next;
500 			} else {
501 				goto err;
502 			}
503 		}
504 		/* csid/index -> mb */
505 		ret = cstombx(&is->is_dst_encoding,
506 			      *out, *outbytes, csid, idx, &szrout);
507 		if (ret)
508 			goto err;
509 next:
510 		_DIAGASSERT(*inbytes>=szrin && *outbytes>=szrout);
511 		*inbytes -= tmpin-*in; /* szrin is insufficient on \0. */
512 		*in = tmpin;
513 		*outbytes -= szrout;
514 		*out += szrout;
515 		if (*inbytes==0)
516 			break;
517 		if (*outbytes == 0) {
518 			ret = E2BIG;
519 			goto err_norestore;
520 		}
521 	}
522 	*invalids = inval;
523 
524 	return 0;
525 
526 err:
527 	restore_encoding_state(&is->is_src_encoding);
528 	restore_encoding_state(&is->is_dst_encoding);
529 err_norestore:
530 	*invalids = inval;
531 
532 	return ret;
533 }
534