xref: /minix3/lib/libintl/gettext_iconv.c (revision 36dcc4a4a93f782ada76dce3d52fbeab0e063cf1)
1*36dcc4a4SLionel Sambuc /*	$NetBSD: gettext_iconv.c,v 1.8 2009/02/18 13:08:22 yamt Exp $	*/
2*36dcc4a4SLionel Sambuc 
3*36dcc4a4SLionel Sambuc /*-
4*36dcc4a4SLionel Sambuc  * Copyright (c) 2004 Citrus Project,
5*36dcc4a4SLionel Sambuc  * All rights reserved.
6*36dcc4a4SLionel Sambuc  *
7*36dcc4a4SLionel Sambuc  * Redistribution and use in source and binary forms, with or without
8*36dcc4a4SLionel Sambuc  * modification, are permitted provided that the following conditions
9*36dcc4a4SLionel Sambuc  * are met:
10*36dcc4a4SLionel Sambuc  * 1. Redistributions of source code must retain the above copyright
11*36dcc4a4SLionel Sambuc  *    notice, this list of conditions and the following disclaimer.
12*36dcc4a4SLionel Sambuc  * 2. Redistributions in binary form must reproduce the above copyright
13*36dcc4a4SLionel Sambuc  *    notice, this list of conditions and the following disclaimer in the
14*36dcc4a4SLionel Sambuc  *    documentation and/or other materials provided with the distribution.
15*36dcc4a4SLionel Sambuc  *
16*36dcc4a4SLionel Sambuc  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17*36dcc4a4SLionel Sambuc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*36dcc4a4SLionel Sambuc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*36dcc4a4SLionel Sambuc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*36dcc4a4SLionel Sambuc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*36dcc4a4SLionel Sambuc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*36dcc4a4SLionel Sambuc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*36dcc4a4SLionel Sambuc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*36dcc4a4SLionel Sambuc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*36dcc4a4SLionel Sambuc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*36dcc4a4SLionel Sambuc  * SUCH DAMAGE.
27*36dcc4a4SLionel Sambuc  *
28*36dcc4a4SLionel Sambuc  * $Citrus$
29*36dcc4a4SLionel Sambuc  */
30*36dcc4a4SLionel Sambuc 
31*36dcc4a4SLionel Sambuc 
32*36dcc4a4SLionel Sambuc #include <sys/types.h>
33*36dcc4a4SLionel Sambuc #include <sys/param.h>
34*36dcc4a4SLionel Sambuc 
35*36dcc4a4SLionel Sambuc #include <errno.h>
36*36dcc4a4SLionel Sambuc #include <iconv.h>
37*36dcc4a4SLionel Sambuc #include <libintl.h>
38*36dcc4a4SLionel Sambuc #include <langinfo.h>
39*36dcc4a4SLionel Sambuc #include <search.h>
40*36dcc4a4SLionel Sambuc #include <stdlib.h>
41*36dcc4a4SLionel Sambuc #include <string.h>
42*36dcc4a4SLionel Sambuc 
43*36dcc4a4SLionel Sambuc #include "libintl_local.h"
44*36dcc4a4SLionel Sambuc 
45*36dcc4a4SLionel Sambuc struct cache {
46*36dcc4a4SLionel Sambuc 	const char *c_origmsg;
47*36dcc4a4SLionel Sambuc 	const char *c_resultmsg;
48*36dcc4a4SLionel Sambuc };
49*36dcc4a4SLionel Sambuc 
50*36dcc4a4SLionel Sambuc static const struct cache *cache_find(const char *, struct domainbinding *);
51*36dcc4a4SLionel Sambuc static int cache_enter(const char *, const char *);
52*36dcc4a4SLionel Sambuc static int cache_cmp(const void *, const void *);
53*36dcc4a4SLionel Sambuc 
54*36dcc4a4SLionel Sambuc static void *cacheroot;
55*36dcc4a4SLionel Sambuc 
56*36dcc4a4SLionel Sambuc /* ARGSUSED1 */
57*36dcc4a4SLionel Sambuc static const struct cache *
cache_find(const char * msg,struct domainbinding * db)58*36dcc4a4SLionel Sambuc cache_find(const char *msg, struct domainbinding *db)
59*36dcc4a4SLionel Sambuc {
60*36dcc4a4SLionel Sambuc 	struct cache key;
61*36dcc4a4SLionel Sambuc 	struct cache **c;
62*36dcc4a4SLionel Sambuc 
63*36dcc4a4SLionel Sambuc 	key.c_origmsg = msg;
64*36dcc4a4SLionel Sambuc 	c = tfind(&key, &cacheroot, cache_cmp);
65*36dcc4a4SLionel Sambuc 
66*36dcc4a4SLionel Sambuc 	return c ? *c : NULL;
67*36dcc4a4SLionel Sambuc }
68*36dcc4a4SLionel Sambuc 
69*36dcc4a4SLionel Sambuc static int
cache_enter(const char * origmsg,const char * resultmsg)70*36dcc4a4SLionel Sambuc cache_enter(const char *origmsg, const char *resultmsg)
71*36dcc4a4SLionel Sambuc {
72*36dcc4a4SLionel Sambuc 	struct cache *c;
73*36dcc4a4SLionel Sambuc 
74*36dcc4a4SLionel Sambuc 	c = malloc(sizeof(*c));
75*36dcc4a4SLionel Sambuc 	if (c == NULL)
76*36dcc4a4SLionel Sambuc 		return -1;
77*36dcc4a4SLionel Sambuc 
78*36dcc4a4SLionel Sambuc 	c->c_origmsg = origmsg;
79*36dcc4a4SLionel Sambuc 	c->c_resultmsg = resultmsg;
80*36dcc4a4SLionel Sambuc 
81*36dcc4a4SLionel Sambuc 	if (tsearch(c, &cacheroot, cache_cmp) == NULL) {
82*36dcc4a4SLionel Sambuc 		free(c);
83*36dcc4a4SLionel Sambuc 		return -1;
84*36dcc4a4SLionel Sambuc 	}
85*36dcc4a4SLionel Sambuc 
86*36dcc4a4SLionel Sambuc 	return 0;
87*36dcc4a4SLionel Sambuc }
88*36dcc4a4SLionel Sambuc 
89*36dcc4a4SLionel Sambuc static int
cache_cmp(const void * va,const void * vb)90*36dcc4a4SLionel Sambuc cache_cmp(const void *va, const void *vb)
91*36dcc4a4SLionel Sambuc {
92*36dcc4a4SLionel Sambuc 	const struct cache *a = va;
93*36dcc4a4SLionel Sambuc 	const struct cache *b = vb;
94*36dcc4a4SLionel Sambuc 	int result;
95*36dcc4a4SLionel Sambuc 
96*36dcc4a4SLionel Sambuc 	if (a->c_origmsg > b->c_origmsg) {
97*36dcc4a4SLionel Sambuc 		result = 1;
98*36dcc4a4SLionel Sambuc 	} else if (a->c_origmsg < b->c_origmsg) {
99*36dcc4a4SLionel Sambuc 		result = -1;
100*36dcc4a4SLionel Sambuc 	} else {
101*36dcc4a4SLionel Sambuc 		result = 0;
102*36dcc4a4SLionel Sambuc 	}
103*36dcc4a4SLionel Sambuc 
104*36dcc4a4SLionel Sambuc 	return result;
105*36dcc4a4SLionel Sambuc }
106*36dcc4a4SLionel Sambuc 
107*36dcc4a4SLionel Sambuc #define	GETTEXT_ICONV_MALLOC_CHUNK	(16 * 1024)
108*36dcc4a4SLionel Sambuc 
109*36dcc4a4SLionel Sambuc const char *
__gettext_iconv(const char * origmsg,struct domainbinding * db)110*36dcc4a4SLionel Sambuc __gettext_iconv(const char *origmsg, struct domainbinding *db)
111*36dcc4a4SLionel Sambuc {
112*36dcc4a4SLionel Sambuc 	const char *tocode;
113*36dcc4a4SLionel Sambuc 	const char *fromcode = db->mohandle.mo.mo_charset;
114*36dcc4a4SLionel Sambuc 	const struct cache *cache;
115*36dcc4a4SLionel Sambuc 	const char *result;
116*36dcc4a4SLionel Sambuc 	iconv_t cd;
117*36dcc4a4SLionel Sambuc 	const char *src;
118*36dcc4a4SLionel Sambuc 	char *dst;
119*36dcc4a4SLionel Sambuc 	size_t origlen;
120*36dcc4a4SLionel Sambuc 	size_t srclen;
121*36dcc4a4SLionel Sambuc 	size_t dstlen;
122*36dcc4a4SLionel Sambuc 	size_t nvalid;
123*36dcc4a4SLionel Sambuc 	int savederrno = errno;
124*36dcc4a4SLionel Sambuc 
125*36dcc4a4SLionel Sambuc 	/*
126*36dcc4a4SLionel Sambuc 	 * static buffer for converted texts.
127*36dcc4a4SLionel Sambuc 	 *
128*36dcc4a4SLionel Sambuc 	 * note:
129*36dcc4a4SLionel Sambuc 	 * we never free buffers once returned to callers.
130*36dcc4a4SLionel Sambuc 	 * because of interface design of gettext, we can't know
131*36dcc4a4SLionel Sambuc 	 * the lifetime of them.
132*36dcc4a4SLionel Sambuc 	 */
133*36dcc4a4SLionel Sambuc 	static char *buffer;
134*36dcc4a4SLionel Sambuc 	static size_t bufferlen;
135*36dcc4a4SLionel Sambuc 
136*36dcc4a4SLionel Sambuc 	/*
137*36dcc4a4SLionel Sambuc 	 * don't convert message if *.mo doesn't specify codeset.
138*36dcc4a4SLionel Sambuc 	 */
139*36dcc4a4SLionel Sambuc 	if (fromcode == NULL)
140*36dcc4a4SLionel Sambuc 		return origmsg;
141*36dcc4a4SLionel Sambuc 
142*36dcc4a4SLionel Sambuc 	tocode = db->codeset;
143*36dcc4a4SLionel Sambuc 	if (tocode == NULL) {
144*36dcc4a4SLionel Sambuc 		/*
145*36dcc4a4SLionel Sambuc 		 * codeset isn't specified explicitly by
146*36dcc4a4SLionel Sambuc 		 * bind_textdomain_codeset().
147*36dcc4a4SLionel Sambuc 		 * use current locale(LC_CTYPE)'s codeset.
148*36dcc4a4SLionel Sambuc 		 *
149*36dcc4a4SLionel Sambuc 		 * XXX maybe wrong; it can mismatch with
150*36dcc4a4SLionel Sambuc 		 * environment variable setting.
151*36dcc4a4SLionel Sambuc 		 */
152*36dcc4a4SLionel Sambuc 		tocode = nl_langinfo(CODESET);
153*36dcc4a4SLionel Sambuc 	}
154*36dcc4a4SLionel Sambuc 
155*36dcc4a4SLionel Sambuc 	/*
156*36dcc4a4SLionel Sambuc 	 * shortcut if possible.
157*36dcc4a4SLionel Sambuc 	 * XXX should handle aliases
158*36dcc4a4SLionel Sambuc 	 */
159*36dcc4a4SLionel Sambuc 	if (!strcasecmp(tocode, fromcode))
160*36dcc4a4SLionel Sambuc 		return origmsg;
161*36dcc4a4SLionel Sambuc 
162*36dcc4a4SLionel Sambuc 	/* XXX LOCK */
163*36dcc4a4SLionel Sambuc 
164*36dcc4a4SLionel Sambuc 	/* XXX should detect change of tocode and purge caches? */
165*36dcc4a4SLionel Sambuc 
166*36dcc4a4SLionel Sambuc 	/*
167*36dcc4a4SLionel Sambuc 	 * see if we have already converted this message.
168*36dcc4a4SLionel Sambuc 	 */
169*36dcc4a4SLionel Sambuc 	cache = cache_find(origmsg, db);
170*36dcc4a4SLionel Sambuc 	if (cache) {
171*36dcc4a4SLionel Sambuc 		result = cache->c_resultmsg;
172*36dcc4a4SLionel Sambuc 		goto out;
173*36dcc4a4SLionel Sambuc 	}
174*36dcc4a4SLionel Sambuc 
175*36dcc4a4SLionel Sambuc 	origlen = strlen(origmsg) + 1;
176*36dcc4a4SLionel Sambuc again:
177*36dcc4a4SLionel Sambuc 	cd = iconv_open(tocode, fromcode);
178*36dcc4a4SLionel Sambuc 	if (cd == (iconv_t)-1) {
179*36dcc4a4SLionel Sambuc 		result = origmsg;
180*36dcc4a4SLionel Sambuc 		goto out;
181*36dcc4a4SLionel Sambuc 	}
182*36dcc4a4SLionel Sambuc 
183*36dcc4a4SLionel Sambuc 	src = origmsg;
184*36dcc4a4SLionel Sambuc 	srclen = origlen;
185*36dcc4a4SLionel Sambuc 	dst = buffer;
186*36dcc4a4SLionel Sambuc 	dstlen = bufferlen;
187*36dcc4a4SLionel Sambuc 	nvalid = iconv(cd, &src, &srclen, &dst, &dstlen);
188*36dcc4a4SLionel Sambuc 	iconv_close(cd);
189*36dcc4a4SLionel Sambuc 
190*36dcc4a4SLionel Sambuc 	if (nvalid == (size_t)-1) {
191*36dcc4a4SLionel Sambuc 		/*
192*36dcc4a4SLionel Sambuc 		 * try to allocate a new buffer.
193*36dcc4a4SLionel Sambuc 		 *
194*36dcc4a4SLionel Sambuc 		 * just give up if GETTEXT_ICONV_MALLOC_CHUNK was not enough.
195*36dcc4a4SLionel Sambuc 		 */
196*36dcc4a4SLionel Sambuc 		if (errno == E2BIG &&
197*36dcc4a4SLionel Sambuc 		    bufferlen != GETTEXT_ICONV_MALLOC_CHUNK) {
198*36dcc4a4SLionel Sambuc 			buffer = malloc(GETTEXT_ICONV_MALLOC_CHUNK);
199*36dcc4a4SLionel Sambuc 			if (buffer) {
200*36dcc4a4SLionel Sambuc 				bufferlen = GETTEXT_ICONV_MALLOC_CHUNK;
201*36dcc4a4SLionel Sambuc 				goto again;
202*36dcc4a4SLionel Sambuc 			}
203*36dcc4a4SLionel Sambuc 		}
204*36dcc4a4SLionel Sambuc 
205*36dcc4a4SLionel Sambuc 		result = origmsg;
206*36dcc4a4SLionel Sambuc 	} else if (cache_enter(origmsg, buffer)) {
207*36dcc4a4SLionel Sambuc 		/*
208*36dcc4a4SLionel Sambuc 		 * failed to enter cache.  give up.
209*36dcc4a4SLionel Sambuc 		 */
210*36dcc4a4SLionel Sambuc 		result = origmsg;
211*36dcc4a4SLionel Sambuc 	} else {
212*36dcc4a4SLionel Sambuc 		size_t resultlen = dst - buffer;
213*36dcc4a4SLionel Sambuc 
214*36dcc4a4SLionel Sambuc 		result = buffer;
215*36dcc4a4SLionel Sambuc 		bufferlen -= resultlen;
216*36dcc4a4SLionel Sambuc 		buffer += resultlen;
217*36dcc4a4SLionel Sambuc 	}
218*36dcc4a4SLionel Sambuc 
219*36dcc4a4SLionel Sambuc out:
220*36dcc4a4SLionel Sambuc 	/* XXX UNLOCK */
221*36dcc4a4SLionel Sambuc 	errno = savederrno;
222*36dcc4a4SLionel Sambuc 
223*36dcc4a4SLionel Sambuc 	return result;
224*36dcc4a4SLionel Sambuc }
225