xref: /netbsd-src/lib/libintl/gettext.c (revision 7cc2f76925f078d01ddc9e640a98f4ccfc9f8c3b)
1 /*	$NetBSD: gettext.c,v 1.7 2000/12/15 06:37:21 itojun Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: gettext.c,v 1.7 2000/12/15 06:37:21 itojun Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <sys/uio.h>
39 
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #if 0
46 #include <util.h>
47 #endif
48 #include <libintl.h>
49 #include <locale.h>
50 #include "libintl_local.h"
51 #include "pathnames.h"
52 
53 static struct mohandle mohandle;
54 
55 static const char *lookup_category __P((int));
56 static const char *split_locale __P((const char *));
57 static const char *lookup_mofile __P((char *, size_t, const char *,
58 	char *, const char *, const char *));
59 static u_int32_t flip __P((u_int32_t, u_int32_t));
60 static int validate __P((void *));
61 static int mapit __P((const char *));
62 static int unmapit __P((void));
63 static const char *lookup_hash __P((const char *));
64 static const char *lookup_bsearch __P((const char *));
65 static const char *lookup __P((const char *));
66 
67 /*
68  * shortcut functions.  the main implementation resides in dcngettext().
69  */
70 char *
71 gettext(msgid)
72 	const char *msgid;
73 {
74 
75 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
76 }
77 
78 char *
79 dgettext(domainname, msgid)
80 	const char *domainname;
81 	const char *msgid;
82 {
83 
84 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
85 }
86 
87 char *
88 dcgettext(domainname, msgid, category)
89 	const char *domainname;
90 	const char *msgid;
91 	int category;
92 {
93 
94 	return dcngettext(domainname, msgid, NULL, 1UL, category);
95 }
96 
97 char *
98 ngettext(msgid1, msgid2, n)
99 	const char *msgid1;
100 	const char *msgid2;
101 	unsigned long int n;
102 {
103 
104 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
105 }
106 
107 char *
108 dngettext(domainname, msgid1, msgid2, n)
109 	const char *domainname;
110 	const char *msgid1;
111 	const char *msgid2;
112 	unsigned long int n;
113 {
114 
115 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
116 }
117 
118 /*
119  * dcngettext() -
120  * lookup internationalized message on database locale/category/domainname
121  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
122  * if n equals to 1, internationalized message will be looked up for msgid1.
123  * otherwise, message will be looked up for msgid2.
124  * if the lookup fails, the function will return msgid1 or msgid2 as is.
125  *
126  * Even though the return type is "char *", caller should not rewrite the
127  * region pointed to by the return value (should be "const char *", but can't
128  * change it for compatibility with other implementations).
129  *
130  * by default (if domainname == NULL), domainname is taken from the value set
131  * by textdomain().  usually name of the application (like "ls") is used as
132  * domainname.  category is usually LC_MESSAGES.
133  *
134  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
135  * endian encoded file.  both endians are supported here, as the files are in
136  * /usr/share/locale! (or we should move those files into /usr/libdata)
137  */
138 
139 static const char *
140 lookup_category(category)
141 	int category;
142 {
143 
144 	switch (category) {
145 	case LC_COLLATE:	return "LC_COLLATE";
146 	case LC_CTYPE:		return "LC_CTYPE";
147 	case LC_MONETARY:	return "LC_MONETARY";
148 	case LC_NUMERIC:	return "LC_NUMERIC";
149 	case LC_TIME:		return "LC_TIME";
150 	case LC_MESSAGES:	return "LC_MESSAGES";
151 	}
152 	return NULL;
153 }
154 
155 /*
156  * XPG syntax: language[_territory[.codeset]][@modifier]
157  * XXX boundary check on "result" is lacking
158  */
159 static const char *
160 split_locale(lname)
161 	const char *lname;
162 {
163 	char buf[BUFSIZ], tmp[BUFSIZ];
164 	char *l, *t, *c, *m;
165 	static char result[BUFSIZ];
166 
167 	memset(result, 0, sizeof(result));
168 
169 	if (strlen(lname) + 1 > sizeof(buf)) {
170 fail:
171 		return lname;
172 	}
173 
174 	strlcpy(buf, lname, sizeof(buf));
175 	m = strrchr(buf, '@');
176 	if (m)
177 		*m++ = '\0';
178 	c = strrchr(buf, '.');
179 	if (c)
180 		*c++ = '\0';
181 	t = strrchr(buf, '_');
182 	if (t)
183 		*t++ = '\0';
184 	l = buf;
185 	if (strlen(l) == 0)
186 		goto fail;
187 	if (c && !t)
188 		goto fail;
189 
190 	if (m) {
191 		if (t) {
192 			if (c) {
193 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
194 				    l, t, c, m);
195 				strlcat(result, tmp, sizeof(result));
196 				strlcat(result, ":", sizeof(result));
197 			}
198 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
199 			strlcat(result, tmp, sizeof(result));
200 			strlcat(result, ":", sizeof(result));
201 		}
202 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
203 		strlcat(result, tmp, sizeof(result));
204 		strlcat(result, ":", sizeof(result));
205 	}
206 	if (t) {
207 		if (c) {
208 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
209 			strlcat(result, tmp, sizeof(result));
210 			strlcat(result, ":", sizeof(result));
211 		}
212 		strlcat(result, tmp, sizeof(result));
213 		strlcat(result, ":", sizeof(result));
214 	}
215 	strlcat(result, l, sizeof(result));
216 
217 	return result;
218 }
219 
220 static const char *
221 lookup_mofile(buf, len, dir, lpath, category, domainname)
222 	char *buf;
223 	size_t len;
224 	const char *dir;
225 	char *lpath;	/* list of locales to be tried */
226 	const char *category;
227 	const char *domainname;
228 {
229 	struct stat st;
230 	char *p, *q;
231 
232 	q = lpath;
233 	while (1) {
234 		p = strsep(&q, ":");
235 		if (!p)
236 			break;
237 		if (!*p)
238 			continue;
239 
240 		/* don't mess with default locales */
241 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
242 			return NULL;
243 
244 		/* validate pathname */
245 		if (strchr(p, '/') || strchr(category, '/'))
246 			continue;
247 #if 1	/*?*/
248 		if (strchr(domainname, '/'))
249 			continue;
250 #endif
251 
252 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
253 		    category, domainname);
254 		if (stat(buf, &st) < 0)
255 			continue;
256 		if ((st.st_mode & S_IFMT) != S_IFREG)
257 			continue;
258 
259 		if (mapit(buf) == 0)
260 			return buf;
261 	}
262 
263 	return NULL;
264 }
265 
266 static u_int32_t
267 flip(v, magic)
268 	u_int32_t v;
269 	u_int32_t magic;
270 {
271 
272 	if (magic == MO_MAGIC)
273 		return v;
274 	else if (magic == MO_MAGIC_SWAPPED) {
275 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
276 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
277 		return v;
278 	} else {
279 		abort();
280 		/*NOTREACHED*/
281 	}
282 }
283 
284 static int
285 validate(arg)
286 	void *arg;
287 {
288 	char *p;
289 
290 	p = (char *)arg;
291 	if (p < (char *)mohandle.addr ||
292 	    p > (char *)mohandle.addr + mohandle.len)
293 		return 0;
294 	else
295 		return 1;
296 }
297 
298 int
299 mapit(path)
300 	const char *path;
301 {
302 	int fd;
303 	struct stat st;
304 	char *base;
305 	u_int32_t magic, revision;
306 	struct moentry *otable, *ttable;
307 	struct moentry_h *p;
308 	struct mo *mo;
309 	size_t l;
310 	int i;
311 	char *v;
312 
313 	if (mohandle.addr && mohandle.addr != MAP_FAILED &&
314 	    strcmp(path, mohandle.path) == 0)
315 		return 0;	/*already opened*/
316 
317 	unmapit();
318 
319 #if 0
320 	if (secure_path(path) != 0)
321 		goto fail;
322 #endif
323 	if (stat(path, &st) < 0)
324 		goto fail;
325 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
326 		goto fail;
327 	fd = open(path, O_RDONLY);
328 	if (fd < 0)
329 		goto fail;
330 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
331 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
332 		close(fd);
333 		goto fail;
334 	}
335 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
336 	    flip(revision, magic) != MO_REVISION) {
337 		close(fd);
338 		goto fail;
339 	}
340 	mohandle.addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
341 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
342 	if (!mohandle.addr || mohandle.addr == MAP_FAILED) {
343 		close(fd);
344 		goto fail;
345 	}
346 	close(fd);
347 	mohandle.len = (size_t)st.st_size;
348 	strlcpy(mohandle.path, path, sizeof(mohandle.path));
349 
350 	base = mohandle.addr;
351 	mo = (struct mo *)mohandle.addr;
352 
353 	/* flip endian.  do not flip magic number! */
354 	mohandle.mo.mo_magic = mo->mo_magic;
355 	mohandle.mo.mo_revision = flip(mo->mo_revision, magic);
356 	mohandle.mo.mo_nstring = flip(mo->mo_nstring, magic);
357 
358 	/* validate otable/ttable */
359 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
360 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
361 	if (!validate(otable) || !validate(&otable[mohandle.mo.mo_nstring])) {
362 		unmapit();
363 		goto fail;
364 	}
365 	if (!validate(ttable) || !validate(&ttable[mohandle.mo.mo_nstring])) {
366 		unmapit();
367 		goto fail;
368 	}
369 
370 	/* allocate [ot]table, and convert to normal pointer representation. */
371 	l = sizeof(struct moentry_h) * mohandle.mo.mo_nstring;
372 	mohandle.mo.mo_otable = (struct moentry_h *)malloc(l);
373 	if (!mohandle.mo.mo_otable) {
374 		unmapit();
375 		goto fail;
376 	}
377 	mohandle.mo.mo_ttable = (struct moentry_h *)malloc(l);
378 	if (!mohandle.mo.mo_ttable) {
379 		unmapit();
380 		goto fail;
381 	}
382 	p = mohandle.mo.mo_otable;
383 	for (i = 0; i < mohandle.mo.mo_nstring; i++) {
384 		p[i].len = flip(otable[i].len, magic);
385 		p[i].off = base + flip(otable[i].off, magic);
386 
387 		if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
388 			unmapit();
389 			goto fail;
390 		}
391 	}
392 	p = mohandle.mo.mo_ttable;
393 	for (i = 0; i < mohandle.mo.mo_nstring; i++) {
394 		p[i].len = flip(ttable[i].len, magic);
395 		p[i].off = base + flip(ttable[i].off, magic);
396 
397 		if (!validate(p[i].off) || !validate(p[i].off + p[i].len + 1)) {
398 			unmapit();
399 			goto fail;
400 		}
401 	}
402 
403 	/* grab MIME-header and charset field */
404 	mohandle.mo.mo_header = lookup("");
405 	if (mohandle.mo.mo_header)
406 		v = strstr(mohandle.mo.mo_header, "charset=");
407 	else
408 		v = NULL;
409 	if (v) {
410 		mohandle.mo.mo_charset = strdup(v + 8);
411 		if (!mohandle.mo.mo_charset)
412 			goto fail;
413 		v = strchr(mohandle.mo.mo_charset, '\n');
414 		if (v)
415 			*v = '\0';
416 	}
417 
418 	/*
419 	 * XXX check charset, reject it if we are unable to support the charset
420 	 * with the current locale.
421 	 * for example, if we are using euc-jp locale and we are looking at
422 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
423 	 * the *.mo file as we cannot support it.
424 	 */
425 
426 	return 0;
427 
428 fail:
429 	return -1;
430 }
431 
432 static int
433 unmapit()
434 {
435 
436 	/* unmap if there's already mapped region */
437 	if (mohandle.addr && mohandle.addr != MAP_FAILED)
438 		munmap(mohandle.addr, mohandle.len);
439 	mohandle.addr = NULL;
440 	mohandle.path[0] = '\0';
441 	if (mohandle.mo.mo_otable)
442 		free(mohandle.mo.mo_otable);
443 	if (mohandle.mo.mo_ttable)
444 		free(mohandle.mo.mo_ttable);
445 	if (mohandle.mo.mo_charset)
446 		free(mohandle.mo.mo_charset);
447 	memset(&mohandle.mo, 0, sizeof(mohandle.mo));
448 	return 0;
449 }
450 
451 static const char *
452 lookup_hash(msgid)
453 	const char *msgid;
454 {
455 
456 	/*
457 	 * XXX should try a hashed lookup here, but to do so, we need to
458 	 * look inside the GPL'ed *.c and re-implement...
459 	 */
460 	return NULL;
461 }
462 
463 static const char *
464 lookup_bsearch(msgid)
465 	const char *msgid;
466 {
467 	int top, bottom, middle, omiddle;
468 	int n;
469 
470 	top = 0;
471 	bottom = mohandle.mo.mo_nstring;
472 	omiddle = -1;
473 	while (1) {
474 		if (top > bottom)
475 			break;
476 		middle = (top + bottom) / 2;
477 		/* avoid possible infinite loop, when the data is not sorted */
478 		if (omiddle == middle)
479 			break;
480 		if (middle < 0 || middle >= mohandle.mo.mo_nstring)
481 			break;
482 
483 		n = strcmp(msgid, mohandle.mo.mo_otable[middle].off);
484 		if (n == 0)
485 			return (const char *)mohandle.mo.mo_ttable[middle].off;
486 		else if (n < 0)
487 			bottom = middle;
488 		else
489 			top = middle;
490 		omiddle = middle;
491 	}
492 
493 	return NULL;
494 }
495 
496 static const char *
497 lookup(msgid)
498 	const char *msgid;
499 {
500 	const char *v;
501 
502 	v = lookup_hash(msgid);
503 	if (v)
504 		return v;
505 
506 	return lookup_bsearch(msgid);
507 }
508 
509 char *
510 dcngettext(domainname, msgid1, msgid2, n, category)
511 	const char *domainname;
512 	const char *msgid1;
513 	const char *msgid2;
514 	unsigned long int n;
515 	int category;
516 {
517 	const char *msgid;
518 	char path[PATH_MAX];
519 	static char lpath[PATH_MAX];
520 	static char olpath[PATH_MAX];
521 	const char *locale;
522 	const char *language;
523 	const char *cname = NULL;
524 	const char *v;
525 	static char *ocname = NULL;
526 	static char *odomainname = NULL;
527 	struct domainbinding *db;
528 
529 	msgid = (n == 1) ? msgid1 : msgid2;
530 
531 	if (!domainname)
532 		domainname = __binding.domainname;
533 	cname = lookup_category(category);
534 	if (!domainname || !cname)
535 		goto fail;
536 
537 	language = getenv("LANGUAGE");
538 	locale = setlocale(LC_MESSAGES, NULL);	/*XXX*/
539 	if (locale)
540 		locale = split_locale(locale);
541 	if (language && locale) {
542 		if (strlen(language) + strlen(locale) + 2 > sizeof(lpath))
543 			goto fail;
544 		snprintf(lpath, sizeof(lpath), "%s:%s", language, locale);
545 	} else if (language) {
546 		if (strlen(language) + 1 > sizeof(lpath))
547 			goto fail;
548 		strlcpy(lpath, language, sizeof(lpath));
549 	} else if (locale) {
550 		if (strlen(locale) + 1 > sizeof(lpath))
551 			goto fail;
552 		strlcpy(lpath, locale, sizeof(lpath));
553 	} else
554 		goto fail;
555 
556 	for (db = __binding.next; db; db = db->next)
557 		if (strcmp(db->domainname, domainname) == 0)
558 			break;
559 	if (!db)
560 		db = &__binding;
561 
562 	/* don't bother looking it up if the values are the same */
563 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
564 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0)
565 		goto found;
566 
567 	/* try to find appropriate file, from $LANGUAGE */
568 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
569 	    domainname) == NULL)
570 		goto fail;
571 
572 	if (odomainname)
573 		free(odomainname);
574 	if (ocname)
575 		free(ocname);
576 	odomainname = strdup(domainname);
577 	ocname = strdup(cname);
578 	if (!odomainname || !ocname) {
579 		if (odomainname)
580 			free(odomainname);
581 		if (ocname)
582 			free(ocname);
583 		odomainname = ocname = NULL;
584 		goto fail;
585 	}
586 
587 	strlcpy(olpath, lpath, sizeof(olpath));
588 
589 found:
590 	v = lookup(msgid);
591 	if (v) {
592 		/*
593 		 * XXX call iconv() here, if translated text is encoded
594 		 * differently from currently-selected encoding (locale).
595 		 * look at Content-type header in *.mo file, in string obtained
596 		 * by gettext("").
597 		 */
598 
599 		/*
600 		 * Given the amount of printf-format security issues, it may
601 		 * be a good idea to validate if the original msgid and the
602 		 * translated message format string carry the same printf-like
603 		 * format identifiers.
604 		 */
605 
606 		msgid = v;
607 	}
608 
609 fail:
610 	/* LINTED const cast */
611 	return (char *)msgid;
612 }
613