xref: /netbsd-src/lib/libintl/gettext.c (revision 5e4c038a45edbc7d63b7c2daa76e29f88b64a4e3)
1 /*	$NetBSD: gettext.c,v 1.13 2002/02/13 08:01:13 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000, 2001 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
29  */
30 
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: gettext.c,v 1.13 2002/02/13 08:01:13 yamt Exp $");
34 #endif /* LIBC_SCCS and not lint */
35 
36 #include <sys/types.h>
37 #include <sys/param.h>
38 #include <sys/stat.h>
39 #include <sys/mman.h>
40 #include <sys/uio.h>
41 
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <string.h>
47 #if 0
48 #include <util.h>
49 #endif
50 #include <libintl.h>
51 #include <locale.h>
52 #include "libintl_local.h"
53 #include "pathnames.h"
54 
55 static const char *lookup_category __P((int));
56 static const char *split_locale __P((const char *));
57 static const char *lookup_mofile __P((char *, size_t, const char *,
58 	const char *, const char *, const char *, struct domainbinding *));
59 static u_int32_t flip __P((u_int32_t, u_int32_t));
60 static int validate __P((void *, struct mohandle *));
61 static int mapit __P((const char *, struct domainbinding *));
62 static int unmapit __P((struct domainbinding *));
63 static const char *lookup_hash __P((const char *, struct domainbinding *));
64 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
65 static const char *lookup __P((const char *, struct domainbinding *));
66 static const char *get_lang_env(const char *);
67 
68 /*
69  * shortcut functions.  the main implementation resides in dcngettext().
70  */
71 char *
72 gettext(msgid)
73 	const char *msgid;
74 {
75 
76 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
77 }
78 
79 char *
80 dgettext(domainname, msgid)
81 	const char *domainname;
82 	const char *msgid;
83 {
84 
85 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
86 }
87 
88 char *
89 dcgettext(domainname, msgid, category)
90 	const char *domainname;
91 	const char *msgid;
92 	int category;
93 {
94 
95 	return dcngettext(domainname, msgid, NULL, 1UL, category);
96 }
97 
98 char *
99 ngettext(msgid1, msgid2, n)
100 	const char *msgid1;
101 	const char *msgid2;
102 	unsigned long int n;
103 {
104 
105 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
106 }
107 
108 char *
109 dngettext(domainname, msgid1, msgid2, n)
110 	const char *domainname;
111 	const char *msgid1;
112 	const char *msgid2;
113 	unsigned long int n;
114 {
115 
116 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
117 }
118 
119 /*
120  * dcngettext() -
121  * lookup internationalized message on database locale/category/domainname
122  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
123  * if n equals to 1, internationalized message will be looked up for msgid1.
124  * otherwise, message will be looked up for msgid2.
125  * if the lookup fails, the function will return msgid1 or msgid2 as is.
126  *
127  * Even though the return type is "char *", caller should not rewrite the
128  * region pointed to by the return value (should be "const char *", but can't
129  * change it for compatibility with other implementations).
130  *
131  * by default (if domainname == NULL), domainname is taken from the value set
132  * by textdomain().  usually name of the application (like "ls") is used as
133  * domainname.  category is usually LC_MESSAGES.
134  *
135  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
136  * endian encoded file.  both endians are supported here, as the files are in
137  * /usr/share/locale! (or we should move those files into /usr/libdata)
138  */
139 
140 static const char *
141 lookup_category(category)
142 	int category;
143 {
144 
145 	switch (category) {
146 	case LC_COLLATE:	return "LC_COLLATE";
147 	case LC_CTYPE:		return "LC_CTYPE";
148 	case LC_MONETARY:	return "LC_MONETARY";
149 	case LC_NUMERIC:	return "LC_NUMERIC";
150 	case LC_TIME:		return "LC_TIME";
151 	case LC_MESSAGES:	return "LC_MESSAGES";
152 	}
153 	return NULL;
154 }
155 
156 /*
157  * XPG syntax: language[_territory[.codeset]][@modifier]
158  * XXX boundary check on "result" is lacking
159  */
160 static const char *
161 split_locale(lname)
162 	const char *lname;
163 {
164 	char buf[BUFSIZ], tmp[BUFSIZ];
165 	char *l, *t, *c, *m;
166 	static char result[BUFSIZ];
167 
168 	memset(result, 0, sizeof(result));
169 
170 	if (strlen(lname) + 1 > sizeof(buf)) {
171 fail:
172 		return lname;
173 	}
174 
175 	strlcpy(buf, lname, sizeof(buf));
176 	m = strrchr(buf, '@');
177 	if (m)
178 		*m++ = '\0';
179 	c = strrchr(buf, '.');
180 	if (c)
181 		*c++ = '\0';
182 	t = strrchr(buf, '_');
183 	if (t)
184 		*t++ = '\0';
185 	l = buf;
186 	if (strlen(l) == 0)
187 		goto fail;
188 	if (c && !t)
189 		goto fail;
190 
191 	if (m) {
192 		if (t) {
193 			if (c) {
194 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
195 				    l, t, c, m);
196 				strlcat(result, tmp, sizeof(result));
197 				strlcat(result, ":", sizeof(result));
198 			}
199 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
200 			strlcat(result, tmp, sizeof(result));
201 			strlcat(result, ":", sizeof(result));
202 		}
203 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
204 		strlcat(result, tmp, sizeof(result));
205 		strlcat(result, ":", sizeof(result));
206 	}
207 	if (t) {
208 		if (c) {
209 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
210 			strlcat(result, tmp, sizeof(result));
211 			strlcat(result, ":", sizeof(result));
212 		}
213 		snprintf(tmp, sizeof(tmp), "%s_%s", l, t);
214 		strlcat(result, tmp, sizeof(result));
215 		strlcat(result, ":", sizeof(result));
216 	}
217 	strlcat(result, l, sizeof(result));
218 
219 	return result;
220 }
221 
222 static const char *
223 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
224 	char *buf;
225 	size_t len;
226 	const char *dir;
227 	const char *lpath;	/* list of locales to be tried */
228 	const char *category;
229 	const char *domainname;
230 	struct domainbinding *db;
231 {
232 	struct stat st;
233 	char *p, *q;
234 	char lpath_tmp[BUFSIZ];
235 
236 	strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
237 	q = lpath_tmp;
238 	/* CONSTCOND */
239 	while (1) {
240 		p = strsep(&q, ":");
241 		if (!p)
242 			break;
243 		if (!*p)
244 			continue;
245 
246 		/* don't mess with default locales */
247 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
248 			return NULL;
249 
250 		/* validate pathname */
251 		if (strchr(p, '/') || strchr(category, '/'))
252 			continue;
253 #if 1	/*?*/
254 		if (strchr(domainname, '/'))
255 			continue;
256 #endif
257 
258 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
259 		    category, domainname);
260 		if (stat(buf, &st) < 0)
261 			continue;
262 		if ((st.st_mode & S_IFMT) != S_IFREG)
263 			continue;
264 
265 		if (mapit(buf, db) == 0)
266 			return buf;
267 	}
268 
269 	return NULL;
270 }
271 
272 static u_int32_t
273 flip(v, magic)
274 	u_int32_t v;
275 	u_int32_t magic;
276 {
277 
278 	if (magic == MO_MAGIC)
279 		return v;
280 	else if (magic == MO_MAGIC_SWAPPED) {
281 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
282 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
283 		return v;
284 	} else {
285 		abort();
286 		/*NOTREACHED*/
287 	}
288 }
289 
290 static int
291 validate(arg, mohandle)
292 	void *arg;
293 	struct mohandle *mohandle;
294 {
295 	char *p;
296 
297 	p = (char *)arg;
298 	if (p < (char *)mohandle->addr ||
299 	    p > (char *)mohandle->addr + mohandle->len)
300 		return 0;
301 	else
302 		return 1;
303 }
304 
305 int
306 mapit(path, db)
307 	const char *path;
308 	struct domainbinding *db;
309 {
310 	int fd;
311 	struct stat st;
312 	char *base;
313 	u_int32_t magic, revision;
314 	struct moentry *otable, *ttable;
315 	struct moentry_h *p;
316 	struct mo *mo;
317 	size_t l;
318 	int i;
319 	char *v;
320 	struct mohandle *mohandle = &db->mohandle;
321 
322 	if (mohandle->addr && mohandle->addr != MAP_FAILED &&
323 	    mohandle->mo.mo_magic)
324 		return 0;	/*already opened*/
325 
326 	unmapit(db);
327 
328 #if 0
329 	if (secure_path(path) != 0)
330 		goto fail;
331 #endif
332 	if (stat(path, &st) < 0)
333 		goto fail;
334 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
335 		goto fail;
336 	fd = open(path, O_RDONLY);
337 	if (fd < 0)
338 		goto fail;
339 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
340 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
341 		close(fd);
342 		goto fail;
343 	}
344 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
345 	    flip(revision, magic) != MO_REVISION) {
346 		close(fd);
347 		goto fail;
348 	}
349 	mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
350 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
351 	if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
352 		close(fd);
353 		goto fail;
354 	}
355 	close(fd);
356 	mohandle->len = (size_t)st.st_size;
357 
358 	base = mohandle->addr;
359 	mo = (struct mo *)mohandle->addr;
360 
361 	/* flip endian.  do not flip magic number! */
362 	mohandle->mo.mo_magic = mo->mo_magic;
363 	mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
364 	mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
365 
366 	/* validate otable/ttable */
367 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
368 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
369 	if (!validate(otable, mohandle) ||
370 	    !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
371 		unmapit(db);
372 		goto fail;
373 	}
374 	if (!validate(ttable, mohandle) ||
375 	    !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
376 		unmapit(db);
377 		goto fail;
378 	}
379 
380 	/* allocate [ot]table, and convert to normal pointer representation. */
381 	l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
382 	mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
383 	if (!mohandle->mo.mo_otable) {
384 		unmapit(db);
385 		goto fail;
386 	}
387 	mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
388 	if (!mohandle->mo.mo_ttable) {
389 		unmapit(db);
390 		goto fail;
391 	}
392 	p = mohandle->mo.mo_otable;
393 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
394 		p[i].len = flip(otable[i].len, magic);
395 		p[i].off = base + flip(otable[i].off, magic);
396 
397 		if (!validate(p[i].off, mohandle) ||
398 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
399 			unmapit(db);
400 			goto fail;
401 		}
402 	}
403 	p = mohandle->mo.mo_ttable;
404 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
405 		p[i].len = flip(ttable[i].len, magic);
406 		p[i].off = base + flip(ttable[i].off, magic);
407 
408 		if (!validate(p[i].off, mohandle) ||
409 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
410 			unmapit(db);
411 			goto fail;
412 		}
413 	}
414 
415 	/* grab MIME-header and charset field */
416 	mohandle->mo.mo_header = lookup("", db);
417 	if (mohandle->mo.mo_header)
418 		v = strstr(mohandle->mo.mo_header, "charset=");
419 	else
420 		v = NULL;
421 	if (v) {
422 		mohandle->mo.mo_charset = strdup(v + 8);
423 		if (!mohandle->mo.mo_charset)
424 			goto fail;
425 		v = strchr(mohandle->mo.mo_charset, '\n');
426 		if (v)
427 			*v = '\0';
428 	}
429 
430 	/*
431 	 * XXX check charset, reject it if we are unable to support the charset
432 	 * with the current locale.
433 	 * for example, if we are using euc-jp locale and we are looking at
434 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
435 	 * the *.mo file as we cannot support it.
436 	 */
437 
438 	return 0;
439 
440 fail:
441 	return -1;
442 }
443 
444 static int
445 unmapit(db)
446 	struct domainbinding *db;
447 {
448 	struct mohandle *mohandle = &db->mohandle;
449 
450 	/* unmap if there's already mapped region */
451 	if (mohandle->addr && mohandle->addr != MAP_FAILED)
452 		munmap(mohandle->addr, mohandle->len);
453 	mohandle->addr = NULL;
454 	if (mohandle->mo.mo_otable)
455 		free(mohandle->mo.mo_otable);
456 	if (mohandle->mo.mo_ttable)
457 		free(mohandle->mo.mo_ttable);
458 	if (mohandle->mo.mo_charset)
459 		free(mohandle->mo.mo_charset);
460 	memset(&mohandle->mo, 0, sizeof(mohandle->mo));
461 	return 0;
462 }
463 
464 /* ARGSUSED */
465 static const char *
466 lookup_hash(msgid, db)
467 	const char *msgid;
468 	struct domainbinding *db;
469 {
470 
471 	/*
472 	 * XXX should try a hashed lookup here, but to do so, we need to
473 	 * look inside the GPL'ed *.c and re-implement...
474 	 */
475 	return NULL;
476 }
477 
478 static const char *
479 lookup_bsearch(msgid, db)
480 	const char *msgid;
481 	struct domainbinding *db;
482 {
483 	int top, bottom, middle, omiddle;
484 	int n;
485 	struct mohandle *mohandle = &db->mohandle;
486 
487 	top = 0;
488 	bottom = mohandle->mo.mo_nstring;
489 	omiddle = -1;
490 	/* CONSTCOND */
491 	while (1) {
492 		if (top > bottom)
493 			break;
494 		middle = (top + bottom) / 2;
495 		/* avoid possible infinite loop, when the data is not sorted */
496 		if (omiddle == middle)
497 			break;
498 		if (middle < 0 || middle >= mohandle->mo.mo_nstring)
499 			break;
500 
501 		n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
502 		if (n == 0)
503 			return (const char *)mohandle->mo.mo_ttable[middle].off;
504 		else if (n < 0)
505 			bottom = middle;
506 		else
507 			top = middle;
508 		omiddle = middle;
509 	}
510 
511 	return NULL;
512 }
513 
514 static const char *
515 lookup(msgid, db)
516 	const char *msgid;
517 	struct domainbinding *db;
518 {
519 	const char *v;
520 
521 	v = lookup_hash(msgid, db);
522 	if (v)
523 		return v;
524 
525 	return lookup_bsearch(msgid, db);
526 }
527 
528 static const char *get_lang_env(const char *category_name)
529 {
530 	const char *lang;
531 
532 	/* 1. see LANGUAGE variable first. */
533 	lang = getenv("LANGUAGE");
534 	if (lang)
535 		return lang;
536 
537 	/* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */
538 	lang = getenv("LC_ALL");
539 	if (!lang)
540 		lang = getenv(category_name);
541 	if (!lang)
542 		lang = getenv("LANG");
543 
544 	if (!lang)
545 		return 0; /* error */
546 
547 	return split_locale(lang);
548 }
549 
550 char *
551 dcngettext(domainname, msgid1, msgid2, n, category)
552 	const char *domainname;
553 	const char *msgid1;
554 	const char *msgid2;
555 	unsigned long int n;
556 	int category;
557 {
558 	const char *msgid;
559 	char path[PATH_MAX];
560 	const char *lpath;
561 	static char olpath[PATH_MAX];
562 	const char *cname = NULL;
563 	const char *v;
564 	static char *ocname = NULL;
565 	static char *odomainname = NULL;
566 	struct domainbinding *db;
567 
568 	msgid = (n == 1) ? msgid1 : msgid2;
569 	if (msgid == NULL)
570 		return NULL;
571 
572 	if (!domainname)
573 		domainname = __current_domainname;
574 	cname = lookup_category(category);
575 	if (!domainname || !cname)
576 		goto fail;
577 
578 	lpath = get_lang_env(cname);
579 	if (!lpath)
580 		goto fail;
581 
582 	for (db = __bindings; db; db = db->next)
583 		if (strcmp(db->domainname, domainname) == 0)
584 			break;
585 	if (!db) {
586 		if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
587 			goto fail;
588 		db = __bindings;
589 	}
590 
591 	/* resolve relative path */
592 	/* XXX not necessary? */
593 	if (db->path[0] != '/') {
594 		char buf[PATH_MAX];
595 
596 		if (getcwd(buf, sizeof(buf)) == 0)
597 			goto fail;
598 		if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
599 			goto fail;
600 		if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
601 			goto fail;
602 		strcpy(db->path, buf);
603 	}
604 
605 	/* don't bother looking it up if the values are the same */
606 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
607 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
608 	    db->mohandle.mo.mo_magic)
609 		goto found;
610 
611 	/* try to find appropriate file, from $LANGUAGE */
612 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
613 	    domainname, db) == NULL)
614 		goto fail;
615 
616 	if (odomainname)
617 		free(odomainname);
618 	if (ocname)
619 		free(ocname);
620 	odomainname = strdup(domainname);
621 	ocname = strdup(cname);
622 	if (!odomainname || !ocname) {
623 		if (odomainname)
624 			free(odomainname);
625 		if (ocname)
626 			free(ocname);
627 		odomainname = ocname = NULL;
628 	}
629 	else
630 		strlcpy(olpath, lpath, sizeof(olpath));
631 
632 found:
633 	v = lookup(msgid, db);
634 	if (v) {
635 		/*
636 		 * XXX call iconv() here, if translated text is encoded
637 		 * differently from currently-selected encoding (locale).
638 		 * look at Content-type header in *.mo file, in string obtained
639 		 * by gettext("").
640 		 */
641 
642 		/*
643 		 * Given the amount of printf-format security issues, it may
644 		 * be a good idea to validate if the original msgid and the
645 		 * translated message format string carry the same printf-like
646 		 * format identifiers.
647 		 */
648 
649 		msgid = v;
650 	}
651 
652 fail:
653 	/* LINTED const cast */
654 	return (char *)msgid;
655 }
656