xref: /netbsd-src/lib/libintl/gettext.c (revision 001c68bd94f75ce9270b69227c4199fbf34ee396)
1 /*	$NetBSD: gettext.c,v 1.14 2003/03/09 01:02:34 lukem Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000, 2001 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $Citrus: xpg4dl/FreeBSD/lib/libintl/gettext.c,v 1.31 2001/09/27 15:18:45 yamt Exp $
29  */
30 
31 #include <sys/cdefs.h>
32 __RCSID("$NetBSD: gettext.c,v 1.14 2003/03/09 01:02:34 lukem Exp $");
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <sys/uio.h>
39 
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #if 0
46 #include <util.h>
47 #endif
48 #include <libintl.h>
49 #include <locale.h>
50 #include "libintl_local.h"
51 #include "pathnames.h"
52 
53 static const char *lookup_category __P((int));
54 static const char *split_locale __P((const char *));
55 static const char *lookup_mofile __P((char *, size_t, const char *,
56 	const char *, const char *, const char *, struct domainbinding *));
57 static u_int32_t flip __P((u_int32_t, u_int32_t));
58 static int validate __P((void *, struct mohandle *));
59 static int mapit __P((const char *, struct domainbinding *));
60 static int unmapit __P((struct domainbinding *));
61 static const char *lookup_hash __P((const char *, struct domainbinding *));
62 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
63 static const char *lookup __P((const char *, struct domainbinding *));
64 static const char *get_lang_env(const char *);
65 
66 /*
67  * shortcut functions.  the main implementation resides in dcngettext().
68  */
69 char *
70 gettext(msgid)
71 	const char *msgid;
72 {
73 
74 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
75 }
76 
77 char *
78 dgettext(domainname, msgid)
79 	const char *domainname;
80 	const char *msgid;
81 {
82 
83 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
84 }
85 
86 char *
87 dcgettext(domainname, msgid, category)
88 	const char *domainname;
89 	const char *msgid;
90 	int category;
91 {
92 
93 	return dcngettext(domainname, msgid, NULL, 1UL, category);
94 }
95 
96 char *
97 ngettext(msgid1, msgid2, n)
98 	const char *msgid1;
99 	const char *msgid2;
100 	unsigned long int n;
101 {
102 
103 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
104 }
105 
106 char *
107 dngettext(domainname, msgid1, msgid2, n)
108 	const char *domainname;
109 	const char *msgid1;
110 	const char *msgid2;
111 	unsigned long int n;
112 {
113 
114 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
115 }
116 
117 /*
118  * dcngettext() -
119  * lookup internationalized message on database locale/category/domainname
120  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
121  * if n equals to 1, internationalized message will be looked up for msgid1.
122  * otherwise, message will be looked up for msgid2.
123  * if the lookup fails, the function will return msgid1 or msgid2 as is.
124  *
125  * Even though the return type is "char *", caller should not rewrite the
126  * region pointed to by the return value (should be "const char *", but can't
127  * change it for compatibility with other implementations).
128  *
129  * by default (if domainname == NULL), domainname is taken from the value set
130  * by textdomain().  usually name of the application (like "ls") is used as
131  * domainname.  category is usually LC_MESSAGES.
132  *
133  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
134  * endian encoded file.  both endians are supported here, as the files are in
135  * /usr/share/locale! (or we should move those files into /usr/libdata)
136  */
137 
138 static const char *
139 lookup_category(category)
140 	int category;
141 {
142 
143 	switch (category) {
144 	case LC_COLLATE:	return "LC_COLLATE";
145 	case LC_CTYPE:		return "LC_CTYPE";
146 	case LC_MONETARY:	return "LC_MONETARY";
147 	case LC_NUMERIC:	return "LC_NUMERIC";
148 	case LC_TIME:		return "LC_TIME";
149 	case LC_MESSAGES:	return "LC_MESSAGES";
150 	}
151 	return NULL;
152 }
153 
154 /*
155  * XPG syntax: language[_territory[.codeset]][@modifier]
156  * XXX boundary check on "result" is lacking
157  */
158 static const char *
159 split_locale(lname)
160 	const char *lname;
161 {
162 	char buf[BUFSIZ], tmp[BUFSIZ];
163 	char *l, *t, *c, *m;
164 	static char result[BUFSIZ];
165 
166 	memset(result, 0, sizeof(result));
167 
168 	if (strlen(lname) + 1 > sizeof(buf)) {
169 fail:
170 		return lname;
171 	}
172 
173 	strlcpy(buf, lname, sizeof(buf));
174 	m = strrchr(buf, '@');
175 	if (m)
176 		*m++ = '\0';
177 	c = strrchr(buf, '.');
178 	if (c)
179 		*c++ = '\0';
180 	t = strrchr(buf, '_');
181 	if (t)
182 		*t++ = '\0';
183 	l = buf;
184 	if (strlen(l) == 0)
185 		goto fail;
186 	if (c && !t)
187 		goto fail;
188 
189 	if (m) {
190 		if (t) {
191 			if (c) {
192 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
193 				    l, t, c, m);
194 				strlcat(result, tmp, sizeof(result));
195 				strlcat(result, ":", sizeof(result));
196 			}
197 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
198 			strlcat(result, tmp, sizeof(result));
199 			strlcat(result, ":", sizeof(result));
200 		}
201 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
202 		strlcat(result, tmp, sizeof(result));
203 		strlcat(result, ":", sizeof(result));
204 	}
205 	if (t) {
206 		if (c) {
207 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
208 			strlcat(result, tmp, sizeof(result));
209 			strlcat(result, ":", sizeof(result));
210 		}
211 		snprintf(tmp, sizeof(tmp), "%s_%s", l, t);
212 		strlcat(result, tmp, sizeof(result));
213 		strlcat(result, ":", sizeof(result));
214 	}
215 	strlcat(result, l, sizeof(result));
216 
217 	return result;
218 }
219 
220 static const char *
221 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
222 	char *buf;
223 	size_t len;
224 	const char *dir;
225 	const char *lpath;	/* list of locales to be tried */
226 	const char *category;
227 	const char *domainname;
228 	struct domainbinding *db;
229 {
230 	struct stat st;
231 	char *p, *q;
232 	char lpath_tmp[BUFSIZ];
233 
234 	strlcpy(lpath_tmp, lpath, sizeof(lpath_tmp));
235 	q = lpath_tmp;
236 	/* CONSTCOND */
237 	while (1) {
238 		p = strsep(&q, ":");
239 		if (!p)
240 			break;
241 		if (!*p)
242 			continue;
243 
244 		/* don't mess with default locales */
245 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
246 			return NULL;
247 
248 		/* validate pathname */
249 		if (strchr(p, '/') || strchr(category, '/'))
250 			continue;
251 #if 1	/*?*/
252 		if (strchr(domainname, '/'))
253 			continue;
254 #endif
255 
256 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
257 		    category, domainname);
258 		if (stat(buf, &st) < 0)
259 			continue;
260 		if ((st.st_mode & S_IFMT) != S_IFREG)
261 			continue;
262 
263 		if (mapit(buf, db) == 0)
264 			return buf;
265 	}
266 
267 	return NULL;
268 }
269 
270 static u_int32_t
271 flip(v, magic)
272 	u_int32_t v;
273 	u_int32_t magic;
274 {
275 
276 	if (magic == MO_MAGIC)
277 		return v;
278 	else if (magic == MO_MAGIC_SWAPPED) {
279 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
280 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
281 		return v;
282 	} else {
283 		abort();
284 		/*NOTREACHED*/
285 	}
286 }
287 
288 static int
289 validate(arg, mohandle)
290 	void *arg;
291 	struct mohandle *mohandle;
292 {
293 	char *p;
294 
295 	p = (char *)arg;
296 	if (p < (char *)mohandle->addr ||
297 	    p > (char *)mohandle->addr + mohandle->len)
298 		return 0;
299 	else
300 		return 1;
301 }
302 
303 int
304 mapit(path, db)
305 	const char *path;
306 	struct domainbinding *db;
307 {
308 	int fd;
309 	struct stat st;
310 	char *base;
311 	u_int32_t magic, revision;
312 	struct moentry *otable, *ttable;
313 	struct moentry_h *p;
314 	struct mo *mo;
315 	size_t l;
316 	int i;
317 	char *v;
318 	struct mohandle *mohandle = &db->mohandle;
319 
320 	if (mohandle->addr && mohandle->addr != MAP_FAILED &&
321 	    mohandle->mo.mo_magic)
322 		return 0;	/*already opened*/
323 
324 	unmapit(db);
325 
326 #if 0
327 	if (secure_path(path) != 0)
328 		goto fail;
329 #endif
330 	if (stat(path, &st) < 0)
331 		goto fail;
332 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
333 		goto fail;
334 	fd = open(path, O_RDONLY);
335 	if (fd < 0)
336 		goto fail;
337 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
338 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
339 		close(fd);
340 		goto fail;
341 	}
342 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
343 	    flip(revision, magic) != MO_REVISION) {
344 		close(fd);
345 		goto fail;
346 	}
347 	mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
348 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
349 	if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
350 		close(fd);
351 		goto fail;
352 	}
353 	close(fd);
354 	mohandle->len = (size_t)st.st_size;
355 
356 	base = mohandle->addr;
357 	mo = (struct mo *)mohandle->addr;
358 
359 	/* flip endian.  do not flip magic number! */
360 	mohandle->mo.mo_magic = mo->mo_magic;
361 	mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
362 	mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
363 
364 	/* validate otable/ttable */
365 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
366 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
367 	if (!validate(otable, mohandle) ||
368 	    !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
369 		unmapit(db);
370 		goto fail;
371 	}
372 	if (!validate(ttable, mohandle) ||
373 	    !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
374 		unmapit(db);
375 		goto fail;
376 	}
377 
378 	/* allocate [ot]table, and convert to normal pointer representation. */
379 	l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
380 	mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
381 	if (!mohandle->mo.mo_otable) {
382 		unmapit(db);
383 		goto fail;
384 	}
385 	mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
386 	if (!mohandle->mo.mo_ttable) {
387 		unmapit(db);
388 		goto fail;
389 	}
390 	p = mohandle->mo.mo_otable;
391 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
392 		p[i].len = flip(otable[i].len, magic);
393 		p[i].off = base + flip(otable[i].off, magic);
394 
395 		if (!validate(p[i].off, mohandle) ||
396 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
397 			unmapit(db);
398 			goto fail;
399 		}
400 	}
401 	p = mohandle->mo.mo_ttable;
402 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
403 		p[i].len = flip(ttable[i].len, magic);
404 		p[i].off = base + flip(ttable[i].off, magic);
405 
406 		if (!validate(p[i].off, mohandle) ||
407 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
408 			unmapit(db);
409 			goto fail;
410 		}
411 	}
412 
413 	/* grab MIME-header and charset field */
414 	mohandle->mo.mo_header = lookup("", db);
415 	if (mohandle->mo.mo_header)
416 		v = strstr(mohandle->mo.mo_header, "charset=");
417 	else
418 		v = NULL;
419 	if (v) {
420 		mohandle->mo.mo_charset = strdup(v + 8);
421 		if (!mohandle->mo.mo_charset)
422 			goto fail;
423 		v = strchr(mohandle->mo.mo_charset, '\n');
424 		if (v)
425 			*v = '\0';
426 	}
427 
428 	/*
429 	 * XXX check charset, reject it if we are unable to support the charset
430 	 * with the current locale.
431 	 * for example, if we are using euc-jp locale and we are looking at
432 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
433 	 * the *.mo file as we cannot support it.
434 	 */
435 
436 	return 0;
437 
438 fail:
439 	return -1;
440 }
441 
442 static int
443 unmapit(db)
444 	struct domainbinding *db;
445 {
446 	struct mohandle *mohandle = &db->mohandle;
447 
448 	/* unmap if there's already mapped region */
449 	if (mohandle->addr && mohandle->addr != MAP_FAILED)
450 		munmap(mohandle->addr, mohandle->len);
451 	mohandle->addr = NULL;
452 	if (mohandle->mo.mo_otable)
453 		free(mohandle->mo.mo_otable);
454 	if (mohandle->mo.mo_ttable)
455 		free(mohandle->mo.mo_ttable);
456 	if (mohandle->mo.mo_charset)
457 		free(mohandle->mo.mo_charset);
458 	memset(&mohandle->mo, 0, sizeof(mohandle->mo));
459 	return 0;
460 }
461 
462 /* ARGSUSED */
463 static const char *
464 lookup_hash(msgid, db)
465 	const char *msgid;
466 	struct domainbinding *db;
467 {
468 
469 	/*
470 	 * XXX should try a hashed lookup here, but to do so, we need to
471 	 * look inside the GPL'ed *.c and re-implement...
472 	 */
473 	return NULL;
474 }
475 
476 static const char *
477 lookup_bsearch(msgid, db)
478 	const char *msgid;
479 	struct domainbinding *db;
480 {
481 	int top, bottom, middle, omiddle;
482 	int n;
483 	struct mohandle *mohandle = &db->mohandle;
484 
485 	top = 0;
486 	bottom = mohandle->mo.mo_nstring;
487 	omiddle = -1;
488 	/* CONSTCOND */
489 	while (1) {
490 		if (top > bottom)
491 			break;
492 		middle = (top + bottom) / 2;
493 		/* avoid possible infinite loop, when the data is not sorted */
494 		if (omiddle == middle)
495 			break;
496 		if (middle < 0 || middle >= mohandle->mo.mo_nstring)
497 			break;
498 
499 		n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
500 		if (n == 0)
501 			return (const char *)mohandle->mo.mo_ttable[middle].off;
502 		else if (n < 0)
503 			bottom = middle;
504 		else
505 			top = middle;
506 		omiddle = middle;
507 	}
508 
509 	return NULL;
510 }
511 
512 static const char *
513 lookup(msgid, db)
514 	const char *msgid;
515 	struct domainbinding *db;
516 {
517 	const char *v;
518 
519 	v = lookup_hash(msgid, db);
520 	if (v)
521 		return v;
522 
523 	return lookup_bsearch(msgid, db);
524 }
525 
526 static const char *get_lang_env(const char *category_name)
527 {
528 	const char *lang;
529 
530 	/* 1. see LANGUAGE variable first. */
531 	lang = getenv("LANGUAGE");
532 	if (lang)
533 		return lang;
534 
535 	/* 2. if LANGUAGE isn't set, see LC_ALL, LC_xxx, LANG. */
536 	lang = getenv("LC_ALL");
537 	if (!lang)
538 		lang = getenv(category_name);
539 	if (!lang)
540 		lang = getenv("LANG");
541 
542 	if (!lang)
543 		return 0; /* error */
544 
545 	return split_locale(lang);
546 }
547 
548 char *
549 dcngettext(domainname, msgid1, msgid2, n, category)
550 	const char *domainname;
551 	const char *msgid1;
552 	const char *msgid2;
553 	unsigned long int n;
554 	int category;
555 {
556 	const char *msgid;
557 	char path[PATH_MAX];
558 	const char *lpath;
559 	static char olpath[PATH_MAX];
560 	const char *cname = NULL;
561 	const char *v;
562 	static char *ocname = NULL;
563 	static char *odomainname = NULL;
564 	struct domainbinding *db;
565 
566 	msgid = (n == 1) ? msgid1 : msgid2;
567 	if (msgid == NULL)
568 		return NULL;
569 
570 	if (!domainname)
571 		domainname = __current_domainname;
572 	cname = lookup_category(category);
573 	if (!domainname || !cname)
574 		goto fail;
575 
576 	lpath = get_lang_env(cname);
577 	if (!lpath)
578 		goto fail;
579 
580 	for (db = __bindings; db; db = db->next)
581 		if (strcmp(db->domainname, domainname) == 0)
582 			break;
583 	if (!db) {
584 		if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
585 			goto fail;
586 		db = __bindings;
587 	}
588 
589 	/* resolve relative path */
590 	/* XXX not necessary? */
591 	if (db->path[0] != '/') {
592 		char buf[PATH_MAX];
593 
594 		if (getcwd(buf, sizeof(buf)) == 0)
595 			goto fail;
596 		if (strlcat(buf, "/", sizeof(buf)) >= sizeof(buf))
597 			goto fail;
598 		if (strlcat(buf, db->path, sizeof(buf)) >= sizeof(buf))
599 			goto fail;
600 		strcpy(db->path, buf);
601 	}
602 
603 	/* don't bother looking it up if the values are the same */
604 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
605 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
606 	    db->mohandle.mo.mo_magic)
607 		goto found;
608 
609 	/* try to find appropriate file, from $LANGUAGE */
610 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
611 	    domainname, db) == NULL)
612 		goto fail;
613 
614 	if (odomainname)
615 		free(odomainname);
616 	if (ocname)
617 		free(ocname);
618 	odomainname = strdup(domainname);
619 	ocname = strdup(cname);
620 	if (!odomainname || !ocname) {
621 		if (odomainname)
622 			free(odomainname);
623 		if (ocname)
624 			free(ocname);
625 		odomainname = ocname = NULL;
626 	}
627 	else
628 		strlcpy(olpath, lpath, sizeof(olpath));
629 
630 found:
631 	v = lookup(msgid, db);
632 	if (v) {
633 		/*
634 		 * XXX call iconv() here, if translated text is encoded
635 		 * differently from currently-selected encoding (locale).
636 		 * look at Content-type header in *.mo file, in string obtained
637 		 * by gettext("").
638 		 */
639 
640 		/*
641 		 * Given the amount of printf-format security issues, it may
642 		 * be a good idea to validate if the original msgid and the
643 		 * translated message format string carry the same printf-like
644 		 * format identifiers.
645 		 */
646 
647 		msgid = v;
648 	}
649 
650 fail:
651 	/* LINTED const cast */
652 	return (char *)msgid;
653 }
654