xref: /netbsd-src/lib/libintl/gettext.c (revision 3b01aba77a7a698587faaae455bbfe740923c1f5)
1 /*	$NetBSD: gettext.c,v 1.9 2001/02/16 07:20:35 minoura Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000, 2001 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #if defined(LIBC_SCCS) && !defined(lint)
31 __RCSID("$NetBSD: gettext.c,v 1.9 2001/02/16 07:20:35 minoura Exp $");
32 #endif /* LIBC_SCCS and not lint */
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <sys/mman.h>
38 #include <sys/uio.h>
39 
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <unistd.h>
44 #include <string.h>
45 #if 0
46 #include <util.h>
47 #endif
48 #include <libintl.h>
49 #include <locale.h>
50 #include "libintl_local.h"
51 #include "pathnames.h"
52 
53 static const char *lookup_category __P((int));
54 static const char *split_locale __P((const char *));
55 static const char *lookup_mofile __P((char *, size_t, const char *,
56 	char *, const char *, const char *, struct domainbinding *));
57 static u_int32_t flip __P((u_int32_t, u_int32_t));
58 static int validate __P((void *, struct mohandle *));
59 static int mapit __P((const char *, struct domainbinding *));
60 static int unmapit __P((struct domainbinding *));
61 static const char *lookup_hash __P((const char *, struct domainbinding *));
62 static const char *lookup_bsearch __P((const char *, struct domainbinding *));
63 static const char *lookup __P((const char *, struct domainbinding *));
64 
65 /*
66  * shortcut functions.  the main implementation resides in dcngettext().
67  */
68 char *
69 gettext(msgid)
70 	const char *msgid;
71 {
72 
73 	return dcngettext(NULL, msgid, NULL, 1UL, LC_MESSAGES);
74 }
75 
76 char *
77 dgettext(domainname, msgid)
78 	const char *domainname;
79 	const char *msgid;
80 {
81 
82 	return dcngettext(domainname, msgid, NULL, 1UL, LC_MESSAGES);
83 }
84 
85 char *
86 dcgettext(domainname, msgid, category)
87 	const char *domainname;
88 	const char *msgid;
89 	int category;
90 {
91 
92 	return dcngettext(domainname, msgid, NULL, 1UL, category);
93 }
94 
95 char *
96 ngettext(msgid1, msgid2, n)
97 	const char *msgid1;
98 	const char *msgid2;
99 	unsigned long int n;
100 {
101 
102 	return dcngettext(NULL, msgid1, msgid2, n, LC_MESSAGES);
103 }
104 
105 char *
106 dngettext(domainname, msgid1, msgid2, n)
107 	const char *domainname;
108 	const char *msgid1;
109 	const char *msgid2;
110 	unsigned long int n;
111 {
112 
113 	return dcngettext(domainname, msgid1, msgid2, n, LC_MESSAGES);
114 }
115 
116 /*
117  * dcngettext() -
118  * lookup internationalized message on database locale/category/domainname
119  * (like ja_JP.eucJP/LC_MESSAGES/domainname).
120  * if n equals to 1, internationalized message will be looked up for msgid1.
121  * otherwise, message will be looked up for msgid2.
122  * if the lookup fails, the function will return msgid1 or msgid2 as is.
123  *
124  * Even though the return type is "char *", caller should not rewrite the
125  * region pointed to by the return value (should be "const char *", but can't
126  * change it for compatibility with other implementations).
127  *
128  * by default (if domainname == NULL), domainname is taken from the value set
129  * by textdomain().  usually name of the application (like "ls") is used as
130  * domainname.  category is usually LC_MESSAGES.
131  *
132  * the code reads in *.mo files generated by GNU gettext.  *.mo is a host-
133  * endian encoded file.  both endians are supported here, as the files are in
134  * /usr/share/locale! (or we should move those files into /usr/libdata)
135  */
136 
137 static const char *
138 lookup_category(category)
139 	int category;
140 {
141 
142 	switch (category) {
143 	case LC_COLLATE:	return "LC_COLLATE";
144 	case LC_CTYPE:		return "LC_CTYPE";
145 	case LC_MONETARY:	return "LC_MONETARY";
146 	case LC_NUMERIC:	return "LC_NUMERIC";
147 	case LC_TIME:		return "LC_TIME";
148 	case LC_MESSAGES:	return "LC_MESSAGES";
149 	}
150 	return NULL;
151 }
152 
153 /*
154  * XPG syntax: language[_territory[.codeset]][@modifier]
155  * XXX boundary check on "result" is lacking
156  */
157 static const char *
158 split_locale(lname)
159 	const char *lname;
160 {
161 	char buf[BUFSIZ], tmp[BUFSIZ];
162 	char *l, *t, *c, *m;
163 	static char result[BUFSIZ];
164 
165 	memset(result, 0, sizeof(result));
166 
167 	if (strlen(lname) + 1 > sizeof(buf)) {
168 fail:
169 		return lname;
170 	}
171 
172 	strlcpy(buf, lname, sizeof(buf));
173 	m = strrchr(buf, '@');
174 	if (m)
175 		*m++ = '\0';
176 	c = strrchr(buf, '.');
177 	if (c)
178 		*c++ = '\0';
179 	t = strrchr(buf, '_');
180 	if (t)
181 		*t++ = '\0';
182 	l = buf;
183 	if (strlen(l) == 0)
184 		goto fail;
185 	if (c && !t)
186 		goto fail;
187 
188 	if (m) {
189 		if (t) {
190 			if (c) {
191 				snprintf(tmp, sizeof(tmp), "%s_%s.%s@%s",
192 				    l, t, c, m);
193 				strlcat(result, tmp, sizeof(result));
194 				strlcat(result, ":", sizeof(result));
195 			}
196 			snprintf(tmp, sizeof(tmp), "%s_%s@%s", l, t, m);
197 			strlcat(result, tmp, sizeof(result));
198 			strlcat(result, ":", sizeof(result));
199 		}
200 		snprintf(tmp, sizeof(tmp), "%s@%s", l, m);
201 		strlcat(result, tmp, sizeof(result));
202 		strlcat(result, ":", sizeof(result));
203 	}
204 	if (t) {
205 		if (c) {
206 			snprintf(tmp, sizeof(tmp), "%s_%s.%s", l, t, c);
207 			strlcat(result, tmp, sizeof(result));
208 			strlcat(result, ":", sizeof(result));
209 		}
210 		strlcat(result, tmp, sizeof(result));
211 		strlcat(result, ":", sizeof(result));
212 	}
213 	strlcat(result, l, sizeof(result));
214 
215 	return result;
216 }
217 
218 static const char *
219 lookup_mofile(buf, len, dir, lpath, category, domainname, db)
220 	char *buf;
221 	size_t len;
222 	const char *dir;
223 	char *lpath;	/* list of locales to be tried */
224 	const char *category;
225 	const char *domainname;
226 	struct domainbinding *db;
227 {
228 	struct stat st;
229 	char *p, *q;
230 
231 	q = lpath;
232 	/* CONSTCOND */
233 	while (1) {
234 		p = strsep(&q, ":");
235 		if (!p)
236 			break;
237 		if (!*p)
238 			continue;
239 
240 		/* don't mess with default locales */
241 		if (strcmp(p, "C") == 0 || strcmp(p, "POSIX") == 0)
242 			return NULL;
243 
244 		/* validate pathname */
245 		if (strchr(p, '/') || strchr(category, '/'))
246 			continue;
247 #if 1	/*?*/
248 		if (strchr(domainname, '/'))
249 			continue;
250 #endif
251 
252 		snprintf(buf, len, "%s/%s/%s/%s.mo", dir, p,
253 		    category, domainname);
254 		if (stat(buf, &st) < 0)
255 			continue;
256 		if ((st.st_mode & S_IFMT) != S_IFREG)
257 			continue;
258 
259 		if (mapit(buf, db) == 0)
260 			return buf;
261 	}
262 
263 	return NULL;
264 }
265 
266 static u_int32_t
267 flip(v, magic)
268 	u_int32_t v;
269 	u_int32_t magic;
270 {
271 
272 	if (magic == MO_MAGIC)
273 		return v;
274 	else if (magic == MO_MAGIC_SWAPPED) {
275 		v = ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) |
276 		    ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000);
277 		return v;
278 	} else {
279 		abort();
280 		/*NOTREACHED*/
281 	}
282 }
283 
284 static int
285 validate(arg, mohandle)
286 	void *arg;
287 	struct mohandle *mohandle;
288 {
289 	char *p;
290 
291 	p = (char *)arg;
292 	if (p < (char *)mohandle->addr ||
293 	    p > (char *)mohandle->addr + mohandle->len)
294 		return 0;
295 	else
296 		return 1;
297 }
298 
299 int
300 mapit(path, db)
301 	const char *path;
302 	struct domainbinding *db;
303 {
304 	int fd;
305 	struct stat st;
306 	char *base;
307 	u_int32_t magic, revision;
308 	struct moentry *otable, *ttable;
309 	struct moentry_h *p;
310 	struct mo *mo;
311 	size_t l;
312 	int i;
313 	char *v;
314 	struct mohandle *mohandle = &db->mohandle;
315 
316 	if (mohandle->addr && mohandle->addr != MAP_FAILED &&
317 	    mohandle->mo.mo_magic)
318 		return 0;	/*already opened*/
319 
320 	unmapit(db);
321 
322 #if 0
323 	if (secure_path(path) != 0)
324 		goto fail;
325 #endif
326 	if (stat(path, &st) < 0)
327 		goto fail;
328 	if ((st.st_mode & S_IFMT) != S_IFREG || st.st_size > GETTEXT_MMAP_MAX)
329 		goto fail;
330 	fd = open(path, O_RDONLY);
331 	if (fd < 0)
332 		goto fail;
333 	if (read(fd, &magic, sizeof(magic)) != sizeof(magic) ||
334 	    (magic != MO_MAGIC && magic != MO_MAGIC_SWAPPED)) {
335 		close(fd);
336 		goto fail;
337 	}
338 	if (read(fd, &revision, sizeof(revision)) != sizeof(revision) ||
339 	    flip(revision, magic) != MO_REVISION) {
340 		close(fd);
341 		goto fail;
342 	}
343 	mohandle->addr = mmap(NULL, (size_t)st.st_size, PROT_READ,
344 	    MAP_FILE | MAP_SHARED, fd, (off_t)0);
345 	if (!mohandle->addr || mohandle->addr == MAP_FAILED) {
346 		close(fd);
347 		goto fail;
348 	}
349 	close(fd);
350 	mohandle->len = (size_t)st.st_size;
351 
352 	base = mohandle->addr;
353 	mo = (struct mo *)mohandle->addr;
354 
355 	/* flip endian.  do not flip magic number! */
356 	mohandle->mo.mo_magic = mo->mo_magic;
357 	mohandle->mo.mo_revision = flip(mo->mo_revision, magic);
358 	mohandle->mo.mo_nstring = flip(mo->mo_nstring, magic);
359 
360 	/* validate otable/ttable */
361 	otable = (struct moentry *)(base + flip(mo->mo_otable, magic));
362 	ttable = (struct moentry *)(base + flip(mo->mo_ttable, magic));
363 	if (!validate(otable, mohandle) ||
364 	    !validate(&otable[mohandle->mo.mo_nstring], mohandle)) {
365 		unmapit(db);
366 		goto fail;
367 	}
368 	if (!validate(ttable, mohandle) ||
369 	    !validate(&ttable[mohandle->mo.mo_nstring], mohandle)) {
370 		unmapit(db);
371 		goto fail;
372 	}
373 
374 	/* allocate [ot]table, and convert to normal pointer representation. */
375 	l = sizeof(struct moentry_h) * mohandle->mo.mo_nstring;
376 	mohandle->mo.mo_otable = (struct moentry_h *)malloc(l);
377 	if (!mohandle->mo.mo_otable) {
378 		unmapit(db);
379 		goto fail;
380 	}
381 	mohandle->mo.mo_ttable = (struct moentry_h *)malloc(l);
382 	if (!mohandle->mo.mo_ttable) {
383 		unmapit(db);
384 		goto fail;
385 	}
386 	p = mohandle->mo.mo_otable;
387 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
388 		p[i].len = flip(otable[i].len, magic);
389 		p[i].off = base + flip(otable[i].off, magic);
390 
391 		if (!validate(p[i].off, mohandle) ||
392 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
393 			unmapit(db);
394 			goto fail;
395 		}
396 	}
397 	p = mohandle->mo.mo_ttable;
398 	for (i = 0; i < mohandle->mo.mo_nstring; i++) {
399 		p[i].len = flip(ttable[i].len, magic);
400 		p[i].off = base + flip(ttable[i].off, magic);
401 
402 		if (!validate(p[i].off, mohandle) ||
403 		    !validate(p[i].off + p[i].len + 1, mohandle)) {
404 			unmapit(db);
405 			goto fail;
406 		}
407 	}
408 
409 	/* grab MIME-header and charset field */
410 	mohandle->mo.mo_header = lookup("", db);
411 	if (mohandle->mo.mo_header)
412 		v = strstr(mohandle->mo.mo_header, "charset=");
413 	else
414 		v = NULL;
415 	if (v) {
416 		mohandle->mo.mo_charset = strdup(v + 8);
417 		if (!mohandle->mo.mo_charset)
418 			goto fail;
419 		v = strchr(mohandle->mo.mo_charset, '\n');
420 		if (v)
421 			*v = '\0';
422 	}
423 
424 	/*
425 	 * XXX check charset, reject it if we are unable to support the charset
426 	 * with the current locale.
427 	 * for example, if we are using euc-jp locale and we are looking at
428 	 * *.mo file encoded by euc-kr (charset=euc-kr), we should reject
429 	 * the *.mo file as we cannot support it.
430 	 */
431 
432 	return 0;
433 
434 fail:
435 	return -1;
436 }
437 
438 static int
439 unmapit(db)
440 	struct domainbinding *db;
441 {
442 	struct mohandle *mohandle = &db->mohandle;
443 
444 	/* unmap if there's already mapped region */
445 	if (mohandle->addr && mohandle->addr != MAP_FAILED)
446 		munmap(mohandle->addr, mohandle->len);
447 	mohandle->addr = NULL;
448 	if (mohandle->mo.mo_otable)
449 		free(mohandle->mo.mo_otable);
450 	if (mohandle->mo.mo_ttable)
451 		free(mohandle->mo.mo_ttable);
452 	if (mohandle->mo.mo_charset)
453 		free(mohandle->mo.mo_charset);
454 	memset(&mohandle->mo, 0, sizeof(mohandle->mo));
455 	return 0;
456 }
457 
458 /* ARGSUSED */
459 static const char *
460 lookup_hash(msgid, db)
461 	const char *msgid;
462 	struct domainbinding *db;
463 {
464 
465 	/*
466 	 * XXX should try a hashed lookup here, but to do so, we need to
467 	 * look inside the GPL'ed *.c and re-implement...
468 	 */
469 	return NULL;
470 }
471 
472 static const char *
473 lookup_bsearch(msgid, db)
474 	const char *msgid;
475 	struct domainbinding *db;
476 {
477 	int top, bottom, middle, omiddle;
478 	int n;
479 	struct mohandle *mohandle = &db->mohandle;
480 
481 	top = 0;
482 	bottom = mohandle->mo.mo_nstring;
483 	omiddle = -1;
484 	/* CONSTCOND */
485 	while (1) {
486 		if (top > bottom)
487 			break;
488 		middle = (top + bottom) / 2;
489 		/* avoid possible infinite loop, when the data is not sorted */
490 		if (omiddle == middle)
491 			break;
492 		if (middle < 0 || middle >= mohandle->mo.mo_nstring)
493 			break;
494 
495 		n = strcmp(msgid, mohandle->mo.mo_otable[middle].off);
496 		if (n == 0)
497 			return (const char *)mohandle->mo.mo_ttable[middle].off;
498 		else if (n < 0)
499 			bottom = middle;
500 		else
501 			top = middle;
502 		omiddle = middle;
503 	}
504 
505 	return NULL;
506 }
507 
508 static const char *
509 lookup(msgid, db)
510 	const char *msgid;
511 	struct domainbinding *db;
512 {
513 	const char *v;
514 
515 	v = lookup_hash(msgid, db);
516 	if (v)
517 		return v;
518 
519 	return lookup_bsearch(msgid, db);
520 }
521 
522 char *
523 dcngettext(domainname, msgid1, msgid2, n, category)
524 	const char *domainname;
525 	const char *msgid1;
526 	const char *msgid2;
527 	unsigned long int n;
528 	int category;
529 {
530 	const char *msgid;
531 	char path[PATH_MAX];
532 	static char lpath[PATH_MAX];
533 	static char olpath[PATH_MAX];
534 	const char *locale;
535 	const char *language;
536 	const char *cname = NULL;
537 	const char *v;
538 	static char *ocname = NULL;
539 	static char *odomainname = NULL;
540 	struct domainbinding *db;
541 
542 	msgid = (n == 1) ? msgid1 : msgid2;
543 	if (msgid == NULL)
544 		return NULL;
545 
546 	if (!domainname)
547 		domainname = __current_domainname;
548 	cname = lookup_category(category);
549 	if (!domainname || !cname)
550 		goto fail;
551 
552 	language = getenv("LANGUAGE");
553 	locale = setlocale(LC_MESSAGES, NULL);	/*XXX*/
554 	if (locale)
555 		locale = split_locale(locale);
556 	if (language && locale) {
557 		if (strlen(language) + strlen(locale) + 2 > sizeof(lpath))
558 			goto fail;
559 		snprintf(lpath, sizeof(lpath), "%s:%s", language, locale);
560 	} else if (language) {
561 		if (strlen(language) + 1 > sizeof(lpath))
562 			goto fail;
563 		strlcpy(lpath, language, sizeof(lpath));
564 	} else if (locale) {
565 		if (strlen(locale) + 1 > sizeof(lpath))
566 			goto fail;
567 		strlcpy(lpath, locale, sizeof(lpath));
568 	} else
569 		goto fail;
570 
571 	for (db = __bindings; db; db = db->next)
572 		if (strcmp(db->domainname, domainname) == 0)
573 			break;
574 	if (!db) {
575 		if (!bindtextdomain(domainname, _PATH_TEXTDOMAIN))
576 			goto fail;
577 		db = __bindings;
578 	}
579 
580 	/* don't bother looking it up if the values are the same */
581 	if (odomainname && strcmp(domainname, odomainname) == 0 &&
582 	    ocname && strcmp(cname, ocname) == 0 && strcmp(lpath, olpath) == 0 &&
583 	    db->mohandle.mo.mo_magic)
584 		goto found;
585 
586 	/* try to find appropriate file, from $LANGUAGE */
587 	if (lookup_mofile(path, sizeof(path), db->path, lpath, cname,
588 	    domainname, db) == NULL)
589 		goto fail;
590 
591 	if (odomainname)
592 		free(odomainname);
593 	if (ocname)
594 		free(ocname);
595 	odomainname = strdup(domainname);
596 	ocname = strdup(cname);
597 	if (!odomainname || !ocname) {
598 		if (odomainname)
599 			free(odomainname);
600 		if (ocname)
601 			free(ocname);
602 		odomainname = ocname = NULL;
603 		goto fail;
604 	}
605 
606 	strlcpy(olpath, lpath, sizeof(olpath));
607 
608 found:
609 	v = lookup(msgid, db);
610 	if (v) {
611 		/*
612 		 * XXX call iconv() here, if translated text is encoded
613 		 * differently from currently-selected encoding (locale).
614 		 * look at Content-type header in *.mo file, in string obtained
615 		 * by gettext("").
616 		 */
617 
618 		/*
619 		 * Given the amount of printf-format security issues, it may
620 		 * be a good idea to validate if the original msgid and the
621 		 * translated message format string carry the same printf-like
622 		 * format identifiers.
623 		 */
624 
625 		msgid = v;
626 	}
627 
628 fail:
629 	/* LINTED const cast */
630 	return (char *)msgid;
631 }
632