xref: /netbsd-src/lib/libc/locale/rune.c (revision 52ed7b035ff6e5fc73eada9c5ce08f7fa154eb8c)
1 /*	$NetBSD: rune.c,v 1.31 2009/01/02 00:20:20 tnozaki Exp $	*/
2 
3 /*-
4  * Copyright (c)1999 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Paul Borman at Krystal Technologies.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 #include <sys/cdefs.h>
62 #if defined(LIBC_SCCS) && !defined(lint)
63 #if 0
64 static char sccsid[] = "@(#)rune.c	8.1 (Berkeley) 6/4/93";
65 #else
66 __RCSID("$NetBSD: rune.c,v 1.31 2009/01/02 00:20:20 tnozaki Exp $");
67 #endif
68 #endif /* LIBC_SCCS and not lint */
69 
70 #include "namespace.h"
71 #include <assert.h>
72 #include <stdio.h>
73 #include <string.h>
74 #include <stdlib.h>
75 #include <errno.h>
76 #include <wchar.h>
77 #include <sys/types.h>
78 #include <sys/stat.h>
79 
80 #include "citrus_module.h"
81 #include "citrus_ctype.h"
82 
83 #include "bsdctype.h"
84 #include "rune.h"
85 #include "rune_local.h"
86 
87 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *));
88 static void _freeentry __P((_RuneRange *));
89 static void _wctype_init __P((_RuneLocale *rl));
90 
91 static int
92 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp,
93 	FILE *fp)
94 {
95 	uint32_t i;
96 	_RuneEntry *re;
97 	_FileRuneEntry fre;
98 
99 	_DIAGASSERT(rl != NULL);
100 	_DIAGASSERT(rr != NULL);
101 	_DIAGASSERT(frr != NULL);
102 	_DIAGASSERT(lastp != NULL);
103 	_DIAGASSERT(fp != NULL);
104 
105 	re = (_RuneEntry *)rl->rl_variable;
106 
107 	rr->rr_nranges = ntohl(frr->frr_nranges);
108 	if (rr->rr_nranges == 0) {
109 		rr->rr_rune_ranges = NULL;
110 		return 0;
111 	}
112 
113 	rr->rr_rune_ranges = re;
114 	for (i = 0; i < rr->rr_nranges; i++) {
115 		if (fread(&fre, sizeof(fre), 1, fp) != 1)
116 			return -1;
117 
118 		re->re_min = ntohl((u_int32_t)fre.fre_min);
119 		re->re_max = ntohl((u_int32_t)fre.fre_max);
120 		re->re_map = ntohl((u_int32_t)fre.fre_map);
121 		re++;
122 
123 		if ((void *)re > lastp)
124 			return -1;
125 	}
126 	rl->rl_variable = re;
127 	return 0;
128 }
129 
130 static int
131 readentry(_RuneRange *rr, FILE *fp)
132 {
133 	_RuneEntry *re;
134 	size_t l, i, j;
135 	int error;
136 
137 	_DIAGASSERT(rr != NULL);
138 	_DIAGASSERT(fp != NULL);
139 
140 	re = rr->rr_rune_ranges;
141 	for (i = 0; i < rr->rr_nranges; i++) {
142 		if (re[i].re_map != 0) {
143 			re[i].re_rune_types = NULL;
144 			continue;
145 		}
146 
147 		l = re[i].re_max - re[i].re_min + 1;
148 		re[i].re_rune_types = malloc(l * sizeof(_RuneType));
149 		if (!re[i].re_rune_types) {
150 			error = ENOMEM;
151 			goto fail;
152 		}
153 		memset(re[i].re_rune_types, 0, l * sizeof(_RuneType));
154 
155 		if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l)
156 			goto fail2;
157 
158 		for (j = 0; j < l; j++)
159 			re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]);
160 	}
161 	return 0;
162 
163 fail:
164 	for (j = 0; j < i; j++) {
165 		free(re[j].re_rune_types);
166 		re[j].re_rune_types = NULL;
167 	}
168 	return error;
169 fail2:
170 	for (j = 0; j <= i; j++) {
171 		free(re[j].re_rune_types);
172 		re[j].re_rune_types = NULL;
173 	}
174 	return errno;
175 }
176 
177 /* XXX: temporary implementation */
178 static void
179 find_codeset(_RuneLocale *rl)
180 {
181 	char *top, *codeset, *tail, *ep;
182 
183 	/* end of rl_variable region */
184 	ep = (char *)rl->rl_variable;
185 	ep += rl->rl_variable_len;
186 	rl->rl_codeset = NULL;
187 	if (!(top = strstr(rl->rl_variable, _RUNE_CODESET)))
188 		return;
189 	tail = strpbrk(top, " \t");
190 	codeset = top + sizeof(_RUNE_CODESET) - 1;
191 	if (tail) {
192 		*top = *tail;
193 		*tail = '\0';
194 		rl->rl_codeset = strdup(codeset);
195 		strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1)));
196 	} else {
197 		*top = '\0';
198 		rl->rl_codeset = strdup(codeset);
199 	}
200 }
201 
202 void
203 _freeentry(_RuneRange *rr)
204 {
205 	_RuneEntry *re;
206 	uint32_t i;
207 
208 	_DIAGASSERT(rr != NULL);
209 
210 	re = rr->rr_rune_ranges;
211 	for (i = 0; i < rr->rr_nranges; i++) {
212 		if (re[i].re_rune_types)
213 			free(re[i].re_rune_types);
214 		re[i].re_rune_types = NULL;
215 	}
216 }
217 
218 void
219 _wctype_init(_RuneLocale *rl)
220 {
221 	memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
222 	       sizeof(rl->rl_wctype));
223 }
224 
225 
226 _RuneLocale *
227 _Read_RuneMagi(fp)
228 	FILE *fp;
229 {
230 	/* file */
231 	_FileRuneLocale frl;
232 	/* host data */
233 	char *hostdata;
234 	size_t hostdatalen;
235 	void *lastp;
236 	_RuneLocale *rl;
237 	struct stat sb;
238 	int x;
239 
240 	_DIAGASSERT(fp != NULL);
241 
242 	if (fstat(fileno(fp), &sb) < 0)
243 		return NULL;
244 
245 	if (sb.st_size < sizeof(_FileRuneLocale))
246 		return NULL;
247 	/* XXX more validation? */
248 
249 	/* Someone might have read the magic number once already */
250 	rewind(fp);
251 
252 	if (fread(&frl, sizeof(frl), 1, fp) != 1)
253 		return NULL;
254 	if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic)))
255 		return NULL;
256 
257 	hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) +
258 	    ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) +
259 	    ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) +
260 	    ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry);
261 
262 	if ((hostdata = malloc(hostdatalen)) == NULL)
263 		return NULL;
264 	memset(hostdata, 0, hostdatalen);
265 	lastp = hostdata + hostdatalen;
266 
267 	rl = (_RuneLocale *)(void *)hostdata;
268 	rl->rl_variable = rl + 1;
269 
270 	memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic));
271 	memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding));
272 
273 	rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune);
274 	rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len);
275 
276 	for (x = 0; x < _CACHED_RUNES; ++x) {
277 		rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]);
278 
279 		/* XXX assumes rune_t = u_int32_t */
280 		rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]);
281 		rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]);
282 	}
283 
284 	if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp))
285 	{
286 		free(hostdata);
287 		return NULL;
288 	}
289 	if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp))
290 	{
291 		free(hostdata);
292 		return NULL;
293 	}
294 	if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp))
295 	{
296 		free(hostdata);
297 		return NULL;
298 	}
299 
300 	if (readentry(&rl->rl_runetype_ext, fp) != 0) {
301 		free(hostdata);
302 		return NULL;
303 	}
304 
305 	if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len >
306 	    (u_int8_t *)lastp) {
307 		_freeentry(&rl->rl_runetype_ext);
308 		free(hostdata);
309 		return NULL;
310 	}
311 	if (rl->rl_variable_len == 0)
312 		rl->rl_variable = NULL;
313 	if (rl->rl_variable == NULL ||
314 	    fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) {
315 		_freeentry(&rl->rl_runetype_ext);
316 		free(hostdata);
317 		return NULL;
318 	}
319 	find_codeset(rl);
320 	_wctype_init(rl);
321 
322 	/* error if we have junk at the tail */
323 	if (ftell(fp) != sb.st_size) {
324 		_freeentry(&rl->rl_runetype_ext);
325 		free(hostdata);
326 		return NULL;
327 	}
328 
329 	return(rl);
330 }
331 
332 void
333 _NukeRune(rl)
334 	_RuneLocale *rl;
335 {
336 
337 	_DIAGASSERT(rl != NULL);
338 
339 	if (rl != &_DefaultRuneLocale) {
340 		_freeentry(&rl->rl_runetype_ext);
341 		if (rl->rl_codeset)
342 			free(__UNCONST(rl->rl_codeset));
343 		if (rl->rl_citrus_ctype)
344 			_citrus_ctype_close(rl->rl_citrus_ctype);
345 		free(__UNCONST(rl->rl_ctype_tab));
346 		free(__UNCONST(rl->rl_tolower_tab));
347 		free(__UNCONST(rl->rl_toupper_tab));
348 		free(rl);
349 	}
350 }
351 
352 /*
353  * read in old LC_CTYPE declaration file, convert into runelocale info
354  */
355 #define _CTYPE_PRIVATE
356 #include <limits.h>
357 #include <ctype.h>
358 
359 _RuneLocale *
360 _Read_CTypeAsRune(fp)
361 	FILE *fp;
362 {
363 	char id[sizeof(_CTYPE_ID) - 1];
364 	u_int32_t i, len;
365 	u_int8_t *new_ctype = NULL;
366 	int16_t *new_toupper = NULL, *new_tolower = NULL;
367 	/* host data */
368 	char *hostdata = NULL;
369 	size_t hostdatalen;
370 	_RuneLocale *rl;
371 	struct stat sb;
372 	int x;
373 
374 	_DIAGASSERT(fp != NULL);
375 
376 	if (fstat(fileno(fp), &sb) < 0)
377 		return NULL;
378 
379 	if (sb.st_size < sizeof(id))
380 		return NULL;
381 	/* XXX more validation? */
382 
383 	/* Someone might have read the magic number once already */
384 	rewind(fp);
385 
386 	if (fread(id, sizeof(id), 1, fp) != 1)
387 		goto bad;
388 	if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0)
389 		goto bad;
390 
391 	if (fread(&i, sizeof(u_int32_t), 1, fp) != 1)
392 		goto bad;
393 	if ((i = ntohl(i)) != _CTYPE_REV)
394 		goto bad;
395 
396 	if (fread(&len, sizeof(u_int32_t), 1, fp) != 1)
397 		goto bad;
398 	if ((len = ntohl(len)) != _CTYPE_NUM_CHARS)
399 		goto bad;
400 
401 	if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL ||
402 	    (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL ||
403 	    (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL)
404 		goto bad;
405 	new_ctype[0] = 0;
406 	if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
407 		goto bad;
408 	new_toupper[0] = EOF;
409 	if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len)
410 		goto bad;
411 	new_tolower[0] = EOF;
412 	if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len)
413 		goto bad;
414 
415 	hostdatalen = sizeof(*rl);
416 
417 	if ((hostdata = malloc(hostdatalen)) == NULL)
418 		goto bad;
419 	memset(hostdata, 0, hostdatalen);
420 	rl = (_RuneLocale *)(void *)hostdata;
421 	rl->rl_variable = NULL;
422 
423 	memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic));
424 	memcpy(rl->rl_encoding, "NONE", 4);
425 
426 	rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune;	/*XXX*/
427 	rl->rl_variable_len = 0;
428 
429 	for (x = 0; x < _CACHED_RUNES; ++x) {
430 		if ((uint32_t) x > len)
431 			continue;
432 
433 		/*
434 		 * TWEAKS!
435 		 * - old locale file declarations do not have proper _B
436 		 *   in many cases.
437 		 * - isprint() declaration in ctype.h incorrectly uses _B.
438 		 *   _B means "isprint but !isgraph", not "isblank" with the
439 		 *   declaration.
440 		 * - _X and _CTYPE_X have negligible difference in meaning.
441 		 * - we don't set digit value, fearing that it would be
442 		 *   too much of hardcoding.  we may need to revisit it.
443 		 */
444 
445 		if (new_ctype[1 + x] & _U)
446 			rl->rl_runetype[x] |= _CTYPE_U;
447 		if (new_ctype[1 + x] & _L)
448 			rl->rl_runetype[x] |= _CTYPE_L;
449 		if (new_ctype[1 + x] & _N)
450 			rl->rl_runetype[x] |= _CTYPE_D;
451 		if (new_ctype[1 + x] & _S)
452 			rl->rl_runetype[x] |= _CTYPE_S;
453 		if (new_ctype[1 + x] & _P)
454 			rl->rl_runetype[x] |= _CTYPE_P;
455 		if (new_ctype[1 + x] & _C)
456 			rl->rl_runetype[x] |= _CTYPE_C;
457 		/* derived flag bits, duplicate of ctype.h */
458 		if (new_ctype[1 + x] & (_U | _L))
459 			rl->rl_runetype[x] |= _CTYPE_A;
460 		if (new_ctype[1 + x] & (_N | _X))
461 			rl->rl_runetype[x] |= _CTYPE_X;
462 		if (new_ctype[1 + x] & (_P|_U|_L|_N))
463 			rl->rl_runetype[x] |= _CTYPE_G;
464 		/* we don't really trust _B in the file.  see above. */
465 		if (new_ctype[1 + x] & _B)
466 			rl->rl_runetype[x] |= _CTYPE_B;
467 		if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ')
468 			rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1);
469 		if (x == ' ' || x == '\t')
470 			rl->rl_runetype[x] |= _CTYPE_B;
471 
472 		/* XXX may fail on non-8bit encoding only */
473 		rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]);
474 		rl->rl_maplower[x] = ntohs(new_tolower[1 + x]);
475 	}
476 
477 	_wctype_init(rl);
478 
479 	/*
480 	 * __runetable_to_netbsd_ctype() will be called from
481 	 * setrunelocale.c:_newrunelocale(), and fill old ctype table.
482 	 */
483 
484 	free(new_ctype);
485 	free(new_toupper);
486 	free(new_tolower);
487 	return(rl);
488 
489 bad:
490 	if (new_ctype)
491 		free(new_ctype);
492 	if (new_toupper)
493 		free(new_toupper);
494 	if (new_tolower)
495 		free(new_tolower);
496 	return NULL;
497 }
498