xref: /netbsd-src/lib/libc/locale/rune.c (revision 39ff64855372f5de717544e67fc3050907fb2dba)
1 /*	$NetBSD: rune.c,v 1.28 2006/03/19 02:44:27 christos Exp $	*/
2 
3 /*-
4  * Copyright (c)1999 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Paul Borman at Krystal Technologies.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  */
60 
61 #include <sys/cdefs.h>
62 #if defined(LIBC_SCCS) && !defined(lint)
63 #if 0
64 static char sccsid[] = "@(#)rune.c	8.1 (Berkeley) 6/4/93";
65 #else
66 __RCSID("$NetBSD: rune.c,v 1.28 2006/03/19 02:44:27 christos Exp $");
67 #endif
68 #endif /* LIBC_SCCS and not lint */
69 
70 #include "namespace.h"
71 #include <assert.h>
72 #include <stdio.h>
73 #include <string.h>
74 #include <stdlib.h>
75 #include <errno.h>
76 #include <wchar.h>
77 #include <sys/types.h>
78 #include <sys/stat.h>
79 #include <citrus/citrus_module.h>
80 #include <citrus/citrus_ctype.h>
81 #include "rune.h"
82 #include "rune_local.h"
83 
84 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *));
85 static void _freeentry __P((_RuneRange *));
86 static void _wctype_init __P((_RuneLocale *rl));
87 
88 static int
89 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp,
90 	FILE *fp)
91 {
92 	uint32_t i;
93 	_RuneEntry *re;
94 	_FileRuneEntry fre;
95 
96 	_DIAGASSERT(rl != NULL);
97 	_DIAGASSERT(rr != NULL);
98 	_DIAGASSERT(frr != NULL);
99 	_DIAGASSERT(lastp != NULL);
100 	_DIAGASSERT(fp != NULL);
101 
102 	re = (_RuneEntry *)rl->rl_variable;
103 
104 	rr->rr_nranges = ntohl(frr->frr_nranges);
105 	if (rr->rr_nranges == 0) {
106 		rr->rr_rune_ranges = NULL;
107 		return 0;
108 	}
109 
110 	rr->rr_rune_ranges = re;
111 	for (i = 0; i < rr->rr_nranges; i++) {
112 		if (fread(&fre, sizeof(fre), 1, fp) != 1)
113 			return -1;
114 
115 		re->re_min = ntohl((u_int32_t)fre.fre_min);
116 		re->re_max = ntohl((u_int32_t)fre.fre_max);
117 		re->re_map = ntohl((u_int32_t)fre.fre_map);
118 		re++;
119 
120 		if ((void *)re > lastp)
121 			return -1;
122 	}
123 	rl->rl_variable = re;
124 	return 0;
125 }
126 
127 static int
128 readentry(_RuneRange *rr, FILE *fp)
129 {
130 	_RuneEntry *re;
131 	size_t l, i, j;
132 	int error;
133 
134 	_DIAGASSERT(rr != NULL);
135 	_DIAGASSERT(fp != NULL);
136 
137 	re = rr->rr_rune_ranges;
138 	for (i = 0; i < rr->rr_nranges; i++) {
139 		if (re[i].re_map != 0) {
140 			re[i].re_rune_types = NULL;
141 			continue;
142 		}
143 
144 		l = re[i].re_max - re[i].re_min + 1;
145 		re[i].re_rune_types = malloc(l * sizeof(_RuneType));
146 		if (!re[i].re_rune_types) {
147 			error = ENOMEM;
148 			goto fail;
149 		}
150 		memset(re[i].re_rune_types, 0, l * sizeof(_RuneType));
151 
152 		if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l)
153 			goto fail2;
154 
155 		for (j = 0; j < l; j++)
156 			re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]);
157 	}
158 	return 0;
159 
160 fail:
161 	for (j = 0; j < i; j++) {
162 		free(re[j].re_rune_types);
163 		re[j].re_rune_types = NULL;
164 	}
165 	return error;
166 fail2:
167 	for (j = 0; j <= i; j++) {
168 		free(re[j].re_rune_types);
169 		re[j].re_rune_types = NULL;
170 	}
171 	return errno;
172 }
173 
174 /* XXX: temporary implementation */
175 static void
176 find_codeset(_RuneLocale *rl)
177 {
178 	char *top, *codeset, *tail, *ep;
179 
180 	/* end of rl_variable region */
181 	ep = (char *)rl->rl_variable;
182 	ep += rl->rl_variable_len;
183 	rl->rl_codeset = NULL;
184 	if (!(top = strstr(rl->rl_variable, _RUNE_CODESET)))
185 		return;
186 	tail = strpbrk(top, " \t");
187 	codeset = top + sizeof(_RUNE_CODESET) - 1;
188 	if (tail) {
189 		*top = *tail;
190 		*tail = '\0';
191 		rl->rl_codeset = strdup(codeset);
192 		strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1)));
193 	} else {
194 		*top = '\0';
195 		rl->rl_codeset = strdup(codeset);
196 	}
197 }
198 
199 void
200 _freeentry(_RuneRange *rr)
201 {
202 	_RuneEntry *re;
203 	uint32_t i;
204 
205 	_DIAGASSERT(rr != NULL);
206 
207 	re = rr->rr_rune_ranges;
208 	for (i = 0; i < rr->rr_nranges; i++) {
209 		if (re[i].re_rune_types)
210 			free(re[i].re_rune_types);
211 		re[i].re_rune_types = NULL;
212 	}
213 }
214 
215 void
216 _wctype_init(_RuneLocale *rl)
217 {
218 	memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
219 	       sizeof(rl->rl_wctype));
220 }
221 
222 
223 _RuneLocale *
224 _Read_RuneMagi(fp)
225 	FILE *fp;
226 {
227 	/* file */
228 	_FileRuneLocale frl;
229 	/* host data */
230 	char *hostdata;
231 	size_t hostdatalen;
232 	void *lastp;
233 	_RuneLocale *rl;
234 	struct stat sb;
235 	int x;
236 
237 	_DIAGASSERT(fp != NULL);
238 
239 	if (fstat(fileno(fp), &sb) < 0)
240 		return NULL;
241 
242 	if (sb.st_size < sizeof(_FileRuneLocale))
243 		return NULL;
244 	/* XXX more validation? */
245 
246 	/* Someone might have read the magic number once already */
247 	rewind(fp);
248 
249 	if (fread(&frl, sizeof(frl), 1, fp) != 1)
250 		return NULL;
251 	if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic)))
252 		return NULL;
253 
254 	hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) +
255 	    ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) +
256 	    ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) +
257 	    ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry);
258 
259 	if ((hostdata = malloc(hostdatalen)) == NULL)
260 		return NULL;
261 	memset(hostdata, 0, hostdatalen);
262 	lastp = hostdata + hostdatalen;
263 
264 	rl = (_RuneLocale *)(void *)hostdata;
265 	rl->rl_variable = rl + 1;
266 
267 	memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic));
268 	memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding));
269 
270 	rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune);
271 	rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len);
272 
273 	for (x = 0; x < _CACHED_RUNES; ++x) {
274 		rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]);
275 
276 		/* XXX assumes rune_t = u_int32_t */
277 		rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]);
278 		rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]);
279 	}
280 
281 	if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp))
282 	{
283 		free(hostdata);
284 		return NULL;
285 	}
286 	if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp))
287 	{
288 		free(hostdata);
289 		return NULL;
290 	}
291 	if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp))
292 	{
293 		free(hostdata);
294 		return NULL;
295 	}
296 
297 	if (readentry(&rl->rl_runetype_ext, fp) != 0) {
298 		free(hostdata);
299 		return NULL;
300 	}
301 
302 	if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len >
303 	    (u_int8_t *)lastp) {
304 		_freeentry(&rl->rl_runetype_ext);
305 		free(hostdata);
306 		return NULL;
307 	}
308 	if (rl->rl_variable_len == 0)
309 		rl->rl_variable = NULL;
310 	if (rl->rl_variable == NULL ||
311 	    fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) {
312 		_freeentry(&rl->rl_runetype_ext);
313 		free(hostdata);
314 		return NULL;
315 	}
316 	find_codeset(rl);
317 	_wctype_init(rl);
318 
319 	/* error if we have junk at the tail */
320 	if (ftell(fp) != sb.st_size) {
321 		_freeentry(&rl->rl_runetype_ext);
322 		free(hostdata);
323 		return NULL;
324 	}
325 
326 	return(rl);
327 }
328 
329 void
330 _NukeRune(rl)
331 	_RuneLocale *rl;
332 {
333 
334 	_DIAGASSERT(rl != NULL);
335 
336 	if (rl != &_DefaultRuneLocale) {
337 		_freeentry(&rl->rl_runetype_ext);
338 		if (rl->rl_codeset)
339 			free(__UNCONST(rl->rl_codeset));
340 		if (rl->rl_citrus_ctype)
341 			_citrus_ctype_close(rl->rl_citrus_ctype);
342 		free(rl);
343 	}
344 }
345 
346 /*
347  * read in old LC_CTYPE declaration file, convert into runelocale info
348  */
349 #define _CTYPE_PRIVATE
350 #include <limits.h>
351 #include <ctype.h>
352 
353 _RuneLocale *
354 _Read_CTypeAsRune(fp)
355 	FILE *fp;
356 {
357 	char id[sizeof(_CTYPE_ID) - 1];
358 	u_int32_t i, len;
359 	u_int8_t *new_ctype = NULL;
360 	int16_t *new_toupper = NULL, *new_tolower = NULL;
361 	/* host data */
362 	char *hostdata = NULL;
363 	size_t hostdatalen;
364 	_RuneLocale *rl;
365 	struct stat sb;
366 	int x;
367 
368 	_DIAGASSERT(fp != NULL);
369 
370 	if (fstat(fileno(fp), &sb) < 0)
371 		return NULL;
372 
373 	if (sb.st_size < sizeof(id))
374 		return NULL;
375 	/* XXX more validation? */
376 
377 	/* Someone might have read the magic number once already */
378 	rewind(fp);
379 
380 	if (fread(id, sizeof(id), 1, fp) != 1)
381 		goto bad;
382 	if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0)
383 		goto bad;
384 
385 	if (fread(&i, sizeof(u_int32_t), 1, fp) != 1)
386 		goto bad;
387 	if ((i = ntohl(i)) != _CTYPE_REV)
388 		goto bad;
389 
390 	if (fread(&len, sizeof(u_int32_t), 1, fp) != 1)
391 		goto bad;
392 	if ((len = ntohl(len)) != _CTYPE_NUM_CHARS)
393 		goto bad;
394 
395 	if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL ||
396 	    (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL ||
397 	    (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL)
398 		goto bad;
399 	new_ctype[0] = 0;
400 	if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
401 		goto bad;
402 	new_toupper[0] = EOF;
403 	if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len)
404 		goto bad;
405 	new_tolower[0] = EOF;
406 	if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len)
407 		goto bad;
408 
409 	hostdatalen = sizeof(*rl);
410 
411 	if ((hostdata = malloc(hostdatalen)) == NULL)
412 		goto bad;
413 	memset(hostdata, 0, hostdatalen);
414 	rl = (_RuneLocale *)(void *)hostdata;
415 	rl->rl_variable = NULL;
416 
417 	memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic));
418 	memcpy(rl->rl_encoding, "NONE", 4);
419 
420 	rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune;	/*XXX*/
421 	rl->rl_variable_len = 0;
422 
423 	for (x = 0; x < _CACHED_RUNES; ++x) {
424 		if ((uint32_t) x > len)
425 			continue;
426 
427 		/*
428 		 * TWEAKS!
429 		 * - old locale file declarations do not have proper _B
430 		 *   in many cases.
431 		 * - isprint() declaration in ctype.h incorrectly uses _B.
432 		 *   _B means "isprint but !isgraph", not "isblank" with the
433 		 *   declaration.
434 		 * - _X and _CTYPE_X have negligible difference in meaning.
435 		 * - we don't set digit value, fearing that it would be
436 		 *   too much of hardcoding.  we may need to revisit it.
437 		 */
438 
439 		if (new_ctype[1 + x] & _U)
440 			rl->rl_runetype[x] |= _CTYPE_U;
441 		if (new_ctype[1 + x] & _L)
442 			rl->rl_runetype[x] |= _CTYPE_L;
443 		if (new_ctype[1 + x] & _N)
444 			rl->rl_runetype[x] |= _CTYPE_D;
445 		if (new_ctype[1 + x] & _S)
446 			rl->rl_runetype[x] |= _CTYPE_S;
447 		if (new_ctype[1 + x] & _P)
448 			rl->rl_runetype[x] |= _CTYPE_P;
449 		if (new_ctype[1 + x] & _C)
450 			rl->rl_runetype[x] |= _CTYPE_C;
451 		/* derived flag bits, duplicate of ctype.h */
452 		if (new_ctype[1 + x] & (_U | _L))
453 			rl->rl_runetype[x] |= _CTYPE_A;
454 		if (new_ctype[1 + x] & (_N | _X))
455 			rl->rl_runetype[x] |= _CTYPE_X;
456 		if (new_ctype[1 + x] & (_P|_U|_L|_N))
457 			rl->rl_runetype[x] |= _CTYPE_G;
458 		/* we don't really trust _B in the file.  see above. */
459 		if (new_ctype[1 + x] & _B)
460 			rl->rl_runetype[x] |= _CTYPE_B;
461 		if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ')
462 			rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1);
463 		if (x == ' ' || x == '\t')
464 			rl->rl_runetype[x] |= _CTYPE_B;
465 
466 		/* XXX may fail on non-8bit encoding only */
467 		rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]);
468 		rl->rl_maplower[x] = ntohs(new_tolower[1 + x]);
469 	}
470 
471 	_wctype_init(rl);
472 
473 	/*
474 	 * __runetable_to_netbsd_ctype() will be called from
475 	 * setlocale.c:loadlocale(), and fill old ctype table.
476 	 */
477 
478 	free(new_ctype);
479 	free(new_toupper);
480 	free(new_tolower);
481 	return(rl);
482 
483 bad:
484 	if (new_ctype)
485 		free(new_ctype);
486 	if (new_toupper)
487 		free(new_toupper);
488 	if (new_tolower)
489 		free(new_tolower);
490 	if (hostdata)
491 		free(hostdata);
492 	return NULL;
493 }
494