xref: /netbsd-src/lib/libc/locale/rune.c (revision 9fbd88883c38d0c0fbfcbe66d76fe6b0fab3f9de)
1 /*	$NetBSD: rune.c,v 1.13 2001/05/26 00:35:20 kristerw Exp $	*/
2 
3 /*-
4  * Copyright (c)1999 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*-
30  * Copyright (c) 1993
31  *	The Regents of the University of California.  All rights reserved.
32  *
33  * This code is derived from software contributed to Berkeley by
34  * Paul Borman at Krystal Technologies.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgement:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  * 4. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 #include <sys/cdefs.h>
66 #if defined(LIBC_SCCS) && !defined(lint)
67 #if 0
68 static char sccsid[] = "@(#)rune.c	8.1 (Berkeley) 6/4/93";
69 #else
70 __RCSID("$NetBSD: rune.c,v 1.13 2001/05/26 00:35:20 kristerw Exp $");
71 #endif
72 #endif /* LIBC_SCCS and not lint */
73 
74 #include "rune.h"
75 #include <assert.h>
76 #include <stdio.h>
77 #include <string.h>
78 #include <stdlib.h>
79 #include <errno.h>
80 #include <sys/types.h>
81 #include <sys/stat.h>
82 #include "rune_local.h"
83 
84 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *));
85 static void _freeentry __P((_RuneRange *));
86 
87 static int
88 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp,
89 	FILE *fp)
90 {
91 	int i;
92 	_RuneEntry *re;
93 	_FileRuneEntry fre;
94 
95 	_DIAGASSERT(rl != NULL);
96 	_DIAGASSERT(rr != NULL);
97 	_DIAGASSERT(frr != NULL);
98 	_DIAGASSERT(lastp != NULL);
99 	_DIAGASSERT(fp != NULL);
100 
101 	re = (_RuneEntry *)rl->__rune_variable;
102 
103 	rr->__nranges = ntohl(frr->__nranges);
104 	if (rr->__nranges == 0) {
105 		rr->__rune_ranges = NULL;
106 		return 0;
107 	}
108 
109 	rr->__rune_ranges = re;
110 	for (i = 0; i < rr->__nranges; i++) {
111 		if (fread(&fre, sizeof(fre), 1, fp) != 1)
112 			return -1;
113 
114 		re->__min = ntohl((u_int32_t)fre.__min);
115 		re->__max = ntohl((u_int32_t)fre.__max);
116 		re->__map = ntohl((u_int32_t)fre.__map);
117 		re++;
118 
119 		if ((void *)re > lastp)
120 			return -1;
121 	}
122 	rl->__rune_variable = re;
123 	return 0;
124 }
125 
126 static int
127 readentry(_RuneRange *rr, FILE *fp)
128 {
129 	_RuneEntry *re;
130 	size_t l, i, j;
131 	int error;
132 
133 	_DIAGASSERT(rr != NULL);
134 	_DIAGASSERT(fp != NULL);
135 
136 	re = rr->__rune_ranges;
137 	for (i = 0; i < rr->__nranges; i++) {
138 		if (re[i].__map != 0) {
139 			re[i].__rune_types = NULL;
140 			continue;
141 		}
142 
143 		l = re[i].__max - re[i].__min + 1;
144 		re[i].__rune_types = malloc(l * sizeof(_RuneType));
145 		if (!re[i].__rune_types) {
146 			error = ENOBUFS;
147 			goto fail;
148 		}
149 		memset(re[i].__rune_types, 0, l * sizeof(_RuneType));
150 
151 		if (fread(re[i].__rune_types, sizeof(_RuneType), l, fp) != l)
152 			goto fail2;
153 
154 		for (j = 0; j < l; j++)
155 			re[i].__rune_types[j] = ntohl(re[i].__rune_types[j]);
156 	}
157 	return 0;
158 
159 fail:
160 	for (j = 0; j < i; j++) {
161 		free(re[j].__rune_types);
162 		re[j].__rune_types = NULL;
163 	}
164 	return error;
165 fail2:
166 	for (j = 0; j <= i; j++) {
167 		free(re[j].__rune_types);
168 		re[j].__rune_types = NULL;
169 	}
170 	return errno;
171 }
172 
173 /* XXX: temporary implementation */
174 static void
175 find_codeset(_RuneLocale *rl)
176 {
177 	char *top, *codeset, *tail;
178 
179 	rl->__rune_codeset = NULL;
180 	if (!(top=strstr(rl->__rune_variable, _RUNE_CODESET)))
181 		return;
182 	tail = strpbrk(top, " \t");
183 	codeset = top + sizeof(_RUNE_CODESET)-1;
184 	if (tail) {
185 		*top = *tail;
186 		*tail = '\0';
187 		rl->__rune_codeset = strdup(codeset);
188 		strcpy(top+1, tail+1);
189 
190 	} else {
191 		*top='\0';
192 		rl->__rune_codeset = strdup(codeset);
193 	}
194 }
195 
196 void
197 _freeentry(_RuneRange *rr)
198 {
199 	_RuneEntry *re;
200 	int i;
201 
202 	_DIAGASSERT(rr != NULL);
203 
204 	re = rr->__rune_ranges;
205 	for (i = 0; i < rr->__nranges; i++) {
206 		if (re[i].__rune_types)
207 			free(re[i].__rune_types);
208 		re[i].__rune_types = NULL;
209 	}
210 }
211 
212 _RuneLocale *
213 _Read_RuneMagi(fp)
214 	FILE *fp;
215 {
216 	/* file */
217 	_FileRuneLocale frl;
218 	/* host data */
219 	char *hostdata;
220 	size_t hostdatalen;
221 	void *lastp;
222 	_RuneLocale *rl;
223 	struct stat sb;
224 	int x;
225 
226 	_DIAGASSERT(fp != NULL);
227 
228 	if (fstat(fileno(fp), &sb) < 0)
229 		return NULL;
230 
231 	if (sb.st_size < sizeof(_RuneLocale))
232 		return NULL;
233 	/* XXX more validation? */
234 
235 	/* Someone might have read the magic number once already */
236 	rewind(fp);
237 
238 	if (fread(&frl, sizeof(frl), 1, fp) != 1)
239 		return NULL;
240 	if (memcmp(frl.__magic, _RUNE_MAGIC_1, sizeof(frl.__magic)))
241 		return NULL;
242 
243 	hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.__variable_len) +
244 	    ntohl(frl.__runetype_ext.__nranges) * sizeof(_RuneEntry) +
245 	    ntohl(frl.__maplower_ext.__nranges) * sizeof(_RuneEntry) +
246 	    ntohl(frl.__mapupper_ext.__nranges) * sizeof(_RuneEntry);
247 
248 	if ((hostdata = malloc(hostdatalen)) == NULL)
249 		return NULL;
250 	memset(hostdata, 0, hostdatalen);
251 	lastp = hostdata + hostdatalen;
252 
253 	rl = (_RuneLocale *)(void *)hostdata;
254 	rl->__rune_variable = rl + 1;
255 
256 	memcpy(rl->__magic, frl.__magic, sizeof(rl->__magic));
257 	memcpy(rl->__encoding, frl.__encoding, sizeof(rl->__encoding));
258 
259 	rl->__invalid_rune = ntohl((u_int32_t)frl.__invalid_rune);
260 	rl->__variable_len = ntohl((u_int32_t)frl.__variable_len);
261 
262 	for (x = 0; x < _CACHED_RUNES; ++x) {
263 		rl->__runetype[x] = ntohl(frl.__runetype[x]);
264 
265 		/* XXX assumes rune_t = u_int32_t */
266 		rl->__maplower[x] = ntohl((u_int32_t)frl.__maplower[x]);
267 		rl->__mapupper[x] = ntohl((u_int32_t)frl.__mapupper[x]);
268 	}
269 
270 	if (readrange(rl, &rl->__runetype_ext, &frl.__runetype_ext, lastp, fp))
271 	{
272 		free(hostdata);
273 		return NULL;
274 	}
275 	if (readrange(rl, &rl->__maplower_ext, &frl.__maplower_ext, lastp, fp))
276 	{
277 		free(hostdata);
278 		return NULL;
279 	}
280 	if (readrange(rl, &rl->__mapupper_ext, &frl.__mapupper_ext, lastp, fp))
281 	{
282 		free(hostdata);
283 		return NULL;
284 	}
285 
286 	if (readentry(&rl->__runetype_ext, fp) < 0) {
287 		free(hostdata);
288 		return NULL;
289 	}
290 
291 	if ((u_int8_t *)rl->__rune_variable + rl->__variable_len >
292 	    (u_int8_t *)lastp) {
293 		_freeentry(&rl->__runetype_ext);
294 		free(hostdata);
295 		return NULL;
296 	}
297 	if (rl->__variable_len == 0)
298 		rl->__rune_variable = NULL;
299 	else if (fread(rl->__rune_variable, rl->__variable_len, 1, fp) != 1) {
300 		_freeentry(&rl->__runetype_ext);
301 		free(hostdata);
302 		return NULL;
303 	}
304 	find_codeset(rl);
305 
306 	/* error if we have junk at the tail */
307 	if (ftell(fp) != sb.st_size) {
308 		_freeentry(&rl->__runetype_ext);
309 		free(hostdata);
310 		return NULL;
311 	}
312 
313 	return(rl);
314 }
315 
316 void
317 _NukeRune(rl)
318 	_RuneLocale *rl;
319 {
320 
321 	_DIAGASSERT(rl != NULL);
322 
323 	_freeentry(&rl->__runetype_ext);
324 	if (rl->__rune_codeset)
325 		free(rl->__rune_codeset);
326 	free(rl);
327 }
328 
329 /*
330  * read in old LC_CTYPE declaration file, convert into runelocale info
331  */
332 #define _CTYPE_PRIVATE
333 #include <limits.h>
334 #include <ctype.h>
335 
336 _RuneLocale *
337 _Read_CTypeAsRune(fp)
338 	FILE *fp;
339 {
340 	char id[sizeof(_CTYPE_ID) - 1];
341 	u_int32_t i, len;
342 	u_int8_t *new_ctype = NULL;
343 	int16_t *new_toupper = NULL, *new_tolower = NULL;
344 	/* host data */
345 	char *hostdata = NULL;
346 	size_t hostdatalen;
347 	_RuneLocale *rl;
348 	struct stat sb;
349 	int x;
350 
351 	_DIAGASSERT(fp != NULL);
352 
353 	if (fstat(fileno(fp), &sb) < 0)
354 		return NULL;
355 
356 	if (sb.st_size < sizeof(id))
357 		return NULL;
358 	/* XXX more validation? */
359 
360 	/* Someone might have read the magic number once already */
361 	rewind(fp);
362 
363 	if (fread(id, sizeof(id), 1, fp) != 1)
364 		goto bad;
365 	if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0)
366 		goto bad;
367 
368 	if (fread(&i, sizeof(u_int32_t), 1, fp) != 1)
369 		goto bad;
370 	if ((i = ntohl(i)) != _CTYPE_REV)
371 		goto bad;
372 
373 	if (fread(&len, sizeof(u_int32_t), 1, fp) != 1)
374 		goto bad;
375 	if ((len = ntohl(len)) != _CTYPE_NUM_CHARS)
376 		goto bad;
377 
378 	if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL ||
379 	    (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL ||
380 	    (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL)
381 		goto bad;
382 	new_ctype[0] = 0;
383 	if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
384 		goto bad;
385 	new_toupper[0] = EOF;
386 	if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len)
387 		goto bad;
388 	new_tolower[0] = EOF;
389 	if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len)
390 		goto bad;
391 
392 	hostdatalen = sizeof(*rl);
393 
394 	if ((hostdata = malloc(hostdatalen)) == NULL)
395 		goto bad;
396 	memset(hostdata, 0, hostdatalen);
397 	rl = (_RuneLocale *)(void *)hostdata;
398 	rl->__rune_variable = NULL;
399 
400 	memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof(rl->__magic));
401 	memcpy(rl->__encoding, "NONE", 4);
402 
403 	rl->__invalid_rune = _DefaultRuneLocale.__invalid_rune;	/*XXX*/
404 	rl->__variable_len = 0;
405 
406 	for (x = 0; x < _CACHED_RUNES; ++x) {
407 		if (x > len)
408 			continue;
409 
410 		/*
411 		 * TWEAKS!
412 		 * - old locale file declarations do not have proper _B
413 		 *   in many cases.
414 		 * - isprint() declaration in ctype.h incorrectly uses _B.
415 		 *   _B means "isprint but !isgraph", not "isblank" with the
416 		 *   declaration.
417 		 * - _X and _CTYPE_X have negligible difference in meaning.
418 		 * - we don't set digit value, fearing that it would be
419 		 *   too much of hardcoding.  we may need to revisit it.
420 		 */
421 
422 		if (new_ctype[1 + x] & _U)
423 			rl->__runetype[x] |= _CTYPE_U;
424 		if (new_ctype[1 + x] & _L)
425 			rl->__runetype[x] |= _CTYPE_L;
426 		if (new_ctype[1 + x] & _N)
427 			rl->__runetype[x] |= _CTYPE_D;
428 		if (new_ctype[1 + x] & _S)
429 			rl->__runetype[x] |= _CTYPE_S;
430 		if (new_ctype[1 + x] & _P)
431 			rl->__runetype[x] |= _CTYPE_P;
432 		if (new_ctype[1 + x] & _C)
433 			rl->__runetype[x] |= _CTYPE_C;
434 		/* derived flag bits, duplicate of ctype.h */
435 		if (new_ctype[1 + x] & (_U | _L))
436 			rl->__runetype[x] |= _CTYPE_A;
437 		if (new_ctype[1 + x] & (_N | _X))
438 			rl->__runetype[x] |= _CTYPE_X;
439 		if (new_ctype[1 + x] & (_P|_U|_L|_N))
440 			rl->__runetype[x] |= _CTYPE_G;
441 		/* we don't really trust _B in the file.  see above. */
442 		if (new_ctype[1 + x] & _B)
443 			rl->__runetype[x] |= _CTYPE_B;
444 		if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ')
445 			rl->__runetype[x] |= (_CTYPE_R | _CTYPE_SW1);
446 		if (x == ' ' || x == '\t')
447 			rl->__runetype[x] |= _CTYPE_B;
448 
449 		/* XXX may fail on non-8bit encoding only */
450 		rl->__mapupper[x] = ntohs(new_toupper[1 + x]);
451 		rl->__maplower[x] = ntohs(new_tolower[1 + x]);
452 	}
453 
454 	/*
455 	 * __runetable_to_netbsd_ctype() will be called from
456 	 * setlocale.c:loadlocale(), and fill old ctype table.
457 	 */
458 
459 	free(new_ctype);
460 	free(new_toupper);
461 	free(new_tolower);
462 	return(rl);
463 
464 bad:
465 	if (new_ctype)
466 		free(new_ctype);
467 	if (new_toupper)
468 		free(new_toupper);
469 	if (new_tolower)
470 		free(new_tolower);
471 	if (hostdata)
472 		free(hostdata);
473 	return NULL;
474 }
475