xref: /openbsd-src/lib/libc/locale/rune.c (revision 909e7910c733713f536254da01d820c315d6be25)
1 /*	$OpenBSD: rune.c,v 1.11 2024/08/18 02:22:29 guenther Exp $ */
2 /*	$NetBSD: rune.c,v 1.26 2004/05/09 11:26:33 kleink Exp $	*/
3 
4 /*-
5  * Copyright (c)1999 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*-
31  * Copyright (c) 1993
32  *	The Regents of the University of California.  All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * Paul Borman at Krystal Technologies.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  */
61 
62 #include <sys/types.h>
63 #include <sys/stat.h>
64 #include <assert.h>
65 #include <errno.h>
66 #include <locale.h>
67 #include <stdint.h>
68 #include <stdio.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <wchar.h>
72 #include "runetype.h"
73 #include "rune_local.h"
74 
75 #define SAFE_ADD(x, y)			\
76 do {					\
77 	if ((x) > SIZE_MAX - (y))	\
78 		return NULL;		\
79 	(x) += (y);			\
80 } while (0);
81 
82 static int readrange(_RuneLocale *, _RuneRange *, uint32_t, void *, FILE *);
83 static void _freeentry(_RuneRange *);
84 
85 static int
86 readrange(_RuneLocale *rl, _RuneRange *rr, uint32_t nranges, void *lastp,
87 	FILE *fp)
88 {
89 	uint32_t i;
90 	_RuneEntry *re;
91 	_FileRuneEntry fre;
92 
93 	re = (_RuneEntry *)rl->rl_variable;
94 
95 	rr->rr_nranges = nranges;
96 	if (rr->rr_nranges == 0) {
97 		rr->rr_rune_ranges = NULL;
98 		return 0;
99 	}
100 
101 	rr->rr_rune_ranges = re;
102 	for (i = 0; i < rr->rr_nranges; i++) {
103 		if ((void *)re >= lastp)
104 			return -1;
105 
106 		if (fread(&fre, sizeof(fre), 1, fp) != 1)
107 			return -1;
108 
109 		re->re_min = ntohl((uint32_t)fre.fre_min);
110 		re->re_max = ntohl((uint32_t)fre.fre_max);
111 		re->re_map = ntohl((uint32_t)fre.fre_map);
112 		re++;
113 	}
114 	rl->rl_variable = re;
115 	return 0;
116 }
117 
118 static int
119 readentry(_RuneRange *rr, FILE *fp)
120 {
121 	_RuneEntry *re;
122 	size_t l, i, j;
123 	int error;
124 
125 	re = rr->rr_rune_ranges;
126 	for (i = 0; i < rr->rr_nranges; i++) {
127 		if (re[i].re_map != 0) {
128 			re[i].re_rune_types = NULL;
129 			continue;
130 		}
131 
132 		if (re[i].re_max < re[i].re_min) {
133 			error = EINVAL;
134 			goto fail;
135 		}
136 
137 		l = re[i].re_max - re[i].re_min + 1;
138 		re[i].re_rune_types = calloc(l, sizeof(_RuneType));
139 		if (!re[i].re_rune_types) {
140 			error = ENOMEM;
141 			goto fail;
142 		}
143 
144 		if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l)
145 			goto fail2;
146 
147 		for (j = 0; j < l; j++)
148 			re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]);
149 	}
150 	return 0;
151 
152 fail:
153 	for (j = 0; j < i; j++) {
154 		free(re[j].re_rune_types);
155 		re[j].re_rune_types = NULL;
156 	}
157 	return error;
158 fail2:
159 	for (j = 0; j <= i; j++) {
160 		free(re[j].re_rune_types);
161 		re[j].re_rune_types = NULL;
162 	}
163 	return errno;
164 }
165 
166 /* XXX: temporary implementation */
167 static int
168 find_codeset(_RuneLocale *rl)
169 {
170 	char *top, *codeset, *tail, *ep;
171 
172 	if (rl->rl_variable == NULL)
173 		return 0;
174 
175 	/* end of rl_variable region */
176 	ep = (char *)rl->rl_variable;
177 	ep += rl->rl_variable_len;
178 	rl->rl_codeset = NULL;
179 	if (!(top = strstr(rl->rl_variable, _RUNE_CODESET)))
180 		return 0;
181 	tail = strpbrk(top, " \t");
182 	codeset = top + sizeof(_RUNE_CODESET) - 1;
183 	if (tail) {
184 		*top = *tail;
185 		*tail = '\0';
186 		rl->rl_codeset = strdup(codeset);
187 		strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1)));
188 	} else {
189 		*top = '\0';
190 		rl->rl_codeset = strdup(codeset);
191 	}
192 	return (rl->rl_codeset == NULL);
193 }
194 
195 void
196 _freeentry(_RuneRange *rr)
197 {
198 	_RuneEntry *re;
199 	uint32_t i;
200 
201 	re = rr->rr_rune_ranges;
202 	for (i = 0; i < rr->rr_nranges; i++) {
203 		free(re[i].re_rune_types);
204 		re[i].re_rune_types = NULL;
205 	}
206 }
207 
208 
209 _RuneLocale *
210 _Read_RuneMagi(FILE *fp)
211 {
212 	/* file */
213 	_FileRuneLocale frl;
214 	/* host data */
215 	char *hostdata;
216 	size_t hostdatalen;
217 	void *lastp;
218 	_RuneLocale *rl;
219 	struct stat sb;
220 	int x;
221 	uint32_t runetype_nranges, maplower_nranges, mapupper_nranges, var_len;
222 
223 	if (fstat(fileno(fp), &sb) == -1)
224 		return NULL;
225 
226 	if (sb.st_size < sizeof(_FileRuneLocale))
227 		return NULL;
228 	/* XXX more validation? */
229 
230 	/* Someone might have read the magic number once already */
231 	rewind(fp);
232 
233 	if (fread(&frl, sizeof(frl), 1, fp) != 1)
234 		return NULL;
235 	if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic)))
236 		return NULL;
237 
238 	runetype_nranges = ntohl(frl.frl_runetype_ext.frr_nranges);
239 	maplower_nranges = ntohl(frl.frl_maplower_ext.frr_nranges);
240 	mapupper_nranges = ntohl(frl.frl_mapupper_ext.frr_nranges);
241 	var_len = ntohl((uint32_t)frl.frl_variable_len);
242 
243 #if SIZE_MAX <= UINT32_MAX
244 	if (runetype_nranges > SIZE_MAX / sizeof(_RuneEntry) ||
245 	    maplower_nranges > SIZE_MAX / sizeof(_RuneEntry) ||
246 	    mapupper_nranges > SIZE_MAX / sizeof(_RuneEntry))
247 		return NULL;
248 #endif
249 
250 	if (var_len > INT32_MAX)
251 		return NULL;
252 
253 	hostdatalen = sizeof(*rl);
254 	SAFE_ADD(hostdatalen, var_len);
255 	SAFE_ADD(hostdatalen, runetype_nranges * sizeof(_RuneEntry));
256 	SAFE_ADD(hostdatalen, maplower_nranges * sizeof(_RuneEntry));
257 	SAFE_ADD(hostdatalen, mapupper_nranges * sizeof(_RuneEntry));
258 
259 	if ((hostdata = calloc(hostdatalen, 1)) == NULL)
260 		return NULL;
261 	lastp = hostdata + hostdatalen;
262 
263 	rl = (_RuneLocale *)hostdata;
264 	rl->rl_variable = rl + 1;
265 
266 	rl->rl_variable_len = ntohl((uint32_t)frl.frl_variable_len);
267 
268 	for (x = 0; x < _CACHED_RUNES; ++x) {
269 		rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]);
270 
271 		/* XXX assumes rune_t = uint32_t */
272 		rl->rl_maplower[x] = ntohl((uint32_t)frl.frl_maplower[x]);
273 		rl->rl_mapupper[x] = ntohl((uint32_t)frl.frl_mapupper[x]);
274 	}
275 
276 	if (readrange(rl, &rl->rl_runetype_ext, runetype_nranges, lastp, fp) ||
277 	    readrange(rl, &rl->rl_maplower_ext, maplower_nranges, lastp, fp) ||
278 	    readrange(rl, &rl->rl_mapupper_ext, mapupper_nranges, lastp, fp))
279 		goto err;
280 
281 	if (readentry(&rl->rl_runetype_ext, fp) != 0)
282 		goto err;
283 
284 	if ((uint8_t *)rl->rl_variable + rl->rl_variable_len >
285 	    (uint8_t *)lastp)
286 		goto rune_err;
287 
288 	if (rl->rl_variable_len == 0)
289 		rl->rl_variable = NULL;
290 	else if (fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1)
291 		goto rune_err;
292 	if (find_codeset(rl))
293 		goto rune_err;
294 
295 	/*
296 	 * error if we have junk at the tail
297 	 */
298 	if (ftello(fp) != sb.st_size)
299 		goto rune_err;
300 
301 	return(rl);
302 rune_err:
303 	_freeentry(&rl->rl_runetype_ext);
304 err:
305 	free(hostdata);
306 	return NULL;
307 }
308