xref: /netbsd-src/lib/libc/locale/rune.c (revision 6c7179c5fb6d12332e7e47fff3fd0952e232a532)
1 /* $NetBSD: rune.c,v 1.42 2011/03/25 00:45:24 joerg Exp $ */
2 
3 /*-
4  * Copyright (c)2010 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/endian.h>
30 #include <sys/mman.h>
31 #include <sys/stat.h>
32 #include <assert.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #define __SETLOCALE_SOURCE__
36 #include <locale.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <wchar.h>
43 
44 #include "setlocale_local.h"
45 
46 #include "citrus_module.h"
47 #include "citrus_ctype.h"
48 
49 #include "runetype_local.h"
50 #include "bsdctype_local.h"
51 
52 #include "multibyte.h"
53 
54 #include "_wctype_local.h"
55 #include "_wctrans_local.h"
56 
57 typedef struct {
58 	_RuneLocale rl;
59 	unsigned char	rlp_ctype_tab  [_CTYPE_NUM_CHARS + 1];
60 	short		rlp_tolower_tab[_CTYPE_NUM_CHARS + 1];
61 	short		rlp_toupper_tab[_CTYPE_NUM_CHARS + 1];
62 	char		rlp_codeset[33]; /* XXX */
63 } _RuneLocalePriv;
64 
65 static __inline void
66 _rune_wctype_init(_RuneLocale *rl)
67 {
68 	memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
69 	    sizeof(rl->rl_wctype));
70 }
71 
72 static __inline void
73 _rune_wctrans_init(_RuneLocale *rl)
74 {
75 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name   = "tolower";
76 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0];
77 	rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext;
78 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name   = "toupper";
79 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0];
80 	rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext;
81 }
82 
83 static __inline void
84 _rune_init_priv(_RuneLocalePriv *rlp)
85 {
86 #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS
87 	int i;
88 
89 	for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) {
90 		rlp->rlp_ctype_tab  [i + 1] = 0;
91 		rlp->rlp_tolower_tab[i + 1] = i;
92 		rlp->rlp_toupper_tab[i + 1] = i;
93 	}
94 #endif
95 	rlp->rlp_ctype_tab  [0] = 0;
96 	rlp->rlp_tolower_tab[0] = EOF;
97 	rlp->rlp_toupper_tab[0] = EOF;
98 
99 	rlp->rl.rl_ctype_tab   = (const unsigned char *)&rlp->rlp_ctype_tab[0];
100 	rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0];
101 	rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0];
102 	rlp->rl.rl_codeset     = (const char *)&rlp->rlp_codeset[0];
103 
104 	_rune_wctype_init(&rlp->rl);
105 	_rune_wctrans_init(&rlp->rl);
106 }
107 
108 static __inline void
109 _rune_find_codeset(char *s, size_t n,
110     char *var, size_t *plenvar)
111 {
112 	size_t lenvar;
113 	const char *endvar;
114 
115 #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1)
116 
117 	lenvar = *plenvar;
118 	for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) {
119 		if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) {
120 			*var = '\0';
121 			*plenvar -= lenvar;
122 			endvar = &var[_RUNE_CODESET_LEN];
123 			while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) {
124 				if (*endvar == ' ' || *endvar == '\t')
125 					break;
126 				*s++ = *endvar++;
127 			}
128 			break;
129 		}
130 	}
131 	*s = '\0';
132 }
133 
134 static __inline int
135 _rune_read_file(const char * __restrict var, size_t lenvar,
136     _RuneLocale ** __restrict prl)
137 {
138 	int ret, i;
139 	const _FileRuneLocale *frl;
140 	const _FileRuneEntry *fre;
141 	const uint32_t *frune;
142 	_RuneLocalePriv *rlp;
143 	_RuneLocale *rl;
144 	_RuneEntry *re;
145 	uint32_t *rune;
146 	uint32_t runetype_len, maplower_len, mapupper_len, variable_len;
147 	size_t len, n;
148 
149 	if (lenvar < sizeof(*frl))
150 		return EFTYPE;
151 	lenvar -= sizeof(*frl);
152 	frl = (const _FileRuneLocale *)(const void *)var;
153 	if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic)))
154 		return EFTYPE;
155 
156 	runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges);
157 	maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges);
158 	mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges);
159 	len = runetype_len + maplower_len + mapupper_len;
160 
161 	fre = (const _FileRuneEntry *)(const void *)(frl + 1);
162 	frune = (const uint32_t *)(const void *)(fre + len);
163 
164 	variable_len = be32toh((uint32_t)frl->frl_variable_len);
165 
166 	n = (len * sizeof(*fre)) + variable_len;
167 	if (lenvar < n)
168 		return EFTYPE;
169 	lenvar -= n;
170 
171 	n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar;
172 	rlp = (_RuneLocalePriv *)malloc(n);
173 	if (rlp == NULL)
174 		return ENOMEM;
175 	_rune_init_priv(rlp);
176 
177 	rl = &rlp->rl;
178 	re = (_RuneEntry *)(void *)(rlp + 1);
179 	rune = (uint32_t *)(void *)(re + len);
180 
181 	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
182 		rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]);
183 		rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]);
184 		rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]);
185 	}
186 
187 #define READ_RANGE(name)						\
188 do {									\
189 	const _FileRuneEntry *end_fre;					\
190 	const uint32_t *end_frune;					\
191 									\
192 	rl->rl_##name##_ext.rr_nranges = name##_len;			\
193 	rl->rl_##name##_ext.rr_rune_ranges = re;			\
194 									\
195 	end_fre = fre + name##_len;					\
196 	while (fre < end_fre) {						\
197 		re->re_min = be32toh((uint32_t)fre->fre_min);		\
198 		re->re_max = be32toh((uint32_t)fre->fre_max);		\
199 		re->re_map = be32toh((uint32_t)fre->fre_map);		\
200 		if (re->re_map != 0) {					\
201 			re->re_rune_types = NULL;			\
202 		} else {						\
203 			re->re_rune_types = rune;			\
204 			len = re->re_max - re->re_min + 1;		\
205 			n = len * sizeof(*frune);			\
206 			if (lenvar < n) {				\
207 				ret = EFTYPE;				\
208 				goto err;				\
209 			}						\
210 			lenvar -= n;					\
211 			end_frune = frune + len;			\
212 			while (frune < end_frune)			\
213 				*rune++ = be32toh(*frune++);		\
214 		}							\
215 		++fre, ++re;						\
216 	}								\
217 } while (/*CONSTCOND*/0)
218 
219 	READ_RANGE(runetype);
220 	READ_RANGE(maplower);
221 	READ_RANGE(mapupper);
222 
223 	memcpy((void *)rune, (void const *)frune, variable_len);
224 	rl->rl_variable_len = variable_len;
225 	rl->rl_variable = (void *)rune;
226 
227 	if (lenvar > 0) {
228 		ret = EFTYPE;
229 		goto err;
230 	}
231 
232 	_rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset),
233 	    (char *)rl->rl_variable, &rl->rl_variable_len);
234 
235 	ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding,
236 	    rl->rl_variable, rl->rl_variable_len, _PRIVSIZE);
237 	if (ret)
238 		goto err;
239 	if (__mb_len_max_runtime <
240 	    _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) {
241 		ret = EINVAL;
242 		goto err;
243 	}
244 
245 	for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
246 		wint_t wc;
247 
248 		ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc);
249 		if (ret)
250 			goto err;
251 		if (wc == WEOF) {
252 			rlp->rlp_ctype_tab[i + 1] = 0;
253 			rlp->rlp_tolower_tab[i + 1] = i;
254 			rlp->rlp_toupper_tab[i + 1] = i;
255 		} else {
256 			rlp->rlp_ctype_tab[i + 1] = (unsigned char)
257 			    _runetype_to_ctype(_runetype_priv(rl, wc));
258 
259 #define CONVERT_MAP(name)						\
260 do {									\
261 	wint_t map;							\
262 	int c;								\
263 									\
264 	map = _towctrans_priv(wc, _wctrans_##name(rl));			\
265 	if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype,	\
266 	    map, &c)  || c == EOF))					\
267 		c = i;							\
268 	rlp->rlp_to##name##_tab[i + 1] = (short)c;			\
269 } while (/*CONSTCOND*/0)
270 
271 			CONVERT_MAP(lower);
272 			CONVERT_MAP(upper);
273 		}
274 	}
275 	*prl = rl;
276 	return 0;
277 
278 err:
279 	free(rlp);
280 	return ret;
281 }
282 
283 int
284 _rune_load(const char * __restrict var, size_t lenvar,
285     _RuneLocale ** __restrict prl)
286 {
287 	int ret;
288 
289 	_DIAGASSERT(var != NULL || lenvar < 1);
290 	_DIAGASSERT(prl != NULL);
291 
292 	if (lenvar < 1)
293 		return EFTYPE;
294 	switch (*var) {
295 	case 'R':
296 		ret = _rune_read_file(var, lenvar, prl);
297 		break;
298 	default:
299 		ret = EFTYPE;
300 	}
301 	return ret;
302 }
303