1*388550b0Srillig /* $NetBSD: rune.c,v 1.47 2022/04/19 20:32:15 rillig Exp $ */
2408f4697Sitojun /*-
33cc41207Stnozaki * Copyright (c)2010 Citrus Project,
4408f4697Sitojun * All rights reserved.
5408f4697Sitojun *
6408f4697Sitojun * Redistribution and use in source and binary forms, with or without
7408f4697Sitojun * modification, are permitted provided that the following conditions
8408f4697Sitojun * are met:
9408f4697Sitojun * 1. Redistributions of source code must retain the above copyright
10408f4697Sitojun * notice, this list of conditions and the following disclaimer.
11408f4697Sitojun * 2. Redistributions in binary form must reproduce the above copyright
12408f4697Sitojun * notice, this list of conditions and the following disclaimer in the
13408f4697Sitojun * documentation and/or other materials provided with the distribution.
14408f4697Sitojun *
15408f4697Sitojun * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16408f4697Sitojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17408f4697Sitojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18408f4697Sitojun * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19408f4697Sitojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20408f4697Sitojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21408f4697Sitojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22408f4697Sitojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23408f4697Sitojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24408f4697Sitojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25408f4697Sitojun * SUCH DAMAGE.
26408f4697Sitojun */
27408f4697Sitojun
283cc41207Stnozaki #include <sys/endian.h>
293cc41207Stnozaki #include <sys/mman.h>
30408f4697Sitojun #include <sys/stat.h>
319a35d797Stnozaki #include <assert.h>
329a35d797Stnozaki #include <errno.h>
333cc41207Stnozaki #include <fcntl.h>
343cc41207Stnozaki #define __SETLOCALE_SOURCE__
353cc41207Stnozaki #include <locale.h>
363cc41207Stnozaki #include <stddef.h>
379a35d797Stnozaki #include <stdio.h>
389a35d797Stnozaki #include <stdlib.h>
399a35d797Stnozaki #include <string.h>
403cc41207Stnozaki #include <unistd.h>
419a35d797Stnozaki #include <wchar.h>
4252ed7b03Stnozaki
433cc41207Stnozaki #include "setlocale_local.h"
443cc41207Stnozaki
4552ed7b03Stnozaki #include "citrus_module.h"
4652ed7b03Stnozaki #include "citrus_ctype.h"
4752ed7b03Stnozaki
482a7af30cStnozaki #include "runetype_local.h"
49408f4697Sitojun
503cc41207Stnozaki #include "multibyte.h"
51408f4697Sitojun
523cc41207Stnozaki #include "_wctype_local.h"
533cc41207Stnozaki #include "_wctrans_local.h"
54408f4697Sitojun
553cc41207Stnozaki typedef struct {
563cc41207Stnozaki _RuneLocale rl;
57e8fa8f4dSjoerg unsigned short rlp_ctype_tab [_CTYPE_NUM_CHARS + 1];
583cc41207Stnozaki short rlp_tolower_tab[_CTYPE_NUM_CHARS + 1];
593cc41207Stnozaki short rlp_toupper_tab[_CTYPE_NUM_CHARS + 1];
603cc41207Stnozaki char rlp_codeset[33]; /* XXX */
61e8fa8f4dSjoerg
62e8fa8f4dSjoerg #ifdef __BUILD_LEGACY
63e8fa8f4dSjoerg unsigned char rlp_compat_bsdctype[_CTYPE_NUM_CHARS + 1];
64e8fa8f4dSjoerg #endif
653cc41207Stnozaki } _RuneLocalePriv;
662e9d6ac8Slukem
673cc41207Stnozaki static __inline void
_rune_wctype_init(_RuneLocale * rl)683cc41207Stnozaki _rune_wctype_init(_RuneLocale *rl)
6931e2cbf0Stshiozak {
7031e2cbf0Stshiozak memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
7131e2cbf0Stshiozak sizeof(rl->rl_wctype));
7231e2cbf0Stshiozak }
7331e2cbf0Stshiozak
743cc41207Stnozaki static __inline void
_rune_wctrans_init(_RuneLocale * rl)753cc41207Stnozaki _rune_wctrans_init(_RuneLocale *rl)
76408f4697Sitojun {
773cc41207Stnozaki rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name = "tolower";
783cc41207Stnozaki rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0];
793cc41207Stnozaki rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext;
803cc41207Stnozaki rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name = "toupper";
813cc41207Stnozaki rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0];
823cc41207Stnozaki rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext;
833cc41207Stnozaki }
843cc41207Stnozaki
853cc41207Stnozaki static __inline void
_rune_init_priv(_RuneLocalePriv * rlp)863cc41207Stnozaki _rune_init_priv(_RuneLocalePriv *rlp)
873cc41207Stnozaki {
883cc41207Stnozaki #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS
893cc41207Stnozaki int i;
903cc41207Stnozaki
913cc41207Stnozaki for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) {
923cc41207Stnozaki rlp->rlp_ctype_tab [i + 1] = 0;
933cc41207Stnozaki rlp->rlp_tolower_tab[i + 1] = i;
943cc41207Stnozaki rlp->rlp_toupper_tab[i + 1] = i;
95e8fa8f4dSjoerg
96e8fa8f4dSjoerg #ifdef __BUILD_LEGACY
97e8fa8f4dSjoerg rlp->rlp_compat_bsdctype[i + 1] = 0;
98e8fa8f4dSjoerg #endif
993cc41207Stnozaki }
1003cc41207Stnozaki #endif
1013cc41207Stnozaki rlp->rlp_ctype_tab [0] = 0;
1023cc41207Stnozaki rlp->rlp_tolower_tab[0] = EOF;
1033cc41207Stnozaki rlp->rlp_toupper_tab[0] = EOF;
1043cc41207Stnozaki
105e8fa8f4dSjoerg rlp->rl.rl_ctype_tab = (const unsigned short *)&rlp->rlp_ctype_tab[0];
1063cc41207Stnozaki rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0];
1073cc41207Stnozaki rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0];
1083cc41207Stnozaki rlp->rl.rl_codeset = (const char *)&rlp->rlp_codeset[0];
1093cc41207Stnozaki
1103cc41207Stnozaki _rune_wctype_init(&rlp->rl);
1113cc41207Stnozaki _rune_wctrans_init(&rlp->rl);
112e8fa8f4dSjoerg
113e8fa8f4dSjoerg #ifdef __BUILD_LEGACY
114e8fa8f4dSjoerg rlp->rlp_compat_bsdctype[0] = 0;
115e8fa8f4dSjoerg rlp->rl.rl_compat_bsdctype = (const unsigned char *)
116e8fa8f4dSjoerg &rlp->rlp_compat_bsdctype[0];
117e8fa8f4dSjoerg #endif
1183cc41207Stnozaki }
1193cc41207Stnozaki
1203cc41207Stnozaki static __inline void
_rune_find_codeset(char * s,size_t n,char * var,size_t * plenvar)1213cc41207Stnozaki _rune_find_codeset(char *s, size_t n,
12296bcb898Stnozaki char *var, size_t *plenvar)
1233cc41207Stnozaki {
12496bcb898Stnozaki size_t lenvar;
1253cc41207Stnozaki const char *endvar;
1263cc41207Stnozaki
1273cc41207Stnozaki #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1)
1283cc41207Stnozaki
12996bcb898Stnozaki lenvar = *plenvar;
1303cc41207Stnozaki for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) {
1313cc41207Stnozaki if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) {
13296bcb898Stnozaki *var = '\0';
13396bcb898Stnozaki *plenvar -= lenvar;
1343cc41207Stnozaki endvar = &var[_RUNE_CODESET_LEN];
1353cc41207Stnozaki while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) {
1363cc41207Stnozaki if (*endvar == ' ' || *endvar == '\t')
1373cc41207Stnozaki break;
1383cc41207Stnozaki *s++ = *endvar++;
1393cc41207Stnozaki }
1403cc41207Stnozaki break;
1413cc41207Stnozaki }
1423cc41207Stnozaki }
1433cc41207Stnozaki *s = '\0';
1443cc41207Stnozaki }
1453cc41207Stnozaki
146e8fa8f4dSjoerg #ifdef __BUILD_LEGACY
147e8fa8f4dSjoerg static __inline int
_runetype_to_bsdctype(_RuneType bits)148e8fa8f4dSjoerg _runetype_to_bsdctype(_RuneType bits)
149e8fa8f4dSjoerg {
150e8fa8f4dSjoerg int ret;
151e8fa8f4dSjoerg
152e8fa8f4dSjoerg if (bits == (_RuneType)0)
153e8fa8f4dSjoerg return 0;
154e8fa8f4dSjoerg ret = 0;
155e8fa8f4dSjoerg if (bits & _RUNETYPE_U)
156e8fa8f4dSjoerg ret |= _COMPAT_U;
157e8fa8f4dSjoerg if (bits & _RUNETYPE_L)
158e8fa8f4dSjoerg ret |= _COMPAT_L;
159e8fa8f4dSjoerg if (bits & _RUNETYPE_D)
160e8fa8f4dSjoerg ret |= _COMPAT_N;
161e8fa8f4dSjoerg if (bits & _RUNETYPE_S)
162e8fa8f4dSjoerg ret |= _COMPAT_S;
163e8fa8f4dSjoerg if (bits & _RUNETYPE_P)
164e8fa8f4dSjoerg ret |= _COMPAT_P;
165e8fa8f4dSjoerg if (bits & _RUNETYPE_C)
166e8fa8f4dSjoerg ret |= _COMPAT_C;
167e8fa8f4dSjoerg if ((bits & (_RUNETYPE_X | _RUNETYPE_D)) == _RUNETYPE_X)
168e8fa8f4dSjoerg ret |= _COMPAT_X;
169e8fa8f4dSjoerg if ((bits & (_RUNETYPE_R | _RUNETYPE_G)) == _RUNETYPE_R)
170e8fa8f4dSjoerg ret |= _COMPAT_B;
171e8fa8f4dSjoerg return ret;
172e8fa8f4dSjoerg }
173e8fa8f4dSjoerg #endif /* __BUILD_LEGACY */
174e8fa8f4dSjoerg
1753cc41207Stnozaki static __inline int
_rune_read_file(const char * __restrict var,size_t lenvar,_RuneLocale ** __restrict prl)1763cc41207Stnozaki _rune_read_file(const char * __restrict var, size_t lenvar,
1773cc41207Stnozaki _RuneLocale ** __restrict prl)
1783cc41207Stnozaki {
1793cc41207Stnozaki int ret, i;
1803cc41207Stnozaki const _FileRuneLocale *frl;
1813cc41207Stnozaki const _FileRuneEntry *fre;
1823cc41207Stnozaki const uint32_t *frune;
1833cc41207Stnozaki _RuneLocalePriv *rlp;
184408f4697Sitojun _RuneLocale *rl;
1853cc41207Stnozaki _RuneEntry *re;
1863cc41207Stnozaki uint32_t *rune;
1873cc41207Stnozaki uint32_t runetype_len, maplower_len, mapupper_len, variable_len;
1883cc41207Stnozaki size_t len, n;
189408f4697Sitojun
1903cc41207Stnozaki if (lenvar < sizeof(*frl))
1913cc41207Stnozaki return EFTYPE;
1923cc41207Stnozaki lenvar -= sizeof(*frl);
1933cc41207Stnozaki frl = (const _FileRuneLocale *)(const void *)var;
1943cc41207Stnozaki if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic)))
1953cc41207Stnozaki return EFTYPE;
1962e9d6ac8Slukem
1973cc41207Stnozaki runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges);
1983cc41207Stnozaki maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges);
1993cc41207Stnozaki mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges);
2003cc41207Stnozaki len = runetype_len + maplower_len + mapupper_len;
201408f4697Sitojun
2023cc41207Stnozaki fre = (const _FileRuneEntry *)(const void *)(frl + 1);
2033cc41207Stnozaki frune = (const uint32_t *)(const void *)(fre + len);
204408f4697Sitojun
2053cc41207Stnozaki variable_len = be32toh((uint32_t)frl->frl_variable_len);
206408f4697Sitojun
207277ab1b5Stnozaki n = len * sizeof(*fre);
2083cc41207Stnozaki if (lenvar < n)
2093cc41207Stnozaki return EFTYPE;
2103cc41207Stnozaki lenvar -= n;
211408f4697Sitojun
2123cc41207Stnozaki n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar;
2133cc41207Stnozaki rlp = (_RuneLocalePriv *)malloc(n);
2143cc41207Stnozaki if (rlp == NULL)
2153cc41207Stnozaki return ENOMEM;
2163cc41207Stnozaki _rune_init_priv(rlp);
217408f4697Sitojun
2183cc41207Stnozaki rl = &rlp->rl;
2193cc41207Stnozaki re = (_RuneEntry *)(void *)(rlp + 1);
2203cc41207Stnozaki rune = (uint32_t *)(void *)(re + len);
221408f4697Sitojun
2223cc41207Stnozaki for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
2233cc41207Stnozaki rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]);
2243cc41207Stnozaki rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]);
2253cc41207Stnozaki rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]);
226408f4697Sitojun }
227408f4697Sitojun
2283cc41207Stnozaki #define READ_RANGE(name) \
2293cc41207Stnozaki do { \
2303cc41207Stnozaki const _FileRuneEntry *end_fre; \
2313cc41207Stnozaki const uint32_t *end_frune; \
2323cc41207Stnozaki \
2333cc41207Stnozaki rl->rl_##name##_ext.rr_nranges = name##_len; \
2343cc41207Stnozaki rl->rl_##name##_ext.rr_rune_ranges = re; \
2353cc41207Stnozaki \
2363cc41207Stnozaki end_fre = fre + name##_len; \
2373cc41207Stnozaki while (fre < end_fre) { \
2383cc41207Stnozaki re->re_min = be32toh((uint32_t)fre->fre_min); \
2393cc41207Stnozaki re->re_max = be32toh((uint32_t)fre->fre_max); \
2403cc41207Stnozaki re->re_map = be32toh((uint32_t)fre->fre_map); \
2413cc41207Stnozaki if (re->re_map != 0) { \
2423cc41207Stnozaki re->re_rune_types = NULL; \
2433cc41207Stnozaki } else { \
2443cc41207Stnozaki re->re_rune_types = rune; \
2453cc41207Stnozaki len = re->re_max - re->re_min + 1; \
2463cc41207Stnozaki n = len * sizeof(*frune); \
2473cc41207Stnozaki if (lenvar < n) { \
2483cc41207Stnozaki ret = EFTYPE; \
2493cc41207Stnozaki goto err; \
2503cc41207Stnozaki } \
2513cc41207Stnozaki lenvar -= n; \
2523cc41207Stnozaki end_frune = frune + len; \
2533cc41207Stnozaki while (frune < end_frune) \
2543cc41207Stnozaki *rune++ = be32toh(*frune++); \
2553cc41207Stnozaki } \
2563cc41207Stnozaki ++fre, ++re; \
2573cc41207Stnozaki } \
258*388550b0Srillig } while (0)
2593cc41207Stnozaki
2603cc41207Stnozaki READ_RANGE(runetype);
2613cc41207Stnozaki READ_RANGE(maplower);
2623cc41207Stnozaki READ_RANGE(mapupper);
2633cc41207Stnozaki
264277ab1b5Stnozaki if (lenvar < variable_len) {
2653cc41207Stnozaki ret = EFTYPE;
2663cc41207Stnozaki goto err;
2673cc41207Stnozaki }
2683cc41207Stnozaki
269277ab1b5Stnozaki memcpy((void *)rune, (void const *)frune, variable_len);
270277ab1b5Stnozaki rl->rl_variable_len = variable_len;
271277ab1b5Stnozaki rl->rl_variable = (void *)rune;
272277ab1b5Stnozaki
2733cc41207Stnozaki _rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset),
27496bcb898Stnozaki (char *)rl->rl_variable, &rl->rl_variable_len);
2753cc41207Stnozaki
2763cc41207Stnozaki ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding,
2773cc41207Stnozaki rl->rl_variable, rl->rl_variable_len, _PRIVSIZE);
2783cc41207Stnozaki if (ret)
2793cc41207Stnozaki goto err;
2803cc41207Stnozaki if (__mb_len_max_runtime <
2813cc41207Stnozaki _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) {
2823cc41207Stnozaki ret = EINVAL;
2833cc41207Stnozaki goto err;
2843cc41207Stnozaki }
2853cc41207Stnozaki
2863cc41207Stnozaki for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) {
2873cc41207Stnozaki wint_t wc;
288e8fa8f4dSjoerg _RuneType rc;
2893cc41207Stnozaki
2903cc41207Stnozaki ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc);
2913cc41207Stnozaki if (ret)
2923cc41207Stnozaki goto err;
2933cc41207Stnozaki if (wc == WEOF) {
2943cc41207Stnozaki rlp->rlp_ctype_tab[i + 1] = 0;
2953cc41207Stnozaki rlp->rlp_tolower_tab[i + 1] = i;
2963cc41207Stnozaki rlp->rlp_toupper_tab[i + 1] = i;
2973cc41207Stnozaki } else {
298e8fa8f4dSjoerg rc = _runetype_priv(rl, wc);
299e8fa8f4dSjoerg rlp->rlp_ctype_tab[i + 1] = (unsigned short)
300e8fa8f4dSjoerg ((rc & ~_RUNETYPE_SWM) >> 8);
301e8fa8f4dSjoerg
302e8fa8f4dSjoerg #ifdef __BUILD_LEGACY
303e8fa8f4dSjoerg rlp->rlp_compat_bsdctype[i + 1]
304e8fa8f4dSjoerg = _runetype_to_bsdctype(rc);
305e8fa8f4dSjoerg #endif
3063cc41207Stnozaki
3073cc41207Stnozaki #define CONVERT_MAP(name) \
3083cc41207Stnozaki do { \
3093cc41207Stnozaki wint_t map; \
3103cc41207Stnozaki int c; \
3113cc41207Stnozaki \
3123cc41207Stnozaki map = _towctrans_priv(wc, _wctrans_##name(rl)); \
3133cc41207Stnozaki if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype, \
3143cc41207Stnozaki map, &c) || c == EOF)) \
3153cc41207Stnozaki c = i; \
3163cc41207Stnozaki rlp->rlp_to##name##_tab[i + 1] = (short)c; \
317*388550b0Srillig } while (0)
3183cc41207Stnozaki
3193cc41207Stnozaki CONVERT_MAP(lower);
3203cc41207Stnozaki CONVERT_MAP(upper);
3213cc41207Stnozaki }
3223cc41207Stnozaki }
3233cc41207Stnozaki *prl = rl;
3243cc41207Stnozaki return 0;
3253cc41207Stnozaki
3263cc41207Stnozaki err:
3273cc41207Stnozaki free(rlp);
3283cc41207Stnozaki return ret;
3293cc41207Stnozaki }
3303cc41207Stnozaki
3313cc41207Stnozaki int
_rune_load(const char * __restrict var,size_t lenvar,_RuneLocale ** __restrict prl)3323cc41207Stnozaki _rune_load(const char * __restrict var, size_t lenvar,
3333cc41207Stnozaki _RuneLocale ** __restrict prl)
334408f4697Sitojun {
3353cc41207Stnozaki int ret;
3362e9d6ac8Slukem
3373cc41207Stnozaki _DIAGASSERT(var != NULL || lenvar < 1);
3383cc41207Stnozaki _DIAGASSERT(prl != NULL);
3392e9d6ac8Slukem
3403cc41207Stnozaki if (lenvar < 1)
3413cc41207Stnozaki return EFTYPE;
3423cc41207Stnozaki switch (*var) {
3433cc41207Stnozaki case 'R':
3443cc41207Stnozaki ret = _rune_read_file(var, lenvar, prl);
3453cc41207Stnozaki break;
3463cc41207Stnozaki default:
3473cc41207Stnozaki ret = EFTYPE;
348408f4697Sitojun }
3493cc41207Stnozaki return ret;
350408f4697Sitojun }
351