1 /* $NetBSD: rune.c,v 1.42 2011/03/25 00:45:24 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)2010 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/endian.h> 30 #include <sys/mman.h> 31 #include <sys/stat.h> 32 #include <assert.h> 33 #include <errno.h> 34 #include <fcntl.h> 35 #define __SETLOCALE_SOURCE__ 36 #include <locale.h> 37 #include <stddef.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 #include <wchar.h> 43 44 #include "setlocale_local.h" 45 46 #include "citrus_module.h" 47 #include "citrus_ctype.h" 48 49 #include "runetype_local.h" 50 #include "bsdctype_local.h" 51 52 #include "multibyte.h" 53 54 #include "_wctype_local.h" 55 #include "_wctrans_local.h" 56 57 typedef struct { 58 _RuneLocale rl; 59 unsigned char rlp_ctype_tab [_CTYPE_NUM_CHARS + 1]; 60 short rlp_tolower_tab[_CTYPE_NUM_CHARS + 1]; 61 short rlp_toupper_tab[_CTYPE_NUM_CHARS + 1]; 62 char rlp_codeset[33]; /* XXX */ 63 } _RuneLocalePriv; 64 65 static __inline void 66 _rune_wctype_init(_RuneLocale *rl) 67 { 68 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype, 69 sizeof(rl->rl_wctype)); 70 } 71 72 static __inline void 73 _rune_wctrans_init(_RuneLocale *rl) 74 { 75 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name = "tolower"; 76 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0]; 77 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext; 78 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name = "toupper"; 79 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0]; 80 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext; 81 } 82 83 static __inline void 84 _rune_init_priv(_RuneLocalePriv *rlp) 85 { 86 #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS 87 int i; 88 89 for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) { 90 rlp->rlp_ctype_tab [i + 1] = 0; 91 rlp->rlp_tolower_tab[i + 1] = i; 92 rlp->rlp_toupper_tab[i + 1] = i; 93 } 94 #endif 95 rlp->rlp_ctype_tab [0] = 0; 96 rlp->rlp_tolower_tab[0] = EOF; 97 rlp->rlp_toupper_tab[0] = EOF; 98 99 rlp->rl.rl_ctype_tab = (const unsigned char *)&rlp->rlp_ctype_tab[0]; 100 rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0]; 101 rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0]; 102 rlp->rl.rl_codeset = (const char *)&rlp->rlp_codeset[0]; 103 104 _rune_wctype_init(&rlp->rl); 105 _rune_wctrans_init(&rlp->rl); 106 } 107 108 static __inline void 109 _rune_find_codeset(char *s, size_t n, 110 char *var, size_t *plenvar) 111 { 112 size_t lenvar; 113 const char *endvar; 114 115 #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1) 116 117 lenvar = *plenvar; 118 for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) { 119 if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) { 120 *var = '\0'; 121 *plenvar -= lenvar; 122 endvar = &var[_RUNE_CODESET_LEN]; 123 while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) { 124 if (*endvar == ' ' || *endvar == '\t') 125 break; 126 *s++ = *endvar++; 127 } 128 break; 129 } 130 } 131 *s = '\0'; 132 } 133 134 static __inline int 135 _rune_read_file(const char * __restrict var, size_t lenvar, 136 _RuneLocale ** __restrict prl) 137 { 138 int ret, i; 139 const _FileRuneLocale *frl; 140 const _FileRuneEntry *fre; 141 const uint32_t *frune; 142 _RuneLocalePriv *rlp; 143 _RuneLocale *rl; 144 _RuneEntry *re; 145 uint32_t *rune; 146 uint32_t runetype_len, maplower_len, mapupper_len, variable_len; 147 size_t len, n; 148 149 if (lenvar < sizeof(*frl)) 150 return EFTYPE; 151 lenvar -= sizeof(*frl); 152 frl = (const _FileRuneLocale *)(const void *)var; 153 if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic))) 154 return EFTYPE; 155 156 runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges); 157 maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges); 158 mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges); 159 len = runetype_len + maplower_len + mapupper_len; 160 161 fre = (const _FileRuneEntry *)(const void *)(frl + 1); 162 frune = (const uint32_t *)(const void *)(fre + len); 163 164 variable_len = be32toh((uint32_t)frl->frl_variable_len); 165 166 n = (len * sizeof(*fre)) + variable_len; 167 if (lenvar < n) 168 return EFTYPE; 169 lenvar -= n; 170 171 n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar; 172 rlp = (_RuneLocalePriv *)malloc(n); 173 if (rlp == NULL) 174 return ENOMEM; 175 _rune_init_priv(rlp); 176 177 rl = &rlp->rl; 178 re = (_RuneEntry *)(void *)(rlp + 1); 179 rune = (uint32_t *)(void *)(re + len); 180 181 for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) { 182 rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]); 183 rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]); 184 rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]); 185 } 186 187 #define READ_RANGE(name) \ 188 do { \ 189 const _FileRuneEntry *end_fre; \ 190 const uint32_t *end_frune; \ 191 \ 192 rl->rl_##name##_ext.rr_nranges = name##_len; \ 193 rl->rl_##name##_ext.rr_rune_ranges = re; \ 194 \ 195 end_fre = fre + name##_len; \ 196 while (fre < end_fre) { \ 197 re->re_min = be32toh((uint32_t)fre->fre_min); \ 198 re->re_max = be32toh((uint32_t)fre->fre_max); \ 199 re->re_map = be32toh((uint32_t)fre->fre_map); \ 200 if (re->re_map != 0) { \ 201 re->re_rune_types = NULL; \ 202 } else { \ 203 re->re_rune_types = rune; \ 204 len = re->re_max - re->re_min + 1; \ 205 n = len * sizeof(*frune); \ 206 if (lenvar < n) { \ 207 ret = EFTYPE; \ 208 goto err; \ 209 } \ 210 lenvar -= n; \ 211 end_frune = frune + len; \ 212 while (frune < end_frune) \ 213 *rune++ = be32toh(*frune++); \ 214 } \ 215 ++fre, ++re; \ 216 } \ 217 } while (/*CONSTCOND*/0) 218 219 READ_RANGE(runetype); 220 READ_RANGE(maplower); 221 READ_RANGE(mapupper); 222 223 memcpy((void *)rune, (void const *)frune, variable_len); 224 rl->rl_variable_len = variable_len; 225 rl->rl_variable = (void *)rune; 226 227 if (lenvar > 0) { 228 ret = EFTYPE; 229 goto err; 230 } 231 232 _rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset), 233 (char *)rl->rl_variable, &rl->rl_variable_len); 234 235 ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding, 236 rl->rl_variable, rl->rl_variable_len, _PRIVSIZE); 237 if (ret) 238 goto err; 239 if (__mb_len_max_runtime < 240 _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) { 241 ret = EINVAL; 242 goto err; 243 } 244 245 for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) { 246 wint_t wc; 247 248 ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc); 249 if (ret) 250 goto err; 251 if (wc == WEOF) { 252 rlp->rlp_ctype_tab[i + 1] = 0; 253 rlp->rlp_tolower_tab[i + 1] = i; 254 rlp->rlp_toupper_tab[i + 1] = i; 255 } else { 256 rlp->rlp_ctype_tab[i + 1] = (unsigned char) 257 _runetype_to_ctype(_runetype_priv(rl, wc)); 258 259 #define CONVERT_MAP(name) \ 260 do { \ 261 wint_t map; \ 262 int c; \ 263 \ 264 map = _towctrans_priv(wc, _wctrans_##name(rl)); \ 265 if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype, \ 266 map, &c) || c == EOF)) \ 267 c = i; \ 268 rlp->rlp_to##name##_tab[i + 1] = (short)c; \ 269 } while (/*CONSTCOND*/0) 270 271 CONVERT_MAP(lower); 272 CONVERT_MAP(upper); 273 } 274 } 275 *prl = rl; 276 return 0; 277 278 err: 279 free(rlp); 280 return ret; 281 } 282 283 int 284 _rune_load(const char * __restrict var, size_t lenvar, 285 _RuneLocale ** __restrict prl) 286 { 287 int ret; 288 289 _DIAGASSERT(var != NULL || lenvar < 1); 290 _DIAGASSERT(prl != NULL); 291 292 if (lenvar < 1) 293 return EFTYPE; 294 switch (*var) { 295 case 'R': 296 ret = _rune_read_file(var, lenvar, prl); 297 break; 298 default: 299 ret = EFTYPE; 300 } 301 return ret; 302 } 303