1 /* $NetBSD: rune.c,v 1.46 2013/04/13 10:21:20 joerg Exp $ */ 2 /*- 3 * Copyright (c)2010 Citrus Project, 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/endian.h> 29 #include <sys/mman.h> 30 #include <sys/stat.h> 31 #include <assert.h> 32 #include <errno.h> 33 #include <fcntl.h> 34 #define __SETLOCALE_SOURCE__ 35 #include <locale.h> 36 #include <stddef.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <wchar.h> 42 43 #include "setlocale_local.h" 44 45 #include "citrus_module.h" 46 #include "citrus_ctype.h" 47 48 #include "runetype_local.h" 49 50 #include "multibyte.h" 51 52 #include "_wctype_local.h" 53 #include "_wctrans_local.h" 54 55 typedef struct { 56 _RuneLocale rl; 57 unsigned short rlp_ctype_tab [_CTYPE_NUM_CHARS + 1]; 58 short rlp_tolower_tab[_CTYPE_NUM_CHARS + 1]; 59 short rlp_toupper_tab[_CTYPE_NUM_CHARS + 1]; 60 char rlp_codeset[33]; /* XXX */ 61 62 #ifdef __BUILD_LEGACY 63 unsigned char rlp_compat_bsdctype[_CTYPE_NUM_CHARS + 1]; 64 #endif 65 } _RuneLocalePriv; 66 67 static __inline void 68 _rune_wctype_init(_RuneLocale *rl) 69 { 70 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype, 71 sizeof(rl->rl_wctype)); 72 } 73 74 static __inline void 75 _rune_wctrans_init(_RuneLocale *rl) 76 { 77 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_name = "tolower"; 78 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_cached = &rl->rl_maplower[0]; 79 rl->rl_wctrans[_WCTRANS_INDEX_LOWER].te_extmap = &rl->rl_maplower_ext; 80 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_name = "toupper"; 81 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_cached = &rl->rl_mapupper[0]; 82 rl->rl_wctrans[_WCTRANS_INDEX_UPPER].te_extmap = &rl->rl_mapupper_ext; 83 } 84 85 static __inline void 86 _rune_init_priv(_RuneLocalePriv *rlp) 87 { 88 #if _CTYPE_CACHE_SIZE != _CTYPE_NUM_CHARS 89 int i; 90 91 for (i = _CTYPE_CACHE_SIZE; i < _CTYPE_NUM_CHARS; ++i) { 92 rlp->rlp_ctype_tab [i + 1] = 0; 93 rlp->rlp_tolower_tab[i + 1] = i; 94 rlp->rlp_toupper_tab[i + 1] = i; 95 96 #ifdef __BUILD_LEGACY 97 rlp->rlp_compat_bsdctype[i + 1] = 0; 98 #endif 99 } 100 #endif 101 rlp->rlp_ctype_tab [0] = 0; 102 rlp->rlp_tolower_tab[0] = EOF; 103 rlp->rlp_toupper_tab[0] = EOF; 104 105 rlp->rl.rl_ctype_tab = (const unsigned short *)&rlp->rlp_ctype_tab[0]; 106 rlp->rl.rl_tolower_tab = (const short *)&rlp->rlp_tolower_tab[0]; 107 rlp->rl.rl_toupper_tab = (const short *)&rlp->rlp_toupper_tab[0]; 108 rlp->rl.rl_codeset = (const char *)&rlp->rlp_codeset[0]; 109 110 _rune_wctype_init(&rlp->rl); 111 _rune_wctrans_init(&rlp->rl); 112 113 #ifdef __BUILD_LEGACY 114 rlp->rlp_compat_bsdctype[0] = 0; 115 rlp->rl.rl_compat_bsdctype = (const unsigned char *) 116 &rlp->rlp_compat_bsdctype[0]; 117 #endif 118 } 119 120 static __inline void 121 _rune_find_codeset(char *s, size_t n, 122 char *var, size_t *plenvar) 123 { 124 size_t lenvar; 125 const char *endvar; 126 127 #define _RUNE_CODESET_LEN (sizeof(_RUNE_CODESET)-1) 128 129 lenvar = *plenvar; 130 for (/**/; lenvar > _RUNE_CODESET_LEN; ++var, --lenvar) { 131 if (!memcmp(var, _RUNE_CODESET, _RUNE_CODESET_LEN)) { 132 *var = '\0'; 133 *plenvar -= lenvar; 134 endvar = &var[_RUNE_CODESET_LEN]; 135 while (n-- > 1 && lenvar-- > _RUNE_CODESET_LEN) { 136 if (*endvar == ' ' || *endvar == '\t') 137 break; 138 *s++ = *endvar++; 139 } 140 break; 141 } 142 } 143 *s = '\0'; 144 } 145 146 #ifdef __BUILD_LEGACY 147 static __inline int 148 _runetype_to_bsdctype(_RuneType bits) 149 { 150 int ret; 151 152 if (bits == (_RuneType)0) 153 return 0; 154 ret = 0; 155 if (bits & _RUNETYPE_U) 156 ret |= _COMPAT_U; 157 if (bits & _RUNETYPE_L) 158 ret |= _COMPAT_L; 159 if (bits & _RUNETYPE_D) 160 ret |= _COMPAT_N; 161 if (bits & _RUNETYPE_S) 162 ret |= _COMPAT_S; 163 if (bits & _RUNETYPE_P) 164 ret |= _COMPAT_P; 165 if (bits & _RUNETYPE_C) 166 ret |= _COMPAT_C; 167 if ((bits & (_RUNETYPE_X | _RUNETYPE_D)) == _RUNETYPE_X) 168 ret |= _COMPAT_X; 169 if ((bits & (_RUNETYPE_R | _RUNETYPE_G)) == _RUNETYPE_R) 170 ret |= _COMPAT_B; 171 return ret; 172 } 173 #endif /* __BUILD_LEGACY */ 174 175 static __inline int 176 _rune_read_file(const char * __restrict var, size_t lenvar, 177 _RuneLocale ** __restrict prl) 178 { 179 int ret, i; 180 const _FileRuneLocale *frl; 181 const _FileRuneEntry *fre; 182 const uint32_t *frune; 183 _RuneLocalePriv *rlp; 184 _RuneLocale *rl; 185 _RuneEntry *re; 186 uint32_t *rune; 187 uint32_t runetype_len, maplower_len, mapupper_len, variable_len; 188 size_t len, n; 189 190 if (lenvar < sizeof(*frl)) 191 return EFTYPE; 192 lenvar -= sizeof(*frl); 193 frl = (const _FileRuneLocale *)(const void *)var; 194 if (memcmp(_RUNECT10_MAGIC, &frl->frl_magic[0], sizeof(frl->frl_magic))) 195 return EFTYPE; 196 197 runetype_len = be32toh(frl->frl_runetype_ext.frr_nranges); 198 maplower_len = be32toh(frl->frl_maplower_ext.frr_nranges); 199 mapupper_len = be32toh(frl->frl_mapupper_ext.frr_nranges); 200 len = runetype_len + maplower_len + mapupper_len; 201 202 fre = (const _FileRuneEntry *)(const void *)(frl + 1); 203 frune = (const uint32_t *)(const void *)(fre + len); 204 205 variable_len = be32toh((uint32_t)frl->frl_variable_len); 206 207 n = len * sizeof(*fre); 208 if (lenvar < n) 209 return EFTYPE; 210 lenvar -= n; 211 212 n = sizeof(*rlp) + (len * sizeof(*re)) + lenvar; 213 rlp = (_RuneLocalePriv *)malloc(n); 214 if (rlp == NULL) 215 return ENOMEM; 216 _rune_init_priv(rlp); 217 218 rl = &rlp->rl; 219 re = (_RuneEntry *)(void *)(rlp + 1); 220 rune = (uint32_t *)(void *)(re + len); 221 222 for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) { 223 rl->rl_runetype[i] = be32toh(frl->frl_runetype[i]); 224 rl->rl_maplower[i] = be32toh((uint32_t)frl->frl_maplower[i]); 225 rl->rl_mapupper[i] = be32toh((uint32_t)frl->frl_mapupper[i]); 226 } 227 228 #define READ_RANGE(name) \ 229 do { \ 230 const _FileRuneEntry *end_fre; \ 231 const uint32_t *end_frune; \ 232 \ 233 rl->rl_##name##_ext.rr_nranges = name##_len; \ 234 rl->rl_##name##_ext.rr_rune_ranges = re; \ 235 \ 236 end_fre = fre + name##_len; \ 237 while (fre < end_fre) { \ 238 re->re_min = be32toh((uint32_t)fre->fre_min); \ 239 re->re_max = be32toh((uint32_t)fre->fre_max); \ 240 re->re_map = be32toh((uint32_t)fre->fre_map); \ 241 if (re->re_map != 0) { \ 242 re->re_rune_types = NULL; \ 243 } else { \ 244 re->re_rune_types = rune; \ 245 len = re->re_max - re->re_min + 1; \ 246 n = len * sizeof(*frune); \ 247 if (lenvar < n) { \ 248 ret = EFTYPE; \ 249 goto err; \ 250 } \ 251 lenvar -= n; \ 252 end_frune = frune + len; \ 253 while (frune < end_frune) \ 254 *rune++ = be32toh(*frune++); \ 255 } \ 256 ++fre, ++re; \ 257 } \ 258 } while (/*CONSTCOND*/0) 259 260 READ_RANGE(runetype); 261 READ_RANGE(maplower); 262 READ_RANGE(mapupper); 263 264 if (lenvar < variable_len) { 265 ret = EFTYPE; 266 goto err; 267 } 268 269 memcpy((void *)rune, (void const *)frune, variable_len); 270 rl->rl_variable_len = variable_len; 271 rl->rl_variable = (void *)rune; 272 273 _rune_find_codeset(rlp->rlp_codeset, sizeof(rlp->rlp_codeset), 274 (char *)rl->rl_variable, &rl->rl_variable_len); 275 276 ret = _citrus_ctype_open(&rl->rl_citrus_ctype, frl->frl_encoding, 277 rl->rl_variable, rl->rl_variable_len, _PRIVSIZE); 278 if (ret) 279 goto err; 280 if (__mb_len_max_runtime < 281 _citrus_ctype_get_mb_cur_max(rl->rl_citrus_ctype)) { 282 ret = EINVAL; 283 goto err; 284 } 285 286 for (i = 0; i < _CTYPE_CACHE_SIZE; ++i) { 287 wint_t wc; 288 _RuneType rc; 289 290 ret = _citrus_ctype_btowc(rl->rl_citrus_ctype, i, &wc); 291 if (ret) 292 goto err; 293 if (wc == WEOF) { 294 rlp->rlp_ctype_tab[i + 1] = 0; 295 rlp->rlp_tolower_tab[i + 1] = i; 296 rlp->rlp_toupper_tab[i + 1] = i; 297 } else { 298 rc = _runetype_priv(rl, wc); 299 rlp->rlp_ctype_tab[i + 1] = (unsigned short) 300 ((rc & ~_RUNETYPE_SWM) >> 8); 301 302 #ifdef __BUILD_LEGACY 303 rlp->rlp_compat_bsdctype[i + 1] 304 = _runetype_to_bsdctype(rc); 305 #endif 306 307 #define CONVERT_MAP(name) \ 308 do { \ 309 wint_t map; \ 310 int c; \ 311 \ 312 map = _towctrans_priv(wc, _wctrans_##name(rl)); \ 313 if (map == wc || (_citrus_ctype_wctob(rl->rl_citrus_ctype, \ 314 map, &c) || c == EOF)) \ 315 c = i; \ 316 rlp->rlp_to##name##_tab[i + 1] = (short)c; \ 317 } while (/*CONSTCOND*/0) 318 319 CONVERT_MAP(lower); 320 CONVERT_MAP(upper); 321 } 322 } 323 *prl = rl; 324 return 0; 325 326 err: 327 free(rlp); 328 return ret; 329 } 330 331 int 332 _rune_load(const char * __restrict var, size_t lenvar, 333 _RuneLocale ** __restrict prl) 334 { 335 int ret; 336 337 _DIAGASSERT(var != NULL || lenvar < 1); 338 _DIAGASSERT(prl != NULL); 339 340 if (lenvar < 1) 341 return EFTYPE; 342 switch (*var) { 343 case 'R': 344 ret = _rune_read_file(var, lenvar, prl); 345 break; 346 default: 347 ret = EFTYPE; 348 } 349 return ret; 350 } 351