1 /* $NetBSD: rune.c,v 1.30 2007/09/29 07:55:45 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)1999 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 #if 0 64 static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; 65 #else 66 __RCSID("$NetBSD: rune.c,v 1.30 2007/09/29 07:55:45 tnozaki Exp $"); 67 #endif 68 #endif /* LIBC_SCCS and not lint */ 69 70 #include "namespace.h" 71 #include <assert.h> 72 #include <stdio.h> 73 #include <string.h> 74 #include <stdlib.h> 75 #include <errno.h> 76 #include <wchar.h> 77 #include <sys/types.h> 78 #include <sys/stat.h> 79 #include <citrus/citrus_module.h> 80 #include <citrus/citrus_ctype.h> 81 #include "rune.h" 82 #include "rune_local.h" 83 84 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *)); 85 static void _freeentry __P((_RuneRange *)); 86 static void _wctype_init __P((_RuneLocale *rl)); 87 88 static int 89 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp, 90 FILE *fp) 91 { 92 uint32_t i; 93 _RuneEntry *re; 94 _FileRuneEntry fre; 95 96 _DIAGASSERT(rl != NULL); 97 _DIAGASSERT(rr != NULL); 98 _DIAGASSERT(frr != NULL); 99 _DIAGASSERT(lastp != NULL); 100 _DIAGASSERT(fp != NULL); 101 102 re = (_RuneEntry *)rl->rl_variable; 103 104 rr->rr_nranges = ntohl(frr->frr_nranges); 105 if (rr->rr_nranges == 0) { 106 rr->rr_rune_ranges = NULL; 107 return 0; 108 } 109 110 rr->rr_rune_ranges = re; 111 for (i = 0; i < rr->rr_nranges; i++) { 112 if (fread(&fre, sizeof(fre), 1, fp) != 1) 113 return -1; 114 115 re->re_min = ntohl((u_int32_t)fre.fre_min); 116 re->re_max = ntohl((u_int32_t)fre.fre_max); 117 re->re_map = ntohl((u_int32_t)fre.fre_map); 118 re++; 119 120 if ((void *)re > lastp) 121 return -1; 122 } 123 rl->rl_variable = re; 124 return 0; 125 } 126 127 static int 128 readentry(_RuneRange *rr, FILE *fp) 129 { 130 _RuneEntry *re; 131 size_t l, i, j; 132 int error; 133 134 _DIAGASSERT(rr != NULL); 135 _DIAGASSERT(fp != NULL); 136 137 re = rr->rr_rune_ranges; 138 for (i = 0; i < rr->rr_nranges; i++) { 139 if (re[i].re_map != 0) { 140 re[i].re_rune_types = NULL; 141 continue; 142 } 143 144 l = re[i].re_max - re[i].re_min + 1; 145 re[i].re_rune_types = malloc(l * sizeof(_RuneType)); 146 if (!re[i].re_rune_types) { 147 error = ENOMEM; 148 goto fail; 149 } 150 memset(re[i].re_rune_types, 0, l * sizeof(_RuneType)); 151 152 if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l) 153 goto fail2; 154 155 for (j = 0; j < l; j++) 156 re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]); 157 } 158 return 0; 159 160 fail: 161 for (j = 0; j < i; j++) { 162 free(re[j].re_rune_types); 163 re[j].re_rune_types = NULL; 164 } 165 return error; 166 fail2: 167 for (j = 0; j <= i; j++) { 168 free(re[j].re_rune_types); 169 re[j].re_rune_types = NULL; 170 } 171 return errno; 172 } 173 174 /* XXX: temporary implementation */ 175 static void 176 find_codeset(_RuneLocale *rl) 177 { 178 char *top, *codeset, *tail, *ep; 179 180 /* end of rl_variable region */ 181 ep = (char *)rl->rl_variable; 182 ep += rl->rl_variable_len; 183 rl->rl_codeset = NULL; 184 if (!(top = strstr(rl->rl_variable, _RUNE_CODESET))) 185 return; 186 tail = strpbrk(top, " \t"); 187 codeset = top + sizeof(_RUNE_CODESET) - 1; 188 if (tail) { 189 *top = *tail; 190 *tail = '\0'; 191 rl->rl_codeset = strdup(codeset); 192 strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1))); 193 } else { 194 *top = '\0'; 195 rl->rl_codeset = strdup(codeset); 196 } 197 } 198 199 void 200 _freeentry(_RuneRange *rr) 201 { 202 _RuneEntry *re; 203 uint32_t i; 204 205 _DIAGASSERT(rr != NULL); 206 207 re = rr->rr_rune_ranges; 208 for (i = 0; i < rr->rr_nranges; i++) { 209 if (re[i].re_rune_types) 210 free(re[i].re_rune_types); 211 re[i].re_rune_types = NULL; 212 } 213 } 214 215 void 216 _wctype_init(_RuneLocale *rl) 217 { 218 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype, 219 sizeof(rl->rl_wctype)); 220 } 221 222 223 _RuneLocale * 224 _Read_RuneMagi(fp) 225 FILE *fp; 226 { 227 /* file */ 228 _FileRuneLocale frl; 229 /* host data */ 230 char *hostdata; 231 size_t hostdatalen; 232 void *lastp; 233 _RuneLocale *rl; 234 struct stat sb; 235 int x; 236 237 _DIAGASSERT(fp != NULL); 238 239 if (fstat(fileno(fp), &sb) < 0) 240 return NULL; 241 242 if (sb.st_size < sizeof(_FileRuneLocale)) 243 return NULL; 244 /* XXX more validation? */ 245 246 /* Someone might have read the magic number once already */ 247 rewind(fp); 248 249 if (fread(&frl, sizeof(frl), 1, fp) != 1) 250 return NULL; 251 if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic))) 252 return NULL; 253 254 hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) + 255 ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) + 256 ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) + 257 ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry); 258 259 if ((hostdata = malloc(hostdatalen)) == NULL) 260 return NULL; 261 memset(hostdata, 0, hostdatalen); 262 lastp = hostdata + hostdatalen; 263 264 rl = (_RuneLocale *)(void *)hostdata; 265 rl->rl_variable = rl + 1; 266 267 memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic)); 268 memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding)); 269 270 rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune); 271 rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len); 272 273 for (x = 0; x < _CACHED_RUNES; ++x) { 274 rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]); 275 276 /* XXX assumes rune_t = u_int32_t */ 277 rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]); 278 rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]); 279 } 280 281 if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp)) 282 { 283 free(hostdata); 284 return NULL; 285 } 286 if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp)) 287 { 288 free(hostdata); 289 return NULL; 290 } 291 if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp)) 292 { 293 free(hostdata); 294 return NULL; 295 } 296 297 if (readentry(&rl->rl_runetype_ext, fp) != 0) { 298 free(hostdata); 299 return NULL; 300 } 301 302 if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len > 303 (u_int8_t *)lastp) { 304 _freeentry(&rl->rl_runetype_ext); 305 free(hostdata); 306 return NULL; 307 } 308 if (rl->rl_variable_len == 0) 309 rl->rl_variable = NULL; 310 if (rl->rl_variable == NULL || 311 fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) { 312 _freeentry(&rl->rl_runetype_ext); 313 free(hostdata); 314 return NULL; 315 } 316 find_codeset(rl); 317 _wctype_init(rl); 318 319 /* error if we have junk at the tail */ 320 if (ftell(fp) != sb.st_size) { 321 _freeentry(&rl->rl_runetype_ext); 322 free(hostdata); 323 return NULL; 324 } 325 326 return(rl); 327 } 328 329 void 330 _NukeRune(rl) 331 _RuneLocale *rl; 332 { 333 334 _DIAGASSERT(rl != NULL); 335 336 if (rl != &_DefaultRuneLocale) { 337 _freeentry(&rl->rl_runetype_ext); 338 if (rl->rl_codeset) 339 free(__UNCONST(rl->rl_codeset)); 340 if (rl->rl_citrus_ctype) 341 _citrus_ctype_close(rl->rl_citrus_ctype); 342 free(__UNCONST(rl->rl_ctype_tab)); 343 free(__UNCONST(rl->rl_tolower_tab)); 344 free(__UNCONST(rl->rl_toupper_tab)); 345 free(rl); 346 } 347 } 348 349 /* 350 * read in old LC_CTYPE declaration file, convert into runelocale info 351 */ 352 #define _CTYPE_PRIVATE 353 #include <limits.h> 354 #include <ctype.h> 355 356 _RuneLocale * 357 _Read_CTypeAsRune(fp) 358 FILE *fp; 359 { 360 char id[sizeof(_CTYPE_ID) - 1]; 361 u_int32_t i, len; 362 u_int8_t *new_ctype = NULL; 363 int16_t *new_toupper = NULL, *new_tolower = NULL; 364 /* host data */ 365 char *hostdata = NULL; 366 size_t hostdatalen; 367 _RuneLocale *rl; 368 struct stat sb; 369 int x; 370 371 _DIAGASSERT(fp != NULL); 372 373 if (fstat(fileno(fp), &sb) < 0) 374 return NULL; 375 376 if (sb.st_size < sizeof(id)) 377 return NULL; 378 /* XXX more validation? */ 379 380 /* Someone might have read the magic number once already */ 381 rewind(fp); 382 383 if (fread(id, sizeof(id), 1, fp) != 1) 384 goto bad; 385 if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0) 386 goto bad; 387 388 if (fread(&i, sizeof(u_int32_t), 1, fp) != 1) 389 goto bad; 390 if ((i = ntohl(i)) != _CTYPE_REV) 391 goto bad; 392 393 if (fread(&len, sizeof(u_int32_t), 1, fp) != 1) 394 goto bad; 395 if ((len = ntohl(len)) != _CTYPE_NUM_CHARS) 396 goto bad; 397 398 if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL || 399 (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL || 400 (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL) 401 goto bad; 402 new_ctype[0] = 0; 403 if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len) 404 goto bad; 405 new_toupper[0] = EOF; 406 if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len) 407 goto bad; 408 new_tolower[0] = EOF; 409 if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len) 410 goto bad; 411 412 hostdatalen = sizeof(*rl); 413 414 if ((hostdata = malloc(hostdatalen)) == NULL) 415 goto bad; 416 memset(hostdata, 0, hostdatalen); 417 rl = (_RuneLocale *)(void *)hostdata; 418 rl->rl_variable = NULL; 419 420 memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic)); 421 memcpy(rl->rl_encoding, "NONE", 4); 422 423 rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune; /*XXX*/ 424 rl->rl_variable_len = 0; 425 426 for (x = 0; x < _CACHED_RUNES; ++x) { 427 if ((uint32_t) x > len) 428 continue; 429 430 /* 431 * TWEAKS! 432 * - old locale file declarations do not have proper _B 433 * in many cases. 434 * - isprint() declaration in ctype.h incorrectly uses _B. 435 * _B means "isprint but !isgraph", not "isblank" with the 436 * declaration. 437 * - _X and _CTYPE_X have negligible difference in meaning. 438 * - we don't set digit value, fearing that it would be 439 * too much of hardcoding. we may need to revisit it. 440 */ 441 442 if (new_ctype[1 + x] & _U) 443 rl->rl_runetype[x] |= _CTYPE_U; 444 if (new_ctype[1 + x] & _L) 445 rl->rl_runetype[x] |= _CTYPE_L; 446 if (new_ctype[1 + x] & _N) 447 rl->rl_runetype[x] |= _CTYPE_D; 448 if (new_ctype[1 + x] & _S) 449 rl->rl_runetype[x] |= _CTYPE_S; 450 if (new_ctype[1 + x] & _P) 451 rl->rl_runetype[x] |= _CTYPE_P; 452 if (new_ctype[1 + x] & _C) 453 rl->rl_runetype[x] |= _CTYPE_C; 454 /* derived flag bits, duplicate of ctype.h */ 455 if (new_ctype[1 + x] & (_U | _L)) 456 rl->rl_runetype[x] |= _CTYPE_A; 457 if (new_ctype[1 + x] & (_N | _X)) 458 rl->rl_runetype[x] |= _CTYPE_X; 459 if (new_ctype[1 + x] & (_P|_U|_L|_N)) 460 rl->rl_runetype[x] |= _CTYPE_G; 461 /* we don't really trust _B in the file. see above. */ 462 if (new_ctype[1 + x] & _B) 463 rl->rl_runetype[x] |= _CTYPE_B; 464 if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ') 465 rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1); 466 if (x == ' ' || x == '\t') 467 rl->rl_runetype[x] |= _CTYPE_B; 468 469 /* XXX may fail on non-8bit encoding only */ 470 rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]); 471 rl->rl_maplower[x] = ntohs(new_tolower[1 + x]); 472 } 473 474 _wctype_init(rl); 475 476 /* 477 * __runetable_to_netbsd_ctype() will be called from 478 * setrunelocale.c:_newrunelocale(), and fill old ctype table. 479 */ 480 481 free(new_ctype); 482 free(new_toupper); 483 free(new_tolower); 484 return(rl); 485 486 bad: 487 if (new_ctype) 488 free(new_ctype); 489 if (new_toupper) 490 free(new_toupper); 491 if (new_tolower) 492 free(new_tolower); 493 return NULL; 494 } 495