1 /* $NetBSD: rune.c,v 1.31 2009/01/02 00:20:20 tnozaki Exp $ */ 2 3 /*- 4 * Copyright (c)1999 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 #if 0 64 static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; 65 #else 66 __RCSID("$NetBSD: rune.c,v 1.31 2009/01/02 00:20:20 tnozaki Exp $"); 67 #endif 68 #endif /* LIBC_SCCS and not lint */ 69 70 #include "namespace.h" 71 #include <assert.h> 72 #include <stdio.h> 73 #include <string.h> 74 #include <stdlib.h> 75 #include <errno.h> 76 #include <wchar.h> 77 #include <sys/types.h> 78 #include <sys/stat.h> 79 80 #include "citrus_module.h" 81 #include "citrus_ctype.h" 82 83 #include "bsdctype.h" 84 #include "rune.h" 85 #include "rune_local.h" 86 87 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *)); 88 static void _freeentry __P((_RuneRange *)); 89 static void _wctype_init __P((_RuneLocale *rl)); 90 91 static int 92 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp, 93 FILE *fp) 94 { 95 uint32_t i; 96 _RuneEntry *re; 97 _FileRuneEntry fre; 98 99 _DIAGASSERT(rl != NULL); 100 _DIAGASSERT(rr != NULL); 101 _DIAGASSERT(frr != NULL); 102 _DIAGASSERT(lastp != NULL); 103 _DIAGASSERT(fp != NULL); 104 105 re = (_RuneEntry *)rl->rl_variable; 106 107 rr->rr_nranges = ntohl(frr->frr_nranges); 108 if (rr->rr_nranges == 0) { 109 rr->rr_rune_ranges = NULL; 110 return 0; 111 } 112 113 rr->rr_rune_ranges = re; 114 for (i = 0; i < rr->rr_nranges; i++) { 115 if (fread(&fre, sizeof(fre), 1, fp) != 1) 116 return -1; 117 118 re->re_min = ntohl((u_int32_t)fre.fre_min); 119 re->re_max = ntohl((u_int32_t)fre.fre_max); 120 re->re_map = ntohl((u_int32_t)fre.fre_map); 121 re++; 122 123 if ((void *)re > lastp) 124 return -1; 125 } 126 rl->rl_variable = re; 127 return 0; 128 } 129 130 static int 131 readentry(_RuneRange *rr, FILE *fp) 132 { 133 _RuneEntry *re; 134 size_t l, i, j; 135 int error; 136 137 _DIAGASSERT(rr != NULL); 138 _DIAGASSERT(fp != NULL); 139 140 re = rr->rr_rune_ranges; 141 for (i = 0; i < rr->rr_nranges; i++) { 142 if (re[i].re_map != 0) { 143 re[i].re_rune_types = NULL; 144 continue; 145 } 146 147 l = re[i].re_max - re[i].re_min + 1; 148 re[i].re_rune_types = malloc(l * sizeof(_RuneType)); 149 if (!re[i].re_rune_types) { 150 error = ENOMEM; 151 goto fail; 152 } 153 memset(re[i].re_rune_types, 0, l * sizeof(_RuneType)); 154 155 if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l) 156 goto fail2; 157 158 for (j = 0; j < l; j++) 159 re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]); 160 } 161 return 0; 162 163 fail: 164 for (j = 0; j < i; j++) { 165 free(re[j].re_rune_types); 166 re[j].re_rune_types = NULL; 167 } 168 return error; 169 fail2: 170 for (j = 0; j <= i; j++) { 171 free(re[j].re_rune_types); 172 re[j].re_rune_types = NULL; 173 } 174 return errno; 175 } 176 177 /* XXX: temporary implementation */ 178 static void 179 find_codeset(_RuneLocale *rl) 180 { 181 char *top, *codeset, *tail, *ep; 182 183 /* end of rl_variable region */ 184 ep = (char *)rl->rl_variable; 185 ep += rl->rl_variable_len; 186 rl->rl_codeset = NULL; 187 if (!(top = strstr(rl->rl_variable, _RUNE_CODESET))) 188 return; 189 tail = strpbrk(top, " \t"); 190 codeset = top + sizeof(_RUNE_CODESET) - 1; 191 if (tail) { 192 *top = *tail; 193 *tail = '\0'; 194 rl->rl_codeset = strdup(codeset); 195 strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1))); 196 } else { 197 *top = '\0'; 198 rl->rl_codeset = strdup(codeset); 199 } 200 } 201 202 void 203 _freeentry(_RuneRange *rr) 204 { 205 _RuneEntry *re; 206 uint32_t i; 207 208 _DIAGASSERT(rr != NULL); 209 210 re = rr->rr_rune_ranges; 211 for (i = 0; i < rr->rr_nranges; i++) { 212 if (re[i].re_rune_types) 213 free(re[i].re_rune_types); 214 re[i].re_rune_types = NULL; 215 } 216 } 217 218 void 219 _wctype_init(_RuneLocale *rl) 220 { 221 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype, 222 sizeof(rl->rl_wctype)); 223 } 224 225 226 _RuneLocale * 227 _Read_RuneMagi(fp) 228 FILE *fp; 229 { 230 /* file */ 231 _FileRuneLocale frl; 232 /* host data */ 233 char *hostdata; 234 size_t hostdatalen; 235 void *lastp; 236 _RuneLocale *rl; 237 struct stat sb; 238 int x; 239 240 _DIAGASSERT(fp != NULL); 241 242 if (fstat(fileno(fp), &sb) < 0) 243 return NULL; 244 245 if (sb.st_size < sizeof(_FileRuneLocale)) 246 return NULL; 247 /* XXX more validation? */ 248 249 /* Someone might have read the magic number once already */ 250 rewind(fp); 251 252 if (fread(&frl, sizeof(frl), 1, fp) != 1) 253 return NULL; 254 if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic))) 255 return NULL; 256 257 hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) + 258 ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) + 259 ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) + 260 ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry); 261 262 if ((hostdata = malloc(hostdatalen)) == NULL) 263 return NULL; 264 memset(hostdata, 0, hostdatalen); 265 lastp = hostdata + hostdatalen; 266 267 rl = (_RuneLocale *)(void *)hostdata; 268 rl->rl_variable = rl + 1; 269 270 memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic)); 271 memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding)); 272 273 rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune); 274 rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len); 275 276 for (x = 0; x < _CACHED_RUNES; ++x) { 277 rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]); 278 279 /* XXX assumes rune_t = u_int32_t */ 280 rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]); 281 rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]); 282 } 283 284 if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp)) 285 { 286 free(hostdata); 287 return NULL; 288 } 289 if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp)) 290 { 291 free(hostdata); 292 return NULL; 293 } 294 if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp)) 295 { 296 free(hostdata); 297 return NULL; 298 } 299 300 if (readentry(&rl->rl_runetype_ext, fp) != 0) { 301 free(hostdata); 302 return NULL; 303 } 304 305 if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len > 306 (u_int8_t *)lastp) { 307 _freeentry(&rl->rl_runetype_ext); 308 free(hostdata); 309 return NULL; 310 } 311 if (rl->rl_variable_len == 0) 312 rl->rl_variable = NULL; 313 if (rl->rl_variable == NULL || 314 fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) { 315 _freeentry(&rl->rl_runetype_ext); 316 free(hostdata); 317 return NULL; 318 } 319 find_codeset(rl); 320 _wctype_init(rl); 321 322 /* error if we have junk at the tail */ 323 if (ftell(fp) != sb.st_size) { 324 _freeentry(&rl->rl_runetype_ext); 325 free(hostdata); 326 return NULL; 327 } 328 329 return(rl); 330 } 331 332 void 333 _NukeRune(rl) 334 _RuneLocale *rl; 335 { 336 337 _DIAGASSERT(rl != NULL); 338 339 if (rl != &_DefaultRuneLocale) { 340 _freeentry(&rl->rl_runetype_ext); 341 if (rl->rl_codeset) 342 free(__UNCONST(rl->rl_codeset)); 343 if (rl->rl_citrus_ctype) 344 _citrus_ctype_close(rl->rl_citrus_ctype); 345 free(__UNCONST(rl->rl_ctype_tab)); 346 free(__UNCONST(rl->rl_tolower_tab)); 347 free(__UNCONST(rl->rl_toupper_tab)); 348 free(rl); 349 } 350 } 351 352 /* 353 * read in old LC_CTYPE declaration file, convert into runelocale info 354 */ 355 #define _CTYPE_PRIVATE 356 #include <limits.h> 357 #include <ctype.h> 358 359 _RuneLocale * 360 _Read_CTypeAsRune(fp) 361 FILE *fp; 362 { 363 char id[sizeof(_CTYPE_ID) - 1]; 364 u_int32_t i, len; 365 u_int8_t *new_ctype = NULL; 366 int16_t *new_toupper = NULL, *new_tolower = NULL; 367 /* host data */ 368 char *hostdata = NULL; 369 size_t hostdatalen; 370 _RuneLocale *rl; 371 struct stat sb; 372 int x; 373 374 _DIAGASSERT(fp != NULL); 375 376 if (fstat(fileno(fp), &sb) < 0) 377 return NULL; 378 379 if (sb.st_size < sizeof(id)) 380 return NULL; 381 /* XXX more validation? */ 382 383 /* Someone might have read the magic number once already */ 384 rewind(fp); 385 386 if (fread(id, sizeof(id), 1, fp) != 1) 387 goto bad; 388 if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0) 389 goto bad; 390 391 if (fread(&i, sizeof(u_int32_t), 1, fp) != 1) 392 goto bad; 393 if ((i = ntohl(i)) != _CTYPE_REV) 394 goto bad; 395 396 if (fread(&len, sizeof(u_int32_t), 1, fp) != 1) 397 goto bad; 398 if ((len = ntohl(len)) != _CTYPE_NUM_CHARS) 399 goto bad; 400 401 if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL || 402 (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL || 403 (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL) 404 goto bad; 405 new_ctype[0] = 0; 406 if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len) 407 goto bad; 408 new_toupper[0] = EOF; 409 if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len) 410 goto bad; 411 new_tolower[0] = EOF; 412 if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len) 413 goto bad; 414 415 hostdatalen = sizeof(*rl); 416 417 if ((hostdata = malloc(hostdatalen)) == NULL) 418 goto bad; 419 memset(hostdata, 0, hostdatalen); 420 rl = (_RuneLocale *)(void *)hostdata; 421 rl->rl_variable = NULL; 422 423 memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic)); 424 memcpy(rl->rl_encoding, "NONE", 4); 425 426 rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune; /*XXX*/ 427 rl->rl_variable_len = 0; 428 429 for (x = 0; x < _CACHED_RUNES; ++x) { 430 if ((uint32_t) x > len) 431 continue; 432 433 /* 434 * TWEAKS! 435 * - old locale file declarations do not have proper _B 436 * in many cases. 437 * - isprint() declaration in ctype.h incorrectly uses _B. 438 * _B means "isprint but !isgraph", not "isblank" with the 439 * declaration. 440 * - _X and _CTYPE_X have negligible difference in meaning. 441 * - we don't set digit value, fearing that it would be 442 * too much of hardcoding. we may need to revisit it. 443 */ 444 445 if (new_ctype[1 + x] & _U) 446 rl->rl_runetype[x] |= _CTYPE_U; 447 if (new_ctype[1 + x] & _L) 448 rl->rl_runetype[x] |= _CTYPE_L; 449 if (new_ctype[1 + x] & _N) 450 rl->rl_runetype[x] |= _CTYPE_D; 451 if (new_ctype[1 + x] & _S) 452 rl->rl_runetype[x] |= _CTYPE_S; 453 if (new_ctype[1 + x] & _P) 454 rl->rl_runetype[x] |= _CTYPE_P; 455 if (new_ctype[1 + x] & _C) 456 rl->rl_runetype[x] |= _CTYPE_C; 457 /* derived flag bits, duplicate of ctype.h */ 458 if (new_ctype[1 + x] & (_U | _L)) 459 rl->rl_runetype[x] |= _CTYPE_A; 460 if (new_ctype[1 + x] & (_N | _X)) 461 rl->rl_runetype[x] |= _CTYPE_X; 462 if (new_ctype[1 + x] & (_P|_U|_L|_N)) 463 rl->rl_runetype[x] |= _CTYPE_G; 464 /* we don't really trust _B in the file. see above. */ 465 if (new_ctype[1 + x] & _B) 466 rl->rl_runetype[x] |= _CTYPE_B; 467 if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ') 468 rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1); 469 if (x == ' ' || x == '\t') 470 rl->rl_runetype[x] |= _CTYPE_B; 471 472 /* XXX may fail on non-8bit encoding only */ 473 rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]); 474 rl->rl_maplower[x] = ntohs(new_tolower[1 + x]); 475 } 476 477 _wctype_init(rl); 478 479 /* 480 * __runetable_to_netbsd_ctype() will be called from 481 * setrunelocale.c:_newrunelocale(), and fill old ctype table. 482 */ 483 484 free(new_ctype); 485 free(new_toupper); 486 free(new_tolower); 487 return(rl); 488 489 bad: 490 if (new_ctype) 491 free(new_ctype); 492 if (new_toupper) 493 free(new_toupper); 494 if (new_tolower) 495 free(new_tolower); 496 return NULL; 497 } 498