1 /* $NetBSD: rune.c,v 1.28 2006/03/19 02:44:27 christos Exp $ */ 2 3 /*- 4 * Copyright (c)1999 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 #if defined(LIBC_SCCS) && !defined(lint) 63 #if 0 64 static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; 65 #else 66 __RCSID("$NetBSD: rune.c,v 1.28 2006/03/19 02:44:27 christos Exp $"); 67 #endif 68 #endif /* LIBC_SCCS and not lint */ 69 70 #include "namespace.h" 71 #include <assert.h> 72 #include <stdio.h> 73 #include <string.h> 74 #include <stdlib.h> 75 #include <errno.h> 76 #include <wchar.h> 77 #include <sys/types.h> 78 #include <sys/stat.h> 79 #include <citrus/citrus_module.h> 80 #include <citrus/citrus_ctype.h> 81 #include "rune.h" 82 #include "rune_local.h" 83 84 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *)); 85 static void _freeentry __P((_RuneRange *)); 86 static void _wctype_init __P((_RuneLocale *rl)); 87 88 static int 89 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp, 90 FILE *fp) 91 { 92 uint32_t i; 93 _RuneEntry *re; 94 _FileRuneEntry fre; 95 96 _DIAGASSERT(rl != NULL); 97 _DIAGASSERT(rr != NULL); 98 _DIAGASSERT(frr != NULL); 99 _DIAGASSERT(lastp != NULL); 100 _DIAGASSERT(fp != NULL); 101 102 re = (_RuneEntry *)rl->rl_variable; 103 104 rr->rr_nranges = ntohl(frr->frr_nranges); 105 if (rr->rr_nranges == 0) { 106 rr->rr_rune_ranges = NULL; 107 return 0; 108 } 109 110 rr->rr_rune_ranges = re; 111 for (i = 0; i < rr->rr_nranges; i++) { 112 if (fread(&fre, sizeof(fre), 1, fp) != 1) 113 return -1; 114 115 re->re_min = ntohl((u_int32_t)fre.fre_min); 116 re->re_max = ntohl((u_int32_t)fre.fre_max); 117 re->re_map = ntohl((u_int32_t)fre.fre_map); 118 re++; 119 120 if ((void *)re > lastp) 121 return -1; 122 } 123 rl->rl_variable = re; 124 return 0; 125 } 126 127 static int 128 readentry(_RuneRange *rr, FILE *fp) 129 { 130 _RuneEntry *re; 131 size_t l, i, j; 132 int error; 133 134 _DIAGASSERT(rr != NULL); 135 _DIAGASSERT(fp != NULL); 136 137 re = rr->rr_rune_ranges; 138 for (i = 0; i < rr->rr_nranges; i++) { 139 if (re[i].re_map != 0) { 140 re[i].re_rune_types = NULL; 141 continue; 142 } 143 144 l = re[i].re_max - re[i].re_min + 1; 145 re[i].re_rune_types = malloc(l * sizeof(_RuneType)); 146 if (!re[i].re_rune_types) { 147 error = ENOMEM; 148 goto fail; 149 } 150 memset(re[i].re_rune_types, 0, l * sizeof(_RuneType)); 151 152 if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l) 153 goto fail2; 154 155 for (j = 0; j < l; j++) 156 re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]); 157 } 158 return 0; 159 160 fail: 161 for (j = 0; j < i; j++) { 162 free(re[j].re_rune_types); 163 re[j].re_rune_types = NULL; 164 } 165 return error; 166 fail2: 167 for (j = 0; j <= i; j++) { 168 free(re[j].re_rune_types); 169 re[j].re_rune_types = NULL; 170 } 171 return errno; 172 } 173 174 /* XXX: temporary implementation */ 175 static void 176 find_codeset(_RuneLocale *rl) 177 { 178 char *top, *codeset, *tail, *ep; 179 180 /* end of rl_variable region */ 181 ep = (char *)rl->rl_variable; 182 ep += rl->rl_variable_len; 183 rl->rl_codeset = NULL; 184 if (!(top = strstr(rl->rl_variable, _RUNE_CODESET))) 185 return; 186 tail = strpbrk(top, " \t"); 187 codeset = top + sizeof(_RUNE_CODESET) - 1; 188 if (tail) { 189 *top = *tail; 190 *tail = '\0'; 191 rl->rl_codeset = strdup(codeset); 192 strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1))); 193 } else { 194 *top = '\0'; 195 rl->rl_codeset = strdup(codeset); 196 } 197 } 198 199 void 200 _freeentry(_RuneRange *rr) 201 { 202 _RuneEntry *re; 203 uint32_t i; 204 205 _DIAGASSERT(rr != NULL); 206 207 re = rr->rr_rune_ranges; 208 for (i = 0; i < rr->rr_nranges; i++) { 209 if (re[i].re_rune_types) 210 free(re[i].re_rune_types); 211 re[i].re_rune_types = NULL; 212 } 213 } 214 215 void 216 _wctype_init(_RuneLocale *rl) 217 { 218 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype, 219 sizeof(rl->rl_wctype)); 220 } 221 222 223 _RuneLocale * 224 _Read_RuneMagi(fp) 225 FILE *fp; 226 { 227 /* file */ 228 _FileRuneLocale frl; 229 /* host data */ 230 char *hostdata; 231 size_t hostdatalen; 232 void *lastp; 233 _RuneLocale *rl; 234 struct stat sb; 235 int x; 236 237 _DIAGASSERT(fp != NULL); 238 239 if (fstat(fileno(fp), &sb) < 0) 240 return NULL; 241 242 if (sb.st_size < sizeof(_FileRuneLocale)) 243 return NULL; 244 /* XXX more validation? */ 245 246 /* Someone might have read the magic number once already */ 247 rewind(fp); 248 249 if (fread(&frl, sizeof(frl), 1, fp) != 1) 250 return NULL; 251 if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic))) 252 return NULL; 253 254 hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) + 255 ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) + 256 ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) + 257 ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry); 258 259 if ((hostdata = malloc(hostdatalen)) == NULL) 260 return NULL; 261 memset(hostdata, 0, hostdatalen); 262 lastp = hostdata + hostdatalen; 263 264 rl = (_RuneLocale *)(void *)hostdata; 265 rl->rl_variable = rl + 1; 266 267 memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic)); 268 memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding)); 269 270 rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune); 271 rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len); 272 273 for (x = 0; x < _CACHED_RUNES; ++x) { 274 rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]); 275 276 /* XXX assumes rune_t = u_int32_t */ 277 rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]); 278 rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]); 279 } 280 281 if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp)) 282 { 283 free(hostdata); 284 return NULL; 285 } 286 if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp)) 287 { 288 free(hostdata); 289 return NULL; 290 } 291 if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp)) 292 { 293 free(hostdata); 294 return NULL; 295 } 296 297 if (readentry(&rl->rl_runetype_ext, fp) != 0) { 298 free(hostdata); 299 return NULL; 300 } 301 302 if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len > 303 (u_int8_t *)lastp) { 304 _freeentry(&rl->rl_runetype_ext); 305 free(hostdata); 306 return NULL; 307 } 308 if (rl->rl_variable_len == 0) 309 rl->rl_variable = NULL; 310 if (rl->rl_variable == NULL || 311 fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) { 312 _freeentry(&rl->rl_runetype_ext); 313 free(hostdata); 314 return NULL; 315 } 316 find_codeset(rl); 317 _wctype_init(rl); 318 319 /* error if we have junk at the tail */ 320 if (ftell(fp) != sb.st_size) { 321 _freeentry(&rl->rl_runetype_ext); 322 free(hostdata); 323 return NULL; 324 } 325 326 return(rl); 327 } 328 329 void 330 _NukeRune(rl) 331 _RuneLocale *rl; 332 { 333 334 _DIAGASSERT(rl != NULL); 335 336 if (rl != &_DefaultRuneLocale) { 337 _freeentry(&rl->rl_runetype_ext); 338 if (rl->rl_codeset) 339 free(__UNCONST(rl->rl_codeset)); 340 if (rl->rl_citrus_ctype) 341 _citrus_ctype_close(rl->rl_citrus_ctype); 342 free(rl); 343 } 344 } 345 346 /* 347 * read in old LC_CTYPE declaration file, convert into runelocale info 348 */ 349 #define _CTYPE_PRIVATE 350 #include <limits.h> 351 #include <ctype.h> 352 353 _RuneLocale * 354 _Read_CTypeAsRune(fp) 355 FILE *fp; 356 { 357 char id[sizeof(_CTYPE_ID) - 1]; 358 u_int32_t i, len; 359 u_int8_t *new_ctype = NULL; 360 int16_t *new_toupper = NULL, *new_tolower = NULL; 361 /* host data */ 362 char *hostdata = NULL; 363 size_t hostdatalen; 364 _RuneLocale *rl; 365 struct stat sb; 366 int x; 367 368 _DIAGASSERT(fp != NULL); 369 370 if (fstat(fileno(fp), &sb) < 0) 371 return NULL; 372 373 if (sb.st_size < sizeof(id)) 374 return NULL; 375 /* XXX more validation? */ 376 377 /* Someone might have read the magic number once already */ 378 rewind(fp); 379 380 if (fread(id, sizeof(id), 1, fp) != 1) 381 goto bad; 382 if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0) 383 goto bad; 384 385 if (fread(&i, sizeof(u_int32_t), 1, fp) != 1) 386 goto bad; 387 if ((i = ntohl(i)) != _CTYPE_REV) 388 goto bad; 389 390 if (fread(&len, sizeof(u_int32_t), 1, fp) != 1) 391 goto bad; 392 if ((len = ntohl(len)) != _CTYPE_NUM_CHARS) 393 goto bad; 394 395 if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL || 396 (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL || 397 (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL) 398 goto bad; 399 new_ctype[0] = 0; 400 if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len) 401 goto bad; 402 new_toupper[0] = EOF; 403 if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len) 404 goto bad; 405 new_tolower[0] = EOF; 406 if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len) 407 goto bad; 408 409 hostdatalen = sizeof(*rl); 410 411 if ((hostdata = malloc(hostdatalen)) == NULL) 412 goto bad; 413 memset(hostdata, 0, hostdatalen); 414 rl = (_RuneLocale *)(void *)hostdata; 415 rl->rl_variable = NULL; 416 417 memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic)); 418 memcpy(rl->rl_encoding, "NONE", 4); 419 420 rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune; /*XXX*/ 421 rl->rl_variable_len = 0; 422 423 for (x = 0; x < _CACHED_RUNES; ++x) { 424 if ((uint32_t) x > len) 425 continue; 426 427 /* 428 * TWEAKS! 429 * - old locale file declarations do not have proper _B 430 * in many cases. 431 * - isprint() declaration in ctype.h incorrectly uses _B. 432 * _B means "isprint but !isgraph", not "isblank" with the 433 * declaration. 434 * - _X and _CTYPE_X have negligible difference in meaning. 435 * - we don't set digit value, fearing that it would be 436 * too much of hardcoding. we may need to revisit it. 437 */ 438 439 if (new_ctype[1 + x] & _U) 440 rl->rl_runetype[x] |= _CTYPE_U; 441 if (new_ctype[1 + x] & _L) 442 rl->rl_runetype[x] |= _CTYPE_L; 443 if (new_ctype[1 + x] & _N) 444 rl->rl_runetype[x] |= _CTYPE_D; 445 if (new_ctype[1 + x] & _S) 446 rl->rl_runetype[x] |= _CTYPE_S; 447 if (new_ctype[1 + x] & _P) 448 rl->rl_runetype[x] |= _CTYPE_P; 449 if (new_ctype[1 + x] & _C) 450 rl->rl_runetype[x] |= _CTYPE_C; 451 /* derived flag bits, duplicate of ctype.h */ 452 if (new_ctype[1 + x] & (_U | _L)) 453 rl->rl_runetype[x] |= _CTYPE_A; 454 if (new_ctype[1 + x] & (_N | _X)) 455 rl->rl_runetype[x] |= _CTYPE_X; 456 if (new_ctype[1 + x] & (_P|_U|_L|_N)) 457 rl->rl_runetype[x] |= _CTYPE_G; 458 /* we don't really trust _B in the file. see above. */ 459 if (new_ctype[1 + x] & _B) 460 rl->rl_runetype[x] |= _CTYPE_B; 461 if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ') 462 rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1); 463 if (x == ' ' || x == '\t') 464 rl->rl_runetype[x] |= _CTYPE_B; 465 466 /* XXX may fail on non-8bit encoding only */ 467 rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]); 468 rl->rl_maplower[x] = ntohs(new_tolower[1 + x]); 469 } 470 471 _wctype_init(rl); 472 473 /* 474 * __runetable_to_netbsd_ctype() will be called from 475 * setlocale.c:loadlocale(), and fill old ctype table. 476 */ 477 478 free(new_ctype); 479 free(new_toupper); 480 free(new_tolower); 481 return(rl); 482 483 bad: 484 if (new_ctype) 485 free(new_ctype); 486 if (new_toupper) 487 free(new_toupper); 488 if (new_tolower) 489 free(new_tolower); 490 if (hostdata) 491 free(hostdata); 492 return NULL; 493 } 494