1 /* $NetBSD: rune.c,v 1.13 2001/05/26 00:35:20 kristerw Exp $ */ 2 3 /*- 4 * Copyright (c)1999 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /*- 30 * Copyright (c) 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * Paul Borman at Krystal Technologies. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 #include <sys/cdefs.h> 66 #if defined(LIBC_SCCS) && !defined(lint) 67 #if 0 68 static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93"; 69 #else 70 __RCSID("$NetBSD: rune.c,v 1.13 2001/05/26 00:35:20 kristerw Exp $"); 71 #endif 72 #endif /* LIBC_SCCS and not lint */ 73 74 #include "rune.h" 75 #include <assert.h> 76 #include <stdio.h> 77 #include <string.h> 78 #include <stdlib.h> 79 #include <errno.h> 80 #include <sys/types.h> 81 #include <sys/stat.h> 82 #include "rune_local.h" 83 84 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *)); 85 static void _freeentry __P((_RuneRange *)); 86 87 static int 88 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp, 89 FILE *fp) 90 { 91 int i; 92 _RuneEntry *re; 93 _FileRuneEntry fre; 94 95 _DIAGASSERT(rl != NULL); 96 _DIAGASSERT(rr != NULL); 97 _DIAGASSERT(frr != NULL); 98 _DIAGASSERT(lastp != NULL); 99 _DIAGASSERT(fp != NULL); 100 101 re = (_RuneEntry *)rl->__rune_variable; 102 103 rr->__nranges = ntohl(frr->__nranges); 104 if (rr->__nranges == 0) { 105 rr->__rune_ranges = NULL; 106 return 0; 107 } 108 109 rr->__rune_ranges = re; 110 for (i = 0; i < rr->__nranges; i++) { 111 if (fread(&fre, sizeof(fre), 1, fp) != 1) 112 return -1; 113 114 re->__min = ntohl((u_int32_t)fre.__min); 115 re->__max = ntohl((u_int32_t)fre.__max); 116 re->__map = ntohl((u_int32_t)fre.__map); 117 re++; 118 119 if ((void *)re > lastp) 120 return -1; 121 } 122 rl->__rune_variable = re; 123 return 0; 124 } 125 126 static int 127 readentry(_RuneRange *rr, FILE *fp) 128 { 129 _RuneEntry *re; 130 size_t l, i, j; 131 int error; 132 133 _DIAGASSERT(rr != NULL); 134 _DIAGASSERT(fp != NULL); 135 136 re = rr->__rune_ranges; 137 for (i = 0; i < rr->__nranges; i++) { 138 if (re[i].__map != 0) { 139 re[i].__rune_types = NULL; 140 continue; 141 } 142 143 l = re[i].__max - re[i].__min + 1; 144 re[i].__rune_types = malloc(l * sizeof(_RuneType)); 145 if (!re[i].__rune_types) { 146 error = ENOBUFS; 147 goto fail; 148 } 149 memset(re[i].__rune_types, 0, l * sizeof(_RuneType)); 150 151 if (fread(re[i].__rune_types, sizeof(_RuneType), l, fp) != l) 152 goto fail2; 153 154 for (j = 0; j < l; j++) 155 re[i].__rune_types[j] = ntohl(re[i].__rune_types[j]); 156 } 157 return 0; 158 159 fail: 160 for (j = 0; j < i; j++) { 161 free(re[j].__rune_types); 162 re[j].__rune_types = NULL; 163 } 164 return error; 165 fail2: 166 for (j = 0; j <= i; j++) { 167 free(re[j].__rune_types); 168 re[j].__rune_types = NULL; 169 } 170 return errno; 171 } 172 173 /* XXX: temporary implementation */ 174 static void 175 find_codeset(_RuneLocale *rl) 176 { 177 char *top, *codeset, *tail; 178 179 rl->__rune_codeset = NULL; 180 if (!(top=strstr(rl->__rune_variable, _RUNE_CODESET))) 181 return; 182 tail = strpbrk(top, " \t"); 183 codeset = top + sizeof(_RUNE_CODESET)-1; 184 if (tail) { 185 *top = *tail; 186 *tail = '\0'; 187 rl->__rune_codeset = strdup(codeset); 188 strcpy(top+1, tail+1); 189 190 } else { 191 *top='\0'; 192 rl->__rune_codeset = strdup(codeset); 193 } 194 } 195 196 void 197 _freeentry(_RuneRange *rr) 198 { 199 _RuneEntry *re; 200 int i; 201 202 _DIAGASSERT(rr != NULL); 203 204 re = rr->__rune_ranges; 205 for (i = 0; i < rr->__nranges; i++) { 206 if (re[i].__rune_types) 207 free(re[i].__rune_types); 208 re[i].__rune_types = NULL; 209 } 210 } 211 212 _RuneLocale * 213 _Read_RuneMagi(fp) 214 FILE *fp; 215 { 216 /* file */ 217 _FileRuneLocale frl; 218 /* host data */ 219 char *hostdata; 220 size_t hostdatalen; 221 void *lastp; 222 _RuneLocale *rl; 223 struct stat sb; 224 int x; 225 226 _DIAGASSERT(fp != NULL); 227 228 if (fstat(fileno(fp), &sb) < 0) 229 return NULL; 230 231 if (sb.st_size < sizeof(_RuneLocale)) 232 return NULL; 233 /* XXX more validation? */ 234 235 /* Someone might have read the magic number once already */ 236 rewind(fp); 237 238 if (fread(&frl, sizeof(frl), 1, fp) != 1) 239 return NULL; 240 if (memcmp(frl.__magic, _RUNE_MAGIC_1, sizeof(frl.__magic))) 241 return NULL; 242 243 hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.__variable_len) + 244 ntohl(frl.__runetype_ext.__nranges) * sizeof(_RuneEntry) + 245 ntohl(frl.__maplower_ext.__nranges) * sizeof(_RuneEntry) + 246 ntohl(frl.__mapupper_ext.__nranges) * sizeof(_RuneEntry); 247 248 if ((hostdata = malloc(hostdatalen)) == NULL) 249 return NULL; 250 memset(hostdata, 0, hostdatalen); 251 lastp = hostdata + hostdatalen; 252 253 rl = (_RuneLocale *)(void *)hostdata; 254 rl->__rune_variable = rl + 1; 255 256 memcpy(rl->__magic, frl.__magic, sizeof(rl->__magic)); 257 memcpy(rl->__encoding, frl.__encoding, sizeof(rl->__encoding)); 258 259 rl->__invalid_rune = ntohl((u_int32_t)frl.__invalid_rune); 260 rl->__variable_len = ntohl((u_int32_t)frl.__variable_len); 261 262 for (x = 0; x < _CACHED_RUNES; ++x) { 263 rl->__runetype[x] = ntohl(frl.__runetype[x]); 264 265 /* XXX assumes rune_t = u_int32_t */ 266 rl->__maplower[x] = ntohl((u_int32_t)frl.__maplower[x]); 267 rl->__mapupper[x] = ntohl((u_int32_t)frl.__mapupper[x]); 268 } 269 270 if (readrange(rl, &rl->__runetype_ext, &frl.__runetype_ext, lastp, fp)) 271 { 272 free(hostdata); 273 return NULL; 274 } 275 if (readrange(rl, &rl->__maplower_ext, &frl.__maplower_ext, lastp, fp)) 276 { 277 free(hostdata); 278 return NULL; 279 } 280 if (readrange(rl, &rl->__mapupper_ext, &frl.__mapupper_ext, lastp, fp)) 281 { 282 free(hostdata); 283 return NULL; 284 } 285 286 if (readentry(&rl->__runetype_ext, fp) < 0) { 287 free(hostdata); 288 return NULL; 289 } 290 291 if ((u_int8_t *)rl->__rune_variable + rl->__variable_len > 292 (u_int8_t *)lastp) { 293 _freeentry(&rl->__runetype_ext); 294 free(hostdata); 295 return NULL; 296 } 297 if (rl->__variable_len == 0) 298 rl->__rune_variable = NULL; 299 else if (fread(rl->__rune_variable, rl->__variable_len, 1, fp) != 1) { 300 _freeentry(&rl->__runetype_ext); 301 free(hostdata); 302 return NULL; 303 } 304 find_codeset(rl); 305 306 /* error if we have junk at the tail */ 307 if (ftell(fp) != sb.st_size) { 308 _freeentry(&rl->__runetype_ext); 309 free(hostdata); 310 return NULL; 311 } 312 313 return(rl); 314 } 315 316 void 317 _NukeRune(rl) 318 _RuneLocale *rl; 319 { 320 321 _DIAGASSERT(rl != NULL); 322 323 _freeentry(&rl->__runetype_ext); 324 if (rl->__rune_codeset) 325 free(rl->__rune_codeset); 326 free(rl); 327 } 328 329 /* 330 * read in old LC_CTYPE declaration file, convert into runelocale info 331 */ 332 #define _CTYPE_PRIVATE 333 #include <limits.h> 334 #include <ctype.h> 335 336 _RuneLocale * 337 _Read_CTypeAsRune(fp) 338 FILE *fp; 339 { 340 char id[sizeof(_CTYPE_ID) - 1]; 341 u_int32_t i, len; 342 u_int8_t *new_ctype = NULL; 343 int16_t *new_toupper = NULL, *new_tolower = NULL; 344 /* host data */ 345 char *hostdata = NULL; 346 size_t hostdatalen; 347 _RuneLocale *rl; 348 struct stat sb; 349 int x; 350 351 _DIAGASSERT(fp != NULL); 352 353 if (fstat(fileno(fp), &sb) < 0) 354 return NULL; 355 356 if (sb.st_size < sizeof(id)) 357 return NULL; 358 /* XXX more validation? */ 359 360 /* Someone might have read the magic number once already */ 361 rewind(fp); 362 363 if (fread(id, sizeof(id), 1, fp) != 1) 364 goto bad; 365 if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0) 366 goto bad; 367 368 if (fread(&i, sizeof(u_int32_t), 1, fp) != 1) 369 goto bad; 370 if ((i = ntohl(i)) != _CTYPE_REV) 371 goto bad; 372 373 if (fread(&len, sizeof(u_int32_t), 1, fp) != 1) 374 goto bad; 375 if ((len = ntohl(len)) != _CTYPE_NUM_CHARS) 376 goto bad; 377 378 if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL || 379 (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL || 380 (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL) 381 goto bad; 382 new_ctype[0] = 0; 383 if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len) 384 goto bad; 385 new_toupper[0] = EOF; 386 if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len) 387 goto bad; 388 new_tolower[0] = EOF; 389 if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len) 390 goto bad; 391 392 hostdatalen = sizeof(*rl); 393 394 if ((hostdata = malloc(hostdatalen)) == NULL) 395 goto bad; 396 memset(hostdata, 0, hostdatalen); 397 rl = (_RuneLocale *)(void *)hostdata; 398 rl->__rune_variable = NULL; 399 400 memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof(rl->__magic)); 401 memcpy(rl->__encoding, "NONE", 4); 402 403 rl->__invalid_rune = _DefaultRuneLocale.__invalid_rune; /*XXX*/ 404 rl->__variable_len = 0; 405 406 for (x = 0; x < _CACHED_RUNES; ++x) { 407 if (x > len) 408 continue; 409 410 /* 411 * TWEAKS! 412 * - old locale file declarations do not have proper _B 413 * in many cases. 414 * - isprint() declaration in ctype.h incorrectly uses _B. 415 * _B means "isprint but !isgraph", not "isblank" with the 416 * declaration. 417 * - _X and _CTYPE_X have negligible difference in meaning. 418 * - we don't set digit value, fearing that it would be 419 * too much of hardcoding. we may need to revisit it. 420 */ 421 422 if (new_ctype[1 + x] & _U) 423 rl->__runetype[x] |= _CTYPE_U; 424 if (new_ctype[1 + x] & _L) 425 rl->__runetype[x] |= _CTYPE_L; 426 if (new_ctype[1 + x] & _N) 427 rl->__runetype[x] |= _CTYPE_D; 428 if (new_ctype[1 + x] & _S) 429 rl->__runetype[x] |= _CTYPE_S; 430 if (new_ctype[1 + x] & _P) 431 rl->__runetype[x] |= _CTYPE_P; 432 if (new_ctype[1 + x] & _C) 433 rl->__runetype[x] |= _CTYPE_C; 434 /* derived flag bits, duplicate of ctype.h */ 435 if (new_ctype[1 + x] & (_U | _L)) 436 rl->__runetype[x] |= _CTYPE_A; 437 if (new_ctype[1 + x] & (_N | _X)) 438 rl->__runetype[x] |= _CTYPE_X; 439 if (new_ctype[1 + x] & (_P|_U|_L|_N)) 440 rl->__runetype[x] |= _CTYPE_G; 441 /* we don't really trust _B in the file. see above. */ 442 if (new_ctype[1 + x] & _B) 443 rl->__runetype[x] |= _CTYPE_B; 444 if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ') 445 rl->__runetype[x] |= (_CTYPE_R | _CTYPE_SW1); 446 if (x == ' ' || x == '\t') 447 rl->__runetype[x] |= _CTYPE_B; 448 449 /* XXX may fail on non-8bit encoding only */ 450 rl->__mapupper[x] = ntohs(new_toupper[1 + x]); 451 rl->__maplower[x] = ntohs(new_tolower[1 + x]); 452 } 453 454 /* 455 * __runetable_to_netbsd_ctype() will be called from 456 * setlocale.c:loadlocale(), and fill old ctype table. 457 */ 458 459 free(new_ctype); 460 free(new_toupper); 461 free(new_tolower); 462 return(rl); 463 464 bad: 465 if (new_ctype) 466 free(new_ctype); 467 if (new_toupper) 468 free(new_toupper); 469 if (new_tolower) 470 free(new_tolower); 471 if (hostdata) 472 free(hostdata); 473 return NULL; 474 } 475