1 /* $NetBSD: udf_osta.c,v 1.7 2008/05/19 20:12:36 reinoud Exp $ */ 2 3 #include <sys/cdefs.h> 4 #ifndef lint 5 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.7 2008/05/19 20:12:36 reinoud Exp $"); 6 #endif /* not lint */ 7 8 /* 9 * Various routines from the OSTA 2.01 specs. Copyrights are included with 10 * each code segment. Slight whitespace modifications have been made for 11 * formatting purposes. Typos/bugs have been fixed. 12 * 13 */ 14 15 #include "udf_osta.h" 16 17 #ifndef _KERNEL 18 #include <ctype.h> 19 #endif 20 21 /*****************************************************************************/ 22 /*********************************************************************** 23 * OSTA compliant Unicode compression, uncompression routines. 24 * Copyright 1995 Micro Design International, Inc. 25 * Written by Jason M. Rinn. 26 * Micro Design International gives permission for the free use of the 27 * following source code. 28 */ 29 30 /*********************************************************************** 31 * Takes an OSTA CS0 compressed unicode name, and converts 32 * it to Unicode. 33 * The Unicode output will be in the byte order 34 * that the local compiler uses for 16-bit values. 35 * NOTE: This routine only performs error checking on the compID. 36 * It is up to the user to ensure that the unicode buffer is large 37 * enough, and that the compressed unicode name is correct. 38 * 39 * RETURN VALUE 40 * 41 * The number of unicode characters which were uncompressed. 42 * A -1 is returned if the compression ID is invalid. 43 */ 44 int 45 udf_UncompressUnicode( 46 int numberOfBytes, /* (Input) number of bytes read from media. */ 47 byte *UDFCompressed, /* (Input) bytes read from media. */ 48 unicode_t *unicode) /* (Output) uncompressed unicode characters. */ 49 { 50 unsigned int compID; 51 int returnValue, unicodeIndex, byteIndex; 52 53 /* Use UDFCompressed to store current byte being read. */ 54 compID = UDFCompressed[0]; 55 56 /* First check for valid compID. */ 57 if (compID != 8 && compID != 16) { 58 returnValue = -1; 59 } else { 60 unicodeIndex = 0; 61 byteIndex = 1; 62 63 /* Loop through all the bytes. */ 64 while (byteIndex < numberOfBytes) { 65 if (compID == 16) { 66 /* Move the first byte to the high bits of the 67 * unicode char. 68 */ 69 unicode[unicodeIndex] = 70 UDFCompressed[byteIndex++] << 8; 71 } else { 72 unicode[unicodeIndex] = 0; 73 } 74 if (byteIndex < numberOfBytes) { 75 /*Then the next byte to the low bits. */ 76 unicode[unicodeIndex] |= 77 UDFCompressed[byteIndex++]; 78 } 79 unicodeIndex++; 80 } 81 returnValue = unicodeIndex; 82 } 83 return(returnValue); 84 } 85 86 /*********************************************************************** 87 * DESCRIPTION: 88 * Takes a string of unicode wide characters and returns an OSTA CS0 89 * compressed unicode string. The unicode MUST be in the byte order of 90 * the compiler in order to obtain correct results. Returns an error 91 * if the compression ID is invalid. 92 * 93 * NOTE: This routine assumes the implementation already knows, by 94 * the local environment, how many bits are appropriate and 95 * therefore does no checking to test if the input characters fit 96 * into that number of bits or not. 97 * 98 * RETURN VALUE 99 * 100 * The total number of bytes in the compressed OSTA CS0 string, 101 * including the compression ID. 102 * A -1 is returned if the compression ID is invalid. 103 */ 104 int 105 udf_CompressUnicode( 106 int numberOfChars, /* (Input) number of unicode characters. */ 107 int compID, /* (Input) compression ID to be used. */ 108 unicode_t *unicode, /* (Input) unicode characters to compress. */ 109 byte *UDFCompressed) /* (Output) compressed string, as bytes. */ 110 { 111 int byteIndex, unicodeIndex; 112 113 if (compID != 8 && compID != 16) { 114 byteIndex = -1; /* Unsupported compression ID ! */ 115 } else { 116 /* Place compression code in first byte. */ 117 UDFCompressed[0] = compID; 118 119 byteIndex = 1; 120 unicodeIndex = 0; 121 while (unicodeIndex < numberOfChars) { 122 if (compID == 16) { 123 /* First, place the high bits of the char 124 * into the byte stream. 125 */ 126 UDFCompressed[byteIndex++] = 127 (unicode[unicodeIndex] & 0xFF00) >> 8; 128 } 129 /*Then place the low bits into the stream. */ 130 UDFCompressed[byteIndex++] = 131 unicode[unicodeIndex] & 0x00FF; 132 unicodeIndex++; 133 } 134 } 135 return(byteIndex); 136 } 137 138 /*****************************************************************************/ 139 /* 140 * CRC 010041 141 */ 142 static unsigned short crc_table[256] = { 143 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, 144 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 145 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, 146 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, 147 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, 148 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, 149 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, 150 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 151 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, 152 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, 153 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, 154 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, 155 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, 156 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 157 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, 158 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, 159 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, 160 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, 161 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, 162 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, 163 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, 164 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 165 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, 166 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, 167 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, 168 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, 169 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, 170 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, 171 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 172 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, 173 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, 174 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 175 }; 176 177 unsigned short 178 udf_cksum(s, n) 179 unsigned char *s; 180 int n; 181 { 182 unsigned short crc=0; 183 184 while (n-- > 0) 185 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8); 186 return crc; 187 } 188 189 /* UNICODE Checksum */ 190 unsigned short 191 udf_unicode_cksum(s, n) 192 unsigned short *s; 193 int n; 194 { 195 unsigned short crc=0; 196 197 while (n-- > 0) { 198 /* Take high order byte first--corresponds to a big endian 199 * byte stream. 200 */ 201 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8); 202 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8); 203 } 204 return crc; 205 } 206 207 208 /* 209 * Calculates a 16-bit checksum of the Implementation Use 210 * Extended Attribute header or Application Use Extended Attribute 211 * header. The fields AttributeType through ImplementationIdentifier 212 * (or ApplicationIdentifier) inclusively represent the 213 * data covered by the checksum (48 bytes). 214 * 215 */ 216 uint16_t udf_ea_cksum(uint8_t *data) { 217 uint16_t checksum = 0; 218 int count; 219 220 for (count = 0; count < 48; count++) { 221 checksum += *data++; 222 } 223 224 return checksum; 225 } 226 227 228 #ifdef MAIN 229 unsigned char bytes[] = { 0x70, 0x6A, 0x77 }; 230 231 main() 232 { 233 unsigned short x; 234 x = cksum(bytes, sizeof bytes); 235 printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299); 236 exit(0); 237 } 238 #endif 239 240 /*****************************************************************************/ 241 /* #ifdef NEEDS_ISPRINT */ 242 /*********************************************************************** 243 * OSTA UDF compliant file name translation routine for OS/2, 244 * Windows 95, Windows NT, Macintosh and UNIX. 245 * Copyright 1995 Micro Design International, Inc. 246 * Written by Jason M. Rinn. 247 * Micro Design International gives permission for the free use of the 248 * following source code. 249 */ 250 251 /*********************************************************************** 252 * To use these routines with different operating systems. 253 * 254 * OS/2 255 * Define OS2 256 * Define MAXLEN = 254 257 * 258 * Windows 95 259 * Define WIN_95 260 * Define MAXLEN = 255 261 * 262 * Windows NT 263 * Define WIN_NT 264 * Define MAXLEN = 255 265 * 266 * Macintosh: 267 * Define MAC. 268 * Define MAXLEN = 31. 269 * 270 * UNIX 271 * Define UNIX. 272 * Define MAXLEN as specified by unix version. 273 */ 274 275 #define ILLEGAL_CHAR_MARK 0x005F 276 #define CRC_MARK 0x0023 277 #define EXT_SIZE 5 278 #define PERIOD 0x002E 279 #define SPACE 0x0020 280 281 /*** PROTOTYPES ***/ 282 int IsIllegal(unicode_t ch); 283 284 /* Define a function or macro which determines if a Unicode character is 285 * printable under your implementation. 286 */ 287 288 289 /* #include <stdio.h> */ 290 static int UnicodeIsPrint(unicode_t ch) { 291 return (ch >=' ') && (ch != 127); 292 } 293 294 295 int UnicodeLength(unicode_t *string) { 296 int length; 297 length = 0; 298 while (*string++) length++; 299 300 return length; 301 } 302 303 304 #ifdef _KERNEL 305 static int isprint(int c) { 306 return (c >= ' ') && (c != 127); 307 } 308 #endif 309 310 311 /*********************************************************************** 312 * Translates a long file name to one using a MAXLEN and an illegal 313 * char set in accord with the OSTA requirements. Assumes the name has 314 * already been translated to Unicode. 315 * 316 * RETURN VALUE 317 * 318 * Number of unicode characters in translated name. 319 */ 320 int UDFTransName( 321 unicode_t *newName, /* (Output)Translated name. Must be of length 322 * MAXLEN */ 323 unicode_t *udfName, /* (Input) Name from UDF volume.*/ 324 int udfLen) /* (Input) Length of UDF Name. */ 325 { 326 int Index, newIndex = 0, needsCRC = false; /* index is shadowed */ 327 int extIndex = 0, newExtIndex = 0, hasExt = false; 328 #if defined OS2 || defined WIN_95 || defined WIN_NT 329 int trailIndex = 0; 330 #endif 331 unsigned short valueCRC; 332 unicode_t current; 333 const char hexChar[] = "0123456789ABCDEF"; 334 335 for (Index = 0; Index < udfLen; Index++) { 336 current = udfName[Index]; 337 338 if (IsIllegal(current) || !UnicodeIsPrint(current)) { 339 needsCRC = true; 340 /* Replace Illegal and non-displayable chars with 341 * underscore. 342 */ 343 current = ILLEGAL_CHAR_MARK; 344 /* Skip any other illegal or non-displayable 345 * characters. 346 */ 347 while(Index+1 < udfLen && (IsIllegal(udfName[Index+1]) 348 || !UnicodeIsPrint(udfName[Index+1]))) { 349 Index++; 350 } 351 } 352 353 /* Record position of extension, if one is found. */ 354 if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) { 355 if (udfLen == Index + 1) { 356 /* A trailing period is NOT an extension. */ 357 hasExt = false; 358 } else { 359 hasExt = true; 360 extIndex = Index; 361 newExtIndex = newIndex; 362 } 363 } 364 365 #if defined OS2 || defined WIN_95 || defined WIN_NT 366 /* Record position of last char which is NOT period or space. */ 367 else if (current != PERIOD && current != SPACE) { 368 trailIndex = newIndex; 369 } 370 #endif 371 372 if (newIndex < MAXLEN) { 373 newName[newIndex++] = current; 374 } else { 375 needsCRC = true; 376 } 377 } 378 379 #if defined OS2 || defined WIN_95 || defined WIN_NT 380 /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */ 381 if (trailIndex != newIndex - 1) { 382 newIndex = trailIndex + 1; 383 needsCRC = true; 384 hasExt = false; /* Trailing period does not make an 385 * extension. */ 386 } 387 #endif 388 389 if (needsCRC) { 390 unicode_t ext[EXT_SIZE]; 391 int localExtIndex = 0; 392 if (hasExt) { 393 int maxFilenameLen; 394 /* Translate extension, and store it in ext. */ 395 for(Index = 0; Index<EXT_SIZE && 396 extIndex + Index +1 < udfLen; Index++ ) { 397 current = udfName[extIndex + Index + 1]; 398 if (IsIllegal(current) || 399 !UnicodeIsPrint(current)) { 400 needsCRC = 1; 401 /* Replace Illegal and non-displayable 402 * chars with underscore. 403 */ 404 current = ILLEGAL_CHAR_MARK; 405 /* Skip any other illegal or 406 * non-displayable characters. 407 */ 408 while(Index + 1 < EXT_SIZE 409 && (IsIllegal(udfName[extIndex + 410 Index + 2]) || 411 !isprint(udfName[extIndex + 412 Index + 2]))) { 413 Index++; 414 } 415 } 416 ext[localExtIndex++] = current; 417 } 418 419 /* Truncate filename to leave room for extension and 420 * CRC. 421 */ 422 maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1); 423 if (newIndex > maxFilenameLen) { 424 newIndex = maxFilenameLen; 425 } else { 426 newIndex = newExtIndex; 427 } 428 } else if (newIndex > MAXLEN - 5) { 429 /*If no extension, make sure to leave room for CRC. */ 430 newIndex = MAXLEN - 5; 431 } 432 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */ 433 434 /*Calculate CRC from original filename from FileIdentifier. */ 435 valueCRC = udf_unicode_cksum(udfName, udfLen); 436 /* Convert 16-bits of CRC to hex characters. */ 437 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; 438 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; 439 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; 440 newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; 441 442 /* Place a translated extension at end, if found. */ 443 if (hasExt) { 444 newName[newIndex++] = PERIOD; 445 for (Index = 0;Index < localExtIndex ;Index++ ) { 446 newName[newIndex++] = ext[Index]; 447 } 448 } 449 } 450 return(newIndex); 451 } 452 453 #if defined OS2 || defined WIN_95 || defined WIN_NT 454 /*********************************************************************** 455 * Decides if a Unicode character matches one of a list 456 * of ASCII characters. 457 * Used by OS2 version of IsIllegal for readability, since all of the 458 * illegal characters above 0x0020 are in the ASCII subset of Unicode. 459 * Works very similarly to the standard C function strchr(). 460 * 461 * RETURN VALUE 462 * 463 * Non-zero if the Unicode character is in the given ASCII string. 464 */ 465 int UnicodeInString( 466 unsigned char *string, /* (Input) String to search through. */ 467 unicode_t ch) /* (Input) Unicode char to search for. */ 468 { 469 int found = false; 470 while (*string != '\0' && found == false) { 471 /* These types should compare, since both are unsigned 472 * numbers. */ 473 if (*string == ch) { 474 found = true; 475 } 476 string++; 477 } 478 return(found); 479 } 480 #endif /* OS2 */ 481 482 /*********************************************************************** 483 * Decides whether the given character is illegal for a given OS. 484 * 485 * RETURN VALUE 486 * 487 * Non-zero if char is illegal. 488 */ 489 int IsIllegal(unicode_t ch) 490 { 491 #ifdef MAC 492 /* Only illegal character on the MAC is the colon. */ 493 if (ch == 0x003A) { 494 return(1); 495 } else { 496 return(0); 497 } 498 499 #elif defined UNIX 500 /* Illegal UNIX characters are NULL and slash. */ 501 if (ch == 0x0000 || ch == 0x002F) { 502 return(1); 503 } else { 504 return(0); 505 } 506 507 #elif defined OS2 || defined WIN_95 || defined WIN_NT 508 /* Illegal char's for OS/2 according to WARP toolkit. */ 509 if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) { 510 return(1); 511 } else { 512 return(0); 513 } 514 #endif 515 } 516 /* #endif*/ /* NEEDS_ISPRINT */ 517 518