1 /* $NetBSD: udf_osta.c,v 1.5 2007/12/11 12:05:27 lukem Exp $ */ 2 3 #include <sys/cdefs.h> 4 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.5 2007/12/11 12:05:27 lukem Exp $"); 5 6 /* 7 * Various routines from the OSTA 2.01 specs. Copyrights are included with 8 * each code segment. Slight whitespace modifications have been made for 9 * formatting purposes. Typos/bugs have been fixed. 10 * 11 */ 12 13 #include "udf_osta.h" 14 15 16 /*****************************************************************************/ 17 /*********************************************************************** 18 * OSTA compliant Unicode compression, uncompression routines. 19 * Copyright 1995 Micro Design International, Inc. 20 * Written by Jason M. Rinn. 21 * Micro Design International gives permission for the free use of the 22 * following source code. 23 */ 24 25 /*********************************************************************** 26 * Takes an OSTA CS0 compressed unicode name, and converts 27 * it to Unicode. 28 * The Unicode output will be in the byte order 29 * that the local compiler uses for 16-bit values. 30 * NOTE: This routine only performs error checking on the compID. 31 * It is up to the user to ensure that the unicode buffer is large 32 * enough, and that the compressed unicode name is correct. 33 * 34 * RETURN VALUE 35 * 36 * The number of unicode characters which were uncompressed. 37 * A -1 is returned if the compression ID is invalid. 38 */ 39 int 40 udf_UncompressUnicode( 41 int numberOfBytes, /* (Input) number of bytes read from media. */ 42 byte *UDFCompressed, /* (Input) bytes read from media. */ 43 unicode_t *unicode) /* (Output) uncompressed unicode characters. */ 44 { 45 unsigned int compID; 46 int returnValue, unicodeIndex, byteIndex; 47 48 /* Use UDFCompressed to store current byte being read. */ 49 compID = UDFCompressed[0]; 50 51 /* First check for valid compID. */ 52 if (compID != 8 && compID != 16) { 53 returnValue = -1; 54 } else { 55 unicodeIndex = 0; 56 byteIndex = 1; 57 58 /* Loop through all the bytes. */ 59 while (byteIndex < numberOfBytes) { 60 if (compID == 16) { 61 /* Move the first byte to the high bits of the 62 * unicode char. 63 */ 64 unicode[unicodeIndex] = 65 UDFCompressed[byteIndex++] << 8; 66 } else { 67 unicode[unicodeIndex] = 0; 68 } 69 if (byteIndex < numberOfBytes) { 70 /*Then the next byte to the low bits. */ 71 unicode[unicodeIndex] |= 72 UDFCompressed[byteIndex++]; 73 } 74 unicodeIndex++; 75 } 76 returnValue = unicodeIndex; 77 } 78 return(returnValue); 79 } 80 81 /*********************************************************************** 82 * DESCRIPTION: 83 * Takes a string of unicode wide characters and returns an OSTA CS0 84 * compressed unicode string. The unicode MUST be in the byte order of 85 * the compiler in order to obtain correct results. Returns an error 86 * if the compression ID is invalid. 87 * 88 * NOTE: This routine assumes the implementation already knows, by 89 * the local environment, how many bits are appropriate and 90 * therefore does no checking to test if the input characters fit 91 * into that number of bits or not. 92 * 93 * RETURN VALUE 94 * 95 * The total number of bytes in the compressed OSTA CS0 string, 96 * including the compression ID. 97 * A -1 is returned if the compression ID is invalid. 98 */ 99 int 100 udf_CompressUnicode( 101 int numberOfChars, /* (Input) number of unicode characters. */ 102 int compID, /* (Input) compression ID to be used. */ 103 unicode_t *unicode, /* (Input) unicode characters to compress. */ 104 byte *UDFCompressed) /* (Output) compressed string, as bytes. */ 105 { 106 int byteIndex, unicodeIndex; 107 108 if (compID != 8 && compID != 16) { 109 byteIndex = -1; /* Unsupported compression ID ! */ 110 } else { 111 /* Place compression code in first byte. */ 112 UDFCompressed[0] = compID; 113 114 byteIndex = 1; 115 unicodeIndex = 0; 116 while (unicodeIndex < numberOfChars) { 117 if (compID == 16) { 118 /* First, place the high bits of the char 119 * into the byte stream. 120 */ 121 UDFCompressed[byteIndex++] = 122 (unicode[unicodeIndex] & 0xFF00) >> 8; 123 } 124 /*Then place the low bits into the stream. */ 125 UDFCompressed[byteIndex++] = 126 unicode[unicodeIndex] & 0x00FF; 127 unicodeIndex++; 128 } 129 } 130 return(byteIndex); 131 } 132 133 /*****************************************************************************/ 134 /* 135 * CRC 010041 136 */ 137 static unsigned short crc_table[256] = { 138 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, 139 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, 140 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, 141 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, 142 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, 143 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, 144 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, 145 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, 146 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, 147 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, 148 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, 149 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, 150 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, 151 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, 152 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, 153 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, 154 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, 155 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, 156 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, 157 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, 158 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, 159 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, 160 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, 161 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, 162 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, 163 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, 164 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, 165 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, 166 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, 167 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, 168 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, 169 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 170 }; 171 172 unsigned short 173 udf_cksum(s, n) 174 unsigned char *s; 175 int n; 176 { 177 unsigned short crc=0; 178 179 while (n-- > 0) 180 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8); 181 return crc; 182 } 183 184 /* UNICODE Checksum */ 185 unsigned short 186 udf_unicode_cksum(s, n) 187 unsigned short *s; 188 int n; 189 { 190 unsigned short crc=0; 191 192 while (n-- > 0) { 193 /* Take high order byte first--corresponds to a big endian 194 * byte stream. 195 */ 196 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8); 197 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8); 198 } 199 return crc; 200 } 201 202 #ifdef MAIN 203 unsigned char bytes[] = { 0x70, 0x6A, 0x77 }; 204 205 main() 206 { 207 unsigned short x; 208 x = cksum(bytes, sizeof bytes); 209 printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299); 210 exit(0); 211 } 212 #endif 213 214 /*****************************************************************************/ 215 /* #ifdef NEEDS_ISPRINT */ 216 /*********************************************************************** 217 * OSTA UDF compliant file name translation routine for OS/2, 218 * Windows 95, Windows NT, Macintosh and UNIX. 219 * Copyright 1995 Micro Design International, Inc. 220 * Written by Jason M. Rinn. 221 * Micro Design International gives permission for the free use of the 222 * following source code. 223 */ 224 225 /*********************************************************************** 226 * To use these routines with different operating systems. 227 * 228 * OS/2 229 * Define OS2 230 * Define MAXLEN = 254 231 * 232 * Windows 95 233 * Define WIN_95 234 * Define MAXLEN = 255 235 * 236 * Windows NT 237 * Define WIN_NT 238 * Define MAXLEN = 255 239 * 240 * Macintosh: 241 * Define MAC. 242 * Define MAXLEN = 31. 243 * 244 * UNIX 245 * Define UNIX. 246 * Define MAXLEN as specified by unix version. 247 */ 248 249 #define ILLEGAL_CHAR_MARK 0x005F 250 #define CRC_MARK 0x0023 251 #define EXT_SIZE 5 252 #define PERIOD 0x002E 253 #define SPACE 0x0020 254 255 /*** PROTOTYPES ***/ 256 int IsIllegal(unicode_t ch); 257 258 /* Define a function or macro which determines if a Unicode character is 259 * printable under your implementation. 260 */ 261 262 263 /* #include <stdio.h> */ 264 static int UnicodeIsPrint(unicode_t ch) { 265 return (ch >=' ') && (ch < 127); 266 } 267 268 269 int UnicodeLength(unicode_t *string) { 270 int length; 271 length = 0; 272 while (*string++) length++; 273 274 return length; 275 } 276 277 278 static int isprint(unsigned char c) { 279 return (c >= ' ') && (c != 127); 280 } 281 282 283 /*********************************************************************** 284 * Translates a long file name to one using a MAXLEN and an illegal 285 * char set in accord with the OSTA requirements. Assumes the name has 286 * already been translated to Unicode. 287 * 288 * RETURN VALUE 289 * 290 * Number of unicode characters in translated name. 291 */ 292 int UDFTransName( 293 unicode_t *newName, /* (Output)Translated name. Must be of length 294 * MAXLEN */ 295 unicode_t *udfName, /* (Input) Name from UDF volume.*/ 296 int udfLen) /* (Input) Length of UDF Name. */ 297 { 298 int Index, newIndex = 0, needsCRC = false; /* index is shadowed */ 299 int extIndex = 0, newExtIndex = 0, hasExt = false; 300 #if defined OS2 || defined WIN_95 || defined WIN_NT 301 int trailIndex = 0; 302 #endif 303 unsigned short valueCRC; 304 unicode_t current; 305 const char hexChar[] = "0123456789ABCDEF"; 306 307 for (Index = 0; Index < udfLen; Index++) { 308 current = udfName[Index]; 309 310 if (IsIllegal(current) || !UnicodeIsPrint(current)) { 311 needsCRC = true; 312 /* Replace Illegal and non-displayable chars with 313 * underscore. 314 */ 315 current = ILLEGAL_CHAR_MARK; 316 /* Skip any other illegal or non-displayable 317 * characters. 318 */ 319 while(Index+1 < udfLen && (IsIllegal(udfName[Index+1]) 320 || !UnicodeIsPrint(udfName[Index+1]))) { 321 Index++; 322 } 323 } 324 325 /* Record position of extension, if one is found. */ 326 if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) { 327 if (udfLen == Index + 1) { 328 /* A trailing period is NOT an extension. */ 329 hasExt = false; 330 } else { 331 hasExt = true; 332 extIndex = Index; 333 newExtIndex = newIndex; 334 } 335 } 336 337 #if defined OS2 || defined WIN_95 || defined WIN_NT 338 /* Record position of last char which is NOT period or space. */ 339 else if (current != PERIOD && current != SPACE) { 340 trailIndex = newIndex; 341 } 342 #endif 343 344 if (newIndex < MAXLEN) { 345 newName[newIndex++] = current; 346 } else { 347 needsCRC = true; 348 } 349 } 350 351 #if defined OS2 || defined WIN_95 || defined WIN_NT 352 /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */ 353 if (trailIndex != newIndex - 1) { 354 newIndex = trailIndex + 1; 355 needsCRC = true; 356 hasExt = false; /* Trailing period does not make an 357 * extension. */ 358 } 359 #endif 360 361 if (needsCRC) { 362 unicode_t ext[EXT_SIZE]; 363 int localExtIndex = 0; 364 if (hasExt) { 365 int maxFilenameLen; 366 /* Translate extension, and store it in ext. */ 367 for(Index = 0; Index<EXT_SIZE && 368 extIndex + Index +1 < udfLen; Index++ ) { 369 current = udfName[extIndex + Index + 1]; 370 if (IsIllegal(current) || 371 !UnicodeIsPrint(current)) { 372 needsCRC = 1; 373 /* Replace Illegal and non-displayable 374 * chars with underscore. 375 */ 376 current = ILLEGAL_CHAR_MARK; 377 /* Skip any other illegal or 378 * non-displayable characters. 379 */ 380 while(Index + 1 < EXT_SIZE 381 && (IsIllegal(udfName[extIndex + 382 Index + 2]) || 383 !isprint(udfName[extIndex + 384 Index + 2]))) { 385 Index++; 386 } 387 } 388 ext[localExtIndex++] = current; 389 } 390 391 /* Truncate filename to leave room for extension and 392 * CRC. 393 */ 394 maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1); 395 if (newIndex > maxFilenameLen) { 396 newIndex = maxFilenameLen; 397 } else { 398 newIndex = newExtIndex; 399 } 400 } else if (newIndex > MAXLEN - 5) { 401 /*If no extension, make sure to leave room for CRC. */ 402 newIndex = MAXLEN - 5; 403 } 404 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */ 405 406 /*Calculate CRC from original filename from FileIdentifier. */ 407 valueCRC = udf_unicode_cksum(udfName, udfLen); 408 /* Convert 16-bits of CRC to hex characters. */ 409 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12]; 410 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8]; 411 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4]; 412 newName[newIndex++] = hexChar[(valueCRC & 0x000f)]; 413 414 /* Place a translated extension at end, if found. */ 415 if (hasExt) { 416 newName[newIndex++] = PERIOD; 417 for (Index = 0;Index < localExtIndex ;Index++ ) { 418 newName[newIndex++] = ext[Index]; 419 } 420 } 421 } 422 return(newIndex); 423 } 424 425 #if defined OS2 || defined WIN_95 || defined WIN_NT 426 /*********************************************************************** 427 * Decides if a Unicode character matches one of a list 428 * of ASCII characters. 429 * Used by OS2 version of IsIllegal for readability, since all of the 430 * illegal characters above 0x0020 are in the ASCII subset of Unicode. 431 * Works very similarly to the standard C function strchr(). 432 * 433 * RETURN VALUE 434 * 435 * Non-zero if the Unicode character is in the given ASCII string. 436 */ 437 int UnicodeInString( 438 unsigned char *string, /* (Input) String to search through. */ 439 unicode_t ch) /* (Input) Unicode char to search for. */ 440 { 441 int found = false; 442 while (*string != '\0' && found == false) { 443 /* These types should compare, since both are unsigned 444 * numbers. */ 445 if (*string == ch) { 446 found = true; 447 } 448 string++; 449 } 450 return(found); 451 } 452 #endif /* OS2 */ 453 454 /*********************************************************************** 455 * Decides whether the given character is illegal for a given OS. 456 * 457 * RETURN VALUE 458 * 459 * Non-zero if char is illegal. 460 */ 461 int IsIllegal(unicode_t ch) 462 { 463 #ifdef MAC 464 /* Only illegal character on the MAC is the colon. */ 465 if (ch == 0x003A) { 466 return(1); 467 } else { 468 return(0); 469 } 470 471 #elif defined UNIX 472 /* Illegal UNIX characters are NULL and slash. */ 473 if (ch == 0x0000 || ch == 0x002F) { 474 return(1); 475 } else { 476 return(0); 477 } 478 479 #elif defined OS2 || defined WIN_95 || defined WIN_NT 480 /* Illegal char's for OS/2 according to WARP toolkit. */ 481 if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) { 482 return(1); 483 } else { 484 return(0); 485 } 486 #endif 487 } 488 /* #endif*/ /* NEEDS_ISPRINT */ 489 490