xref: /minix3/sys/fs/udf/udf_osta.c (revision 9f988b79349f9b89ecc822458c30ec8897558560)
1*9f988b79SJean-Baptiste Boric /* $NetBSD: udf_osta.c,v 1.10 2013/08/05 17:02:54 joerg Exp $ */
2*9f988b79SJean-Baptiste Boric #if HAVE_NBTOOL_CONFIG_H
3*9f988b79SJean-Baptiste Boric #include "nbtool_config.h"
4*9f988b79SJean-Baptiste Boric #endif
5*9f988b79SJean-Baptiste Boric 
6*9f988b79SJean-Baptiste Boric #include <sys/cdefs.h>
7*9f988b79SJean-Baptiste Boric __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.10 2013/08/05 17:02:54 joerg Exp $");
8*9f988b79SJean-Baptiste Boric 
9*9f988b79SJean-Baptiste Boric /*
10*9f988b79SJean-Baptiste Boric  * Various routines from the OSTA 2.01 specs.  Copyrights are included with
11*9f988b79SJean-Baptiste Boric  * each code segment.  Slight whitespace modifications have been made for
12*9f988b79SJean-Baptiste Boric  * formatting purposes.  Typos/bugs have been fixed.
13*9f988b79SJean-Baptiste Boric  *
14*9f988b79SJean-Baptiste Boric  */
15*9f988b79SJean-Baptiste Boric 
16*9f988b79SJean-Baptiste Boric #include "udf_osta.h"
17*9f988b79SJean-Baptiste Boric 
18*9f988b79SJean-Baptiste Boric #ifndef _KERNEL
19*9f988b79SJean-Baptiste Boric #include <ctype.h>
20*9f988b79SJean-Baptiste Boric #endif
21*9f988b79SJean-Baptiste Boric 
22*9f988b79SJean-Baptiste Boric /*****************************************************************************/
23*9f988b79SJean-Baptiste Boric /***********************************************************************
24*9f988b79SJean-Baptiste Boric  * OSTA compliant Unicode compression, uncompression routines.
25*9f988b79SJean-Baptiste Boric  * Copyright 1995 Micro Design International, Inc.
26*9f988b79SJean-Baptiste Boric  * Written by Jason M. Rinn.
27*9f988b79SJean-Baptiste Boric  * Micro Design International gives permission for the free use of the
28*9f988b79SJean-Baptiste Boric  * following source code.
29*9f988b79SJean-Baptiste Boric  */
30*9f988b79SJean-Baptiste Boric 
31*9f988b79SJean-Baptiste Boric /***********************************************************************
32*9f988b79SJean-Baptiste Boric  * Takes an OSTA CS0 compressed unicode name, and converts
33*9f988b79SJean-Baptiste Boric  * it to Unicode.
34*9f988b79SJean-Baptiste Boric  * The Unicode output will be in the byte order
35*9f988b79SJean-Baptiste Boric  * that the local compiler uses for 16-bit values.
36*9f988b79SJean-Baptiste Boric  * NOTE: This routine only performs error checking on the compID.
37*9f988b79SJean-Baptiste Boric  * It is up to the user to ensure that the unicode buffer is large
38*9f988b79SJean-Baptiste Boric  * enough, and that the compressed unicode name is correct.
39*9f988b79SJean-Baptiste Boric  *
40*9f988b79SJean-Baptiste Boric  * RETURN VALUE
41*9f988b79SJean-Baptiste Boric  *
42*9f988b79SJean-Baptiste Boric  * The number of unicode characters which were uncompressed.
43*9f988b79SJean-Baptiste Boric  * A -1 is returned if the compression ID is invalid.
44*9f988b79SJean-Baptiste Boric  */
45*9f988b79SJean-Baptiste Boric int
udf_UncompressUnicode(int numberOfBytes,byte * UDFCompressed,unicode_t * unicode)46*9f988b79SJean-Baptiste Boric udf_UncompressUnicode(
47*9f988b79SJean-Baptiste Boric 	int numberOfBytes,	/* (Input) number of bytes read from media. */
48*9f988b79SJean-Baptiste Boric 	byte *UDFCompressed,	/* (Input) bytes read from media. */
49*9f988b79SJean-Baptiste Boric 	unicode_t *unicode)	/* (Output) uncompressed unicode characters. */
50*9f988b79SJean-Baptiste Boric {
51*9f988b79SJean-Baptiste Boric 	unsigned int compID;
52*9f988b79SJean-Baptiste Boric 	int returnValue, unicodeIndex, byteIndex;
53*9f988b79SJean-Baptiste Boric 
54*9f988b79SJean-Baptiste Boric 	/* Use UDFCompressed to store current byte being read. */
55*9f988b79SJean-Baptiste Boric 	compID = UDFCompressed[0];
56*9f988b79SJean-Baptiste Boric 
57*9f988b79SJean-Baptiste Boric 	/* First check for valid compID. */
58*9f988b79SJean-Baptiste Boric 	if (compID != 8 && compID != 16) {
59*9f988b79SJean-Baptiste Boric 		returnValue = -1;
60*9f988b79SJean-Baptiste Boric 	} else {
61*9f988b79SJean-Baptiste Boric 		unicodeIndex = 0;
62*9f988b79SJean-Baptiste Boric 		byteIndex = 1;
63*9f988b79SJean-Baptiste Boric 
64*9f988b79SJean-Baptiste Boric 		/* Loop through all the bytes. */
65*9f988b79SJean-Baptiste Boric 		while (byteIndex < numberOfBytes) {
66*9f988b79SJean-Baptiste Boric 			if (compID == 16) {
67*9f988b79SJean-Baptiste Boric 				/* Move the first byte to the high bits of the
68*9f988b79SJean-Baptiste Boric 				 * unicode char.
69*9f988b79SJean-Baptiste Boric 				 */
70*9f988b79SJean-Baptiste Boric 				unicode[unicodeIndex] =
71*9f988b79SJean-Baptiste Boric 				    UDFCompressed[byteIndex++] << 8;
72*9f988b79SJean-Baptiste Boric 			} else {
73*9f988b79SJean-Baptiste Boric 				unicode[unicodeIndex] = 0;
74*9f988b79SJean-Baptiste Boric 			}
75*9f988b79SJean-Baptiste Boric 			if (byteIndex < numberOfBytes) {
76*9f988b79SJean-Baptiste Boric 				/*Then the next byte to the low bits. */
77*9f988b79SJean-Baptiste Boric 				unicode[unicodeIndex] |=
78*9f988b79SJean-Baptiste Boric 				    UDFCompressed[byteIndex++];
79*9f988b79SJean-Baptiste Boric 			}
80*9f988b79SJean-Baptiste Boric 			unicodeIndex++;
81*9f988b79SJean-Baptiste Boric 		}
82*9f988b79SJean-Baptiste Boric 		returnValue = unicodeIndex;
83*9f988b79SJean-Baptiste Boric 	}
84*9f988b79SJean-Baptiste Boric 	return(returnValue);
85*9f988b79SJean-Baptiste Boric }
86*9f988b79SJean-Baptiste Boric 
87*9f988b79SJean-Baptiste Boric /***********************************************************************
88*9f988b79SJean-Baptiste Boric  * DESCRIPTION:
89*9f988b79SJean-Baptiste Boric  * Takes a string of unicode wide characters and returns an OSTA CS0
90*9f988b79SJean-Baptiste Boric  * compressed unicode string. The unicode MUST be in the byte order of
91*9f988b79SJean-Baptiste Boric  * the compiler in order to obtain correct results. Returns an error
92*9f988b79SJean-Baptiste Boric  * if the compression ID is invalid.
93*9f988b79SJean-Baptiste Boric  *
94*9f988b79SJean-Baptiste Boric  * NOTE: This routine assumes the implementation already knows, by
95*9f988b79SJean-Baptiste Boric  * the local environment, how many bits are appropriate and
96*9f988b79SJean-Baptiste Boric  * therefore does no checking to test if the input characters fit
97*9f988b79SJean-Baptiste Boric  * into that number of bits or not.
98*9f988b79SJean-Baptiste Boric  *
99*9f988b79SJean-Baptiste Boric  * RETURN VALUE
100*9f988b79SJean-Baptiste Boric  *
101*9f988b79SJean-Baptiste Boric  * The total number of bytes in the compressed OSTA CS0 string,
102*9f988b79SJean-Baptiste Boric  * including the compression ID.
103*9f988b79SJean-Baptiste Boric  * A -1 is returned if the compression ID is invalid.
104*9f988b79SJean-Baptiste Boric  */
105*9f988b79SJean-Baptiste Boric int
udf_CompressUnicode(int numberOfChars,int compID,unicode_t * unicode,byte * UDFCompressed)106*9f988b79SJean-Baptiste Boric udf_CompressUnicode(
107*9f988b79SJean-Baptiste Boric 	int numberOfChars,	/* (Input) number of unicode characters. */
108*9f988b79SJean-Baptiste Boric 	int compID,		/* (Input) compression ID to be used. */
109*9f988b79SJean-Baptiste Boric 	unicode_t *unicode,	/* (Input) unicode characters to compress. */
110*9f988b79SJean-Baptiste Boric 	byte *UDFCompressed)	/* (Output) compressed string, as bytes. */
111*9f988b79SJean-Baptiste Boric {
112*9f988b79SJean-Baptiste Boric 	int byteIndex, unicodeIndex;
113*9f988b79SJean-Baptiste Boric 
114*9f988b79SJean-Baptiste Boric 	if (compID != 8 && compID != 16) {
115*9f988b79SJean-Baptiste Boric 		byteIndex = -1; /* Unsupported compression ID ! */
116*9f988b79SJean-Baptiste Boric 	} else {
117*9f988b79SJean-Baptiste Boric 		/* Place compression code in first byte. */
118*9f988b79SJean-Baptiste Boric 		UDFCompressed[0] = compID;
119*9f988b79SJean-Baptiste Boric 
120*9f988b79SJean-Baptiste Boric 		byteIndex = 1;
121*9f988b79SJean-Baptiste Boric 		unicodeIndex = 0;
122*9f988b79SJean-Baptiste Boric 		while (unicodeIndex < numberOfChars) {
123*9f988b79SJean-Baptiste Boric 			if (compID == 16) {
124*9f988b79SJean-Baptiste Boric 				/* First, place the high bits of the char
125*9f988b79SJean-Baptiste Boric 				 * into the byte stream.
126*9f988b79SJean-Baptiste Boric 				 */
127*9f988b79SJean-Baptiste Boric 				UDFCompressed[byteIndex++] =
128*9f988b79SJean-Baptiste Boric 				    (unicode[unicodeIndex] & 0xFF00) >> 8;
129*9f988b79SJean-Baptiste Boric 			}
130*9f988b79SJean-Baptiste Boric 			/*Then place the low bits into the stream. */
131*9f988b79SJean-Baptiste Boric 			UDFCompressed[byteIndex++] =
132*9f988b79SJean-Baptiste Boric 			    unicode[unicodeIndex] & 0x00FF;
133*9f988b79SJean-Baptiste Boric 			unicodeIndex++;
134*9f988b79SJean-Baptiste Boric 		}
135*9f988b79SJean-Baptiste Boric 	}
136*9f988b79SJean-Baptiste Boric 	return(byteIndex);
137*9f988b79SJean-Baptiste Boric }
138*9f988b79SJean-Baptiste Boric 
139*9f988b79SJean-Baptiste Boric /*****************************************************************************/
140*9f988b79SJean-Baptiste Boric /*
141*9f988b79SJean-Baptiste Boric  * CRC 010041
142*9f988b79SJean-Baptiste Boric  */
143*9f988b79SJean-Baptiste Boric static unsigned short crc_table[256] = {
144*9f988b79SJean-Baptiste Boric 	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
145*9f988b79SJean-Baptiste Boric 	0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
146*9f988b79SJean-Baptiste Boric 	0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
147*9f988b79SJean-Baptiste Boric 	0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
148*9f988b79SJean-Baptiste Boric 	0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
149*9f988b79SJean-Baptiste Boric 	0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
150*9f988b79SJean-Baptiste Boric 	0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
151*9f988b79SJean-Baptiste Boric 	0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
152*9f988b79SJean-Baptiste Boric 	0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
153*9f988b79SJean-Baptiste Boric 	0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
154*9f988b79SJean-Baptiste Boric 	0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
155*9f988b79SJean-Baptiste Boric 	0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
156*9f988b79SJean-Baptiste Boric 	0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
157*9f988b79SJean-Baptiste Boric 	0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
158*9f988b79SJean-Baptiste Boric 	0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
159*9f988b79SJean-Baptiste Boric 	0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
160*9f988b79SJean-Baptiste Boric 	0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
161*9f988b79SJean-Baptiste Boric 	0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
162*9f988b79SJean-Baptiste Boric 	0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
163*9f988b79SJean-Baptiste Boric 	0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
164*9f988b79SJean-Baptiste Boric 	0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
165*9f988b79SJean-Baptiste Boric 	0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
166*9f988b79SJean-Baptiste Boric 	0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
167*9f988b79SJean-Baptiste Boric 	0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
168*9f988b79SJean-Baptiste Boric 	0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
169*9f988b79SJean-Baptiste Boric 	0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
170*9f988b79SJean-Baptiste Boric 	0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
171*9f988b79SJean-Baptiste Boric 	0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
172*9f988b79SJean-Baptiste Boric 	0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
173*9f988b79SJean-Baptiste Boric 	0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
174*9f988b79SJean-Baptiste Boric 	0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
175*9f988b79SJean-Baptiste Boric 	0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
176*9f988b79SJean-Baptiste Boric };
177*9f988b79SJean-Baptiste Boric 
178*9f988b79SJean-Baptiste Boric unsigned short
udf_cksum(unsigned char * s,int n)179*9f988b79SJean-Baptiste Boric udf_cksum(unsigned char *s, int n)
180*9f988b79SJean-Baptiste Boric {
181*9f988b79SJean-Baptiste Boric 	unsigned short crc=0;
182*9f988b79SJean-Baptiste Boric 
183*9f988b79SJean-Baptiste Boric 	while (n-- > 0)
184*9f988b79SJean-Baptiste Boric 		crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
185*9f988b79SJean-Baptiste Boric 	return crc;
186*9f988b79SJean-Baptiste Boric }
187*9f988b79SJean-Baptiste Boric 
188*9f988b79SJean-Baptiste Boric /* UNICODE Checksum */
189*9f988b79SJean-Baptiste Boric unsigned short
udf_unicode_cksum(unsigned short * s,int n)190*9f988b79SJean-Baptiste Boric udf_unicode_cksum(unsigned short *s, int n)
191*9f988b79SJean-Baptiste Boric {
192*9f988b79SJean-Baptiste Boric 	unsigned short crc=0;
193*9f988b79SJean-Baptiste Boric 
194*9f988b79SJean-Baptiste Boric 	while (n-- > 0) {
195*9f988b79SJean-Baptiste Boric 		/* Take high order byte first--corresponds to a big endian
196*9f988b79SJean-Baptiste Boric 		 * byte stream.
197*9f988b79SJean-Baptiste Boric 		 */
198*9f988b79SJean-Baptiste Boric 		crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
199*9f988b79SJean-Baptiste Boric 		crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
200*9f988b79SJean-Baptiste Boric 	}
201*9f988b79SJean-Baptiste Boric 	return crc;
202*9f988b79SJean-Baptiste Boric }
203*9f988b79SJean-Baptiste Boric 
204*9f988b79SJean-Baptiste Boric 
205*9f988b79SJean-Baptiste Boric /*
206*9f988b79SJean-Baptiste Boric   * Calculates a 16-bit checksum of the Implementation Use
207*9f988b79SJean-Baptiste Boric   * Extended Attribute header or Application Use Extended Attribute
208*9f988b79SJean-Baptiste Boric   * header. The fields AttributeType through ImplementationIdentifier
209*9f988b79SJean-Baptiste Boric   * (or ApplicationIdentifier) inclusively represent the
210*9f988b79SJean-Baptiste Boric   * data covered by the checksum (48 bytes).
211*9f988b79SJean-Baptiste Boric   *
212*9f988b79SJean-Baptiste Boric   */
udf_ea_cksum(uint8_t * data)213*9f988b79SJean-Baptiste Boric uint16_t udf_ea_cksum(uint8_t *data) {
214*9f988b79SJean-Baptiste Boric         uint16_t checksum = 0;
215*9f988b79SJean-Baptiste Boric         int      count;
216*9f988b79SJean-Baptiste Boric 
217*9f988b79SJean-Baptiste Boric         for (count = 0; count < 48; count++) {
218*9f988b79SJean-Baptiste Boric                checksum += *data++;
219*9f988b79SJean-Baptiste Boric         }
220*9f988b79SJean-Baptiste Boric 
221*9f988b79SJean-Baptiste Boric         return checksum;
222*9f988b79SJean-Baptiste Boric }
223*9f988b79SJean-Baptiste Boric 
224*9f988b79SJean-Baptiste Boric 
225*9f988b79SJean-Baptiste Boric #ifdef MAIN
226*9f988b79SJean-Baptiste Boric unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
227*9f988b79SJean-Baptiste Boric 
main(void)228*9f988b79SJean-Baptiste Boric main(void)
229*9f988b79SJean-Baptiste Boric {
230*9f988b79SJean-Baptiste Boric 	unsigned short x;
231*9f988b79SJean-Baptiste Boric 	x = cksum(bytes, sizeof bytes);
232*9f988b79SJean-Baptiste Boric 	printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
233*9f988b79SJean-Baptiste Boric 	exit(0);
234*9f988b79SJean-Baptiste Boric }
235*9f988b79SJean-Baptiste Boric #endif
236*9f988b79SJean-Baptiste Boric 
237*9f988b79SJean-Baptiste Boric /*****************************************************************************/
238*9f988b79SJean-Baptiste Boric /* #ifdef NEEDS_ISPRINT */
239*9f988b79SJean-Baptiste Boric /***********************************************************************
240*9f988b79SJean-Baptiste Boric  * OSTA UDF compliant file name translation routine for OS/2,
241*9f988b79SJean-Baptiste Boric  * Windows 95, Windows NT, Macintosh and UNIX.
242*9f988b79SJean-Baptiste Boric  * Copyright 1995 Micro Design International, Inc.
243*9f988b79SJean-Baptiste Boric  * Written by Jason M. Rinn.
244*9f988b79SJean-Baptiste Boric  * Micro Design International gives permission for the free use of the
245*9f988b79SJean-Baptiste Boric  * following source code.
246*9f988b79SJean-Baptiste Boric  */
247*9f988b79SJean-Baptiste Boric 
248*9f988b79SJean-Baptiste Boric /***********************************************************************
249*9f988b79SJean-Baptiste Boric  * To use these routines with different operating systems.
250*9f988b79SJean-Baptiste Boric  *
251*9f988b79SJean-Baptiste Boric  * OS/2
252*9f988b79SJean-Baptiste Boric  * Define OS2
253*9f988b79SJean-Baptiste Boric  * Define MAXLEN = 254
254*9f988b79SJean-Baptiste Boric  *
255*9f988b79SJean-Baptiste Boric  * Windows 95
256*9f988b79SJean-Baptiste Boric  * Define WIN_95
257*9f988b79SJean-Baptiste Boric  * Define MAXLEN = 255
258*9f988b79SJean-Baptiste Boric  *
259*9f988b79SJean-Baptiste Boric  * Windows NT
260*9f988b79SJean-Baptiste Boric  * Define WIN_NT
261*9f988b79SJean-Baptiste Boric  * Define MAXLEN = 255
262*9f988b79SJean-Baptiste Boric  *
263*9f988b79SJean-Baptiste Boric  * Macintosh:
264*9f988b79SJean-Baptiste Boric  * Define MAC.
265*9f988b79SJean-Baptiste Boric  * Define MAXLEN = 31.
266*9f988b79SJean-Baptiste Boric  *
267*9f988b79SJean-Baptiste Boric  * UNIX
268*9f988b79SJean-Baptiste Boric  * Define UNIX.
269*9f988b79SJean-Baptiste Boric  * Define MAXLEN as specified by unix version.
270*9f988b79SJean-Baptiste Boric  */
271*9f988b79SJean-Baptiste Boric 
272*9f988b79SJean-Baptiste Boric #define	ILLEGAL_CHAR_MARK	0x005F
273*9f988b79SJean-Baptiste Boric #define	CRC_MARK	0x0023
274*9f988b79SJean-Baptiste Boric #define	EXT_SIZE	5
275*9f988b79SJean-Baptiste Boric #define	PERIOD	0x002E
276*9f988b79SJean-Baptiste Boric #define	SPACE	0x0020
277*9f988b79SJean-Baptiste Boric 
278*9f988b79SJean-Baptiste Boric /*** PROTOTYPES ***/
279*9f988b79SJean-Baptiste Boric int IsIllegal(unicode_t ch);
280*9f988b79SJean-Baptiste Boric 
281*9f988b79SJean-Baptiste Boric /* Define a function or macro which determines if a Unicode character is
282*9f988b79SJean-Baptiste Boric  * printable under your implementation.
283*9f988b79SJean-Baptiste Boric  */
284*9f988b79SJean-Baptiste Boric 
285*9f988b79SJean-Baptiste Boric 
286*9f988b79SJean-Baptiste Boric /* #include <stdio.h> */
UnicodeIsPrint(unicode_t ch)287*9f988b79SJean-Baptiste Boric static int UnicodeIsPrint(unicode_t ch) {
288*9f988b79SJean-Baptiste Boric 	return (ch >=' ') && (ch != 127);
289*9f988b79SJean-Baptiste Boric }
290*9f988b79SJean-Baptiste Boric 
291*9f988b79SJean-Baptiste Boric 
UnicodeLength(unicode_t * string)292*9f988b79SJean-Baptiste Boric int UnicodeLength(unicode_t *string) {
293*9f988b79SJean-Baptiste Boric 	int length;
294*9f988b79SJean-Baptiste Boric 	length = 0;
295*9f988b79SJean-Baptiste Boric 	while (*string++) length++;
296*9f988b79SJean-Baptiste Boric 
297*9f988b79SJean-Baptiste Boric 	return length;
298*9f988b79SJean-Baptiste Boric }
299*9f988b79SJean-Baptiste Boric 
300*9f988b79SJean-Baptiste Boric 
301*9f988b79SJean-Baptiste Boric #ifdef _KERNEL
isprint(int c)302*9f988b79SJean-Baptiste Boric static int isprint(int c) {
303*9f988b79SJean-Baptiste Boric 	return (c >= ' ') && (c != 127);
304*9f988b79SJean-Baptiste Boric }
305*9f988b79SJean-Baptiste Boric #endif
306*9f988b79SJean-Baptiste Boric 
307*9f988b79SJean-Baptiste Boric 
308*9f988b79SJean-Baptiste Boric /***********************************************************************
309*9f988b79SJean-Baptiste Boric  * Translates a long file name to one using a MAXLEN and an illegal
310*9f988b79SJean-Baptiste Boric  * char set in accord with the OSTA requirements. Assumes the name has
311*9f988b79SJean-Baptiste Boric  * already been translated to Unicode.
312*9f988b79SJean-Baptiste Boric  *
313*9f988b79SJean-Baptiste Boric  * RETURN VALUE
314*9f988b79SJean-Baptiste Boric  *
315*9f988b79SJean-Baptiste Boric  * Number of unicode characters in translated name.
316*9f988b79SJean-Baptiste Boric  */
UDFTransName(unicode_t * newName,unicode_t * udfName,int udfLen)317*9f988b79SJean-Baptiste Boric int UDFTransName(
318*9f988b79SJean-Baptiste Boric 	unicode_t *newName,	/* (Output)Translated name. Must be of length
319*9f988b79SJean-Baptiste Boric 				 * MAXLEN */
320*9f988b79SJean-Baptiste Boric 	unicode_t *udfName,	/* (Input) Name from UDF volume.*/
321*9f988b79SJean-Baptiste Boric 	int udfLen)		/* (Input) Length of UDF Name. */
322*9f988b79SJean-Baptiste Boric {
323*9f988b79SJean-Baptiste Boric 	int Index, newIndex = 0, needsCRC = false;	/* index is shadowed */
324*9f988b79SJean-Baptiste Boric 	int extIndex = 0, newExtIndex = 0, hasExt = false;
325*9f988b79SJean-Baptiste Boric #if defined OS2 || defined WIN_95 || defined WIN_NT
326*9f988b79SJean-Baptiste Boric 	int trailIndex = 0;
327*9f988b79SJean-Baptiste Boric #endif
328*9f988b79SJean-Baptiste Boric 	unsigned short valueCRC;
329*9f988b79SJean-Baptiste Boric 	unicode_t current;
330*9f988b79SJean-Baptiste Boric 	const char hexChar[] = "0123456789ABCDEF";
331*9f988b79SJean-Baptiste Boric 
332*9f988b79SJean-Baptiste Boric 	for (Index = 0; Index < udfLen; Index++) {
333*9f988b79SJean-Baptiste Boric 		current = udfName[Index];
334*9f988b79SJean-Baptiste Boric 
335*9f988b79SJean-Baptiste Boric 		if (IsIllegal(current) || !UnicodeIsPrint(current)) {
336*9f988b79SJean-Baptiste Boric 			needsCRC = true;
337*9f988b79SJean-Baptiste Boric 			/* Replace Illegal and non-displayable chars with
338*9f988b79SJean-Baptiste Boric 			 * underscore.
339*9f988b79SJean-Baptiste Boric 			 */
340*9f988b79SJean-Baptiste Boric 			current = ILLEGAL_CHAR_MARK;
341*9f988b79SJean-Baptiste Boric 			/* Skip any other illegal or non-displayable
342*9f988b79SJean-Baptiste Boric 			 * characters.
343*9f988b79SJean-Baptiste Boric 			 */
344*9f988b79SJean-Baptiste Boric 			while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
345*9f988b79SJean-Baptiste Boric 			    || !UnicodeIsPrint(udfName[Index+1]))) {
346*9f988b79SJean-Baptiste Boric 				Index++;
347*9f988b79SJean-Baptiste Boric 			}
348*9f988b79SJean-Baptiste Boric 		}
349*9f988b79SJean-Baptiste Boric 
350*9f988b79SJean-Baptiste Boric 		/* Record position of extension, if one is found. */
351*9f988b79SJean-Baptiste Boric 		if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
352*9f988b79SJean-Baptiste Boric 			if (udfLen == Index + 1) {
353*9f988b79SJean-Baptiste Boric 				/* A trailing period is NOT an extension. */
354*9f988b79SJean-Baptiste Boric 				hasExt = false;
355*9f988b79SJean-Baptiste Boric 			} else {
356*9f988b79SJean-Baptiste Boric 				hasExt = true;
357*9f988b79SJean-Baptiste Boric 				extIndex = Index;
358*9f988b79SJean-Baptiste Boric 				newExtIndex = newIndex;
359*9f988b79SJean-Baptiste Boric 			}
360*9f988b79SJean-Baptiste Boric 		}
361*9f988b79SJean-Baptiste Boric 
362*9f988b79SJean-Baptiste Boric #if defined OS2 || defined WIN_95 || defined WIN_NT
363*9f988b79SJean-Baptiste Boric 		/* Record position of last char which is NOT period or space. */
364*9f988b79SJean-Baptiste Boric 		else if (current != PERIOD && current != SPACE) {
365*9f988b79SJean-Baptiste Boric 			trailIndex = newIndex;
366*9f988b79SJean-Baptiste Boric 		}
367*9f988b79SJean-Baptiste Boric #endif
368*9f988b79SJean-Baptiste Boric 
369*9f988b79SJean-Baptiste Boric 		if (newIndex < MAXLEN) {
370*9f988b79SJean-Baptiste Boric 			newName[newIndex++] = current;
371*9f988b79SJean-Baptiste Boric 		} else {
372*9f988b79SJean-Baptiste Boric 			needsCRC = true;
373*9f988b79SJean-Baptiste Boric 		}
374*9f988b79SJean-Baptiste Boric 	}
375*9f988b79SJean-Baptiste Boric 
376*9f988b79SJean-Baptiste Boric #if defined OS2 || defined WIN_95 || defined WIN_NT
377*9f988b79SJean-Baptiste Boric 	/* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
378*9f988b79SJean-Baptiste Boric 	if (trailIndex != newIndex - 1) {
379*9f988b79SJean-Baptiste Boric 		newIndex = trailIndex + 1;
380*9f988b79SJean-Baptiste Boric 		needsCRC = true;
381*9f988b79SJean-Baptiste Boric 		hasExt = false; /* Trailing period does not make an
382*9f988b79SJean-Baptiste Boric 				 * extension. */
383*9f988b79SJean-Baptiste Boric 	}
384*9f988b79SJean-Baptiste Boric #endif
385*9f988b79SJean-Baptiste Boric 
386*9f988b79SJean-Baptiste Boric 	if (needsCRC) {
387*9f988b79SJean-Baptiste Boric 		unicode_t ext[EXT_SIZE];
388*9f988b79SJean-Baptiste Boric 		int localExtIndex = 0;
389*9f988b79SJean-Baptiste Boric 		if (hasExt) {
390*9f988b79SJean-Baptiste Boric 			int maxFilenameLen;
391*9f988b79SJean-Baptiste Boric 			/* Translate extension, and store it in ext. */
392*9f988b79SJean-Baptiste Boric 			for(Index = 0; Index<EXT_SIZE &&
393*9f988b79SJean-Baptiste Boric 			    extIndex + Index +1 < udfLen; Index++ ) {
394*9f988b79SJean-Baptiste Boric 				current = udfName[extIndex + Index + 1];
395*9f988b79SJean-Baptiste Boric 				if (IsIllegal(current) ||
396*9f988b79SJean-Baptiste Boric 				    !UnicodeIsPrint(current)) {
397*9f988b79SJean-Baptiste Boric 					needsCRC = 1;
398*9f988b79SJean-Baptiste Boric 					/* Replace Illegal and non-displayable
399*9f988b79SJean-Baptiste Boric 					 * chars with underscore.
400*9f988b79SJean-Baptiste Boric 					 */
401*9f988b79SJean-Baptiste Boric 					current = ILLEGAL_CHAR_MARK;
402*9f988b79SJean-Baptiste Boric 					/* Skip any other illegal or
403*9f988b79SJean-Baptiste Boric 					 * non-displayable characters.
404*9f988b79SJean-Baptiste Boric 					 */
405*9f988b79SJean-Baptiste Boric 					while(Index + 1 < EXT_SIZE
406*9f988b79SJean-Baptiste Boric 					    && (IsIllegal(udfName[extIndex +
407*9f988b79SJean-Baptiste Boric 					    Index + 2]) ||
408*9f988b79SJean-Baptiste Boric 					    !isprint(udfName[extIndex +
409*9f988b79SJean-Baptiste Boric 					    Index + 2]))) {
410*9f988b79SJean-Baptiste Boric 						Index++;
411*9f988b79SJean-Baptiste Boric 					}
412*9f988b79SJean-Baptiste Boric 				}
413*9f988b79SJean-Baptiste Boric 				ext[localExtIndex++] = current;
414*9f988b79SJean-Baptiste Boric 			}
415*9f988b79SJean-Baptiste Boric 
416*9f988b79SJean-Baptiste Boric 			/* Truncate filename to leave room for extension and
417*9f988b79SJean-Baptiste Boric 			 * CRC.
418*9f988b79SJean-Baptiste Boric 			 */
419*9f988b79SJean-Baptiste Boric 			maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
420*9f988b79SJean-Baptiste Boric 			if (newIndex > maxFilenameLen) {
421*9f988b79SJean-Baptiste Boric 				newIndex = maxFilenameLen;
422*9f988b79SJean-Baptiste Boric 			} else {
423*9f988b79SJean-Baptiste Boric 				newIndex = newExtIndex;
424*9f988b79SJean-Baptiste Boric 			}
425*9f988b79SJean-Baptiste Boric 		} else if (newIndex > MAXLEN - 5) {
426*9f988b79SJean-Baptiste Boric 			/*If no extension, make sure to leave room for CRC. */
427*9f988b79SJean-Baptiste Boric 			newIndex = MAXLEN - 5;
428*9f988b79SJean-Baptiste Boric 		}
429*9f988b79SJean-Baptiste Boric 		newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
430*9f988b79SJean-Baptiste Boric 
431*9f988b79SJean-Baptiste Boric 		/*Calculate CRC from original filename from FileIdentifier. */
432*9f988b79SJean-Baptiste Boric 		valueCRC = udf_unicode_cksum(udfName, udfLen);
433*9f988b79SJean-Baptiste Boric 		/* Convert 16-bits of CRC to hex characters. */
434*9f988b79SJean-Baptiste Boric 		newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
435*9f988b79SJean-Baptiste Boric 		newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
436*9f988b79SJean-Baptiste Boric 		newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
437*9f988b79SJean-Baptiste Boric 		newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
438*9f988b79SJean-Baptiste Boric 
439*9f988b79SJean-Baptiste Boric 		/* Place a translated extension at end, if found. */
440*9f988b79SJean-Baptiste Boric 		if (hasExt) {
441*9f988b79SJean-Baptiste Boric 			newName[newIndex++] = PERIOD;
442*9f988b79SJean-Baptiste Boric 			for (Index = 0;Index < localExtIndex ;Index++ ) {
443*9f988b79SJean-Baptiste Boric 				newName[newIndex++] = ext[Index];
444*9f988b79SJean-Baptiste Boric 			}
445*9f988b79SJean-Baptiste Boric 		}
446*9f988b79SJean-Baptiste Boric 	}
447*9f988b79SJean-Baptiste Boric 	return(newIndex);
448*9f988b79SJean-Baptiste Boric }
449*9f988b79SJean-Baptiste Boric 
450*9f988b79SJean-Baptiste Boric #if defined OS2 || defined WIN_95 || defined WIN_NT
451*9f988b79SJean-Baptiste Boric /***********************************************************************
452*9f988b79SJean-Baptiste Boric  * Decides if a Unicode character matches one of a list
453*9f988b79SJean-Baptiste Boric  * of ASCII characters.
454*9f988b79SJean-Baptiste Boric  * Used by OS2 version of IsIllegal for readability, since all of the
455*9f988b79SJean-Baptiste Boric  * illegal characters above 0x0020 are in the ASCII subset of Unicode.
456*9f988b79SJean-Baptiste Boric  * Works very similarly to the standard C function strchr().
457*9f988b79SJean-Baptiste Boric  *
458*9f988b79SJean-Baptiste Boric  * RETURN VALUE
459*9f988b79SJean-Baptiste Boric  *
460*9f988b79SJean-Baptiste Boric  * Non-zero if the Unicode character is in the given ASCII string.
461*9f988b79SJean-Baptiste Boric  */
UnicodeInString(unsigned char * string,unicode_t ch)462*9f988b79SJean-Baptiste Boric int UnicodeInString(
463*9f988b79SJean-Baptiste Boric 	unsigned char *string,	/* (Input) String to search through. */
464*9f988b79SJean-Baptiste Boric 	unicode_t ch)		/* (Input) Unicode char to search for. */
465*9f988b79SJean-Baptiste Boric {
466*9f988b79SJean-Baptiste Boric 	int found = false;
467*9f988b79SJean-Baptiste Boric 	while (*string != '\0' && found == false) {
468*9f988b79SJean-Baptiste Boric 		/* These types should compare, since both are unsigned
469*9f988b79SJean-Baptiste Boric 		 * numbers. */
470*9f988b79SJean-Baptiste Boric 		if (*string == ch) {
471*9f988b79SJean-Baptiste Boric 			found = true;
472*9f988b79SJean-Baptiste Boric 		}
473*9f988b79SJean-Baptiste Boric 		string++;
474*9f988b79SJean-Baptiste Boric 	}
475*9f988b79SJean-Baptiste Boric 	return(found);
476*9f988b79SJean-Baptiste Boric }
477*9f988b79SJean-Baptiste Boric #endif /* OS2 */
478*9f988b79SJean-Baptiste Boric 
479*9f988b79SJean-Baptiste Boric /***********************************************************************
480*9f988b79SJean-Baptiste Boric  * Decides whether the given character is illegal for a given OS.
481*9f988b79SJean-Baptiste Boric  *
482*9f988b79SJean-Baptiste Boric  * RETURN VALUE
483*9f988b79SJean-Baptiste Boric  *
484*9f988b79SJean-Baptiste Boric  * Non-zero if char is illegal.
485*9f988b79SJean-Baptiste Boric  */
IsIllegal(unicode_t ch)486*9f988b79SJean-Baptiste Boric int IsIllegal(unicode_t ch)
487*9f988b79SJean-Baptiste Boric {
488*9f988b79SJean-Baptiste Boric #ifdef MAC
489*9f988b79SJean-Baptiste Boric 	/* Only illegal character on the MAC is the colon. */
490*9f988b79SJean-Baptiste Boric 	if (ch == 0x003A) {
491*9f988b79SJean-Baptiste Boric 		return(1);
492*9f988b79SJean-Baptiste Boric 	} else {
493*9f988b79SJean-Baptiste Boric 		return(0);
494*9f988b79SJean-Baptiste Boric 	}
495*9f988b79SJean-Baptiste Boric 
496*9f988b79SJean-Baptiste Boric #elif defined UNIX
497*9f988b79SJean-Baptiste Boric 	/* Illegal UNIX characters are NULL and slash. */
498*9f988b79SJean-Baptiste Boric 	if (ch == 0x0000 || ch == 0x002F) {
499*9f988b79SJean-Baptiste Boric 		return(1);
500*9f988b79SJean-Baptiste Boric 	} else {
501*9f988b79SJean-Baptiste Boric 		return(0);
502*9f988b79SJean-Baptiste Boric 	}
503*9f988b79SJean-Baptiste Boric 
504*9f988b79SJean-Baptiste Boric #elif defined OS2 || defined WIN_95 || defined WIN_NT
505*9f988b79SJean-Baptiste Boric 	/* Illegal char's for OS/2 according to WARP toolkit. */
506*9f988b79SJean-Baptiste Boric 	if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
507*9f988b79SJean-Baptiste Boric 		return(1);
508*9f988b79SJean-Baptiste Boric 	} else {
509*9f988b79SJean-Baptiste Boric 		return(0);
510*9f988b79SJean-Baptiste Boric 	}
511*9f988b79SJean-Baptiste Boric #endif
512*9f988b79SJean-Baptiste Boric }
513*9f988b79SJean-Baptiste Boric /* #endif*/	/* NEEDS_ISPRINT */
514*9f988b79SJean-Baptiste Boric 
515