10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7298SMark.J.Nelson@Sun.COM * Common Development and Distribution License (the "License"). 6*7298SMark.J.Nelson@Sun.COM * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 220Sstevel@tonic-gate * Copyright (c) 2001 by Sun Microsystems, Inc. 230Sstevel@tonic-gate * All rights reserved. 240Sstevel@tonic-gate * 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate // IANACharCode.java: SLPv1 Character encoding support 280Sstevel@tonic-gate // Author: James Kempf 290Sstevel@tonic-gate // Created On: Fri Sep 11 13:24:02 1998 300Sstevel@tonic-gate // Last Modified By: James Kempf 310Sstevel@tonic-gate // Last Modified On: Wed Oct 28 14:33:02 1998 320Sstevel@tonic-gate // Update Count: 7 330Sstevel@tonic-gate // 340Sstevel@tonic-gate 350Sstevel@tonic-gate 360Sstevel@tonic-gate package com.sun.slp; 370Sstevel@tonic-gate 380Sstevel@tonic-gate import java.util.*; 390Sstevel@tonic-gate import java.io.*; 400Sstevel@tonic-gate 410Sstevel@tonic-gate /** 420Sstevel@tonic-gate * The IANACharCode class supports static methods for decoding IANA 430Sstevel@tonic-gate * character codes into strings appropriate for the Java Writer subclass 440Sstevel@tonic-gate * encoding String arguments, and for encoding the String descriptions 450Sstevel@tonic-gate * of character codings into the integer codes. Ideally, Java itself 460Sstevel@tonic-gate * should support this. 470Sstevel@tonic-gate * 480Sstevel@tonic-gate * @author James Kempf 490Sstevel@tonic-gate */ 500Sstevel@tonic-gate 510Sstevel@tonic-gate abstract class IANACharCode extends Object { 520Sstevel@tonic-gate 530Sstevel@tonic-gate // Character code descriptors. These can be used with the Java 540Sstevel@tonic-gate // character encoding utilities. For Unicode, we use little on 550Sstevel@tonic-gate // input, 560Sstevel@tonic-gate 570Sstevel@tonic-gate static final String ASCII = "Default"; 580Sstevel@tonic-gate static final String LATIN1 = "latin1"; 590Sstevel@tonic-gate static final String UTF8 = "UTF8"; 600Sstevel@tonic-gate static final String UNICODE = "Unicode"; 610Sstevel@tonic-gate static final String UNICODE_LITTLE = "UnicodeLittle"; 620Sstevel@tonic-gate static final String UNICODE_BIG = "UnicodeBig"; 630Sstevel@tonic-gate static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr"; 640Sstevel@tonic-gate 650Sstevel@tonic-gate // Error code for misidentified character set. 660Sstevel@tonic-gate 670Sstevel@tonic-gate static final short CHARSET_NOT_UNDERSTOOD = 5; 680Sstevel@tonic-gate 690Sstevel@tonic-gate // Character codes. 700Sstevel@tonic-gate 710Sstevel@tonic-gate protected static final int CHAR_ASCII = 3; 720Sstevel@tonic-gate protected static final int CHAR_LATIN1 = 4; 730Sstevel@tonic-gate protected static final int CHAR_UTF8 = 6; 740Sstevel@tonic-gate protected static final int CHAR_UNICODE = 1000; 750Sstevel@tonic-gate 760Sstevel@tonic-gate // First two bytes indicate that string is big/little endian Unicode. 770Sstevel@tonic-gate // If this flag isn't set, then big endian is assumed and we 780Sstevel@tonic-gate // must add the big endian bytes on every call. 790Sstevel@tonic-gate 800Sstevel@tonic-gate protected static final byte[] UNICODE_LITTLE_FLAG = 810Sstevel@tonic-gate {(byte)0xFF, (byte)0xFE}; 820Sstevel@tonic-gate 830Sstevel@tonic-gate protected static final byte[] UNICODE_BIG_FLAG = 840Sstevel@tonic-gate {(byte)0xFE, (byte)0xFF}; 850Sstevel@tonic-gate 860Sstevel@tonic-gate /** 870Sstevel@tonic-gate * Encode the String describing a character encoding into 880Sstevel@tonic-gate * the approprate integer descriptor code. 890Sstevel@tonic-gate * 900Sstevel@tonic-gate * @param encoding The String describing the encoding. 910Sstevel@tonic-gate * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the 920Sstevel@tonic-gate * String is not recognized. 930Sstevel@tonic-gate */ 940Sstevel@tonic-gate encodeCharacterEncoding(String encoding)950Sstevel@tonic-gate static int encodeCharacterEncoding(String encoding) 960Sstevel@tonic-gate throws ServiceLocationException { 970Sstevel@tonic-gate 980Sstevel@tonic-gate if (encoding.equals(ASCII)) { 990Sstevel@tonic-gate return CHAR_ASCII; 1000Sstevel@tonic-gate } else if (encoding.equals(LATIN1)) { 1010Sstevel@tonic-gate return CHAR_LATIN1; 1020Sstevel@tonic-gate } else if (encoding.equals(UTF8)) { 1030Sstevel@tonic-gate return CHAR_UTF8; 1040Sstevel@tonic-gate } else if (encoding.equals(UNICODE)) { 1050Sstevel@tonic-gate return CHAR_UNICODE; 1060Sstevel@tonic-gate } else if (encoding.equals(UNICODE_BIG)) { 1070Sstevel@tonic-gate return CHAR_UNICODE; 1080Sstevel@tonic-gate } else if (encoding.equals(UNICODE_LITTLE)) { 1090Sstevel@tonic-gate return CHAR_UNICODE; 1100Sstevel@tonic-gate } else if (encoding.equals(UNICODE_BIG_NO_HDR)) { 1110Sstevel@tonic-gate return CHAR_UNICODE; 1120Sstevel@tonic-gate } 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate throw 1150Sstevel@tonic-gate new ServiceLocationException( 1160Sstevel@tonic-gate CHARSET_NOT_UNDERSTOOD, 1170Sstevel@tonic-gate "v1_unsupported_encoding", 1180Sstevel@tonic-gate new Object[] {encoding}); 1190Sstevel@tonic-gate } 1200Sstevel@tonic-gate 1210Sstevel@tonic-gate /** 1220Sstevel@tonic-gate * Decode the integer describing a character encoding into 1230Sstevel@tonic-gate * the approprate String descriptor. 1240Sstevel@tonic-gate * 1250Sstevel@tonic-gate * @param code The integer coding the String set. 1260Sstevel@tonic-gate * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the 1270Sstevel@tonic-gate * integer is not recognized. 1280Sstevel@tonic-gate */ 1290Sstevel@tonic-gate decodeCharacterEncoding(int code)1300Sstevel@tonic-gate static String decodeCharacterEncoding(int code) 1310Sstevel@tonic-gate throws ServiceLocationException { 1320Sstevel@tonic-gate 1330Sstevel@tonic-gate switch (code) { 1340Sstevel@tonic-gate case CHAR_ASCII: return ASCII; 1350Sstevel@tonic-gate case CHAR_LATIN1: return LATIN1; 1360Sstevel@tonic-gate case CHAR_UTF8: return UTF8; 1370Sstevel@tonic-gate case CHAR_UNICODE: return UNICODE; 1380Sstevel@tonic-gate } 1390Sstevel@tonic-gate 1400Sstevel@tonic-gate throw 1410Sstevel@tonic-gate new ServiceLocationException( 1420Sstevel@tonic-gate CHARSET_NOT_UNDERSTOOD, 1430Sstevel@tonic-gate "v1_unsupported_encoding", 1440Sstevel@tonic-gate new Object[] {Integer.toString(code)}); 1450Sstevel@tonic-gate } 1460Sstevel@tonic-gate 1470Sstevel@tonic-gate /** 1480Sstevel@tonic-gate * Return a string of integers giving the character's encoding in 1490Sstevel@tonic-gate * the character set passed in as encoding. 1500Sstevel@tonic-gate * 1510Sstevel@tonic-gate * @param c The character to escape. 1520Sstevel@tonic-gate * @param encoding The character set encoding to use. 1530Sstevel@tonic-gate * @return The character as a string of integers for the encoding. 1540Sstevel@tonic-gate * @exception ServiceLocationException Thrown if the encoding is not 1550Sstevel@tonic-gate * recognized, if the character's encoding 1560Sstevel@tonic-gate * has more than 8 bytes or if the sign bit gets turned on. 1570Sstevel@tonic-gate */ 1580Sstevel@tonic-gate escapeChar(char c, String encoding)1590Sstevel@tonic-gate static String escapeChar(char c, String encoding) 1600Sstevel@tonic-gate throws ServiceLocationException { 1610Sstevel@tonic-gate 1620Sstevel@tonic-gate ByteArrayOutputStream baos = new ByteArrayOutputStream(); 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate try { 1650Sstevel@tonic-gate OutputStreamWriter osw = new OutputStreamWriter(baos, encoding); 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate osw.write(c); 1680Sstevel@tonic-gate osw.flush(); 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate } catch (UnsupportedEncodingException ex) { 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate throw 1730Sstevel@tonic-gate new ServiceLocationException( 1740Sstevel@tonic-gate CHARSET_NOT_UNDERSTOOD, 1750Sstevel@tonic-gate "v1_unsupported_encoding", 1760Sstevel@tonic-gate new Object[] {encoding}); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate } catch (IOException ex) { 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate } 1810Sstevel@tonic-gate 1820Sstevel@tonic-gate byte b[] = baos.toByteArray(); 1830Sstevel@tonic-gate int code = 0; 1840Sstevel@tonic-gate 1850Sstevel@tonic-gate // Assemble the character code based on the encoding type. 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate if (encoding.equals(UNICODE) || 1880Sstevel@tonic-gate encoding.equals(UNICODE_BIG) || 1890Sstevel@tonic-gate encoding.equals(UNICODE_LITTLE)) { 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate code = (int)(b[0] & 0xFF); // control bytes... 1920Sstevel@tonic-gate code = (int)(code | ((b[1] & 0xFF) << 8)); 1930Sstevel@tonic-gate code = (int)(code | ((b[2] & 0xFF) << 16)); 1940Sstevel@tonic-gate code = (int)(code | ((b[3] & 0xFF) << 24)); 1950Sstevel@tonic-gate 1960Sstevel@tonic-gate if (b.length <= 4) { 1970Sstevel@tonic-gate throw 1980Sstevel@tonic-gate new ServiceLocationException( 1990Sstevel@tonic-gate ServiceLocationException.PARSE_ERROR, 2000Sstevel@tonic-gate "v1_charcode_error", 2010Sstevel@tonic-gate new Object[] {new Character(c), encoding}); 2020Sstevel@tonic-gate } 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate } else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) { 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate code = (int)(b[0] & 0xFF); 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate if (b.length > 1) { 2090Sstevel@tonic-gate throw 2100Sstevel@tonic-gate new ServiceLocationException( 2110Sstevel@tonic-gate ServiceLocationException.PARSE_ERROR, 2120Sstevel@tonic-gate "v1_charcode_error", 2130Sstevel@tonic-gate new Object[] {new Character(c), encoding}); 2140Sstevel@tonic-gate } 2150Sstevel@tonic-gate } else if (encoding.equals(UTF8)) { 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate if (b.length > 3) { 2180Sstevel@tonic-gate throw 2190Sstevel@tonic-gate new ServiceLocationException( 2200Sstevel@tonic-gate ServiceLocationException.PARSE_ERROR, 2210Sstevel@tonic-gate "v1_charcode_error", 2220Sstevel@tonic-gate new Object[] {new Character(c), encoding}); 2230Sstevel@tonic-gate } 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate code = (int)(b[0] & 0xFF); 2270Sstevel@tonic-gate 2280Sstevel@tonic-gate if (b.length > 1) { 2290Sstevel@tonic-gate code = (int)(code | ((b[1] & 0xFF) << 8)); 2300Sstevel@tonic-gate } 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate if (b.length > 2) { 2330Sstevel@tonic-gate code = (int)(code | ((b[2] & 0xFF) << 16)); 2340Sstevel@tonic-gate } 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate return Integer.toString(code); 2380Sstevel@tonic-gate } 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate /** 2410Sstevel@tonic-gate * Unescape the character encoded as the string. 2420Sstevel@tonic-gate * 2430Sstevel@tonic-gate * @param ch The character as a string of Integers. 2440Sstevel@tonic-gate * @param encoding The character set encoding to use. 2450Sstevel@tonic-gate * @return The character. 2460Sstevel@tonic-gate * @exception ServiceLocationException Thrown if the string can't 2470Sstevel@tonic-gate * be parsed into an integer or if the encoding isn't 2480Sstevel@tonic-gate * recognized. 2490Sstevel@tonic-gate */ 2500Sstevel@tonic-gate unescapeChar(String ch, String encoding)2510Sstevel@tonic-gate static String unescapeChar(String ch, String encoding) 2520Sstevel@tonic-gate throws ServiceLocationException { 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate int code = 0; 2550Sstevel@tonic-gate 2560Sstevel@tonic-gate try { 2570Sstevel@tonic-gate code = Integer.parseInt(ch); 2580Sstevel@tonic-gate 2590Sstevel@tonic-gate } catch (NumberFormatException ex) { 2600Sstevel@tonic-gate throw 2610Sstevel@tonic-gate new ServiceLocationException( 2620Sstevel@tonic-gate ServiceLocationException.PARSE_ERROR, 2630Sstevel@tonic-gate "v1_stringcode_error", 2640Sstevel@tonic-gate new Object[] {ch, encoding}); 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate } 2670Sstevel@tonic-gate 2680Sstevel@tonic-gate // Convert to bytes. We need to taylor the array size to the 2690Sstevel@tonic-gate // number of bytes because otherwise, in encodings that 2700Sstevel@tonic-gate // take less bytes, the resulting string will have garbage 2710Sstevel@tonic-gate // in it. 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate String str = null; 2740Sstevel@tonic-gate byte b0 = 0, b1 = 0, b2 = 0, b3 = 0; 2750Sstevel@tonic-gate byte b[] = null; 2760Sstevel@tonic-gate 2770Sstevel@tonic-gate b0 = (byte) (code & 0xFF); 2780Sstevel@tonic-gate b1 = (byte) ((code >> 8) & 0xFF); 2790Sstevel@tonic-gate b2 = (byte) ((code >> 16) & 0xFF); 2800Sstevel@tonic-gate b3 = (byte) ((code >> 24) & 0xFf); 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate // We create an array sized to the encoding. 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate if (encoding.equals(UNICODE_BIG) || 2850Sstevel@tonic-gate encoding.equals(UNICODE_LITTLE)) { 2860Sstevel@tonic-gate b = new byte[4]; 2870Sstevel@tonic-gate b[0] = b0; 2880Sstevel@tonic-gate b[1] = b1; 2890Sstevel@tonic-gate b[2] = b2; 2900Sstevel@tonic-gate b[3] = b3; 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate } else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) { 2930Sstevel@tonic-gate // single byte 2940Sstevel@tonic-gate b = new byte[1]; 2950Sstevel@tonic-gate b[0] = b0; 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate if (b1 != 0 || b2 != 0) { 2980Sstevel@tonic-gate throw 2990Sstevel@tonic-gate new ServiceLocationException( 3000Sstevel@tonic-gate ServiceLocationException.PARSE_ERROR, 3010Sstevel@tonic-gate "v1_stringcode_error", 3020Sstevel@tonic-gate new Object[] {ch, encoding}); 3030Sstevel@tonic-gate } 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate } else if (encoding.equals(UTF8)) {// vari-byte 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate if (b3 != 0) { 3090Sstevel@tonic-gate throw 3100Sstevel@tonic-gate new ServiceLocationException( 3110Sstevel@tonic-gate ServiceLocationException.PARSE_ERROR, 3120Sstevel@tonic-gate "v1_stringcode_error", 3130Sstevel@tonic-gate new Object[] {ch, encoding}); 3140Sstevel@tonic-gate } 3150Sstevel@tonic-gate 3160Sstevel@tonic-gate if (b2 != 0) { 3170Sstevel@tonic-gate b = new byte[3]; 3180Sstevel@tonic-gate b[2] = b2; 3190Sstevel@tonic-gate b[1] = b1; 3200Sstevel@tonic-gate b[0] = b0; 3210Sstevel@tonic-gate } else if (b1 != 0) { 3220Sstevel@tonic-gate b = new byte[2]; 3230Sstevel@tonic-gate b[1] = b1; 3240Sstevel@tonic-gate b[0] = b0; 3250Sstevel@tonic-gate } else { 3260Sstevel@tonic-gate b = new byte[1]; 3270Sstevel@tonic-gate b[0] = b0; 3280Sstevel@tonic-gate } 3290Sstevel@tonic-gate } 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate // Make a string out of it. 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate try { 3340Sstevel@tonic-gate str = new String(b, encoding); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate } catch (UnsupportedEncodingException ex) { 3370Sstevel@tonic-gate Assert.slpassert(false, 3380Sstevel@tonic-gate "v1_unsupported_encoding", 3390Sstevel@tonic-gate new Object[] {encoding}); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate 3420Sstevel@tonic-gate return str; 3430Sstevel@tonic-gate } 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate // Determine from the flag bytes whether this is big or little endian 3460Sstevel@tonic-gate // Unicode. If there are no flag bytes, then just return UNICODE. 3470Sstevel@tonic-gate getUnicodeEndianess(byte[] bytes)3480Sstevel@tonic-gate static String getUnicodeEndianess(byte[] bytes) { 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate if (bytes.length >= 2) { 3510Sstevel@tonic-gate 3520Sstevel@tonic-gate if (bytes[0] == UNICODE_LITTLE_FLAG[0] && 3530Sstevel@tonic-gate bytes[1] == UNICODE_LITTLE_FLAG[1]) { 3540Sstevel@tonic-gate return UNICODE_LITTLE; 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate } else if (bytes[0] == UNICODE_BIG_FLAG[0] && 3570Sstevel@tonic-gate bytes[1] == UNICODE_BIG_FLAG[1]) { 3580Sstevel@tonic-gate return UNICODE_BIG; 3590Sstevel@tonic-gate 3600Sstevel@tonic-gate } 3610Sstevel@tonic-gate } 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate // We can`t tell from the byte header, so it's big endian. But 3640Sstevel@tonic-gate // since we need to add the byte header, we say we don't know. 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate return UNICODE; 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate } 3690Sstevel@tonic-gate 3700Sstevel@tonic-gate // Add the big endian flag to a Unicode string. 3710Sstevel@tonic-gate addBigEndianFlag(byte[] bytes)3720Sstevel@tonic-gate static byte[] addBigEndianFlag(byte[] bytes) { 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate byte[] flaggedBytes = new byte[bytes.length + 2]; 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate flaggedBytes[0] = UNICODE_BIG_FLAG[0]; 3770Sstevel@tonic-gate flaggedBytes[1] = UNICODE_BIG_FLAG[1]; 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length); 3800Sstevel@tonic-gate 3810Sstevel@tonic-gate return flaggedBytes; 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate } 3840Sstevel@tonic-gate } 385