sun/slp/IANACharCode.java

0Sstevel@tonic-gate/*
0Sstevel@tonic-gate * CDDL HEADER START
0Sstevel@tonic-gate *
0Sstevel@tonic-gate * The contents of this file are subject to the terms of the
*7298SMark.J.Nelson@Sun.COM * Common Development and Distribution License (the "License").
*7298SMark.J.Nelson@Sun.COM * You may not use this file except in compliance with the License.
0Sstevel@tonic-gate *
0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
0Sstevel@tonic-gate * See the License for the specific language governing permissions
0Sstevel@tonic-gate * and limitations under the License.
0Sstevel@tonic-gate *
0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
0Sstevel@tonic-gate *
0Sstevel@tonic-gate * CDDL HEADER END
0Sstevel@tonic-gate */
0Sstevel@tonic-gate/*
0Sstevel@tonic-gate * Copyright (c) 2001 by Sun Microsystems, Inc.
0Sstevel@tonic-gate * All rights reserved.
0Sstevel@tonic-gate *
0Sstevel@tonic-gate */
0Sstevel@tonic-gate
0Sstevel@tonic-gate//  IANACharCode.java: SLPv1 Character encoding support
0Sstevel@tonic-gate//  Author:           James Kempf
0Sstevel@tonic-gate//  Created On:       Fri Sep 11 13:24:02 1998
0Sstevel@tonic-gate//  Last Modified By: James Kempf
0Sstevel@tonic-gate//  Last Modified On: Wed Oct 28 14:33:02 1998
0Sstevel@tonic-gate//  Update Count:     7
0Sstevel@tonic-gate//
0Sstevel@tonic-gate
0Sstevel@tonic-gate
0Sstevel@tonic-gatepackage com.sun.slp;
0Sstevel@tonic-gate
0Sstevel@tonic-gateimport java.util.*;
0Sstevel@tonic-gateimport java.io.*;
0Sstevel@tonic-gate
0Sstevel@tonic-gate/**
0Sstevel@tonic-gate * The IANACharCode class supports static methods for decoding IANA
0Sstevel@tonic-gate * character codes into strings appropriate for the Java Writer subclass
0Sstevel@tonic-gate * encoding String arguments, and for encoding the String descriptions
0Sstevel@tonic-gate * of character codings into the integer codes. Ideally, Java itself
0Sstevel@tonic-gate * should support this.
0Sstevel@tonic-gate *
0Sstevel@tonic-gate * @author James Kempf
0Sstevel@tonic-gate */
0Sstevel@tonic-gate
0Sstevel@tonic-gateabstract class IANACharCode extends Object {
0Sstevel@tonic-gate
0Sstevel@tonic-gate    // Character code descriptors. These can be used with the Java
0Sstevel@tonic-gate    //  character encoding utilities. For Unicode, we use little on
0Sstevel@tonic-gate    //  input,
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static final String ASCII = "Default";
0Sstevel@tonic-gate    static final String LATIN1 = "latin1";
0Sstevel@tonic-gate    static final String UTF8 = "UTF8";
0Sstevel@tonic-gate    static final String UNICODE = "Unicode";
0Sstevel@tonic-gate    static final String UNICODE_LITTLE = "UnicodeLittle";
0Sstevel@tonic-gate    static final String UNICODE_BIG = "UnicodeBig";
0Sstevel@tonic-gate    static final String UNICODE_BIG_NO_HDR = "UnicodeBigNoHdr";
0Sstevel@tonic-gate
0Sstevel@tonic-gate    // Error code for misidentified character set.
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static final short CHARSET_NOT_UNDERSTOOD = 5;
0Sstevel@tonic-gate
0Sstevel@tonic-gate    // Character codes.
0Sstevel@tonic-gate
0Sstevel@tonic-gate    protected static final int CHAR_ASCII   = 3;
0Sstevel@tonic-gate    protected static final int CHAR_LATIN1  = 4;
0Sstevel@tonic-gate    protected static final int CHAR_UTF8    = 6;
0Sstevel@tonic-gate    protected static final int CHAR_UNICODE = 1000;
0Sstevel@tonic-gate
0Sstevel@tonic-gate    // First two bytes indicate that string is big/little endian Unicode.
0Sstevel@tonic-gate    //  If this flag isn't set, then big endian is assumed and we
0Sstevel@tonic-gate    //  must add the big endian bytes on every call.
0Sstevel@tonic-gate
0Sstevel@tonic-gate    protected static final byte[] UNICODE_LITTLE_FLAG =
0Sstevel@tonic-gate					{(byte)0xFF, (byte)0xFE};
0Sstevel@tonic-gate
0Sstevel@tonic-gate    protected static final byte[] UNICODE_BIG_FLAG =
0Sstevel@tonic-gate					{(byte)0xFE, (byte)0xFF};
0Sstevel@tonic-gate
0Sstevel@tonic-gate    /**
0Sstevel@tonic-gate     * Encode the String describing a character encoding into
0Sstevel@tonic-gate     * the approprate integer descriptor code.
0Sstevel@tonic-gate     *
0Sstevel@tonic-gate     * @param encoding The String describing the encoding.
0Sstevel@tonic-gate     * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
0Sstevel@tonic-gate     *			String is not recognized.
0Sstevel@tonic-gate     */
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static int encodeCharacterEncoding(String encoding)
0Sstevel@tonic-gate	throws ServiceLocationException {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	if (encoding.equals(ASCII)) {
0Sstevel@tonic-gate	    return CHAR_ASCII;
0Sstevel@tonic-gate	} else if (encoding.equals(LATIN1)) {
0Sstevel@tonic-gate	    return CHAR_LATIN1;
0Sstevel@tonic-gate	} else if (encoding.equals(UTF8)) {
0Sstevel@tonic-gate	    return CHAR_UTF8;
0Sstevel@tonic-gate	} else if (encoding.equals(UNICODE)) {
0Sstevel@tonic-gate	    return CHAR_UNICODE;
0Sstevel@tonic-gate	} else if (encoding.equals(UNICODE_BIG)) {
0Sstevel@tonic-gate	    return CHAR_UNICODE;
0Sstevel@tonic-gate	} else if (encoding.equals(UNICODE_LITTLE)) {
0Sstevel@tonic-gate	    return CHAR_UNICODE;
0Sstevel@tonic-gate	} else if (encoding.equals(UNICODE_BIG_NO_HDR)) {
0Sstevel@tonic-gate	    return CHAR_UNICODE;
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	throw
0Sstevel@tonic-gate	    new ServiceLocationException(
0Sstevel@tonic-gate				CHARSET_NOT_UNDERSTOOD,
0Sstevel@tonic-gate				"v1_unsupported_encoding",
0Sstevel@tonic-gate				new Object[] {encoding});
0Sstevel@tonic-gate    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate    /**
0Sstevel@tonic-gate     * Decode the integer describing a character encoding into
0Sstevel@tonic-gate     * the approprate String descriptor.
0Sstevel@tonic-gate     *
0Sstevel@tonic-gate     * @param code The integer coding the String set.
0Sstevel@tonic-gate     * @exception ServiceLocationCharSetNotUnderstoodException Thrown if the
0Sstevel@tonic-gate     *			integer is not recognized.
0Sstevel@tonic-gate     */
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static String decodeCharacterEncoding(int code)
0Sstevel@tonic-gate	throws ServiceLocationException {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	switch (code) {
0Sstevel@tonic-gate	case CHAR_ASCII: 	return ASCII;
0Sstevel@tonic-gate	case CHAR_LATIN1:	return LATIN1;
0Sstevel@tonic-gate	case CHAR_UTF8:	return UTF8;
0Sstevel@tonic-gate	case CHAR_UNICODE:	return UNICODE;
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	throw
0Sstevel@tonic-gate	    new ServiceLocationException(
0Sstevel@tonic-gate				CHARSET_NOT_UNDERSTOOD,
0Sstevel@tonic-gate				"v1_unsupported_encoding",
0Sstevel@tonic-gate				new Object[] {Integer.toString(code)});
0Sstevel@tonic-gate    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate    /**
0Sstevel@tonic-gate     * Return a string of integers giving the character's encoding in
0Sstevel@tonic-gate     * the character set passed in as encoding.
0Sstevel@tonic-gate     *
0Sstevel@tonic-gate     * @param c The character to escape.
0Sstevel@tonic-gate     * @param encoding The character set encoding to use.
0Sstevel@tonic-gate     * @return The character as a string of integers for the encoding.
0Sstevel@tonic-gate     * @exception ServiceLocationException Thrown if the encoding is not
0Sstevel@tonic-gate     *		 recognized, if the character's encoding
0Sstevel@tonic-gate     *		 has more than 8 bytes or if the sign bit gets turned on.
0Sstevel@tonic-gate     */
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static String escapeChar(char c, String encoding)
0Sstevel@tonic-gate	throws ServiceLocationException {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	ByteArrayOutputStream baos = new ByteArrayOutputStream();
0Sstevel@tonic-gate
0Sstevel@tonic-gate	try {
0Sstevel@tonic-gate	    OutputStreamWriter osw = new OutputStreamWriter(baos, encoding);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    osw.write(c);
0Sstevel@tonic-gate	    osw.flush();
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} catch (UnsupportedEncodingException ex) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    throw
0Sstevel@tonic-gate		new ServiceLocationException(
0Sstevel@tonic-gate				CHARSET_NOT_UNDERSTOOD,
0Sstevel@tonic-gate				"v1_unsupported_encoding",
0Sstevel@tonic-gate				new Object[] {encoding});
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} catch (IOException ex) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	byte b[] = baos.toByteArray();
0Sstevel@tonic-gate	int code = 0;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	// Assemble the character code based on the encoding type.
0Sstevel@tonic-gate
0Sstevel@tonic-gate	if (encoding.equals(UNICODE) ||
0Sstevel@tonic-gate	    encoding.equals(UNICODE_BIG) ||
0Sstevel@tonic-gate	    encoding.equals(UNICODE_LITTLE)) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    code = (int)(b[0] & 0xFF);		// control bytes...
0Sstevel@tonic-gate	    code = (int)(code | ((b[1] & 0xFF) << 8));
0Sstevel@tonic-gate	    code = (int)(code | ((b[2] & 0xFF) << 16));
0Sstevel@tonic-gate	    code = (int)(code | ((b[3] & 0xFF) << 24));
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b.length <= 4) {
0Sstevel@tonic-gate		throw
0Sstevel@tonic-gate		    new ServiceLocationException(
0Sstevel@tonic-gate				ServiceLocationException.PARSE_ERROR,
0Sstevel@tonic-gate				"v1_charcode_error",
0Sstevel@tonic-gate				new Object[] {new Character(c), encoding});
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} else if (encoding.equals(ASCII) || encoding.equals(LATIN1)) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    code = (int)(b[0] & 0xFF);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b.length > 1) {
0Sstevel@tonic-gate		throw
0Sstevel@tonic-gate		    new ServiceLocationException(
0Sstevel@tonic-gate				ServiceLocationException.PARSE_ERROR,
0Sstevel@tonic-gate				"v1_charcode_error",
0Sstevel@tonic-gate				new Object[] {new Character(c), encoding});
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate	} else if (encoding.equals(UTF8)) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b.length > 3) {
0Sstevel@tonic-gate		throw
0Sstevel@tonic-gate		    new ServiceLocationException(
0Sstevel@tonic-gate				ServiceLocationException.PARSE_ERROR,
0Sstevel@tonic-gate				"v1_charcode_error",
0Sstevel@tonic-gate				new Object[] {new Character(c), encoding});
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    code = (int)(b[0] & 0xFF);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b.length > 1) {
0Sstevel@tonic-gate		code = (int)(code | ((b[1] & 0xFF) << 8));
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b.length > 2) {
0Sstevel@tonic-gate		code = (int)(code | ((b[2] & 0xFF) << 16));
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	return Integer.toString(code);
0Sstevel@tonic-gate    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate    /**
0Sstevel@tonic-gate     * Unescape the character encoded as the string.
0Sstevel@tonic-gate     *
0Sstevel@tonic-gate     * @param ch The character as a string of Integers.
0Sstevel@tonic-gate     * @param encoding The character set encoding to use.
0Sstevel@tonic-gate     * @return The character.
0Sstevel@tonic-gate     * @exception ServiceLocationException Thrown if the string can't
0Sstevel@tonic-gate     *		 be parsed into an integer or if the encoding isn't
0Sstevel@tonic-gate     *		 recognized.
0Sstevel@tonic-gate     */
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static String unescapeChar(String ch, String encoding)
0Sstevel@tonic-gate	throws ServiceLocationException {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	int code = 0;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	try {
0Sstevel@tonic-gate	    code = Integer.parseInt(ch);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} catch (NumberFormatException ex) {
0Sstevel@tonic-gate	    throw
0Sstevel@tonic-gate		new ServiceLocationException(
0Sstevel@tonic-gate				ServiceLocationException.PARSE_ERROR,
0Sstevel@tonic-gate				"v1_stringcode_error",
0Sstevel@tonic-gate				new Object[] {ch, encoding});
0Sstevel@tonic-gate
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	// Convert to bytes. We need to taylor the array size to the
0Sstevel@tonic-gate	//  number of bytes because otherwise, in encodings that
0Sstevel@tonic-gate	//  take less bytes, the resulting string will have garbage
0Sstevel@tonic-gate	//  in it.
0Sstevel@tonic-gate
0Sstevel@tonic-gate	String str = null;
0Sstevel@tonic-gate	byte b0 = 0, b1 = 0, b2 = 0, b3 = 0;
0Sstevel@tonic-gate	byte b[] = null;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	b0 = (byte) (code & 0xFF);
0Sstevel@tonic-gate	b1 = (byte) ((code >> 8) & 0xFF);
0Sstevel@tonic-gate	b2 = (byte) ((code >> 16) & 0xFF);
0Sstevel@tonic-gate	b3 = (byte) ((code >> 24) & 0xFf);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	// We create an array sized to the encoding.
0Sstevel@tonic-gate
0Sstevel@tonic-gate	if (encoding.equals(UNICODE_BIG) ||
0Sstevel@tonic-gate	    encoding.equals(UNICODE_LITTLE)) {
0Sstevel@tonic-gate	    b = new byte[4];
0Sstevel@tonic-gate	    b[0] = b0;
0Sstevel@tonic-gate	    b[1] = b1;
0Sstevel@tonic-gate	    b[2] = b2;
0Sstevel@tonic-gate	    b[3] = b3;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} else if (encoding.equals(LATIN1) || encoding.equals(ASCII)) {
0Sstevel@tonic-gate	    // single byte
0Sstevel@tonic-gate	    b = new byte[1];
0Sstevel@tonic-gate	    b[0] = b0;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b1 != 0 || b2 != 0) {
0Sstevel@tonic-gate		throw
0Sstevel@tonic-gate		    new ServiceLocationException(
0Sstevel@tonic-gate				ServiceLocationException.PARSE_ERROR,
0Sstevel@tonic-gate				"v1_stringcode_error",
0Sstevel@tonic-gate				new Object[] {ch, encoding});
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} else if (encoding.equals(UTF8)) {// vari-byte
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b3 != 0) {
0Sstevel@tonic-gate		throw
0Sstevel@tonic-gate		    new ServiceLocationException(
0Sstevel@tonic-gate				ServiceLocationException.PARSE_ERROR,
0Sstevel@tonic-gate				"v1_stringcode_error",
0Sstevel@tonic-gate				new Object[] {ch, encoding});
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (b2 != 0) {
0Sstevel@tonic-gate		b = new byte[3];
0Sstevel@tonic-gate		b[2] = b2;
0Sstevel@tonic-gate		b[1] = b1;
0Sstevel@tonic-gate		b[0] = b0;
0Sstevel@tonic-gate	    } else if (b1 != 0) {
0Sstevel@tonic-gate		b = new byte[2];
0Sstevel@tonic-gate		b[1] = b1;
0Sstevel@tonic-gate		b[0] = b0;
0Sstevel@tonic-gate	    } else {
0Sstevel@tonic-gate		b = new byte[1];
0Sstevel@tonic-gate		b[0] = b0;
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	// Make a string out of it.
0Sstevel@tonic-gate
0Sstevel@tonic-gate	try {
0Sstevel@tonic-gate	    str = new String(b, encoding);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	} catch (UnsupportedEncodingException ex) {
0Sstevel@tonic-gate	    Assert.slpassert(false,
0Sstevel@tonic-gate			  "v1_unsupported_encoding",
0Sstevel@tonic-gate			  new Object[] {encoding});
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	return str;
0Sstevel@tonic-gate    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate    // Determine from the flag bytes whether this is big or little endian
0Sstevel@tonic-gate    //  Unicode. If there are no flag bytes, then just return UNICODE.
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static String getUnicodeEndianess(byte[] bytes) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	if (bytes.length >= 2) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    if (bytes[0] == UNICODE_LITTLE_FLAG[0] &&
0Sstevel@tonic-gate		bytes[1] == UNICODE_LITTLE_FLAG[1]) {
0Sstevel@tonic-gate		return UNICODE_LITTLE;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    } else if (bytes[0] == UNICODE_BIG_FLAG[0] &&
0Sstevel@tonic-gate		       bytes[1] == UNICODE_BIG_FLAG[1]) {
0Sstevel@tonic-gate		return UNICODE_BIG;
0Sstevel@tonic-gate
0Sstevel@tonic-gate	    }
0Sstevel@tonic-gate	}
0Sstevel@tonic-gate
0Sstevel@tonic-gate	// We can`t tell from the byte header, so it's big endian. But
0Sstevel@tonic-gate	//  since we need to add the byte header, we say we don't know.
0Sstevel@tonic-gate
0Sstevel@tonic-gate	return UNICODE;
0Sstevel@tonic-gate
0Sstevel@tonic-gate    }
0Sstevel@tonic-gate
0Sstevel@tonic-gate    // Add the big endian flag to a Unicode string.
0Sstevel@tonic-gate
0Sstevel@tonic-gate    static byte[] addBigEndianFlag(byte[] bytes) {
0Sstevel@tonic-gate
0Sstevel@tonic-gate	byte[] flaggedBytes = new byte[bytes.length + 2];
0Sstevel@tonic-gate
0Sstevel@tonic-gate	flaggedBytes[0] = UNICODE_BIG_FLAG[0];
0Sstevel@tonic-gate	flaggedBytes[1] = UNICODE_BIG_FLAG[1];
0Sstevel@tonic-gate
0Sstevel@tonic-gate	System.arraycopy(flaggedBytes, 2, bytes, 0, bytes.length);
0Sstevel@tonic-gate
0Sstevel@tonic-gate	return flaggedBytes;
0Sstevel@tonic-gate
0Sstevel@tonic-gate    }
0Sstevel@tonic-gate}