libsmbfs/smb/charsets.c

6007Sthurlow/*
6007Sthurlow * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
6007Sthurlow *
6007Sthurlow * @APPLE_LICENSE_HEADER_START@
6007Sthurlow *
6007Sthurlow * "Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
6007Sthurlow * Reserved.  This file contains Original Code and/or Modifications of
6007Sthurlow * Original Code as defined in and that are subject to the Apple Public
6007Sthurlow * Source License Version 1.0 (the 'License').  You may not use this file
6007Sthurlow * except in compliance with the License.  Please obtain a copy of the
6007Sthurlow * License at http://www.apple.com/publicsource and read it before using
6007Sthurlow * this file.
6007Sthurlow *
6007Sthurlow * The Original Code and all software distributed under the License are
6007Sthurlow * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
6007Sthurlow * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
6007Sthurlow * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
6007Sthurlow * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
6007Sthurlow * License for the specific language governing rights and limitations
6007Sthurlow * under the License."
6007Sthurlow *
6007Sthurlow * @APPLE_LICENSE_HEADER_END@
6007Sthurlow */
*8271SGordon.Ross@Sun.COM/* CSTYLED */
*8271SGordon.Ross@Sun.COM/*
*8271SGordon.Ross@Sun.COM *      @(#)charsets.c      *
6007Sthurlow *      (c) 2004   Apple Computer, Inc.  All Rights Reserved
6007Sthurlow *
6007Sthurlow *
6007Sthurlow *      charsets.c -- Routines converting between UTF-8, 16-bit
6007Sthurlow *			little-endian Unicode, and various Windows
6007Sthurlow *			code pages.
6007Sthurlow *
6007Sthurlow *      MODIFICATION HISTORY:
6007Sthurlow *       28-Nov-2004     Guy Harris	New today
6007Sthurlow */
6007Sthurlow
6007Sthurlow#include <stdlib.h>
6007Sthurlow#include <stdio.h>
6007Sthurlow#include <string.h>
6007Sthurlow#include <ctype.h>
*8271SGordon.Ross@Sun.COM#include <errno.h>
6007Sthurlow#include <iconv.h>
6007Sthurlow#include <langinfo.h>
6007Sthurlow#include <strings.h>
6007Sthurlow
6007Sthurlow#include <netsmb/smb_lib.h>
6007Sthurlow#include <netsmb/mchain.h>
6007Sthurlow
6007Sthurlow#include "charsets.h"
6007Sthurlow
6007Sthurlow/*
6007Sthurlow * On Solaris, we will need to do some rewriting to use our iconv
6007Sthurlow * routines for the conversions.  For now, we're effectively
6007Sthurlow * stubbing out code, leaving the details of what happens on
6007Sthurlow * Darwin in case it's useful as a guide later.
6007Sthurlow */
6007Sthurlow
6007Sthurlowstatic unsigned
6007Sthurlowxtoi(char u)
6007Sthurlow{
*8271SGordon.Ross@Sun.COM	if (isdigit(u))
*8271SGordon.Ross@Sun.COM		return (u - '0');
*8271SGordon.Ross@Sun.COM	else if (islower(u))
*8271SGordon.Ross@Sun.COM		return (10 + u - 'a');
*8271SGordon.Ross@Sun.COM	else if (isupper(u))
*8271SGordon.Ross@Sun.COM		return (10 + u - 'A');
*8271SGordon.Ross@Sun.COM	return (16);
6007Sthurlow}
6007Sthurlow
6007Sthurlow
*8271SGordon.Ross@Sun.COM/*
*8271SGordon.Ross@Sun.COM * Removes the "%" escape sequences from a URL component.
6007Sthurlow * See IETF RFC 2396.
6007Sthurlow */
6007Sthurlowchar *
*8271SGordon.Ross@Sun.COMunpercent(char *component)
6007Sthurlow{
*8271SGordon.Ross@Sun.COM	char c, *s;
*8271SGordon.Ross@Sun.COM	unsigned hi, lo;
*8271SGordon.Ross@Sun.COM
*8271SGordon.Ross@Sun.COM	if (component == NULL)
*8271SGordon.Ross@Sun.COM		return (component);
6007Sthurlow
*8271SGordon.Ross@Sun.COM	for (s = component; (c = *s) != 0; s++) {
*8271SGordon.Ross@Sun.COM		if (c != '%')
*8271SGordon.Ross@Sun.COM			continue;
*8271SGordon.Ross@Sun.COM		if ((hi = xtoi(s[1])) > 15 || (lo = xtoi(s[2])) > 15)
*8271SGordon.Ross@Sun.COM			continue; /* ignore invalid escapes */
*8271SGordon.Ross@Sun.COM		s[0] = hi*16 + lo;
*8271SGordon.Ross@Sun.COM		/*
*8271SGordon.Ross@Sun.COM		 * This was strcpy(s + 1, s + 3);
*8271SGordon.Ross@Sun.COM		 * But nowadays leftward overlapping copies are
*8271SGordon.Ross@Sun.COM		 * officially undefined in C.  Ours seems to
*8271SGordon.Ross@Sun.COM		 * work or not depending upon alignment.
*8271SGordon.Ross@Sun.COM		 */
*8271SGordon.Ross@Sun.COM		memmove(s+1, s+3, strlen(s+3) + 1);
*8271SGordon.Ross@Sun.COM	}
*8271SGordon.Ross@Sun.COM	return (component);
6007Sthurlow}
6007Sthurlow
*8271SGordon.Ross@Sun.COM/* BEGIN CSTYLED */
6007Sthurlow#ifdef NOTPORTED
6007Sthurlowstatic CFStringEncoding
6007Sthurlowget_windows_encoding_equivalent( void )
6007Sthurlow{
6007Sthurlow
6007Sthurlow	CFStringEncoding encoding;
6007Sthurlow	uint32_t index,region;
6007Sthurlow
6007Sthurlow	/* important! use root ID so you can read the config file! */
6007Sthurlow	seteuid(eff_uid);
6007Sthurlow	__CFStringGetInstallationEncodingAndRegion(&index,&region);
6007Sthurlow	seteuid(real_uid);
6007Sthurlow
6007Sthurlow	switch ( index )
6007Sthurlow	{
6007Sthurlow		case	kCFStringEncodingMacRoman:
6007Sthurlow			if (region) /* anything nonzero is not US */
6007Sthurlow				encoding = kCFStringEncodingDOSLatin1;
6007Sthurlow			else /* US region */
6007Sthurlow				encoding = kCFStringEncodingDOSLatinUS;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacJapanese:
6007Sthurlow			encoding = kCFStringEncodingDOSJapanese;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacChineseTrad:
6007Sthurlow			encoding = kCFStringEncodingDOSChineseTrad;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacKorean:
6007Sthurlow			encoding = kCFStringEncodingDOSKorean;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacArabic:
6007Sthurlow			encoding = kCFStringEncodingDOSArabic;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacHebrew:
6007Sthurlow			encoding = kCFStringEncodingDOSHebrew;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacGreek:
6007Sthurlow			encoding = kCFStringEncodingDOSGreek;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacCyrillic:
6007Sthurlow			encoding = kCFStringEncodingDOSCyrillic;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacThai:
6007Sthurlow			encoding = kCFStringEncodingDOSThai;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacChineseSimp:
6007Sthurlow			encoding = kCFStringEncodingDOSChineseSimplif;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacCentralEurRoman:
6007Sthurlow			encoding = kCFStringEncodingDOSLatin2;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacTurkish:
6007Sthurlow			encoding = kCFStringEncodingDOSTurkish;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacCroatian:
6007Sthurlow			encoding = kCFStringEncodingDOSLatin2;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacIcelandic:
6007Sthurlow			encoding = kCFStringEncodingDOSIcelandic;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacRomanian:
6007Sthurlow			encoding = kCFStringEncodingDOSLatin2;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacFarsi:
6007Sthurlow			encoding = kCFStringEncodingDOSArabic;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		case	kCFStringEncodingMacUkrainian:
6007Sthurlow			encoding = kCFStringEncodingDOSCyrillic;
6007Sthurlow			break;
6007Sthurlow
6007Sthurlow		default:
6007Sthurlow			encoding = kCFStringEncodingDOSLatin1;
6007Sthurlow			break;
6007Sthurlow	}
6007Sthurlow
6007Sthurlow	return encoding;
6007Sthurlow}
6007Sthurlow#endif /* NOTPORTED */
6007Sthurlow
6007Sthurlow/*
6007Sthurlow * XXX - NLS, or CF?  We should probably use the same routine for all
6007Sthurlow * conversions.
6007Sthurlow */
6007Sthurlowchar *
6007Sthurlowconvert_wincs_to_utf8(const char *windows_string)
6007Sthurlow{
6007Sthurlow#ifdef NOTPORTED
6007Sthurlow	CFStringRef s;
6007Sthurlow	CFIndex maxlen;
6007Sthurlow	char *result;
6007Sthurlow
6007Sthurlow	s = CFStringCreateWithCString(NULL, windows_string,
6007Sthurlow		get_windows_encoding_equivalent());
6007Sthurlow	if (s == NULL) {
6007Sthurlow		smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" ", -1,
6007Sthurlow		    windows_string);
6007Sthurlow
6007Sthurlow		/* kCFStringEncodingMacRoman should always succeed */
6007Sthurlow		s = CFStringCreateWithCString(NULL, windows_string,
6007Sthurlow		    kCFStringEncodingMacRoman);
6007Sthurlow		if (s == NULL) {
6007Sthurlow			smb_error("CFStringCreateWithCString for Windows code page failed on \"%s\" with kCFStringEncodingMacRoman - skipping",
6007Sthurlow			    -1, windows_string);
6007Sthurlow			return NULL;
6007Sthurlow		}
6007Sthurlow	}
6007Sthurlow
6007Sthurlow	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
6007Sthurlow	    kCFStringEncodingUTF8) + 1;
6007Sthurlow	result = malloc(maxlen);
6007Sthurlow	if (result == NULL) {
6007Sthurlow		smb_error("Couldn't allocate buffer for UTF-8 string for \"%s\" - skipping", -1,
6007Sthurlow		    windows_string);
6007Sthurlow		CFRelease(s);
6007Sthurlow		return NULL;
6007Sthurlow	}
6007Sthurlow	if (!CFStringGetCString(s, result, maxlen, kCFStringEncodingUTF8)) {
6007Sthurlow		smb_error("CFStringGetCString for UTF-8 failed on \"%s\" - skipping",
6007Sthurlow		    -1, windows_string);
6007Sthurlow		CFRelease(s);
6007Sthurlow		return NULL;
6007Sthurlow	}
6007Sthurlow	CFRelease(s);
6007Sthurlow	return result;
6007Sthurlow#else /* NOTPORTED */
*8271SGordon.Ross@Sun.COM	return (strdup((char*)windows_string));
6007Sthurlow#endif /* NOTPORTED */
6007Sthurlow}
6007Sthurlow
6007Sthurlow/*
6007Sthurlow * XXX - NLS, or CF?  We should probably use the same routine for all
6007Sthurlow * conversions.
6007Sthurlow */
6007Sthurlowchar *
6007Sthurlowconvert_utf8_to_wincs(const char *utf8_string)
6007Sthurlow{
6007Sthurlow#ifdef NOTPORTED
6007Sthurlow	CFStringRef s;
6007Sthurlow	CFIndex maxlen;
6007Sthurlow	char *result;
6007Sthurlow
6007Sthurlow	s = CFStringCreateWithCString(NULL, utf8_string,
6007Sthurlow	    kCFStringEncodingUTF8);
6007Sthurlow	if (s == NULL) {
6007Sthurlow		smb_error("CFStringCreateWithCString for UTF-8 failed on \"%s\"", -1,
6007Sthurlow		    utf8_string);
6007Sthurlow		return NULL;
6007Sthurlow	}
6007Sthurlow
6007Sthurlow	maxlen = CFStringGetMaximumSizeForEncoding(CFStringGetLength(s),
6007Sthurlow	    get_windows_encoding_equivalent()) + 1;
6007Sthurlow	result = malloc(maxlen);
6007Sthurlow	if (result == NULL) {
6007Sthurlow		smb_error("Couldn't allocate buffer for Windows code page string for \"%s\" - skipping", -1,
6007Sthurlow		    utf8_string);
6007Sthurlow		CFRelease(s);
6007Sthurlow		return NULL;
6007Sthurlow	}
6007Sthurlow	if (!CFStringGetCString(s, result, maxlen,
6007Sthurlow	    get_windows_encoding_equivalent())) {
6007Sthurlow		smb_error("CFStringGetCString for Windows code page failed on \"%s\" - skipping",
6007Sthurlow		    -1, utf8_string);
6007Sthurlow		CFRelease(s);
6007Sthurlow		return NULL;
6007Sthurlow	}
6007Sthurlow	CFRelease(s);
6007Sthurlow	return result;
6007Sthurlow#else /* NOTPORTED */
*8271SGordon.Ross@Sun.COM	return (strdup((char*)utf8_string));
6007Sthurlow#endif /* NOTPORTED */
6007Sthurlow}
*8271SGordon.Ross@Sun.COM/* END CSTYLED */
6007Sthurlow
6007Sthurlow/*
*8271SGordon.Ross@Sun.COM * We replaced these routines for Solaris:
*8271SGordon.Ross@Sun.COM *	convert_leunicode_to_utf8
*8271SGordon.Ross@Sun.COM *	convert_unicode_to_utf8
*8271SGordon.Ross@Sun.COM *	convert_utf8_to_leunicode
*8271SGordon.Ross@Sun.COM * with new code in: utf_str.c
6007Sthurlow */