1*8271SGordon.Ross@Sun.COM /* 2*8271SGordon.Ross@Sun.COM * CDDL HEADER START 3*8271SGordon.Ross@Sun.COM * 4*8271SGordon.Ross@Sun.COM * The contents of this file are subject to the terms of the 5*8271SGordon.Ross@Sun.COM * Common Development and Distribution License (the "License"). 6*8271SGordon.Ross@Sun.COM * You may not use this file except in compliance with the License. 7*8271SGordon.Ross@Sun.COM * 8*8271SGordon.Ross@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*8271SGordon.Ross@Sun.COM * or http://www.opensolaris.org/os/licensing. 10*8271SGordon.Ross@Sun.COM * See the License for the specific language governing permissions 11*8271SGordon.Ross@Sun.COM * and limitations under the License. 12*8271SGordon.Ross@Sun.COM * 13*8271SGordon.Ross@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 14*8271SGordon.Ross@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*8271SGordon.Ross@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 16*8271SGordon.Ross@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 17*8271SGordon.Ross@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 18*8271SGordon.Ross@Sun.COM * 19*8271SGordon.Ross@Sun.COM * CDDL HEADER END 20*8271SGordon.Ross@Sun.COM */ 21*8271SGordon.Ross@Sun.COM 22*8271SGordon.Ross@Sun.COM /* 23*8271SGordon.Ross@Sun.COM * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*8271SGordon.Ross@Sun.COM * Use is subject to license terms. 25*8271SGordon.Ross@Sun.COM */ 26*8271SGordon.Ross@Sun.COM 27*8271SGordon.Ross@Sun.COM /* 28*8271SGordon.Ross@Sun.COM * Unicode conversions (yet more) 29*8271SGordon.Ross@Sun.COM */ 30*8271SGordon.Ross@Sun.COM 31*8271SGordon.Ross@Sun.COM #include <stdio.h> 32*8271SGordon.Ross@Sun.COM #include <stdlib.h> 33*8271SGordon.Ross@Sun.COM #include <string.h> 34*8271SGordon.Ross@Sun.COM #include <errno.h> 35*8271SGordon.Ross@Sun.COM #include <iconv.h> 36*8271SGordon.Ross@Sun.COM #include <libintl.h> 37*8271SGordon.Ross@Sun.COM 38*8271SGordon.Ross@Sun.COM #include <sys/u8_textprep.h> 39*8271SGordon.Ross@Sun.COM 40*8271SGordon.Ross@Sun.COM #include <netsmb/smb_lib.h> 41*8271SGordon.Ross@Sun.COM #include "charsets.h" 42*8271SGordon.Ross@Sun.COM 43*8271SGordon.Ross@Sun.COM 44*8271SGordon.Ross@Sun.COM /* 45*8271SGordon.Ross@Sun.COM * Number of unicode symbols in the string, 46*8271SGordon.Ross@Sun.COM * not including the 2-byte null terminator. 47*8271SGordon.Ross@Sun.COM * (multiply by two for storage size) 48*8271SGordon.Ross@Sun.COM */ 49*8271SGordon.Ross@Sun.COM size_t 50*8271SGordon.Ross@Sun.COM unicode_strlen(const uint16_t *us) 51*8271SGordon.Ross@Sun.COM { 52*8271SGordon.Ross@Sun.COM size_t len = 0; 53*8271SGordon.Ross@Sun.COM while (*us++) 54*8271SGordon.Ross@Sun.COM len++; 55*8271SGordon.Ross@Sun.COM return (len); 56*8271SGordon.Ross@Sun.COM } 57*8271SGordon.Ross@Sun.COM 58*8271SGordon.Ross@Sun.COM static char *convert_ucs2xx_to_utf8(iconv_t, const uint16_t *); 59*8271SGordon.Ross@Sun.COM 60*8271SGordon.Ross@Sun.COM /* 61*8271SGordon.Ross@Sun.COM * Convert (native) Unicode string to UTF-8. 62*8271SGordon.Ross@Sun.COM * Returns allocated memory. 63*8271SGordon.Ross@Sun.COM */ 64*8271SGordon.Ross@Sun.COM char * 65*8271SGordon.Ross@Sun.COM convert_unicode_to_utf8(uint16_t *us) 66*8271SGordon.Ross@Sun.COM { 67*8271SGordon.Ross@Sun.COM static iconv_t cd1 = (iconv_t)-1; 68*8271SGordon.Ross@Sun.COM 69*8271SGordon.Ross@Sun.COM /* Get conversion descriptor (to, from) */ 70*8271SGordon.Ross@Sun.COM if (cd1 == (iconv_t)-1) 71*8271SGordon.Ross@Sun.COM cd1 = iconv_open("UTF-8", "UCS-2"); 72*8271SGordon.Ross@Sun.COM 73*8271SGordon.Ross@Sun.COM return (convert_ucs2xx_to_utf8(cd1, us)); 74*8271SGordon.Ross@Sun.COM } 75*8271SGordon.Ross@Sun.COM 76*8271SGordon.Ross@Sun.COM /* 77*8271SGordon.Ross@Sun.COM * Convert little-endian Unicode string to UTF-8. 78*8271SGordon.Ross@Sun.COM * Returns allocated memory. 79*8271SGordon.Ross@Sun.COM */ 80*8271SGordon.Ross@Sun.COM char * 81*8271SGordon.Ross@Sun.COM convert_leunicode_to_utf8(unsigned short *us) 82*8271SGordon.Ross@Sun.COM { 83*8271SGordon.Ross@Sun.COM static iconv_t cd2 = (iconv_t)-1; 84*8271SGordon.Ross@Sun.COM 85*8271SGordon.Ross@Sun.COM /* Get conversion descriptor (to, from) */ 86*8271SGordon.Ross@Sun.COM if (cd2 == (iconv_t)-1) 87*8271SGordon.Ross@Sun.COM cd2 = iconv_open("UTF-8", "UCS-2LE"); 88*8271SGordon.Ross@Sun.COM 89*8271SGordon.Ross@Sun.COM return (convert_ucs2xx_to_utf8(cd2, us)); 90*8271SGordon.Ross@Sun.COM } 91*8271SGordon.Ross@Sun.COM 92*8271SGordon.Ross@Sun.COM static char * 93*8271SGordon.Ross@Sun.COM convert_ucs2xx_to_utf8(iconv_t cd, const uint16_t *us) 94*8271SGordon.Ross@Sun.COM { 95*8271SGordon.Ross@Sun.COM char *obuf, *optr; 96*8271SGordon.Ross@Sun.COM const char *iptr; 97*8271SGordon.Ross@Sun.COM size_t ileft, obsize, oleft, ret; 98*8271SGordon.Ross@Sun.COM 99*8271SGordon.Ross@Sun.COM if (cd == (iconv_t)-1) { 100*8271SGordon.Ross@Sun.COM smb_error(dgettext(TEXT_DOMAIN, 101*8271SGordon.Ross@Sun.COM "iconv_open(UTF-8/UCS-2)"), -1); 102*8271SGordon.Ross@Sun.COM return (NULL); 103*8271SGordon.Ross@Sun.COM } 104*8271SGordon.Ross@Sun.COM 105*8271SGordon.Ross@Sun.COM iptr = (const char *)us; 106*8271SGordon.Ross@Sun.COM ileft = unicode_strlen(us); 107*8271SGordon.Ross@Sun.COM ileft *= 2; /* now bytes */ 108*8271SGordon.Ross@Sun.COM 109*8271SGordon.Ross@Sun.COM /* Worst-case output size is 2x input size. */ 110*8271SGordon.Ross@Sun.COM oleft = ileft * 2; 111*8271SGordon.Ross@Sun.COM obsize = oleft + 2; /* room for null */ 112*8271SGordon.Ross@Sun.COM obuf = malloc(obsize); 113*8271SGordon.Ross@Sun.COM if (!obuf) 114*8271SGordon.Ross@Sun.COM return (NULL); 115*8271SGordon.Ross@Sun.COM optr = obuf; 116*8271SGordon.Ross@Sun.COM 117*8271SGordon.Ross@Sun.COM ret = iconv(cd, &iptr, &ileft, &optr, &oleft); 118*8271SGordon.Ross@Sun.COM *optr = '\0'; 119*8271SGordon.Ross@Sun.COM if (ret == (size_t)-1) { 120*8271SGordon.Ross@Sun.COM smb_error(dgettext(TEXT_DOMAIN, 121*8271SGordon.Ross@Sun.COM "iconv(%s) failed"), errno, obuf); 122*8271SGordon.Ross@Sun.COM } 123*8271SGordon.Ross@Sun.COM if (ileft) { 124*8271SGordon.Ross@Sun.COM smb_error(dgettext(TEXT_DOMAIN, 125*8271SGordon.Ross@Sun.COM "iconv(%s) failed"), -1, obuf); 126*8271SGordon.Ross@Sun.COM /* 127*8271SGordon.Ross@Sun.COM * XXX: What's better? return NULL? 128*8271SGordon.Ross@Sun.COM * The truncated string? << for now 129*8271SGordon.Ross@Sun.COM */ 130*8271SGordon.Ross@Sun.COM } 131*8271SGordon.Ross@Sun.COM 132*8271SGordon.Ross@Sun.COM return (obuf); 133*8271SGordon.Ross@Sun.COM } 134*8271SGordon.Ross@Sun.COM 135*8271SGordon.Ross@Sun.COM static uint16_t *convert_utf8_to_ucs2xx(iconv_t, const char *); 136*8271SGordon.Ross@Sun.COM 137*8271SGordon.Ross@Sun.COM /* 138*8271SGordon.Ross@Sun.COM * Convert UTF-8 string to Unicode. 139*8271SGordon.Ross@Sun.COM * Returns allocated memory. 140*8271SGordon.Ross@Sun.COM */ 141*8271SGordon.Ross@Sun.COM uint16_t * 142*8271SGordon.Ross@Sun.COM convert_utf8_to_unicode(const char *utf8_string) 143*8271SGordon.Ross@Sun.COM { 144*8271SGordon.Ross@Sun.COM static iconv_t cd3 = (iconv_t)-1; 145*8271SGordon.Ross@Sun.COM 146*8271SGordon.Ross@Sun.COM /* Get conversion descriptor (to, from) */ 147*8271SGordon.Ross@Sun.COM if (cd3 == (iconv_t)-1) 148*8271SGordon.Ross@Sun.COM cd3 = iconv_open("UCS-2", "UTF-8"); 149*8271SGordon.Ross@Sun.COM return (convert_utf8_to_ucs2xx(cd3, utf8_string)); 150*8271SGordon.Ross@Sun.COM } 151*8271SGordon.Ross@Sun.COM 152*8271SGordon.Ross@Sun.COM /* 153*8271SGordon.Ross@Sun.COM * Convert UTF-8 string to little-endian Unicode. 154*8271SGordon.Ross@Sun.COM * Returns allocated memory. 155*8271SGordon.Ross@Sun.COM */ 156*8271SGordon.Ross@Sun.COM uint16_t * 157*8271SGordon.Ross@Sun.COM convert_utf8_to_leunicode(const char *utf8_string) 158*8271SGordon.Ross@Sun.COM { 159*8271SGordon.Ross@Sun.COM static iconv_t cd4 = (iconv_t)-1; 160*8271SGordon.Ross@Sun.COM 161*8271SGordon.Ross@Sun.COM /* Get conversion descriptor (to, from) */ 162*8271SGordon.Ross@Sun.COM if (cd4 == (iconv_t)-1) 163*8271SGordon.Ross@Sun.COM cd4 = iconv_open("UCS-2LE", "UTF-8"); 164*8271SGordon.Ross@Sun.COM return (convert_utf8_to_ucs2xx(cd4, utf8_string)); 165*8271SGordon.Ross@Sun.COM } 166*8271SGordon.Ross@Sun.COM 167*8271SGordon.Ross@Sun.COM static uint16_t * 168*8271SGordon.Ross@Sun.COM convert_utf8_to_ucs2xx(iconv_t cd, const char *utf8_string) 169*8271SGordon.Ross@Sun.COM { 170*8271SGordon.Ross@Sun.COM uint16_t *obuf, *optr; 171*8271SGordon.Ross@Sun.COM const char *iptr; 172*8271SGordon.Ross@Sun.COM size_t ileft, obsize, oleft, ret; 173*8271SGordon.Ross@Sun.COM 174*8271SGordon.Ross@Sun.COM if (cd == (iconv_t)-1) { 175*8271SGordon.Ross@Sun.COM smb_error(dgettext(TEXT_DOMAIN, 176*8271SGordon.Ross@Sun.COM "iconv_open(UCS-2/UTF-8)"), -1); 177*8271SGordon.Ross@Sun.COM return (NULL); 178*8271SGordon.Ross@Sun.COM } 179*8271SGordon.Ross@Sun.COM 180*8271SGordon.Ross@Sun.COM iptr = utf8_string; 181*8271SGordon.Ross@Sun.COM ileft = strlen(iptr); 182*8271SGordon.Ross@Sun.COM 183*8271SGordon.Ross@Sun.COM /* Worst-case output size is 2x input size. */ 184*8271SGordon.Ross@Sun.COM oleft = ileft * 2; 185*8271SGordon.Ross@Sun.COM obsize = oleft + 2; /* room for null */ 186*8271SGordon.Ross@Sun.COM obuf = malloc(obsize); 187*8271SGordon.Ross@Sun.COM if (!obuf) 188*8271SGordon.Ross@Sun.COM return (NULL); 189*8271SGordon.Ross@Sun.COM optr = obuf; 190*8271SGordon.Ross@Sun.COM 191*8271SGordon.Ross@Sun.COM ret = iconv(cd, &iptr, &ileft, (char **)&optr, &oleft); 192*8271SGordon.Ross@Sun.COM *optr = '\0'; 193*8271SGordon.Ross@Sun.COM if (ret == (size_t)-1) { 194*8271SGordon.Ross@Sun.COM smb_error(dgettext(TEXT_DOMAIN, 195*8271SGordon.Ross@Sun.COM "iconv(%s) failed"), errno, utf8_string); 196*8271SGordon.Ross@Sun.COM } 197*8271SGordon.Ross@Sun.COM if (ileft) { 198*8271SGordon.Ross@Sun.COM smb_error(dgettext(TEXT_DOMAIN, 199*8271SGordon.Ross@Sun.COM "iconv(%s) failed"), -1, utf8_string); 200*8271SGordon.Ross@Sun.COM /* 201*8271SGordon.Ross@Sun.COM * XXX: What's better? return NULL? 202*8271SGordon.Ross@Sun.COM * The truncated string? << for now 203*8271SGordon.Ross@Sun.COM */ 204*8271SGordon.Ross@Sun.COM } 205*8271SGordon.Ross@Sun.COM 206*8271SGordon.Ross@Sun.COM return (obuf); 207*8271SGordon.Ross@Sun.COM } 208