xref: /freebsd-src/contrib/libpcap/charconv.c (revision afdbf109c6a661a729938f68211054a0a50d38ac)
16f9cba8fSJoseph Mingrone /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
26f9cba8fSJoseph Mingrone /*
36f9cba8fSJoseph Mingrone  * Copyright (c) 1993, 1994, 1995, 1996, 1997
46f9cba8fSJoseph Mingrone  *	The Regents of the University of California.  All rights reserved.
56f9cba8fSJoseph Mingrone  *
66f9cba8fSJoseph Mingrone  * Redistribution and use in source and binary forms, with or without
76f9cba8fSJoseph Mingrone  * modification, are permitted provided that the following conditions
86f9cba8fSJoseph Mingrone  * are met:
96f9cba8fSJoseph Mingrone  * 1. Redistributions of source code must retain the above copyright
106f9cba8fSJoseph Mingrone  *    notice, this list of conditions and the following disclaimer.
116f9cba8fSJoseph Mingrone  * 2. Redistributions in binary form must reproduce the above copyright
126f9cba8fSJoseph Mingrone  *    notice, this list of conditions and the following disclaimer in the
136f9cba8fSJoseph Mingrone  *    documentation and/or other materials provided with the distribution.
146f9cba8fSJoseph Mingrone  * 3. All advertising materials mentioning features or use of this software
156f9cba8fSJoseph Mingrone  *    must display the following acknowledgement:
166f9cba8fSJoseph Mingrone  *	This product includes software developed by the Computer Systems
176f9cba8fSJoseph Mingrone  *	Engineering Group at Lawrence Berkeley Laboratory.
186f9cba8fSJoseph Mingrone  * 4. Neither the name of the University nor of the Laboratory may be used
196f9cba8fSJoseph Mingrone  *    to endorse or promote products derived from this software without
206f9cba8fSJoseph Mingrone  *    specific prior written permission.
216f9cba8fSJoseph Mingrone  *
226f9cba8fSJoseph Mingrone  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
236f9cba8fSJoseph Mingrone  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
246f9cba8fSJoseph Mingrone  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
256f9cba8fSJoseph Mingrone  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
266f9cba8fSJoseph Mingrone  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
276f9cba8fSJoseph Mingrone  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
286f9cba8fSJoseph Mingrone  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
296f9cba8fSJoseph Mingrone  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
306f9cba8fSJoseph Mingrone  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
316f9cba8fSJoseph Mingrone  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
326f9cba8fSJoseph Mingrone  * SUCH DAMAGE.
336f9cba8fSJoseph Mingrone  */
346f9cba8fSJoseph Mingrone 
356f9cba8fSJoseph Mingrone #ifdef _WIN32
366f9cba8fSJoseph Mingrone #include <stdio.h>
376f9cba8fSJoseph Mingrone #include <errno.h>
386f9cba8fSJoseph Mingrone 
396f9cba8fSJoseph Mingrone #include <pcap/pcap.h>	/* Needed for PCAP_ERRBUF_SIZE */
406f9cba8fSJoseph Mingrone 
416f9cba8fSJoseph Mingrone #include "charconv.h"
426f9cba8fSJoseph Mingrone 
436f9cba8fSJoseph Mingrone wchar_t *
446f9cba8fSJoseph Mingrone cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
456f9cba8fSJoseph Mingrone {
466f9cba8fSJoseph Mingrone 	int utf16le_len;
476f9cba8fSJoseph Mingrone 	wchar_t *utf16le_string;
486f9cba8fSJoseph Mingrone 
496f9cba8fSJoseph Mingrone 	/*
506f9cba8fSJoseph Mingrone 	 * Map from the specified code page to UTF-16LE.
516f9cba8fSJoseph Mingrone 	 * First, find out how big a buffer we'll need.
526f9cba8fSJoseph Mingrone 	 */
536f9cba8fSJoseph Mingrone 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
546f9cba8fSJoseph Mingrone 	    NULL, 0);
556f9cba8fSJoseph Mingrone 	if (utf16le_len == 0) {
566f9cba8fSJoseph Mingrone 		/*
576f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
586f9cba8fSJoseph Mingrone 		 */
596f9cba8fSJoseph Mingrone 		errno = EINVAL;
606f9cba8fSJoseph Mingrone 		return (NULL);
616f9cba8fSJoseph Mingrone 	}
626f9cba8fSJoseph Mingrone 
636f9cba8fSJoseph Mingrone 	/*
646f9cba8fSJoseph Mingrone 	 * Now attempt to allocate a buffer for that.
656f9cba8fSJoseph Mingrone 	 */
666f9cba8fSJoseph Mingrone 	utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
676f9cba8fSJoseph Mingrone 	if (utf16le_string == NULL) {
686f9cba8fSJoseph Mingrone 		/*
696f9cba8fSJoseph Mingrone 		 * Not enough memory; assume errno has been
706f9cba8fSJoseph Mingrone 		 * set, and fail.
716f9cba8fSJoseph Mingrone 		 */
726f9cba8fSJoseph Mingrone 		return (NULL);
736f9cba8fSJoseph Mingrone 	}
746f9cba8fSJoseph Mingrone 
756f9cba8fSJoseph Mingrone 	/*
766f9cba8fSJoseph Mingrone 	 * Now convert.
776f9cba8fSJoseph Mingrone 	 */
786f9cba8fSJoseph Mingrone 	utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
796f9cba8fSJoseph Mingrone 	    utf16le_string, utf16le_len);
806f9cba8fSJoseph Mingrone 	if (utf16le_len == 0) {
816f9cba8fSJoseph Mingrone 		/*
826f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
836f9cba8fSJoseph Mingrone 		 * XXX - should this ever happen, given that
846f9cba8fSJoseph Mingrone 		 * we already ran the string through
856f9cba8fSJoseph Mingrone 		 * MultiByteToWideChar() to find out how big
866f9cba8fSJoseph Mingrone 		 * a buffer we needed?
876f9cba8fSJoseph Mingrone 		 */
886f9cba8fSJoseph Mingrone 		free(utf16le_string);
896f9cba8fSJoseph Mingrone 		errno = EINVAL;
906f9cba8fSJoseph Mingrone 		return (NULL);
916f9cba8fSJoseph Mingrone 	}
926f9cba8fSJoseph Mingrone 	return (utf16le_string);
936f9cba8fSJoseph Mingrone }
946f9cba8fSJoseph Mingrone 
956f9cba8fSJoseph Mingrone char *
966f9cba8fSJoseph Mingrone utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
976f9cba8fSJoseph Mingrone {
986f9cba8fSJoseph Mingrone 	int cp_len;
996f9cba8fSJoseph Mingrone 	char *cp_string;
1006f9cba8fSJoseph Mingrone 
1016f9cba8fSJoseph Mingrone 	/*
1026f9cba8fSJoseph Mingrone 	 * Map from UTF-16LE to the specified code page.
1036f9cba8fSJoseph Mingrone 	 * First, find out how big a buffer we'll need.
1046f9cba8fSJoseph Mingrone 	 * We convert composite characters to precomposed characters,
1056f9cba8fSJoseph Mingrone 	 * as that's what Windows expects.
1066f9cba8fSJoseph Mingrone 	 */
1076f9cba8fSJoseph Mingrone 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
1086f9cba8fSJoseph Mingrone 	    utf16le_string, -1, NULL, 0, NULL, NULL);
1096f9cba8fSJoseph Mingrone 	if (cp_len == 0) {
1106f9cba8fSJoseph Mingrone 		/*
1116f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
1126f9cba8fSJoseph Mingrone 		 */
1136f9cba8fSJoseph Mingrone 		errno = EINVAL;
1146f9cba8fSJoseph Mingrone 		return (NULL);
1156f9cba8fSJoseph Mingrone 	}
1166f9cba8fSJoseph Mingrone 
1176f9cba8fSJoseph Mingrone 	/*
1186f9cba8fSJoseph Mingrone 	 * Now attempt to allocate a buffer for that.
1196f9cba8fSJoseph Mingrone 	 */
1206f9cba8fSJoseph Mingrone 	cp_string = malloc(cp_len * sizeof (char));
1216f9cba8fSJoseph Mingrone 	if (cp_string == NULL) {
1226f9cba8fSJoseph Mingrone 		/*
1236f9cba8fSJoseph Mingrone 		 * Not enough memory; assume errno has been
1246f9cba8fSJoseph Mingrone 		 * set, and fail.
1256f9cba8fSJoseph Mingrone 		 */
1266f9cba8fSJoseph Mingrone 		return (NULL);
1276f9cba8fSJoseph Mingrone 	}
1286f9cba8fSJoseph Mingrone 
1296f9cba8fSJoseph Mingrone 	/*
1306f9cba8fSJoseph Mingrone 	 * Now convert.
1316f9cba8fSJoseph Mingrone 	 */
1326f9cba8fSJoseph Mingrone 	cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
1336f9cba8fSJoseph Mingrone 	    utf16le_string, -1, cp_string, cp_len, NULL, NULL);
1346f9cba8fSJoseph Mingrone 	if (cp_len == 0) {
1356f9cba8fSJoseph Mingrone 		/*
1366f9cba8fSJoseph Mingrone 		 * Error.  Fail with EINVAL.
1376f9cba8fSJoseph Mingrone 		 * XXX - should this ever happen, given that
1386f9cba8fSJoseph Mingrone 		 * we already ran the string through
1396f9cba8fSJoseph Mingrone 		 * WideCharToMultiByte() to find out how big
1406f9cba8fSJoseph Mingrone 		 * a buffer we needed?
1416f9cba8fSJoseph Mingrone 		 */
1426f9cba8fSJoseph Mingrone 		free(cp_string);
1436f9cba8fSJoseph Mingrone 		errno = EINVAL;
1446f9cba8fSJoseph Mingrone 		return (NULL);
1456f9cba8fSJoseph Mingrone 	}
1466f9cba8fSJoseph Mingrone 	return (cp_string);
1476f9cba8fSJoseph Mingrone }
1486f9cba8fSJoseph Mingrone 
1496f9cba8fSJoseph Mingrone /*
1506f9cba8fSJoseph Mingrone  * Convert an error message string from UTF-8 to the local code page, as
1516f9cba8fSJoseph Mingrone  * best we can.
1526f9cba8fSJoseph Mingrone  *
1536f9cba8fSJoseph Mingrone  * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
1546f9cba8fSJoseph Mingrone  * if it doesn't fit.
1556f9cba8fSJoseph Mingrone  */
1566f9cba8fSJoseph Mingrone void
1576f9cba8fSJoseph Mingrone utf_8_to_acp_truncated(char *errbuf)
1586f9cba8fSJoseph Mingrone {
1596f9cba8fSJoseph Mingrone 	wchar_t *utf_16_errbuf;
1606f9cba8fSJoseph Mingrone 	int retval;
1616f9cba8fSJoseph Mingrone 	DWORD err;
1626f9cba8fSJoseph Mingrone 
1636f9cba8fSJoseph Mingrone 	/*
1646f9cba8fSJoseph Mingrone 	 * Do this by converting to UTF-16LE and then to the local
1656f9cba8fSJoseph Mingrone 	 * code page.  That means we get to use Microsoft's
1666f9cba8fSJoseph Mingrone 	 * conversion routines, rather than having to understand
1676f9cba8fSJoseph Mingrone 	 * all the code pages ourselves, *and* that this routine
1686f9cba8fSJoseph Mingrone 	 * can convert in place.
1696f9cba8fSJoseph Mingrone 	 */
1706f9cba8fSJoseph Mingrone 
1716f9cba8fSJoseph Mingrone 	/*
1726f9cba8fSJoseph Mingrone 	 * Map from UTF-8 to UTF-16LE.
1736f9cba8fSJoseph Mingrone 	 * First, find out how big a buffer we'll need.
1746f9cba8fSJoseph Mingrone 	 * Convert any invalid characters to REPLACEMENT CHARACTER.
1756f9cba8fSJoseph Mingrone 	 */
1766f9cba8fSJoseph Mingrone 	utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
1776f9cba8fSJoseph Mingrone 	if (utf_16_errbuf == NULL) {
1786f9cba8fSJoseph Mingrone 		/*
1796f9cba8fSJoseph Mingrone 		 * Error.  Give up.
1806f9cba8fSJoseph Mingrone 		 */
1816f9cba8fSJoseph Mingrone 		snprintf(errbuf, PCAP_ERRBUF_SIZE,
1826f9cba8fSJoseph Mingrone 		    "Can't convert error string to the local code page");
1836f9cba8fSJoseph Mingrone 		return;
1846f9cba8fSJoseph Mingrone 	}
1856f9cba8fSJoseph Mingrone 
1866f9cba8fSJoseph Mingrone 	/*
1876f9cba8fSJoseph Mingrone 	 * Now, convert that to the local code page.
188*afdbf109SJoseph Mingrone 	 * Use the current thread's code page.  For unconvertible
1896f9cba8fSJoseph Mingrone 	 * characters, let it pick the "best fit" character.
1906f9cba8fSJoseph Mingrone 	 *
1916f9cba8fSJoseph Mingrone 	 * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
1926f9cba8fSJoseph Mingrone 	 * does if the buffer isn't big enough, but we don't want to have
1936f9cba8fSJoseph Mingrone 	 * to handle all local code pages ourselves; doing so requires
1946f9cba8fSJoseph Mingrone 	 * knowledge of all those code pages, including knowledge of how
195*afdbf109SJoseph Mingrone 	 * characters are formed in those code pages so that we can avoid
1966f9cba8fSJoseph Mingrone 	 * cutting a multi-byte character into pieces.
1976f9cba8fSJoseph Mingrone 	 *
1986f9cba8fSJoseph Mingrone 	 * Converting to an un-truncated string using Windows APIs, and
1996f9cba8fSJoseph Mingrone 	 * then copying to the buffer, still requires knowledge of how
2006f9cba8fSJoseph Mingrone 	 * characters are formed in the target code page.
2016f9cba8fSJoseph Mingrone 	 */
2026f9cba8fSJoseph Mingrone 	retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
2036f9cba8fSJoseph Mingrone 	    errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
2046f9cba8fSJoseph Mingrone 	if (retval == 0) {
2056f9cba8fSJoseph Mingrone 		err = GetLastError();
2066f9cba8fSJoseph Mingrone 		free(utf_16_errbuf);
2076f9cba8fSJoseph Mingrone 		if (err == ERROR_INSUFFICIENT_BUFFER)
2086f9cba8fSJoseph Mingrone 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
2096f9cba8fSJoseph Mingrone 			    "The error string, in the local code page, didn't fit in the buffer");
2106f9cba8fSJoseph Mingrone 		else
2116f9cba8fSJoseph Mingrone 			snprintf(errbuf, PCAP_ERRBUF_SIZE,
2126f9cba8fSJoseph Mingrone 			    "Can't convert error string to the local code page");
2136f9cba8fSJoseph Mingrone 		return;
2146f9cba8fSJoseph Mingrone 	}
2156f9cba8fSJoseph Mingrone 	free(utf_16_errbuf);
2166f9cba8fSJoseph Mingrone }
2176f9cba8fSJoseph Mingrone #endif
218