xref: /onnv-gate/usr/src/cmd/ssh/libssh/common/g11n.c (revision 5562:0f12179b71ab)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52628Sjp161948  * Common Development and Distribution License (the "License").
62628Sjp161948  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  *
21*5562Sjp161948  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
220Sstevel@tonic-gate  * Use is subject to license terms.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <errno.h>
280Sstevel@tonic-gate #include <locale.h>
290Sstevel@tonic-gate #include <langinfo.h>
300Sstevel@tonic-gate #include <iconv.h>
310Sstevel@tonic-gate #include <ctype.h>
320Sstevel@tonic-gate #include <strings.h>
330Sstevel@tonic-gate #include <string.h>
340Sstevel@tonic-gate #include <stdio.h>
350Sstevel@tonic-gate #include <stdlib.h>
360Sstevel@tonic-gate #include "includes.h"
370Sstevel@tonic-gate #include "xmalloc.h"
380Sstevel@tonic-gate #include "xlist.h"
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #ifdef MIN
410Sstevel@tonic-gate #undef MIN
420Sstevel@tonic-gate #endif /* MIN */
430Sstevel@tonic-gate 
442705Sjp161948 #define	MIN(x, y)	((x) < (y) ? (x) : (y))
450Sstevel@tonic-gate 
462705Sjp161948 #define	LOCALE_PATH	"/usr/bin/locale"
470Sstevel@tonic-gate 
482705Sjp161948 /* two-char country code, '-' and two-char region code */
492705Sjp161948 #define	LANGTAG_MAX	5
500Sstevel@tonic-gate 
512705Sjp161948 static uchar_t *do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf,
522705Sjp161948     uint_t len, uint_t *outlen, int *err, uchar_t **err_str);
530Sstevel@tonic-gate 
540Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2);
550Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale);
560Sstevel@tonic-gate 
572705Sjp161948 uint_t g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str);
582705Sjp161948 uint_t g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str);
590Sstevel@tonic-gate 
60*5562Sjp161948 /*
61*5562Sjp161948  * Convert locale string name into a language tag. The caller is responsible for
62*5562Sjp161948  * freeing the memory allocated for the result.
63*5562Sjp161948  */
642705Sjp161948 static char *
650Sstevel@tonic-gate g11n_locale2langtag(char *locale)
660Sstevel@tonic-gate {
672705Sjp161948 	char *langtag;
680Sstevel@tonic-gate 
692705Sjp161948 	/* base cases */
702705Sjp161948 	if (!locale || !*locale)
712705Sjp161948 		return (NULL);
720Sstevel@tonic-gate 
732705Sjp161948 	if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
74*5562Sjp161948 		return (xstrdup("i-default"));
750Sstevel@tonic-gate 
762705Sjp161948 	/* punt for language codes which are not exactly 2 letters */
772705Sjp161948 	if (strlen(locale) < 2 ||
782705Sjp161948 	    !isalpha(locale[0]) ||
792705Sjp161948 	    !isalpha(locale[1]) ||
802705Sjp161948 	    (locale[2] != '\0' &&
812705Sjp161948 	    locale[2] != '_' &&
822705Sjp161948 	    locale[2] != '.' &&
832705Sjp161948 	    locale[2] != '@'))
842705Sjp161948 		return (NULL);
850Sstevel@tonic-gate 
860Sstevel@tonic-gate 
872705Sjp161948 	/* we have a primary language sub-tag */
882705Sjp161948 	langtag = (char *)xmalloc(LANGTAG_MAX + 1);
890Sstevel@tonic-gate 
902705Sjp161948 	strncpy(langtag, locale, 2);
912705Sjp161948 	langtag[2] = '\0';
920Sstevel@tonic-gate 
932705Sjp161948 	/* do we have country sub-tag? For example: cs_CZ */
942705Sjp161948 	if (locale[2] == '_') {
952705Sjp161948 		if (strlen(locale) < 5 ||
962705Sjp161948 		    !isalpha(locale[3]) ||
972705Sjp161948 		    !isalpha(locale[4]) ||
982705Sjp161948 		    (locale[5] != '\0' && (locale[5] != '.' &&
992705Sjp161948 		    locale[5] != '@'))) {
1002705Sjp161948 			return (langtag);
1012705Sjp161948 		}
1022705Sjp161948 
1032705Sjp161948 		/* example: create cs-CZ from cs_CZ */
1042705Sjp161948 		if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
1052705Sjp161948 		    locale + 3) == 5)
1062705Sjp161948 			return (langtag);
1070Sstevel@tonic-gate 	}
1080Sstevel@tonic-gate 
1092705Sjp161948 	/* in all other cases we just use the primary language sub-tag */
1102705Sjp161948 	return (langtag);
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate 
1132705Sjp161948 uint_t
1140Sstevel@tonic-gate g11n_langtag_is_default(char *langtag)
1150Sstevel@tonic-gate {
1162705Sjp161948 	return (strcmp(langtag, "i-default") == 0);
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * This lang tag / locale matching function works only for two-character
1210Sstevel@tonic-gate  * language primary sub-tags and two-character country sub-tags.
1220Sstevel@tonic-gate  */
1232705Sjp161948 uint_t
1240Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale)
1250Sstevel@tonic-gate {
1262705Sjp161948 	/* match "i-default" to the process' current locale if possible */
1272705Sjp161948 	if (g11n_langtag_is_default(langtag)) {
1282705Sjp161948 		if (strcasecmp(locale, "POSIX") == 0 ||
1292705Sjp161948 		    strcasecmp(locale, "C") == 0)
1302705Sjp161948 			return (1);
1312705Sjp161948 		else
1322705Sjp161948 			return (0);
1332705Sjp161948 	}
1340Sstevel@tonic-gate 
1352705Sjp161948 	/*
1362705Sjp161948 	 * locale must be at least 2 chars long and the lang part must be
1372705Sjp161948 	 * exactly two characters
1382705Sjp161948 	 */
1392705Sjp161948 	if (strlen(locale) < 2 ||
1402705Sjp161948 	    (!isalpha(locale[0]) || !isalpha(locale[1]) ||
1412705Sjp161948 	    (locale[2] != '\0' && locale[2] != '_' &&
1422705Sjp161948 	    locale[2] != '.' && locale[2] != '@')))
1432705Sjp161948 		return (0);
1440Sstevel@tonic-gate 
1452705Sjp161948 	/* same thing with the langtag */
1462705Sjp161948 	if (strlen(langtag) < 2 ||
1472705Sjp161948 	    (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
1482705Sjp161948 	    (langtag[2] != '\0' && langtag[2] != '-')))
1492705Sjp161948 		return (0);
1500Sstevel@tonic-gate 
1512705Sjp161948 	/* primary language sub-tag and the locale's language part must match */
1522705Sjp161948 	if (strncasecmp(langtag, locale, 2) != 0)
1532705Sjp161948 		return (0);
1540Sstevel@tonic-gate 
1552705Sjp161948 	/*
1562705Sjp161948 	 * primary language sub-tag and the locale's language match, now
1572705Sjp161948 	 * fuzzy check country part
1582705Sjp161948 	 */
1590Sstevel@tonic-gate 
1602705Sjp161948 	/* neither langtag nor locale have more than one component */
1612705Sjp161948 	if (langtag[2] == '\0' &&
1622705Sjp161948 	    (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
1632705Sjp161948 		return (2);
1640Sstevel@tonic-gate 
1652705Sjp161948 	/* langtag has only one sub-tag... */
1662705Sjp161948 	if (langtag[2] == '\0')
1672705Sjp161948 		return (1);
1680Sstevel@tonic-gate 
1692705Sjp161948 	/* locale has no country code... */
1702705Sjp161948 	if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
1712705Sjp161948 		return (1);
1722705Sjp161948 
1732705Sjp161948 	/* langtag has more than one subtag and the locale has a country code */
1740Sstevel@tonic-gate 
1752705Sjp161948 	/* ignore second subtag if not two chars */
1762705Sjp161948 	if (strlen(langtag) < 5)
1772705Sjp161948 		return (1);
1780Sstevel@tonic-gate 
1792705Sjp161948 	if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
1802705Sjp161948 	    (langtag[5] != '\0' && langtag[5] != '-'))
1812705Sjp161948 		return (1);
1820Sstevel@tonic-gate 
1832705Sjp161948 	/* ignore rest of locale if there is no two-character country part */
1842705Sjp161948 	if (strlen(locale) < 5)
1852705Sjp161948 		return (1);
1860Sstevel@tonic-gate 
1872705Sjp161948 	if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
1882705Sjp161948 	    (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
1892705Sjp161948 		return (1);
1900Sstevel@tonic-gate 
1912705Sjp161948 	/* if the country part matches, return 2 */
1922705Sjp161948 	if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
1932705Sjp161948 		return (2);
1940Sstevel@tonic-gate 
1952705Sjp161948 	return (1);
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate char *
1990Sstevel@tonic-gate g11n_getlocale()
2000Sstevel@tonic-gate {
2012705Sjp161948 	/* we have one text domain - always set it */
2022705Sjp161948 	(void) textdomain(TEXT_DOMAIN);
2030Sstevel@tonic-gate 
2042705Sjp161948 	/* if the locale is not set, set it from the env vars */
2052705Sjp161948 	if (!setlocale(LC_MESSAGES, NULL))
2062705Sjp161948 		(void) setlocale(LC_MESSAGES, "");
2070Sstevel@tonic-gate 
2082705Sjp161948 	return (setlocale(LC_MESSAGES, NULL));
2090Sstevel@tonic-gate }
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate void
2120Sstevel@tonic-gate g11n_setlocale(int category, const char *locale)
2130Sstevel@tonic-gate {
2142705Sjp161948 	char *curr;
2150Sstevel@tonic-gate 
2162705Sjp161948 	/* we have one text domain - always set it */
2172705Sjp161948 	(void) textdomain(TEXT_DOMAIN);
2180Sstevel@tonic-gate 
2192705Sjp161948 	if (!locale)
2202705Sjp161948 		return;
2210Sstevel@tonic-gate 
2222705Sjp161948 	if (*locale && ((curr = setlocale(category, NULL))) &&
2232705Sjp161948 	    strcmp(curr, locale) == 0)
2242705Sjp161948 		return;
2252628Sjp161948 
2262705Sjp161948 	/* if <category> is bogus, setlocale() will do nothing */
2272705Sjp161948 	(void) setlocale(category, locale);
2280Sstevel@tonic-gate }
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate char **
2310Sstevel@tonic-gate g11n_getlocales()
2320Sstevel@tonic-gate {
2332705Sjp161948 	FILE *locale_out;
2342705Sjp161948 	uint_t n_elems, list_size, long_line = 0;
2352705Sjp161948 	char **list;
2362705Sjp161948 	char locale[64];	/* 64 bytes is plenty for locale names */
2372705Sjp161948 
2382705Sjp161948 	if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
2392705Sjp161948 		return (NULL);
2400Sstevel@tonic-gate 
2412705Sjp161948 	/*
2422705Sjp161948 	 * start with enough room for 65 locales - that's a lot fewer than
2432705Sjp161948 	 * all the locales available for installation, but a lot more than
2442705Sjp161948 	 * what most users will need and install
2452705Sjp161948 	 */
2462705Sjp161948 	n_elems = 0;
2472705Sjp161948 	list_size = 192;
2482705Sjp161948 	list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
2492705Sjp161948 	memset(list, 0, sizeof (char *) * (list_size + 1));
2500Sstevel@tonic-gate 
2512705Sjp161948 	while (fgets(locale, sizeof (locale), locale_out)) {
2522705Sjp161948 		/* skip long locale names (if any) */
2532705Sjp161948 		if (!strchr(locale, '\n')) {
2542705Sjp161948 			long_line = 1;
2552705Sjp161948 			continue;
2562705Sjp161948 		} else if (long_line) {
2572705Sjp161948 			long_line = 0;
2582705Sjp161948 			continue;
2592705Sjp161948 		}
2600Sstevel@tonic-gate 
2612705Sjp161948 		if (strncmp(locale, "iso_8859", 8) == 0)
2622705Sjp161948 			/* ignore locale names like "iso_8859-1" */
2632705Sjp161948 			continue;
2640Sstevel@tonic-gate 
2652705Sjp161948 		if (n_elems == list_size) {
2662705Sjp161948 			list_size *= 2;
2672705Sjp161948 			list = (char **)xrealloc((void *) list,
2682705Sjp161948 			    (list_size + 1) * sizeof (char *));
2692705Sjp161948 			memset(&list[n_elems + 1], 0,
2702705Sjp161948 			    sizeof (char *) * (list_size - n_elems + 1));
2712705Sjp161948 		}
2722705Sjp161948 
2732705Sjp161948 		*(strchr(locale, '\n')) = '\0';	/* remove the trailing \n */
2742705Sjp161948 		list[n_elems++] = xstrdup(locale);
2750Sstevel@tonic-gate 	}
2760Sstevel@tonic-gate 
277*5562Sjp161948 	if (n_elems == 0) {
278*5562Sjp161948 		xfree(list);
2793109Sjp161948 		return (NULL);
280*5562Sjp161948 	}
2813109Sjp161948 
2822705Sjp161948 	list[n_elems] = NULL;
2832705Sjp161948 	(void) pclose(locale_out);
2840Sstevel@tonic-gate 
2852705Sjp161948 	qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
2862705Sjp161948 	return (list);
2870Sstevel@tonic-gate }
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate char *
2900Sstevel@tonic-gate g11n_getlangs()
2910Sstevel@tonic-gate {
2922705Sjp161948 	char *locale;
2930Sstevel@tonic-gate 
2942705Sjp161948 	if (getenv("SSH_LANGS"))
2952705Sjp161948 		return (xstrdup(getenv("SSH_LANGS")));
2960Sstevel@tonic-gate 
2972705Sjp161948 	locale = g11n_getlocale();
2980Sstevel@tonic-gate 
2992705Sjp161948 	if (!locale || !*locale)
3002705Sjp161948 		return (xstrdup("i-default"));
3010Sstevel@tonic-gate 
3022705Sjp161948 	return (g11n_locale2langtag(locale));
3030Sstevel@tonic-gate }
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate char *
3060Sstevel@tonic-gate g11n_locales2langs(char **locale_set)
3070Sstevel@tonic-gate {
3082705Sjp161948 	char **p, **r, **q;
309*5562Sjp161948 	char *langtag, *langs;
3102705Sjp161948 	int locales, skip;
3110Sstevel@tonic-gate 
3122705Sjp161948 	for (locales = 0, p = locale_set; p && *p; p++)
3132705Sjp161948 		locales++;
3140Sstevel@tonic-gate 
3152705Sjp161948 	r = (char **)xmalloc((locales + 1) * sizeof (char *));
3162705Sjp161948 	memset(r, 0, (locales + 1) * sizeof (char *));
3170Sstevel@tonic-gate 
3182705Sjp161948 	for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
3192705Sjp161948 		skip = 0;
3202705Sjp161948 		if ((langtag = g11n_locale2langtag(*p)) == NULL)
3212705Sjp161948 			continue;
3222705Sjp161948 		for (q = r; (q - r) < locales; q++) {
3232705Sjp161948 			if (!*q)
3242705Sjp161948 				break;
3252705Sjp161948 			if (*q && strcmp(*q, langtag) == 0)
3262705Sjp161948 				skip = 1;
3272705Sjp161948 		}
3282705Sjp161948 		if (!skip)
3292705Sjp161948 			*(q++) = langtag;
330*5562Sjp161948 		else
331*5562Sjp161948 			xfree(langtag);
3322705Sjp161948 		*q = NULL;
3330Sstevel@tonic-gate 	}
3342705Sjp161948 
335*5562Sjp161948 	langs = xjoin(r, ',');
336*5562Sjp161948 	g11n_freelist(r);
337*5562Sjp161948 
338*5562Sjp161948 	return (langs);
3390Sstevel@tonic-gate }
3400Sstevel@tonic-gate 
3412705Sjp161948 static int
3420Sstevel@tonic-gate sortcmp(const void *d1, const void *d2)
3430Sstevel@tonic-gate {
3442705Sjp161948 	char *s1 = *(char **)d1;
3452705Sjp161948 	char *s2 = *(char **)d2;
3460Sstevel@tonic-gate 
3472705Sjp161948 	return (strcmp(s1, s2));
3480Sstevel@tonic-gate }
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate int
3510Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2)
3520Sstevel@tonic-gate {
3532705Sjp161948 	int len1, len2;
3542705Sjp161948 	char c1, c2;
3550Sstevel@tonic-gate 
3562705Sjp161948 	len1 = (strchr(langtag1, '-')) ?
357*5562Sjp161948 	    (strchr(langtag1, '-') - langtag1)
358*5562Sjp161948 	    : strlen(langtag1);
3590Sstevel@tonic-gate 
3602705Sjp161948 	len2 = (strchr(langtag2, '-')) ?
361*5562Sjp161948 	    (strchr(langtag2, '-') - langtag2)
362*5562Sjp161948 	    : strlen(langtag2);
3630Sstevel@tonic-gate 
3642705Sjp161948 	/* no match */
3652705Sjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
3662705Sjp161948 		return (0);
3670Sstevel@tonic-gate 
3682705Sjp161948 	c1 = *(langtag1 + len1);
3692705Sjp161948 	c2 = *(langtag2 + len2);
3700Sstevel@tonic-gate 
3712705Sjp161948 	/* no country sub-tags - exact match */
3722705Sjp161948 	if (c1 == '\0' && c2 == '\0')
3732705Sjp161948 		return (2);
3740Sstevel@tonic-gate 
3752705Sjp161948 	/* one langtag has a country sub-tag, the other doesn't */
3762705Sjp161948 	if (c1 == '\0' || c2 == '\0')
3772705Sjp161948 		return (1);
3780Sstevel@tonic-gate 
3792705Sjp161948 	/* can't happen - both langtags have a country sub-tag */
3802705Sjp161948 	if (c1 != '-' || c2 != '-')
3812705Sjp161948 		return (1);
3820Sstevel@tonic-gate 
3832705Sjp161948 	/* compare country subtags */
3842705Sjp161948 	langtag1 = langtag1 + len1 + 1;
3852705Sjp161948 	langtag2 = langtag2 + len2 + 1;
3860Sstevel@tonic-gate 
3872705Sjp161948 	len1 = (strchr(langtag1, '-')) ?
3882705Sjp161948 	    (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
3892705Sjp161948 
3902705Sjp161948 	len2 = (strchr(langtag2, '-')) ?
3912705Sjp161948 	    (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
3920Sstevel@tonic-gate 
3932705Sjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
3942705Sjp161948 		return (1);
3950Sstevel@tonic-gate 
3962705Sjp161948 	/* country tags matched - exact match */
3972705Sjp161948 	return (2);
3980Sstevel@tonic-gate }
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate char *
4010Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2)
4020Sstevel@tonic-gate {
4032705Sjp161948 	char **list1, **list2, **list3, **p, **q, **r;
4042705Sjp161948 	char *set3, *lang_subtag;
4052705Sjp161948 	uint_t n1, n2, n3;
4062705Sjp161948 	uint_t do_append;
4072705Sjp161948 
4082705Sjp161948 	list1 = xsplit(set1, ',');
4092705Sjp161948 	list2 = xsplit(set2, ',');
4100Sstevel@tonic-gate 
4112705Sjp161948 	for (n1 = 0, p = list1; p && *p; p++, n1++)
4122705Sjp161948 		;
4132705Sjp161948 	for (n2 = 0, p = list2; p && *p; p++, n2++)
4142705Sjp161948 		;
4150Sstevel@tonic-gate 
4162705Sjp161948 	list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
4172705Sjp161948 	*list3 = NULL;
4180Sstevel@tonic-gate 
4192705Sjp161948 	/*
4202705Sjp161948 	 * we must not sort the user langtags - sorting or not the server's
4212705Sjp161948 	 * should not affect the outcome
4222705Sjp161948 	 */
4232705Sjp161948 	qsort(list2, n2, sizeof (char *), sortcmp);
4240Sstevel@tonic-gate 
4252705Sjp161948 	for (n3 = 0, p = list1; p && *p; p++) {
4262705Sjp161948 		do_append = 0;
4272705Sjp161948 		for (q = list2; q && *q; q++) {
4282705Sjp161948 			if (g11n_langtag_match(*p, *q) != 2) continue;
4292705Sjp161948 			/* append element */
4302705Sjp161948 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4312705Sjp161948 				do_append = 1;
4322705Sjp161948 				if (!*r)
4332705Sjp161948 					break;
4342705Sjp161948 				if (strcmp(*p, *r) == 0) {
4352705Sjp161948 					do_append = 0;
4362705Sjp161948 					break;
4372705Sjp161948 				}
4382705Sjp161948 			}
4392705Sjp161948 			if (do_append && n3 <= (n1 + n2)) {
4402705Sjp161948 				list3[n3++] = xstrdup(*p);
4412705Sjp161948 				list3[n3] = NULL;
4422705Sjp161948 			}
4430Sstevel@tonic-gate 		}
4440Sstevel@tonic-gate 	}
4452705Sjp161948 
4462705Sjp161948 	for (p = list1; p && *p; p++) {
4472705Sjp161948 		do_append = 0;
4482705Sjp161948 		for (q = list2; q && *q; q++) {
4492705Sjp161948 			if (g11n_langtag_match(*p, *q) != 1)
4502705Sjp161948 				continue;
4510Sstevel@tonic-gate 
4522705Sjp161948 			/* append element */
4532705Sjp161948 			lang_subtag = xstrdup(*p);
4542705Sjp161948 			if (strchr(lang_subtag, '-'))
4552705Sjp161948 				*(strchr(lang_subtag, '-')) = '\0';
4562705Sjp161948 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4572705Sjp161948 				do_append = 1;
4582705Sjp161948 				if (!*r)
4592705Sjp161948 					break;
4602705Sjp161948 				if (strcmp(lang_subtag, *r) == 0) {
4612705Sjp161948 					do_append = 0;
4622705Sjp161948 					break;
4632705Sjp161948 				}
4642705Sjp161948 			}
4652705Sjp161948 			if (do_append && n3 <= (n1 + n2)) {
4662705Sjp161948 				list3[n3++] = lang_subtag;
4672705Sjp161948 				list3[n3] = NULL;
4682705Sjp161948 			} else
4692705Sjp161948 				xfree(lang_subtag);
4700Sstevel@tonic-gate 		}
4710Sstevel@tonic-gate 	}
4720Sstevel@tonic-gate 
4732705Sjp161948 	set3 = xjoin(list3, ',');
4742705Sjp161948 	xfree_split_list(list1);
4752705Sjp161948 	xfree_split_list(list2);
4762705Sjp161948 	xfree_split_list(list3);
4770Sstevel@tonic-gate 
4782705Sjp161948 	return (set3);
4790Sstevel@tonic-gate }
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate char *
4820Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
4830Sstevel@tonic-gate {
4842705Sjp161948 	char *list, *result;
4852705Sjp161948 	char **xlist;
4860Sstevel@tonic-gate 
4872705Sjp161948 	/* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
4882705Sjp161948 	list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
4890Sstevel@tonic-gate 
4902705Sjp161948 	if (!list)
4912705Sjp161948 		return (NULL);
4920Sstevel@tonic-gate 
4932705Sjp161948 	xlist = xsplit(list, ',');
4940Sstevel@tonic-gate 
4952705Sjp161948 	xfree(list);
4960Sstevel@tonic-gate 
4972705Sjp161948 	if (!xlist || !*xlist)
4982705Sjp161948 		return (NULL);
4990Sstevel@tonic-gate 
5002705Sjp161948 	result = xstrdup(*xlist);
5012705Sjp161948 	xfree_split_list(xlist);
5020Sstevel@tonic-gate 
5032705Sjp161948 	return (result);
5040Sstevel@tonic-gate }
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate /*
5070Sstevel@tonic-gate  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
5080Sstevel@tonic-gate  * a stright strcmp()
5090Sstevel@tonic-gate  */
5102705Sjp161948 static int
5110Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2)
5120Sstevel@tonic-gate {
5132705Sjp161948 	char *dot_ptr;
5142705Sjp161948 	char *s1 = *(char **)d1;
5152705Sjp161948 	char *s2 = *(char **)d2;
5162705Sjp161948 	int s1_is_utf8 = 0;
5172705Sjp161948 	int s2_is_utf8 = 0;
5180Sstevel@tonic-gate 
5192705Sjp161948 	/* check if s1 is a UTF-8 locale */
5202705Sjp161948 	if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
5212705Sjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5222705Sjp161948 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5232705Sjp161948 		s1_is_utf8++;
5242705Sjp161948 	}
5252705Sjp161948 
5262705Sjp161948 	/* check if s2 is a UTF-8 locale */
5272705Sjp161948 	if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
5282705Sjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5292705Sjp161948 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5302705Sjp161948 		s2_is_utf8++;
5312705Sjp161948 	}
5320Sstevel@tonic-gate 
5332705Sjp161948 	/* prefer UTF-8 locales */
5342705Sjp161948 	if (s1_is_utf8 && !s2_is_utf8)
5352705Sjp161948 		return (-1);
5360Sstevel@tonic-gate 
5372705Sjp161948 	if (s2_is_utf8 && !s1_is_utf8)
5382705Sjp161948 		return (1);
5390Sstevel@tonic-gate 
5402705Sjp161948 	/* prefer any locale over the default locales */
5412705Sjp161948 	if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
5422705Sjp161948 	    strcmp(s1, "common") == 0) {
5432705Sjp161948 		if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
5442705Sjp161948 		    strcmp(s2, "common") != 0)
5452705Sjp161948 			return (1);
5462705Sjp161948 	}
5470Sstevel@tonic-gate 
5482705Sjp161948 	if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
5492705Sjp161948 	    strcmp(s2, "common") == 0) {
5502705Sjp161948 		if (strcmp(s1, "C") != 0 &&
5512705Sjp161948 		    strcmp(s1, "POSIX") != 0 &&
5522705Sjp161948 		    strcmp(s1, "common") != 0)
5532705Sjp161948 			return (-1);
5542705Sjp161948 	}
5550Sstevel@tonic-gate 
5562705Sjp161948 	return (strcmp(s1, s2));
5570Sstevel@tonic-gate }
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate char **
5612705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
5620Sstevel@tonic-gate {
5632705Sjp161948 	char **langtag_list, **result, **p, **q, **r;
5642705Sjp161948 	char *s;
5652705Sjp161948 	uint_t do_append, n_langtags, n_locales, n_results, max_results;
5662705Sjp161948 
5672705Sjp161948 	/* count lang tags and locales */
5682705Sjp161948 	for (n_locales = 0, p = locale_set; p && *p; p++)
5692705Sjp161948 		n_locales++;
5700Sstevel@tonic-gate 
5712705Sjp161948 	n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
5722705Sjp161948 	/* count the number of langtags */
5732705Sjp161948 	for (; s = strchr(s, ','); s++, n_langtags++)
5742705Sjp161948 		;
5752705Sjp161948 
5762705Sjp161948 	qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
5770Sstevel@tonic-gate 
5782705Sjp161948 	langtag_list = xsplit(langtag_set, ',');
5792705Sjp161948 	for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
5802705Sjp161948 		;
5810Sstevel@tonic-gate 
5822705Sjp161948 	max_results = MIN(n_locales, n_langtags) * 2;
5832705Sjp161948 	result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
5842705Sjp161948 	*result = NULL;
5852705Sjp161948 	n_results = 0;
5860Sstevel@tonic-gate 
5872705Sjp161948 	/* more specific matches first */
5882705Sjp161948 	for (p = langtag_list; p && *p; p++) {
5892705Sjp161948 		do_append = 0;
5902705Sjp161948 		for (q = locale_set; q && *q; q++) {
5912705Sjp161948 			if (g11n_langtag_matches_locale(*p, *q) == 2) {
5922705Sjp161948 				do_append = 1;
5932705Sjp161948 				for (r = result; (r - result) <=
5942705Sjp161948 				    MIN(n_locales, n_langtags); r++) {
5952705Sjp161948 					if (!*r)
5962705Sjp161948 						break;
5972705Sjp161948 					if (strcmp(*q, *r) == 0) {
5982705Sjp161948 						do_append = 0;
5992705Sjp161948 						break;
6002705Sjp161948 					}
6012705Sjp161948 				}
6022705Sjp161948 				if (do_append && n_results < max_results) {
6032705Sjp161948 					result[n_results++] = xstrdup(*q);
6042705Sjp161948 					result[n_results] = NULL;
6052705Sjp161948 				}
6062705Sjp161948 				break;
6072705Sjp161948 			}
6080Sstevel@tonic-gate 		}
6090Sstevel@tonic-gate 	}
6100Sstevel@tonic-gate 
6112705Sjp161948 	for (p = langtag_list; p && *p; p++) {
6122705Sjp161948 		do_append = 0;
6132705Sjp161948 		for (q = locale_set; q && *q; q++) {
6142705Sjp161948 			if (g11n_langtag_matches_locale(*p, *q) == 1) {
6152705Sjp161948 				do_append = 1;
6162705Sjp161948 				for (r = result; (r - result) <=
6172705Sjp161948 				    MIN(n_locales, n_langtags); r++) {
6182705Sjp161948 					if (!*r)
6192705Sjp161948 						break;
6202705Sjp161948 					if (strcmp(*q, *r) == 0) {
6212705Sjp161948 						do_append = 0;
6222705Sjp161948 						break;
6232705Sjp161948 					}
6242705Sjp161948 				}
6252705Sjp161948 				if (do_append && n_results < max_results) {
6262705Sjp161948 					result[n_results++] = xstrdup(*q);
6272705Sjp161948 					result[n_results] = NULL;
6282705Sjp161948 				}
6292705Sjp161948 				break;
6302705Sjp161948 			}
6310Sstevel@tonic-gate 		}
6320Sstevel@tonic-gate 	}
6330Sstevel@tonic-gate 
6342705Sjp161948 	xfree_split_list(langtag_list);
6352705Sjp161948 
6362705Sjp161948 	return (result);
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate char *
6400Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
6410Sstevel@tonic-gate {
642*5562Sjp161948 	char **results, **locales, *result = NULL;
643*5562Sjp161948 
644*5562Sjp161948 	if (srvr_locales == NULL)
645*5562Sjp161948 		locales = g11n_getlocales();
646*5562Sjp161948 	else
647*5562Sjp161948 		locales = srvr_locales;
6480Sstevel@tonic-gate 
6492705Sjp161948 	if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
650*5562Sjp161948 	    locales)) == NULL)
651*5562Sjp161948 		goto err;
6520Sstevel@tonic-gate 
6532705Sjp161948 	if (*results != NULL)
6542705Sjp161948 		result = xstrdup(*results);
6550Sstevel@tonic-gate 
6562705Sjp161948 	xfree_split_list(results);
6570Sstevel@tonic-gate 
658*5562Sjp161948 err:
659*5562Sjp161948 	if (locales != srvr_locales)
660*5562Sjp161948 		g11n_freelist(locales);
6612705Sjp161948 	return (result);
6620Sstevel@tonic-gate }
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate /*
6660Sstevel@tonic-gate  * Functions for validating ASCII and UTF-8 strings
6670Sstevel@tonic-gate  *
6680Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
6690Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
6700Sstevel@tonic-gate  * friends.
6710Sstevel@tonic-gate  *
6720Sstevel@tonic-gate  * The return value is 0 if success, EILSEQ or EINVAL.
6730Sstevel@tonic-gate  *
6740Sstevel@tonic-gate  */
6752705Sjp161948 uint_t
6762705Sjp161948 g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str)
6770Sstevel@tonic-gate {
6782705Sjp161948 	uchar_t *p;
6790Sstevel@tonic-gate 
6802705Sjp161948 	for (p = (uchar_t *)str; p && *p && (!(*p & 0x80)); p++)
6812705Sjp161948 		;
6820Sstevel@tonic-gate 
6832705Sjp161948 	if (len && ((p - (uchar_t *)str) != len))
6842705Sjp161948 		return (EILSEQ);
6852705Sjp161948 
6862705Sjp161948 	return (0);
6870Sstevel@tonic-gate }
6880Sstevel@tonic-gate 
6892705Sjp161948 uint_t
6902705Sjp161948 g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str)
6910Sstevel@tonic-gate {
6922705Sjp161948 	uchar_t *p;
6932705Sjp161948 	uint_t c, l;
6942705Sjp161948 
6952705Sjp161948 	if (len == 0)
6962705Sjp161948 		len = strlen((const char *)str);
6970Sstevel@tonic-gate 
6982705Sjp161948 	for (p = (uchar_t *)str; p && (p - str < len) && *p; ) {
6992705Sjp161948 		/* 8-bit chars begin a UTF-8 sequence */
7002705Sjp161948 		if (*p & 0x80) {
7012705Sjp161948 			/* get sequence length and sanity check first byte */
7022705Sjp161948 			if (*p < 0xc0)
7032705Sjp161948 				return (EILSEQ);
7042705Sjp161948 			else if (*p < 0xe0)
7052705Sjp161948 				l = 2;
7062705Sjp161948 			else if (*p < 0xf0)
7072705Sjp161948 				l = 3;
7082705Sjp161948 			else if (*p < 0xf8)
7092705Sjp161948 				l = 4;
7102705Sjp161948 			else if (*p < 0xfc)
7112705Sjp161948 				l = 5;
7122705Sjp161948 			else if (*p < 0xfe)
7132705Sjp161948 				l = 6;
7142705Sjp161948 			else
7152705Sjp161948 				return (EILSEQ);
7162705Sjp161948 
7172705Sjp161948 			if ((p + l - str) >= len)
7182705Sjp161948 				return (EILSEQ);
7192705Sjp161948 
7202705Sjp161948 			/* overlong detection - build codepoint */
7212705Sjp161948 			c = *p & 0x3f;
7222705Sjp161948 			/* shift c bits from first byte */
7232705Sjp161948 			c = c << (6 * (l - 1));
7242705Sjp161948 
7252705Sjp161948 			if (l > 1) {
7262705Sjp161948 				if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80))
7272705Sjp161948 					c = c | ((*(p + 1) & 0x3f) <<
7282705Sjp161948 					    (6 * (l - 2)));
7292705Sjp161948 				else
7302705Sjp161948 					return (EILSEQ);
7312705Sjp161948 
7322705Sjp161948 				if (c < 0x80)
7332705Sjp161948 					return (EILSEQ);
7342705Sjp161948 			}
7350Sstevel@tonic-gate 
7362705Sjp161948 			if (l > 2) {
7372705Sjp161948 				if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80))
7382705Sjp161948 					c = c | ((*(p + 2) & 0x3f) <<
7392705Sjp161948 					    (6 * (l - 3)));
7402705Sjp161948 				else
7412705Sjp161948 					return (EILSEQ);
7422705Sjp161948 
7432705Sjp161948 				if (c < 0x800)
7442705Sjp161948 					return (EILSEQ);
7452705Sjp161948 			}
7462705Sjp161948 
7472705Sjp161948 			if (l > 3) {
7482705Sjp161948 				if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80))
7492705Sjp161948 					c = c | ((*(p + 3) & 0x3f) <<
7502705Sjp161948 					    (6 * (l - 4)));
7512705Sjp161948 				else
7522705Sjp161948 					return (EILSEQ);
7532705Sjp161948 
7542705Sjp161948 				if (c < 0x10000)
7552705Sjp161948 					return (EILSEQ);
7562705Sjp161948 			}
7570Sstevel@tonic-gate 
7582705Sjp161948 			if (l > 4) {
7592705Sjp161948 				if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80))
7602705Sjp161948 					c = c | ((*(p + 4) & 0x3f) <<
7612705Sjp161948 					    (6 * (l - 5)));
7622705Sjp161948 				else
7632705Sjp161948 					return (EILSEQ);
7642705Sjp161948 
7652705Sjp161948 				if (c < 0x200000)
7662705Sjp161948 					return (EILSEQ);
7672705Sjp161948 			}
7680Sstevel@tonic-gate 
7692705Sjp161948 			if (l > 5) {
7702705Sjp161948 				if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80))
7712705Sjp161948 					c = c | (*(p + 5) & 0x3f);
7722705Sjp161948 				else
7732705Sjp161948 					return (EILSEQ);
7742705Sjp161948 
7752705Sjp161948 				if (c < 0x4000000)
7762705Sjp161948 					return (EILSEQ);
7772705Sjp161948 			}
7780Sstevel@tonic-gate 
7792705Sjp161948 			/*
7802705Sjp161948 			 * check for UTF-16 surrogates ifs other illegal
7812705Sjp161948 			 * UTF-8 * points
7822705Sjp161948 			 */
7832705Sjp161948 			if (((c <= 0xdfff) && (c >= 0xd800)) ||
7842705Sjp161948 			    (c == 0xfffe) || (c == 0xffff))
7852705Sjp161948 				return (EILSEQ);
7862705Sjp161948 			p += l;
7872705Sjp161948 		}
7882705Sjp161948 		/* 7-bit chars are fine */
7890Sstevel@tonic-gate 		else
7902705Sjp161948 			p++;
7910Sstevel@tonic-gate 	}
7922705Sjp161948 	return (0);
7930Sstevel@tonic-gate }
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate /*
7960Sstevel@tonic-gate  * Functions for converting to ASCII or UTF-8 from the local codeset
7970Sstevel@tonic-gate  * Functions for converting from ASCII or UTF-8 to the local codeset
7980Sstevel@tonic-gate  *
7990Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
8000Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
8010Sstevel@tonic-gate  * friends.
8020Sstevel@tonic-gate  *
8030Sstevel@tonic-gate  * The err parameter is an optional pointer to an integer where 0
8040Sstevel@tonic-gate  * (success) or EILSEQ or EINVAL will be stored (failure).
8050Sstevel@tonic-gate  *
8060Sstevel@tonic-gate  * These functions return NULL if the conversion fails.
8070Sstevel@tonic-gate  *
8080Sstevel@tonic-gate  */
8092705Sjp161948 uchar_t *
8102705Sjp161948 g11n_convert_from_ascii(const char *str, int *err_ptr, uchar_t **error_str)
8110Sstevel@tonic-gate {
8122705Sjp161948 	static uint_t initialized = 0;
8132705Sjp161948 	static uint_t do_convert = 0;
8142705Sjp161948 	iconv_t cd;
8152705Sjp161948 	int err;
8160Sstevel@tonic-gate 
8172705Sjp161948 	if (!initialized) {
8182705Sjp161948 		/*
8192705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
8202705Sjp161948 		 * same, and there are aliases of codesets to boot...
8212705Sjp161948 		 */
8222705Sjp161948 		if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
823*5562Sjp161948 		    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
824*5562Sjp161948 		    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
8252705Sjp161948 			initialized = 1;
8262705Sjp161948 			do_convert = 0;
8272705Sjp161948 		} else {
8282705Sjp161948 			cd = iconv_open(nl_langinfo(CODESET), "646");
8292705Sjp161948 			if (cd == (iconv_t)-1) {
8302705Sjp161948 				if (err_ptr)
8312705Sjp161948 					*err_ptr = errno;
8322705Sjp161948 				if (error_str)
8332705Sjp161948 					*error_str = (uchar_t *)"Cannot "
8342705Sjp161948 					    "convert ASCII strings to the local"
8352705Sjp161948 					    " codeset";
8362705Sjp161948 			}
8372705Sjp161948 			initialized = 1;
8382705Sjp161948 			do_convert = 1;
8392705Sjp161948 		}
8400Sstevel@tonic-gate 	}
8412705Sjp161948 
8422705Sjp161948 	if (!do_convert) {
8432705Sjp161948 		if ((err = g11n_validate_ascii(str, 0, error_str))) {
8442705Sjp161948 			if (err_ptr)
8452705Sjp161948 				*err_ptr = err;
8462705Sjp161948 			return (NULL);
8472705Sjp161948 		} else
8482705Sjp161948 			return ((uchar_t *)xstrdup(str));
8490Sstevel@tonic-gate 	}
8500Sstevel@tonic-gate 
8512705Sjp161948 	return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
8520Sstevel@tonic-gate }
8530Sstevel@tonic-gate 
8542705Sjp161948 uchar_t *
8552705Sjp161948 g11n_convert_from_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str)
8560Sstevel@tonic-gate {
8572705Sjp161948 	static uint_t initialized = 0;
8582705Sjp161948 	static uint_t do_convert = 0;
8592705Sjp161948 	iconv_t cd;
8602705Sjp161948 	int err;
8610Sstevel@tonic-gate 
8622705Sjp161948 	if (!initialized) {
8632705Sjp161948 		/*
8642705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
8652705Sjp161948 		 * same, and there are aliases of codesets to boot...
8662705Sjp161948 		 */
8672705Sjp161948 		if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
8682705Sjp161948 		    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
8692705Sjp161948 			initialized = 1;
8702705Sjp161948 			do_convert = 0;
8712705Sjp161948 		} else {
8722705Sjp161948 			cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
8732705Sjp161948 			if (cd == (iconv_t)-1) {
8742705Sjp161948 				if (err_ptr)
8752705Sjp161948 					*err_ptr = errno;
8762705Sjp161948 				if (error_str)
8772705Sjp161948 					*error_str = (uchar_t *)"Cannot "
8782705Sjp161948 					    "convert UTF-8 strings to the "
8792705Sjp161948 					    "local codeset";
8802705Sjp161948 			}
8812705Sjp161948 			initialized = 1;
8822705Sjp161948 			do_convert = 1;
8832705Sjp161948 		}
8840Sstevel@tonic-gate 	}
8852705Sjp161948 
8862705Sjp161948 	if (!do_convert) {
8872705Sjp161948 		if ((err = g11n_validate_utf8(str, 0, error_str))) {
8882705Sjp161948 			if (err_ptr)
8892705Sjp161948 				*err_ptr = err;
8902705Sjp161948 			return (NULL);
8912705Sjp161948 		} else
8922705Sjp161948 			return ((uchar_t *)xstrdup((char *)str));
8930Sstevel@tonic-gate 	}
8940Sstevel@tonic-gate 
8952705Sjp161948 	return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
8960Sstevel@tonic-gate }
8970Sstevel@tonic-gate 
8980Sstevel@tonic-gate char *
8992705Sjp161948 g11n_convert_to_ascii(const uchar_t *str, int *err_ptr, uchar_t **error_str)
9000Sstevel@tonic-gate {
9012705Sjp161948 	static uint_t initialized = 0;
9022705Sjp161948 	static uint_t do_convert = 0;
9032705Sjp161948 	iconv_t cd;
9040Sstevel@tonic-gate 
9052705Sjp161948 	if (!initialized) {
9062705Sjp161948 		/*
9072705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
9082705Sjp161948 		 * same, and there are aliases of codesets to boot...
9092705Sjp161948 		 */
9102705Sjp161948 		if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
9112705Sjp161948 		    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
9122705Sjp161948 		    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
9132705Sjp161948 			initialized = 1;
9142705Sjp161948 			do_convert = 0;
9152705Sjp161948 		} else {
9162705Sjp161948 			cd = iconv_open("646", nl_langinfo(CODESET));
9172705Sjp161948 			if (cd == (iconv_t)-1) {
9182705Sjp161948 				if (err_ptr)
9192705Sjp161948 					*err_ptr = errno;
9202705Sjp161948 				if (error_str)
9212705Sjp161948 					*error_str = (uchar_t *)"Cannot "
9222705Sjp161948 					    "convert UTF-8 strings to the "
9232705Sjp161948 					    "local codeset";
9242705Sjp161948 			}
9252705Sjp161948 			initialized = 1;
9262705Sjp161948 			do_convert = 1;
9272705Sjp161948 		}
9280Sstevel@tonic-gate 	}
9290Sstevel@tonic-gate 
9302705Sjp161948 	if (!do_convert)
9312705Sjp161948 		return (xstrdup((char *)str));
9322705Sjp161948 
9332705Sjp161948 	return ((char *)do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
9340Sstevel@tonic-gate }
9350Sstevel@tonic-gate 
9362705Sjp161948 uchar_t *
9372705Sjp161948 g11n_convert_to_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str)
9380Sstevel@tonic-gate {
9392705Sjp161948 	static uint_t initialized = 0;
9402705Sjp161948 	static uint_t do_convert = 0;
9412705Sjp161948 	iconv_t cd;
9420Sstevel@tonic-gate 
9432705Sjp161948 	if (!initialized) {
9442705Sjp161948 		/*
9452705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
9462705Sjp161948 		 * same, and there are aliases of codesets to boot...
9472705Sjp161948 		 */
9482705Sjp161948 		if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
9492705Sjp161948 		    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
9502705Sjp161948 			initialized = 1;
9512705Sjp161948 			do_convert = 0;
9522705Sjp161948 		} else {
9532705Sjp161948 			cd = iconv_open("UTF-8", nl_langinfo(CODESET));
9542705Sjp161948 			if (cd == (iconv_t)-1) {
9552705Sjp161948 				if (err_ptr)
9562705Sjp161948 					*err_ptr = errno;
9572705Sjp161948 				if (error_str)
9582705Sjp161948 					*error_str = (uchar_t *)"Cannot "
9592705Sjp161948 					    "convert UTF-8 strings to the "
9602705Sjp161948 					    "local codeset";
9612705Sjp161948 			}
9622705Sjp161948 			initialized = 1;
9632705Sjp161948 			do_convert = 1;
9642705Sjp161948 		}
9650Sstevel@tonic-gate 	}
9660Sstevel@tonic-gate 
9672705Sjp161948 	if (!do_convert)
9682705Sjp161948 		return ((uchar_t *)xstrdup((char *)str));
9692705Sjp161948 
9702705Sjp161948 	return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
9710Sstevel@tonic-gate }
9720Sstevel@tonic-gate 
9730Sstevel@tonic-gate 
9740Sstevel@tonic-gate /*
9750Sstevel@tonic-gate  * Wrapper around iconv()
9760Sstevel@tonic-gate  *
9770Sstevel@tonic-gate  * The caller is responsible for freeing the result and for handling
9780Sstevel@tonic-gate  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
9790Sstevel@tonic-gate  */
9802705Sjp161948 static uchar_t *
9812705Sjp161948 do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, uint_t len,
9822705Sjp161948     uint_t *outlen, int *err, uchar_t **err_str)
9832705Sjp161948 {
9842705Sjp161948 	size_t inbytesleft, outbytesleft, converted_size;
9852705Sjp161948 	char *outbuf;
9862705Sjp161948 	uchar_t *converted;
9872705Sjp161948 	const char *inbuf;
9882705Sjp161948 	uint_t mul = 0;
9890Sstevel@tonic-gate 
9902705Sjp161948 	if (!buf || !(*(char *)buf))
9912705Sjp161948 		return (NULL);
9922705Sjp161948 
9932705Sjp161948 	if (len == 0)
9942705Sjp161948 		len = strlen(buf);
9952705Sjp161948 
9962705Sjp161948 	/* reset conversion descriptor */
9972705Sjp161948 	/* XXX Do we need initial shift sequences for UTF-8??? */
9982705Sjp161948 	(void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft);
9992705Sjp161948 	inbuf = (const char *) buf;
10002705Sjp161948 
10012705Sjp161948 	if (mul_ptr)
10022705Sjp161948 		mul = *mul_ptr;
10032705Sjp161948 
10042705Sjp161948 	converted_size = (len << mul);
10052705Sjp161948 	outbuf = (char *)xmalloc(converted_size + 1); /* for null */
10062705Sjp161948 	converted = (uchar_t *)outbuf;
10072705Sjp161948 	outbytesleft = len;
10080Sstevel@tonic-gate 
10092705Sjp161948 	do {
10102705Sjp161948 		if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) ==
10112705Sjp161948 		    (size_t)-1) {
10122705Sjp161948 			if (errno == E2BIG) {
10132705Sjp161948 				/* UTF-8 codepoints are at most 8 bytes long */
10142705Sjp161948 				if (mul > 2) {
10152705Sjp161948 					if (err_str)
10162705Sjp161948 						*err_str = (uchar_t *)
10172705Sjp161948 						    "Conversion to UTF-8 failed"
10182705Sjp161948 						    " due to preposterous space"
10192705Sjp161948 						    " requirements";
10202705Sjp161948 					if (err)
10212705Sjp161948 						*err = EILSEQ;
10222705Sjp161948 					return (NULL);
10232705Sjp161948 				}
10240Sstevel@tonic-gate 
10252705Sjp161948 				/*
10262705Sjp161948 				 * re-alloc output and ensure that the outbuf
10272705Sjp161948 				 * and outbytesleft values are adjusted
10282705Sjp161948 				 */
10292705Sjp161948 				converted = xrealloc(converted,
10302705Sjp161948 				    converted_size << 1 + 1);
10312705Sjp161948 				outbuf = (char *)converted + converted_size -
10322705Sjp161948 				    outbytesleft;
10332705Sjp161948 				converted_size = (len << ++(mul));
10342705Sjp161948 				outbytesleft = converted_size - outbytesleft;
10352705Sjp161948 			} else {
10362705Sjp161948 				/*
10372705Sjp161948 				 * let the caller deal with iconv() errors,
10382705Sjp161948 				 * probably by calling fatal(); xfree() does
10392705Sjp161948 				 * not set errno
10402705Sjp161948 				 */
10412705Sjp161948 				if (err)
10422705Sjp161948 					*err = errno;
10432705Sjp161948 				xfree(converted);
10442705Sjp161948 				return (NULL);
10452705Sjp161948 			}
10462705Sjp161948 		}
10472705Sjp161948 	} while (inbytesleft);
10482705Sjp161948 
10492705Sjp161948 	*outbuf = '\0'; /* ensure null-termination */
10502705Sjp161948 	if (outlen)
10512705Sjp161948 		*outlen = converted_size - outbytesleft;
10522705Sjp161948 	if (mul_ptr)
10532705Sjp161948 		*mul_ptr = mul;
10542705Sjp161948 
10552705Sjp161948 	return (converted);
10560Sstevel@tonic-gate }
1057*5562Sjp161948 
1058*5562Sjp161948 /*
1059*5562Sjp161948  * Free all strings in the list and then free the list itself. We know that the
1060*5562Sjp161948  * list ends with a NULL pointer.
1061*5562Sjp161948  */
1062*5562Sjp161948 void
1063*5562Sjp161948 g11n_freelist(char **list)
1064*5562Sjp161948 {
1065*5562Sjp161948 	int i = 0;
1066*5562Sjp161948 
1067*5562Sjp161948 	while (list[i] != NULL) {
1068*5562Sjp161948 		xfree(list[i]);
1069*5562Sjp161948 		i++;
1070*5562Sjp161948 	}
1071*5562Sjp161948 
1072*5562Sjp161948 	xfree(list);
1073*5562Sjp161948 }
1074