xref: /onnv-gate/usr/src/cmd/ssh/libssh/common/g11n.c (revision 6288:65c8136c6b0b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52628Sjp161948  * Common Development and Distribution License (the "License").
62628Sjp161948  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  *
21*6288Sjp161948  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
220Sstevel@tonic-gate  * Use is subject to license terms.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <errno.h>
280Sstevel@tonic-gate #include <locale.h>
290Sstevel@tonic-gate #include <langinfo.h>
300Sstevel@tonic-gate #include <iconv.h>
310Sstevel@tonic-gate #include <ctype.h>
320Sstevel@tonic-gate #include <strings.h>
330Sstevel@tonic-gate #include <string.h>
340Sstevel@tonic-gate #include <stdio.h>
350Sstevel@tonic-gate #include <stdlib.h>
360Sstevel@tonic-gate #include "includes.h"
370Sstevel@tonic-gate #include "xmalloc.h"
380Sstevel@tonic-gate #include "xlist.h"
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #ifdef MIN
410Sstevel@tonic-gate #undef MIN
420Sstevel@tonic-gate #endif /* MIN */
430Sstevel@tonic-gate 
442705Sjp161948 #define	MIN(x, y)	((x) < (y) ? (x) : (y))
450Sstevel@tonic-gate 
462705Sjp161948 #define	LOCALE_PATH	"/usr/bin/locale"
470Sstevel@tonic-gate 
482705Sjp161948 /* two-char country code, '-' and two-char region code */
492705Sjp161948 #define	LANGTAG_MAX	5
500Sstevel@tonic-gate 
512705Sjp161948 static uchar_t *do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf,
522705Sjp161948     uint_t len, uint_t *outlen, int *err, uchar_t **err_str);
530Sstevel@tonic-gate 
540Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2);
550Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale);
560Sstevel@tonic-gate 
572705Sjp161948 uint_t g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str);
582705Sjp161948 uint_t g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str);
590Sstevel@tonic-gate 
605562Sjp161948 /*
615562Sjp161948  * Convert locale string name into a language tag. The caller is responsible for
625562Sjp161948  * freeing the memory allocated for the result.
635562Sjp161948  */
642705Sjp161948 static char *
650Sstevel@tonic-gate g11n_locale2langtag(char *locale)
660Sstevel@tonic-gate {
672705Sjp161948 	char *langtag;
680Sstevel@tonic-gate 
692705Sjp161948 	/* base cases */
702705Sjp161948 	if (!locale || !*locale)
712705Sjp161948 		return (NULL);
720Sstevel@tonic-gate 
732705Sjp161948 	if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
745562Sjp161948 		return (xstrdup("i-default"));
750Sstevel@tonic-gate 
762705Sjp161948 	/* punt for language codes which are not exactly 2 letters */
772705Sjp161948 	if (strlen(locale) < 2 ||
782705Sjp161948 	    !isalpha(locale[0]) ||
792705Sjp161948 	    !isalpha(locale[1]) ||
802705Sjp161948 	    (locale[2] != '\0' &&
812705Sjp161948 	    locale[2] != '_' &&
822705Sjp161948 	    locale[2] != '.' &&
832705Sjp161948 	    locale[2] != '@'))
842705Sjp161948 		return (NULL);
850Sstevel@tonic-gate 
860Sstevel@tonic-gate 
872705Sjp161948 	/* we have a primary language sub-tag */
882705Sjp161948 	langtag = (char *)xmalloc(LANGTAG_MAX + 1);
890Sstevel@tonic-gate 
902705Sjp161948 	strncpy(langtag, locale, 2);
912705Sjp161948 	langtag[2] = '\0';
920Sstevel@tonic-gate 
932705Sjp161948 	/* do we have country sub-tag? For example: cs_CZ */
942705Sjp161948 	if (locale[2] == '_') {
952705Sjp161948 		if (strlen(locale) < 5 ||
962705Sjp161948 		    !isalpha(locale[3]) ||
972705Sjp161948 		    !isalpha(locale[4]) ||
982705Sjp161948 		    (locale[5] != '\0' && (locale[5] != '.' &&
992705Sjp161948 		    locale[5] != '@'))) {
1002705Sjp161948 			return (langtag);
1012705Sjp161948 		}
1022705Sjp161948 
1032705Sjp161948 		/* example: create cs-CZ from cs_CZ */
1042705Sjp161948 		if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
1052705Sjp161948 		    locale + 3) == 5)
1062705Sjp161948 			return (langtag);
1070Sstevel@tonic-gate 	}
1080Sstevel@tonic-gate 
1092705Sjp161948 	/* in all other cases we just use the primary language sub-tag */
1102705Sjp161948 	return (langtag);
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate 
1132705Sjp161948 uint_t
1140Sstevel@tonic-gate g11n_langtag_is_default(char *langtag)
1150Sstevel@tonic-gate {
1162705Sjp161948 	return (strcmp(langtag, "i-default") == 0);
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * This lang tag / locale matching function works only for two-character
1210Sstevel@tonic-gate  * language primary sub-tags and two-character country sub-tags.
1220Sstevel@tonic-gate  */
1232705Sjp161948 uint_t
1240Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale)
1250Sstevel@tonic-gate {
1262705Sjp161948 	/* match "i-default" to the process' current locale if possible */
1272705Sjp161948 	if (g11n_langtag_is_default(langtag)) {
1282705Sjp161948 		if (strcasecmp(locale, "POSIX") == 0 ||
1292705Sjp161948 		    strcasecmp(locale, "C") == 0)
1302705Sjp161948 			return (1);
1312705Sjp161948 		else
1322705Sjp161948 			return (0);
1332705Sjp161948 	}
1340Sstevel@tonic-gate 
1352705Sjp161948 	/*
1362705Sjp161948 	 * locale must be at least 2 chars long and the lang part must be
1372705Sjp161948 	 * exactly two characters
1382705Sjp161948 	 */
1392705Sjp161948 	if (strlen(locale) < 2 ||
1402705Sjp161948 	    (!isalpha(locale[0]) || !isalpha(locale[1]) ||
1412705Sjp161948 	    (locale[2] != '\0' && locale[2] != '_' &&
1422705Sjp161948 	    locale[2] != '.' && locale[2] != '@')))
1432705Sjp161948 		return (0);
1440Sstevel@tonic-gate 
1452705Sjp161948 	/* same thing with the langtag */
1462705Sjp161948 	if (strlen(langtag) < 2 ||
1472705Sjp161948 	    (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
1482705Sjp161948 	    (langtag[2] != '\0' && langtag[2] != '-')))
1492705Sjp161948 		return (0);
1500Sstevel@tonic-gate 
1512705Sjp161948 	/* primary language sub-tag and the locale's language part must match */
1522705Sjp161948 	if (strncasecmp(langtag, locale, 2) != 0)
1532705Sjp161948 		return (0);
1540Sstevel@tonic-gate 
1552705Sjp161948 	/*
1562705Sjp161948 	 * primary language sub-tag and the locale's language match, now
1572705Sjp161948 	 * fuzzy check country part
1582705Sjp161948 	 */
1590Sstevel@tonic-gate 
1602705Sjp161948 	/* neither langtag nor locale have more than one component */
1612705Sjp161948 	if (langtag[2] == '\0' &&
1622705Sjp161948 	    (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
1632705Sjp161948 		return (2);
1640Sstevel@tonic-gate 
1652705Sjp161948 	/* langtag has only one sub-tag... */
1662705Sjp161948 	if (langtag[2] == '\0')
1672705Sjp161948 		return (1);
1680Sstevel@tonic-gate 
1692705Sjp161948 	/* locale has no country code... */
1702705Sjp161948 	if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
1712705Sjp161948 		return (1);
1722705Sjp161948 
1732705Sjp161948 	/* langtag has more than one subtag and the locale has a country code */
1740Sstevel@tonic-gate 
1752705Sjp161948 	/* ignore second subtag if not two chars */
1762705Sjp161948 	if (strlen(langtag) < 5)
1772705Sjp161948 		return (1);
1780Sstevel@tonic-gate 
1792705Sjp161948 	if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
1802705Sjp161948 	    (langtag[5] != '\0' && langtag[5] != '-'))
1812705Sjp161948 		return (1);
1820Sstevel@tonic-gate 
1832705Sjp161948 	/* ignore rest of locale if there is no two-character country part */
1842705Sjp161948 	if (strlen(locale) < 5)
1852705Sjp161948 		return (1);
1860Sstevel@tonic-gate 
1872705Sjp161948 	if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
1882705Sjp161948 	    (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
1892705Sjp161948 		return (1);
1900Sstevel@tonic-gate 
1912705Sjp161948 	/* if the country part matches, return 2 */
1922705Sjp161948 	if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
1932705Sjp161948 		return (2);
1940Sstevel@tonic-gate 
1952705Sjp161948 	return (1);
1960Sstevel@tonic-gate }
1970Sstevel@tonic-gate 
1980Sstevel@tonic-gate char *
1990Sstevel@tonic-gate g11n_getlocale()
2000Sstevel@tonic-gate {
2012705Sjp161948 	/* we have one text domain - always set it */
2022705Sjp161948 	(void) textdomain(TEXT_DOMAIN);
2030Sstevel@tonic-gate 
2042705Sjp161948 	/* if the locale is not set, set it from the env vars */
2052705Sjp161948 	if (!setlocale(LC_MESSAGES, NULL))
2062705Sjp161948 		(void) setlocale(LC_MESSAGES, "");
2070Sstevel@tonic-gate 
2082705Sjp161948 	return (setlocale(LC_MESSAGES, NULL));
2090Sstevel@tonic-gate }
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate void
2120Sstevel@tonic-gate g11n_setlocale(int category, const char *locale)
2130Sstevel@tonic-gate {
2142705Sjp161948 	char *curr;
2150Sstevel@tonic-gate 
2162705Sjp161948 	/* we have one text domain - always set it */
2172705Sjp161948 	(void) textdomain(TEXT_DOMAIN);
2180Sstevel@tonic-gate 
2192705Sjp161948 	if (!locale)
2202705Sjp161948 		return;
2210Sstevel@tonic-gate 
2222705Sjp161948 	if (*locale && ((curr = setlocale(category, NULL))) &&
2232705Sjp161948 	    strcmp(curr, locale) == 0)
2242705Sjp161948 		return;
2252628Sjp161948 
2262705Sjp161948 	/* if <category> is bogus, setlocale() will do nothing */
2272705Sjp161948 	(void) setlocale(category, locale);
2280Sstevel@tonic-gate }
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate char **
2310Sstevel@tonic-gate g11n_getlocales()
2320Sstevel@tonic-gate {
2332705Sjp161948 	FILE *locale_out;
2342705Sjp161948 	uint_t n_elems, list_size, long_line = 0;
2352705Sjp161948 	char **list;
2362705Sjp161948 	char locale[64];	/* 64 bytes is plenty for locale names */
2372705Sjp161948 
2382705Sjp161948 	if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
2392705Sjp161948 		return (NULL);
2400Sstevel@tonic-gate 
2412705Sjp161948 	/*
2422705Sjp161948 	 * start with enough room for 65 locales - that's a lot fewer than
2432705Sjp161948 	 * all the locales available for installation, but a lot more than
2442705Sjp161948 	 * what most users will need and install
2452705Sjp161948 	 */
2462705Sjp161948 	n_elems = 0;
2472705Sjp161948 	list_size = 192;
2482705Sjp161948 	list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
2492705Sjp161948 	memset(list, 0, sizeof (char *) * (list_size + 1));
2500Sstevel@tonic-gate 
2512705Sjp161948 	while (fgets(locale, sizeof (locale), locale_out)) {
2522705Sjp161948 		/* skip long locale names (if any) */
2532705Sjp161948 		if (!strchr(locale, '\n')) {
2542705Sjp161948 			long_line = 1;
2552705Sjp161948 			continue;
2562705Sjp161948 		} else if (long_line) {
2572705Sjp161948 			long_line = 0;
2582705Sjp161948 			continue;
2592705Sjp161948 		}
2600Sstevel@tonic-gate 
2612705Sjp161948 		if (strncmp(locale, "iso_8859", 8) == 0)
2622705Sjp161948 			/* ignore locale names like "iso_8859-1" */
2632705Sjp161948 			continue;
2640Sstevel@tonic-gate 
2652705Sjp161948 		if (n_elems == list_size) {
2662705Sjp161948 			list_size *= 2;
2672705Sjp161948 			list = (char **)xrealloc((void *) list,
2682705Sjp161948 			    (list_size + 1) * sizeof (char *));
2692705Sjp161948 			memset(&list[n_elems + 1], 0,
2702705Sjp161948 			    sizeof (char *) * (list_size - n_elems + 1));
2712705Sjp161948 		}
2722705Sjp161948 
2732705Sjp161948 		*(strchr(locale, '\n')) = '\0';	/* remove the trailing \n */
2742705Sjp161948 		list[n_elems++] = xstrdup(locale);
2750Sstevel@tonic-gate 	}
2760Sstevel@tonic-gate 
277*6288Sjp161948 	(void) pclose(locale_out);
278*6288Sjp161948 
2795562Sjp161948 	if (n_elems == 0) {
2805562Sjp161948 		xfree(list);
2813109Sjp161948 		return (NULL);
2825562Sjp161948 	}
2833109Sjp161948 
2842705Sjp161948 	list[n_elems] = NULL;
2850Sstevel@tonic-gate 
2862705Sjp161948 	qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
2872705Sjp161948 	return (list);
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate char *
2910Sstevel@tonic-gate g11n_getlangs()
2920Sstevel@tonic-gate {
2932705Sjp161948 	char *locale;
2940Sstevel@tonic-gate 
2952705Sjp161948 	if (getenv("SSH_LANGS"))
2962705Sjp161948 		return (xstrdup(getenv("SSH_LANGS")));
2970Sstevel@tonic-gate 
2982705Sjp161948 	locale = g11n_getlocale();
2990Sstevel@tonic-gate 
3002705Sjp161948 	if (!locale || !*locale)
3012705Sjp161948 		return (xstrdup("i-default"));
3020Sstevel@tonic-gate 
3032705Sjp161948 	return (g11n_locale2langtag(locale));
3040Sstevel@tonic-gate }
3050Sstevel@tonic-gate 
3060Sstevel@tonic-gate char *
3070Sstevel@tonic-gate g11n_locales2langs(char **locale_set)
3080Sstevel@tonic-gate {
3092705Sjp161948 	char **p, **r, **q;
3105562Sjp161948 	char *langtag, *langs;
3112705Sjp161948 	int locales, skip;
3120Sstevel@tonic-gate 
3132705Sjp161948 	for (locales = 0, p = locale_set; p && *p; p++)
3142705Sjp161948 		locales++;
3150Sstevel@tonic-gate 
3162705Sjp161948 	r = (char **)xmalloc((locales + 1) * sizeof (char *));
3172705Sjp161948 	memset(r, 0, (locales + 1) * sizeof (char *));
3180Sstevel@tonic-gate 
3192705Sjp161948 	for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
3202705Sjp161948 		skip = 0;
3212705Sjp161948 		if ((langtag = g11n_locale2langtag(*p)) == NULL)
3222705Sjp161948 			continue;
3232705Sjp161948 		for (q = r; (q - r) < locales; q++) {
3242705Sjp161948 			if (!*q)
3252705Sjp161948 				break;
3262705Sjp161948 			if (*q && strcmp(*q, langtag) == 0)
3272705Sjp161948 				skip = 1;
3282705Sjp161948 		}
3292705Sjp161948 		if (!skip)
3302705Sjp161948 			*(q++) = langtag;
3315562Sjp161948 		else
3325562Sjp161948 			xfree(langtag);
3332705Sjp161948 		*q = NULL;
3340Sstevel@tonic-gate 	}
3352705Sjp161948 
3365562Sjp161948 	langs = xjoin(r, ',');
3375562Sjp161948 	g11n_freelist(r);
3385562Sjp161948 
3395562Sjp161948 	return (langs);
3400Sstevel@tonic-gate }
3410Sstevel@tonic-gate 
3422705Sjp161948 static int
3430Sstevel@tonic-gate sortcmp(const void *d1, const void *d2)
3440Sstevel@tonic-gate {
3452705Sjp161948 	char *s1 = *(char **)d1;
3462705Sjp161948 	char *s2 = *(char **)d2;
3470Sstevel@tonic-gate 
3482705Sjp161948 	return (strcmp(s1, s2));
3490Sstevel@tonic-gate }
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate int
3520Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2)
3530Sstevel@tonic-gate {
3542705Sjp161948 	int len1, len2;
3552705Sjp161948 	char c1, c2;
3560Sstevel@tonic-gate 
3572705Sjp161948 	len1 = (strchr(langtag1, '-')) ?
3585562Sjp161948 	    (strchr(langtag1, '-') - langtag1)
3595562Sjp161948 	    : strlen(langtag1);
3600Sstevel@tonic-gate 
3612705Sjp161948 	len2 = (strchr(langtag2, '-')) ?
3625562Sjp161948 	    (strchr(langtag2, '-') - langtag2)
3635562Sjp161948 	    : strlen(langtag2);
3640Sstevel@tonic-gate 
3652705Sjp161948 	/* no match */
3662705Sjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
3672705Sjp161948 		return (0);
3680Sstevel@tonic-gate 
3692705Sjp161948 	c1 = *(langtag1 + len1);
3702705Sjp161948 	c2 = *(langtag2 + len2);
3710Sstevel@tonic-gate 
3722705Sjp161948 	/* no country sub-tags - exact match */
3732705Sjp161948 	if (c1 == '\0' && c2 == '\0')
3742705Sjp161948 		return (2);
3750Sstevel@tonic-gate 
3762705Sjp161948 	/* one langtag has a country sub-tag, the other doesn't */
3772705Sjp161948 	if (c1 == '\0' || c2 == '\0')
3782705Sjp161948 		return (1);
3790Sstevel@tonic-gate 
3802705Sjp161948 	/* can't happen - both langtags have a country sub-tag */
3812705Sjp161948 	if (c1 != '-' || c2 != '-')
3822705Sjp161948 		return (1);
3830Sstevel@tonic-gate 
3842705Sjp161948 	/* compare country subtags */
3852705Sjp161948 	langtag1 = langtag1 + len1 + 1;
3862705Sjp161948 	langtag2 = langtag2 + len2 + 1;
3870Sstevel@tonic-gate 
3882705Sjp161948 	len1 = (strchr(langtag1, '-')) ?
3892705Sjp161948 	    (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
3902705Sjp161948 
3912705Sjp161948 	len2 = (strchr(langtag2, '-')) ?
3922705Sjp161948 	    (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
3930Sstevel@tonic-gate 
3942705Sjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
3952705Sjp161948 		return (1);
3960Sstevel@tonic-gate 
3972705Sjp161948 	/* country tags matched - exact match */
3982705Sjp161948 	return (2);
3990Sstevel@tonic-gate }
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate char *
4020Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2)
4030Sstevel@tonic-gate {
4042705Sjp161948 	char **list1, **list2, **list3, **p, **q, **r;
4052705Sjp161948 	char *set3, *lang_subtag;
4062705Sjp161948 	uint_t n1, n2, n3;
4072705Sjp161948 	uint_t do_append;
4082705Sjp161948 
4092705Sjp161948 	list1 = xsplit(set1, ',');
4102705Sjp161948 	list2 = xsplit(set2, ',');
4110Sstevel@tonic-gate 
4122705Sjp161948 	for (n1 = 0, p = list1; p && *p; p++, n1++)
4132705Sjp161948 		;
4142705Sjp161948 	for (n2 = 0, p = list2; p && *p; p++, n2++)
4152705Sjp161948 		;
4160Sstevel@tonic-gate 
4172705Sjp161948 	list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
4182705Sjp161948 	*list3 = NULL;
4190Sstevel@tonic-gate 
4202705Sjp161948 	/*
4212705Sjp161948 	 * we must not sort the user langtags - sorting or not the server's
4222705Sjp161948 	 * should not affect the outcome
4232705Sjp161948 	 */
4242705Sjp161948 	qsort(list2, n2, sizeof (char *), sortcmp);
4250Sstevel@tonic-gate 
4262705Sjp161948 	for (n3 = 0, p = list1; p && *p; p++) {
4272705Sjp161948 		do_append = 0;
4282705Sjp161948 		for (q = list2; q && *q; q++) {
4292705Sjp161948 			if (g11n_langtag_match(*p, *q) != 2) continue;
4302705Sjp161948 			/* append element */
4312705Sjp161948 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4322705Sjp161948 				do_append = 1;
4332705Sjp161948 				if (!*r)
4342705Sjp161948 					break;
4352705Sjp161948 				if (strcmp(*p, *r) == 0) {
4362705Sjp161948 					do_append = 0;
4372705Sjp161948 					break;
4382705Sjp161948 				}
4392705Sjp161948 			}
4402705Sjp161948 			if (do_append && n3 <= (n1 + n2)) {
4412705Sjp161948 				list3[n3++] = xstrdup(*p);
4422705Sjp161948 				list3[n3] = NULL;
4432705Sjp161948 			}
4440Sstevel@tonic-gate 		}
4450Sstevel@tonic-gate 	}
4462705Sjp161948 
4472705Sjp161948 	for (p = list1; p && *p; p++) {
4482705Sjp161948 		do_append = 0;
4492705Sjp161948 		for (q = list2; q && *q; q++) {
4502705Sjp161948 			if (g11n_langtag_match(*p, *q) != 1)
4512705Sjp161948 				continue;
4520Sstevel@tonic-gate 
4532705Sjp161948 			/* append element */
4542705Sjp161948 			lang_subtag = xstrdup(*p);
4552705Sjp161948 			if (strchr(lang_subtag, '-'))
4562705Sjp161948 				*(strchr(lang_subtag, '-')) = '\0';
4572705Sjp161948 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4582705Sjp161948 				do_append = 1;
4592705Sjp161948 				if (!*r)
4602705Sjp161948 					break;
4612705Sjp161948 				if (strcmp(lang_subtag, *r) == 0) {
4622705Sjp161948 					do_append = 0;
4632705Sjp161948 					break;
4642705Sjp161948 				}
4652705Sjp161948 			}
4662705Sjp161948 			if (do_append && n3 <= (n1 + n2)) {
4672705Sjp161948 				list3[n3++] = lang_subtag;
4682705Sjp161948 				list3[n3] = NULL;
4692705Sjp161948 			} else
4702705Sjp161948 				xfree(lang_subtag);
4710Sstevel@tonic-gate 		}
4720Sstevel@tonic-gate 	}
4730Sstevel@tonic-gate 
4742705Sjp161948 	set3 = xjoin(list3, ',');
4752705Sjp161948 	xfree_split_list(list1);
4762705Sjp161948 	xfree_split_list(list2);
4772705Sjp161948 	xfree_split_list(list3);
4780Sstevel@tonic-gate 
4792705Sjp161948 	return (set3);
4800Sstevel@tonic-gate }
4810Sstevel@tonic-gate 
4820Sstevel@tonic-gate char *
4830Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
4840Sstevel@tonic-gate {
4852705Sjp161948 	char *list, *result;
4862705Sjp161948 	char **xlist;
4870Sstevel@tonic-gate 
4882705Sjp161948 	/* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
4892705Sjp161948 	list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
4900Sstevel@tonic-gate 
4912705Sjp161948 	if (!list)
4922705Sjp161948 		return (NULL);
4930Sstevel@tonic-gate 
4942705Sjp161948 	xlist = xsplit(list, ',');
4950Sstevel@tonic-gate 
4962705Sjp161948 	xfree(list);
4970Sstevel@tonic-gate 
4982705Sjp161948 	if (!xlist || !*xlist)
4992705Sjp161948 		return (NULL);
5000Sstevel@tonic-gate 
5012705Sjp161948 	result = xstrdup(*xlist);
5022705Sjp161948 	xfree_split_list(xlist);
5030Sstevel@tonic-gate 
5042705Sjp161948 	return (result);
5050Sstevel@tonic-gate }
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate /*
5080Sstevel@tonic-gate  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
5090Sstevel@tonic-gate  * a stright strcmp()
5100Sstevel@tonic-gate  */
5112705Sjp161948 static int
5120Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2)
5130Sstevel@tonic-gate {
5142705Sjp161948 	char *dot_ptr;
5152705Sjp161948 	char *s1 = *(char **)d1;
5162705Sjp161948 	char *s2 = *(char **)d2;
5172705Sjp161948 	int s1_is_utf8 = 0;
5182705Sjp161948 	int s2_is_utf8 = 0;
5190Sstevel@tonic-gate 
5202705Sjp161948 	/* check if s1 is a UTF-8 locale */
5212705Sjp161948 	if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
5222705Sjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5232705Sjp161948 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5242705Sjp161948 		s1_is_utf8++;
5252705Sjp161948 	}
5262705Sjp161948 
5272705Sjp161948 	/* check if s2 is a UTF-8 locale */
5282705Sjp161948 	if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
5292705Sjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5302705Sjp161948 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5312705Sjp161948 		s2_is_utf8++;
5322705Sjp161948 	}
5330Sstevel@tonic-gate 
5342705Sjp161948 	/* prefer UTF-8 locales */
5352705Sjp161948 	if (s1_is_utf8 && !s2_is_utf8)
5362705Sjp161948 		return (-1);
5370Sstevel@tonic-gate 
5382705Sjp161948 	if (s2_is_utf8 && !s1_is_utf8)
5392705Sjp161948 		return (1);
5400Sstevel@tonic-gate 
5412705Sjp161948 	/* prefer any locale over the default locales */
5422705Sjp161948 	if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
5432705Sjp161948 	    strcmp(s1, "common") == 0) {
5442705Sjp161948 		if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
5452705Sjp161948 		    strcmp(s2, "common") != 0)
5462705Sjp161948 			return (1);
5472705Sjp161948 	}
5480Sstevel@tonic-gate 
5492705Sjp161948 	if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
5502705Sjp161948 	    strcmp(s2, "common") == 0) {
5512705Sjp161948 		if (strcmp(s1, "C") != 0 &&
5522705Sjp161948 		    strcmp(s1, "POSIX") != 0 &&
5532705Sjp161948 		    strcmp(s1, "common") != 0)
5542705Sjp161948 			return (-1);
5552705Sjp161948 	}
5560Sstevel@tonic-gate 
5572705Sjp161948 	return (strcmp(s1, s2));
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 
5610Sstevel@tonic-gate char **
5622705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
5630Sstevel@tonic-gate {
5642705Sjp161948 	char **langtag_list, **result, **p, **q, **r;
5652705Sjp161948 	char *s;
5662705Sjp161948 	uint_t do_append, n_langtags, n_locales, n_results, max_results;
5672705Sjp161948 
5682705Sjp161948 	/* count lang tags and locales */
5692705Sjp161948 	for (n_locales = 0, p = locale_set; p && *p; p++)
5702705Sjp161948 		n_locales++;
5710Sstevel@tonic-gate 
5722705Sjp161948 	n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
5732705Sjp161948 	/* count the number of langtags */
5742705Sjp161948 	for (; s = strchr(s, ','); s++, n_langtags++)
5752705Sjp161948 		;
5762705Sjp161948 
5772705Sjp161948 	qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
5780Sstevel@tonic-gate 
5792705Sjp161948 	langtag_list = xsplit(langtag_set, ',');
5802705Sjp161948 	for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
5812705Sjp161948 		;
5820Sstevel@tonic-gate 
5832705Sjp161948 	max_results = MIN(n_locales, n_langtags) * 2;
5842705Sjp161948 	result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
5852705Sjp161948 	*result = NULL;
5862705Sjp161948 	n_results = 0;
5870Sstevel@tonic-gate 
5882705Sjp161948 	/* more specific matches first */
5892705Sjp161948 	for (p = langtag_list; p && *p; p++) {
5902705Sjp161948 		do_append = 0;
5912705Sjp161948 		for (q = locale_set; q && *q; q++) {
5922705Sjp161948 			if (g11n_langtag_matches_locale(*p, *q) == 2) {
5932705Sjp161948 				do_append = 1;
5942705Sjp161948 				for (r = result; (r - result) <=
5952705Sjp161948 				    MIN(n_locales, n_langtags); r++) {
5962705Sjp161948 					if (!*r)
5972705Sjp161948 						break;
5982705Sjp161948 					if (strcmp(*q, *r) == 0) {
5992705Sjp161948 						do_append = 0;
6002705Sjp161948 						break;
6012705Sjp161948 					}
6022705Sjp161948 				}
6032705Sjp161948 				if (do_append && n_results < max_results) {
6042705Sjp161948 					result[n_results++] = xstrdup(*q);
6052705Sjp161948 					result[n_results] = NULL;
6062705Sjp161948 				}
6072705Sjp161948 				break;
6082705Sjp161948 			}
6090Sstevel@tonic-gate 		}
6100Sstevel@tonic-gate 	}
6110Sstevel@tonic-gate 
6122705Sjp161948 	for (p = langtag_list; p && *p; p++) {
6132705Sjp161948 		do_append = 0;
6142705Sjp161948 		for (q = locale_set; q && *q; q++) {
6152705Sjp161948 			if (g11n_langtag_matches_locale(*p, *q) == 1) {
6162705Sjp161948 				do_append = 1;
6172705Sjp161948 				for (r = result; (r - result) <=
6182705Sjp161948 				    MIN(n_locales, n_langtags); r++) {
6192705Sjp161948 					if (!*r)
6202705Sjp161948 						break;
6212705Sjp161948 					if (strcmp(*q, *r) == 0) {
6222705Sjp161948 						do_append = 0;
6232705Sjp161948 						break;
6242705Sjp161948 					}
6252705Sjp161948 				}
6262705Sjp161948 				if (do_append && n_results < max_results) {
6272705Sjp161948 					result[n_results++] = xstrdup(*q);
6282705Sjp161948 					result[n_results] = NULL;
6292705Sjp161948 				}
6302705Sjp161948 				break;
6312705Sjp161948 			}
6320Sstevel@tonic-gate 		}
6330Sstevel@tonic-gate 	}
6340Sstevel@tonic-gate 
6352705Sjp161948 	xfree_split_list(langtag_list);
6362705Sjp161948 
6372705Sjp161948 	return (result);
6380Sstevel@tonic-gate }
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate char *
6410Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
6420Sstevel@tonic-gate {
6435562Sjp161948 	char **results, **locales, *result = NULL;
6445562Sjp161948 
6455562Sjp161948 	if (srvr_locales == NULL)
6465562Sjp161948 		locales = g11n_getlocales();
6475562Sjp161948 	else
6485562Sjp161948 		locales = srvr_locales;
6490Sstevel@tonic-gate 
6502705Sjp161948 	if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
6515562Sjp161948 	    locales)) == NULL)
6525562Sjp161948 		goto err;
6530Sstevel@tonic-gate 
6542705Sjp161948 	if (*results != NULL)
6552705Sjp161948 		result = xstrdup(*results);
6560Sstevel@tonic-gate 
6572705Sjp161948 	xfree_split_list(results);
6580Sstevel@tonic-gate 
6595562Sjp161948 err:
6605562Sjp161948 	if (locales != srvr_locales)
6615562Sjp161948 		g11n_freelist(locales);
6622705Sjp161948 	return (result);
6630Sstevel@tonic-gate }
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 
6660Sstevel@tonic-gate /*
6670Sstevel@tonic-gate  * Functions for validating ASCII and UTF-8 strings
6680Sstevel@tonic-gate  *
6690Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
6700Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
6710Sstevel@tonic-gate  * friends.
6720Sstevel@tonic-gate  *
6730Sstevel@tonic-gate  * The return value is 0 if success, EILSEQ or EINVAL.
6740Sstevel@tonic-gate  *
6750Sstevel@tonic-gate  */
6762705Sjp161948 uint_t
6772705Sjp161948 g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str)
6780Sstevel@tonic-gate {
6792705Sjp161948 	uchar_t *p;
6800Sstevel@tonic-gate 
6812705Sjp161948 	for (p = (uchar_t *)str; p && *p && (!(*p & 0x80)); p++)
6822705Sjp161948 		;
6830Sstevel@tonic-gate 
6842705Sjp161948 	if (len && ((p - (uchar_t *)str) != len))
6852705Sjp161948 		return (EILSEQ);
6862705Sjp161948 
6872705Sjp161948 	return (0);
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate 
6902705Sjp161948 uint_t
6912705Sjp161948 g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str)
6920Sstevel@tonic-gate {
6932705Sjp161948 	uchar_t *p;
6942705Sjp161948 	uint_t c, l;
6952705Sjp161948 
6962705Sjp161948 	if (len == 0)
6972705Sjp161948 		len = strlen((const char *)str);
6980Sstevel@tonic-gate 
6992705Sjp161948 	for (p = (uchar_t *)str; p && (p - str < len) && *p; ) {
7002705Sjp161948 		/* 8-bit chars begin a UTF-8 sequence */
7012705Sjp161948 		if (*p & 0x80) {
7022705Sjp161948 			/* get sequence length and sanity check first byte */
7032705Sjp161948 			if (*p < 0xc0)
7042705Sjp161948 				return (EILSEQ);
7052705Sjp161948 			else if (*p < 0xe0)
7062705Sjp161948 				l = 2;
7072705Sjp161948 			else if (*p < 0xf0)
7082705Sjp161948 				l = 3;
7092705Sjp161948 			else if (*p < 0xf8)
7102705Sjp161948 				l = 4;
7112705Sjp161948 			else if (*p < 0xfc)
7122705Sjp161948 				l = 5;
7132705Sjp161948 			else if (*p < 0xfe)
7142705Sjp161948 				l = 6;
7152705Sjp161948 			else
7162705Sjp161948 				return (EILSEQ);
7172705Sjp161948 
7182705Sjp161948 			if ((p + l - str) >= len)
7192705Sjp161948 				return (EILSEQ);
7202705Sjp161948 
7212705Sjp161948 			/* overlong detection - build codepoint */
7222705Sjp161948 			c = *p & 0x3f;
7232705Sjp161948 			/* shift c bits from first byte */
7242705Sjp161948 			c = c << (6 * (l - 1));
7252705Sjp161948 
7262705Sjp161948 			if (l > 1) {
7272705Sjp161948 				if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80))
7282705Sjp161948 					c = c | ((*(p + 1) & 0x3f) <<
7292705Sjp161948 					    (6 * (l - 2)));
7302705Sjp161948 				else
7312705Sjp161948 					return (EILSEQ);
7322705Sjp161948 
7332705Sjp161948 				if (c < 0x80)
7342705Sjp161948 					return (EILSEQ);
7352705Sjp161948 			}
7360Sstevel@tonic-gate 
7372705Sjp161948 			if (l > 2) {
7382705Sjp161948 				if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80))
7392705Sjp161948 					c = c | ((*(p + 2) & 0x3f) <<
7402705Sjp161948 					    (6 * (l - 3)));
7412705Sjp161948 				else
7422705Sjp161948 					return (EILSEQ);
7432705Sjp161948 
7442705Sjp161948 				if (c < 0x800)
7452705Sjp161948 					return (EILSEQ);
7462705Sjp161948 			}
7472705Sjp161948 
7482705Sjp161948 			if (l > 3) {
7492705Sjp161948 				if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80))
7502705Sjp161948 					c = c | ((*(p + 3) & 0x3f) <<
7512705Sjp161948 					    (6 * (l - 4)));
7522705Sjp161948 				else
7532705Sjp161948 					return (EILSEQ);
7542705Sjp161948 
7552705Sjp161948 				if (c < 0x10000)
7562705Sjp161948 					return (EILSEQ);
7572705Sjp161948 			}
7580Sstevel@tonic-gate 
7592705Sjp161948 			if (l > 4) {
7602705Sjp161948 				if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80))
7612705Sjp161948 					c = c | ((*(p + 4) & 0x3f) <<
7622705Sjp161948 					    (6 * (l - 5)));
7632705Sjp161948 				else
7642705Sjp161948 					return (EILSEQ);
7652705Sjp161948 
7662705Sjp161948 				if (c < 0x200000)
7672705Sjp161948 					return (EILSEQ);
7682705Sjp161948 			}
7690Sstevel@tonic-gate 
7702705Sjp161948 			if (l > 5) {
7712705Sjp161948 				if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80))
7722705Sjp161948 					c = c | (*(p + 5) & 0x3f);
7732705Sjp161948 				else
7742705Sjp161948 					return (EILSEQ);
7752705Sjp161948 
7762705Sjp161948 				if (c < 0x4000000)
7772705Sjp161948 					return (EILSEQ);
7782705Sjp161948 			}
7790Sstevel@tonic-gate 
7802705Sjp161948 			/*
7812705Sjp161948 			 * check for UTF-16 surrogates ifs other illegal
7822705Sjp161948 			 * UTF-8 * points
7832705Sjp161948 			 */
7842705Sjp161948 			if (((c <= 0xdfff) && (c >= 0xd800)) ||
7852705Sjp161948 			    (c == 0xfffe) || (c == 0xffff))
7862705Sjp161948 				return (EILSEQ);
7872705Sjp161948 			p += l;
7882705Sjp161948 		}
7892705Sjp161948 		/* 7-bit chars are fine */
7900Sstevel@tonic-gate 		else
7912705Sjp161948 			p++;
7920Sstevel@tonic-gate 	}
7932705Sjp161948 	return (0);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate /*
7970Sstevel@tonic-gate  * Functions for converting to ASCII or UTF-8 from the local codeset
7980Sstevel@tonic-gate  * Functions for converting from ASCII or UTF-8 to the local codeset
7990Sstevel@tonic-gate  *
8000Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
8010Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
8020Sstevel@tonic-gate  * friends.
8030Sstevel@tonic-gate  *
8040Sstevel@tonic-gate  * The err parameter is an optional pointer to an integer where 0
8050Sstevel@tonic-gate  * (success) or EILSEQ or EINVAL will be stored (failure).
8060Sstevel@tonic-gate  *
8070Sstevel@tonic-gate  * These functions return NULL if the conversion fails.
8080Sstevel@tonic-gate  *
8090Sstevel@tonic-gate  */
8102705Sjp161948 uchar_t *
8112705Sjp161948 g11n_convert_from_ascii(const char *str, int *err_ptr, uchar_t **error_str)
8120Sstevel@tonic-gate {
8132705Sjp161948 	static uint_t initialized = 0;
8142705Sjp161948 	static uint_t do_convert = 0;
8152705Sjp161948 	iconv_t cd;
8162705Sjp161948 	int err;
8170Sstevel@tonic-gate 
8182705Sjp161948 	if (!initialized) {
8192705Sjp161948 		/*
8202705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
8212705Sjp161948 		 * same, and there are aliases of codesets to boot...
8222705Sjp161948 		 */
8232705Sjp161948 		if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
8245562Sjp161948 		    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
8255562Sjp161948 		    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
8262705Sjp161948 			initialized = 1;
8272705Sjp161948 			do_convert = 0;
8282705Sjp161948 		} else {
8292705Sjp161948 			cd = iconv_open(nl_langinfo(CODESET), "646");
8302705Sjp161948 			if (cd == (iconv_t)-1) {
8312705Sjp161948 				if (err_ptr)
8322705Sjp161948 					*err_ptr = errno;
8332705Sjp161948 				if (error_str)
8342705Sjp161948 					*error_str = (uchar_t *)"Cannot "
8352705Sjp161948 					    "convert ASCII strings to the local"
8362705Sjp161948 					    " codeset";
8372705Sjp161948 			}
8382705Sjp161948 			initialized = 1;
8392705Sjp161948 			do_convert = 1;
8402705Sjp161948 		}
8410Sstevel@tonic-gate 	}
8422705Sjp161948 
8432705Sjp161948 	if (!do_convert) {
8442705Sjp161948 		if ((err = g11n_validate_ascii(str, 0, error_str))) {
8452705Sjp161948 			if (err_ptr)
8462705Sjp161948 				*err_ptr = err;
8472705Sjp161948 			return (NULL);
8482705Sjp161948 		} else
8492705Sjp161948 			return ((uchar_t *)xstrdup(str));
8500Sstevel@tonic-gate 	}
8510Sstevel@tonic-gate 
8522705Sjp161948 	return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
8530Sstevel@tonic-gate }
8540Sstevel@tonic-gate 
8552705Sjp161948 uchar_t *
8562705Sjp161948 g11n_convert_from_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str)
8570Sstevel@tonic-gate {
8582705Sjp161948 	static uint_t initialized = 0;
8592705Sjp161948 	static uint_t do_convert = 0;
8602705Sjp161948 	iconv_t cd;
8612705Sjp161948 	int err;
8620Sstevel@tonic-gate 
8632705Sjp161948 	if (!initialized) {
8642705Sjp161948 		/*
8652705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
8662705Sjp161948 		 * same, and there are aliases of codesets to boot...
8672705Sjp161948 		 */
8682705Sjp161948 		if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
8692705Sjp161948 		    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
8702705Sjp161948 			initialized = 1;
8712705Sjp161948 			do_convert = 0;
8722705Sjp161948 		} else {
8732705Sjp161948 			cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
8742705Sjp161948 			if (cd == (iconv_t)-1) {
8752705Sjp161948 				if (err_ptr)
8762705Sjp161948 					*err_ptr = errno;
8772705Sjp161948 				if (error_str)
8782705Sjp161948 					*error_str = (uchar_t *)"Cannot "
8792705Sjp161948 					    "convert UTF-8 strings to the "
8802705Sjp161948 					    "local codeset";
8812705Sjp161948 			}
8822705Sjp161948 			initialized = 1;
8832705Sjp161948 			do_convert = 1;
8842705Sjp161948 		}
8850Sstevel@tonic-gate 	}
8862705Sjp161948 
8872705Sjp161948 	if (!do_convert) {
8882705Sjp161948 		if ((err = g11n_validate_utf8(str, 0, error_str))) {
8892705Sjp161948 			if (err_ptr)
8902705Sjp161948 				*err_ptr = err;
8912705Sjp161948 			return (NULL);
8922705Sjp161948 		} else
8932705Sjp161948 			return ((uchar_t *)xstrdup((char *)str));
8940Sstevel@tonic-gate 	}
8950Sstevel@tonic-gate 
8962705Sjp161948 	return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
8970Sstevel@tonic-gate }
8980Sstevel@tonic-gate 
8990Sstevel@tonic-gate char *
9002705Sjp161948 g11n_convert_to_ascii(const uchar_t *str, int *err_ptr, uchar_t **error_str)
9010Sstevel@tonic-gate {
9022705Sjp161948 	static uint_t initialized = 0;
9032705Sjp161948 	static uint_t do_convert = 0;
9042705Sjp161948 	iconv_t cd;
9050Sstevel@tonic-gate 
9062705Sjp161948 	if (!initialized) {
9072705Sjp161948 		/*
9082705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
9092705Sjp161948 		 * same, and there are aliases of codesets to boot...
9102705Sjp161948 		 */
9112705Sjp161948 		if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
9122705Sjp161948 		    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
9132705Sjp161948 		    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
9142705Sjp161948 			initialized = 1;
9152705Sjp161948 			do_convert = 0;
9162705Sjp161948 		} else {
9172705Sjp161948 			cd = iconv_open("646", nl_langinfo(CODESET));
9182705Sjp161948 			if (cd == (iconv_t)-1) {
9192705Sjp161948 				if (err_ptr)
9202705Sjp161948 					*err_ptr = errno;
9212705Sjp161948 				if (error_str)
9222705Sjp161948 					*error_str = (uchar_t *)"Cannot "
9232705Sjp161948 					    "convert UTF-8 strings to the "
9242705Sjp161948 					    "local codeset";
9252705Sjp161948 			}
9262705Sjp161948 			initialized = 1;
9272705Sjp161948 			do_convert = 1;
9282705Sjp161948 		}
9290Sstevel@tonic-gate 	}
9300Sstevel@tonic-gate 
9312705Sjp161948 	if (!do_convert)
9322705Sjp161948 		return (xstrdup((char *)str));
9332705Sjp161948 
9342705Sjp161948 	return ((char *)do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
9350Sstevel@tonic-gate }
9360Sstevel@tonic-gate 
9372705Sjp161948 uchar_t *
9382705Sjp161948 g11n_convert_to_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str)
9390Sstevel@tonic-gate {
9402705Sjp161948 	static uint_t initialized = 0;
9412705Sjp161948 	static uint_t do_convert = 0;
9422705Sjp161948 	iconv_t cd;
9430Sstevel@tonic-gate 
9442705Sjp161948 	if (!initialized) {
9452705Sjp161948 		/*
9462705Sjp161948 		 * iconv_open() fails if the to/from codesets are the
9472705Sjp161948 		 * same, and there are aliases of codesets to boot...
9482705Sjp161948 		 */
9492705Sjp161948 		if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
9502705Sjp161948 		    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
9512705Sjp161948 			initialized = 1;
9522705Sjp161948 			do_convert = 0;
9532705Sjp161948 		} else {
9542705Sjp161948 			cd = iconv_open("UTF-8", nl_langinfo(CODESET));
9552705Sjp161948 			if (cd == (iconv_t)-1) {
9562705Sjp161948 				if (err_ptr)
9572705Sjp161948 					*err_ptr = errno;
9582705Sjp161948 				if (error_str)
9592705Sjp161948 					*error_str = (uchar_t *)"Cannot "
9602705Sjp161948 					    "convert UTF-8 strings to the "
9612705Sjp161948 					    "local codeset";
9622705Sjp161948 			}
9632705Sjp161948 			initialized = 1;
9642705Sjp161948 			do_convert = 1;
9652705Sjp161948 		}
9660Sstevel@tonic-gate 	}
9670Sstevel@tonic-gate 
9682705Sjp161948 	if (!do_convert)
9692705Sjp161948 		return ((uchar_t *)xstrdup((char *)str));
9702705Sjp161948 
9712705Sjp161948 	return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str));
9720Sstevel@tonic-gate }
9730Sstevel@tonic-gate 
9740Sstevel@tonic-gate 
9750Sstevel@tonic-gate /*
9760Sstevel@tonic-gate  * Wrapper around iconv()
9770Sstevel@tonic-gate  *
9780Sstevel@tonic-gate  * The caller is responsible for freeing the result and for handling
9790Sstevel@tonic-gate  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
9800Sstevel@tonic-gate  */
9812705Sjp161948 static uchar_t *
9822705Sjp161948 do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, uint_t len,
9832705Sjp161948     uint_t *outlen, int *err, uchar_t **err_str)
9842705Sjp161948 {
9852705Sjp161948 	size_t inbytesleft, outbytesleft, converted_size;
9862705Sjp161948 	char *outbuf;
9872705Sjp161948 	uchar_t *converted;
9882705Sjp161948 	const char *inbuf;
9892705Sjp161948 	uint_t mul = 0;
9900Sstevel@tonic-gate 
9912705Sjp161948 	if (!buf || !(*(char *)buf))
9922705Sjp161948 		return (NULL);
9932705Sjp161948 
9942705Sjp161948 	if (len == 0)
9952705Sjp161948 		len = strlen(buf);
9962705Sjp161948 
9972705Sjp161948 	/* reset conversion descriptor */
9982705Sjp161948 	/* XXX Do we need initial shift sequences for UTF-8??? */
9992705Sjp161948 	(void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft);
10002705Sjp161948 	inbuf = (const char *) buf;
10012705Sjp161948 
10022705Sjp161948 	if (mul_ptr)
10032705Sjp161948 		mul = *mul_ptr;
10042705Sjp161948 
10052705Sjp161948 	converted_size = (len << mul);
10062705Sjp161948 	outbuf = (char *)xmalloc(converted_size + 1); /* for null */
10072705Sjp161948 	converted = (uchar_t *)outbuf;
10082705Sjp161948 	outbytesleft = len;
10090Sstevel@tonic-gate 
10102705Sjp161948 	do {
10112705Sjp161948 		if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) ==
10122705Sjp161948 		    (size_t)-1) {
10132705Sjp161948 			if (errno == E2BIG) {
10142705Sjp161948 				/* UTF-8 codepoints are at most 8 bytes long */
10152705Sjp161948 				if (mul > 2) {
10162705Sjp161948 					if (err_str)
10172705Sjp161948 						*err_str = (uchar_t *)
10182705Sjp161948 						    "Conversion to UTF-8 failed"
10192705Sjp161948 						    " due to preposterous space"
10202705Sjp161948 						    " requirements";
10212705Sjp161948 					if (err)
10222705Sjp161948 						*err = EILSEQ;
10232705Sjp161948 					return (NULL);
10242705Sjp161948 				}
10250Sstevel@tonic-gate 
10262705Sjp161948 				/*
10272705Sjp161948 				 * re-alloc output and ensure that the outbuf
10282705Sjp161948 				 * and outbytesleft values are adjusted
10292705Sjp161948 				 */
10302705Sjp161948 				converted = xrealloc(converted,
10312705Sjp161948 				    converted_size << 1 + 1);
10322705Sjp161948 				outbuf = (char *)converted + converted_size -
10332705Sjp161948 				    outbytesleft;
10342705Sjp161948 				converted_size = (len << ++(mul));
10352705Sjp161948 				outbytesleft = converted_size - outbytesleft;
10362705Sjp161948 			} else {
10372705Sjp161948 				/*
10382705Sjp161948 				 * let the caller deal with iconv() errors,
10392705Sjp161948 				 * probably by calling fatal(); xfree() does
10402705Sjp161948 				 * not set errno
10412705Sjp161948 				 */
10422705Sjp161948 				if (err)
10432705Sjp161948 					*err = errno;
10442705Sjp161948 				xfree(converted);
10452705Sjp161948 				return (NULL);
10462705Sjp161948 			}
10472705Sjp161948 		}
10482705Sjp161948 	} while (inbytesleft);
10492705Sjp161948 
10502705Sjp161948 	*outbuf = '\0'; /* ensure null-termination */
10512705Sjp161948 	if (outlen)
10522705Sjp161948 		*outlen = converted_size - outbytesleft;
10532705Sjp161948 	if (mul_ptr)
10542705Sjp161948 		*mul_ptr = mul;
10552705Sjp161948 
10562705Sjp161948 	return (converted);
10570Sstevel@tonic-gate }
10585562Sjp161948 
10595562Sjp161948 /*
10605562Sjp161948  * Free all strings in the list and then free the list itself. We know that the
10615562Sjp161948  * list ends with a NULL pointer.
10625562Sjp161948  */
10635562Sjp161948 void
10645562Sjp161948 g11n_freelist(char **list)
10655562Sjp161948 {
10665562Sjp161948 	int i = 0;
10675562Sjp161948 
10685562Sjp161948 	while (list[i] != NULL) {
10695562Sjp161948 		xfree(list[i]);
10705562Sjp161948 		i++;
10715562Sjp161948 	}
10725562Sjp161948 
10735562Sjp161948 	xfree(list);
10745562Sjp161948 }
1075