xref: /onnv-gate/usr/src/cmd/ssh/libssh/common/g11n.c (revision 9845:0d705da26956)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52628Sjp161948  * Common Development and Distribution License (the "License").
62628Sjp161948  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  *
219600SNobutomo.Nakano@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
220Sstevel@tonic-gate  * Use is subject to license terms.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #include <errno.h>
260Sstevel@tonic-gate #include <locale.h>
270Sstevel@tonic-gate #include <langinfo.h>
280Sstevel@tonic-gate #include <iconv.h>
290Sstevel@tonic-gate #include <ctype.h>
309600SNobutomo.Nakano@Sun.COM #include <wctype.h>
310Sstevel@tonic-gate #include <strings.h>
320Sstevel@tonic-gate #include <string.h>
330Sstevel@tonic-gate #include <stdio.h>
340Sstevel@tonic-gate #include <stdlib.h>
350Sstevel@tonic-gate #include "includes.h"
360Sstevel@tonic-gate #include "xmalloc.h"
370Sstevel@tonic-gate #include "xlist.h"
389600SNobutomo.Nakano@Sun.COM #include "compat.h"
399600SNobutomo.Nakano@Sun.COM #include "log.h"
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #ifdef MIN
420Sstevel@tonic-gate #undef MIN
430Sstevel@tonic-gate #endif /* MIN */
440Sstevel@tonic-gate 
452705Sjp161948 #define	MIN(x, y)	((x) < (y) ? (x) : (y))
460Sstevel@tonic-gate 
472705Sjp161948 #define	LOCALE_PATH	"/usr/bin/locale"
480Sstevel@tonic-gate 
492705Sjp161948 /* two-char country code, '-' and two-char region code */
502705Sjp161948 #define	LANGTAG_MAX	5
510Sstevel@tonic-gate 
520Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2);
530Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale);
540Sstevel@tonic-gate 
559600SNobutomo.Nakano@Sun.COM static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str);
569600SNobutomo.Nakano@Sun.COM 
579600SNobutomo.Nakano@Sun.COM /*
589600SNobutomo.Nakano@Sun.COM  * native_codeset records the codeset of the default system locale.
599600SNobutomo.Nakano@Sun.COM  * It is used to convert the contents of file (eg /etc/issue) which is
609600SNobutomo.Nakano@Sun.COM  * supposed to be in the codeset of default system locale.
619600SNobutomo.Nakano@Sun.COM  */
629600SNobutomo.Nakano@Sun.COM static char *native_codeset;
630Sstevel@tonic-gate 
645562Sjp161948 /*
655562Sjp161948  * Convert locale string name into a language tag. The caller is responsible for
665562Sjp161948  * freeing the memory allocated for the result.
675562Sjp161948  */
682705Sjp161948 static char *
g11n_locale2langtag(char * locale)690Sstevel@tonic-gate g11n_locale2langtag(char *locale)
700Sstevel@tonic-gate {
712705Sjp161948 	char *langtag;
720Sstevel@tonic-gate 
732705Sjp161948 	/* base cases */
742705Sjp161948 	if (!locale || !*locale)
752705Sjp161948 		return (NULL);
760Sstevel@tonic-gate 
772705Sjp161948 	if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
785562Sjp161948 		return (xstrdup("i-default"));
790Sstevel@tonic-gate 
802705Sjp161948 	/* punt for language codes which are not exactly 2 letters */
812705Sjp161948 	if (strlen(locale) < 2 ||
822705Sjp161948 	    !isalpha(locale[0]) ||
832705Sjp161948 	    !isalpha(locale[1]) ||
842705Sjp161948 	    (locale[2] != '\0' &&
852705Sjp161948 	    locale[2] != '_' &&
862705Sjp161948 	    locale[2] != '.' &&
872705Sjp161948 	    locale[2] != '@'))
882705Sjp161948 		return (NULL);
890Sstevel@tonic-gate 
900Sstevel@tonic-gate 
912705Sjp161948 	/* we have a primary language sub-tag */
922705Sjp161948 	langtag = (char *)xmalloc(LANGTAG_MAX + 1);
930Sstevel@tonic-gate 
942705Sjp161948 	strncpy(langtag, locale, 2);
952705Sjp161948 	langtag[2] = '\0';
960Sstevel@tonic-gate 
972705Sjp161948 	/* do we have country sub-tag? For example: cs_CZ */
982705Sjp161948 	if (locale[2] == '_') {
992705Sjp161948 		if (strlen(locale) < 5 ||
1002705Sjp161948 		    !isalpha(locale[3]) ||
1012705Sjp161948 		    !isalpha(locale[4]) ||
1022705Sjp161948 		    (locale[5] != '\0' && (locale[5] != '.' &&
1032705Sjp161948 		    locale[5] != '@'))) {
1042705Sjp161948 			return (langtag);
1052705Sjp161948 		}
1062705Sjp161948 
1072705Sjp161948 		/* example: create cs-CZ from cs_CZ */
1082705Sjp161948 		if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
1092705Sjp161948 		    locale + 3) == 5)
1102705Sjp161948 			return (langtag);
1110Sstevel@tonic-gate 	}
1120Sstevel@tonic-gate 
1132705Sjp161948 	/* in all other cases we just use the primary language sub-tag */
1142705Sjp161948 	return (langtag);
1150Sstevel@tonic-gate }
1160Sstevel@tonic-gate 
1172705Sjp161948 uint_t
g11n_langtag_is_default(char * langtag)1180Sstevel@tonic-gate g11n_langtag_is_default(char *langtag)
1190Sstevel@tonic-gate {
1202705Sjp161948 	return (strcmp(langtag, "i-default") == 0);
1210Sstevel@tonic-gate }
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate  * This lang tag / locale matching function works only for two-character
1250Sstevel@tonic-gate  * language primary sub-tags and two-character country sub-tags.
1260Sstevel@tonic-gate  */
1272705Sjp161948 uint_t
g11n_langtag_matches_locale(char * langtag,char * locale)1280Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale)
1290Sstevel@tonic-gate {
1302705Sjp161948 	/* match "i-default" to the process' current locale if possible */
1312705Sjp161948 	if (g11n_langtag_is_default(langtag)) {
1322705Sjp161948 		if (strcasecmp(locale, "POSIX") == 0 ||
1332705Sjp161948 		    strcasecmp(locale, "C") == 0)
1342705Sjp161948 			return (1);
1352705Sjp161948 		else
1362705Sjp161948 			return (0);
1372705Sjp161948 	}
1380Sstevel@tonic-gate 
1392705Sjp161948 	/*
1402705Sjp161948 	 * locale must be at least 2 chars long and the lang part must be
1412705Sjp161948 	 * exactly two characters
1422705Sjp161948 	 */
1432705Sjp161948 	if (strlen(locale) < 2 ||
1442705Sjp161948 	    (!isalpha(locale[0]) || !isalpha(locale[1]) ||
1452705Sjp161948 	    (locale[2] != '\0' && locale[2] != '_' &&
1462705Sjp161948 	    locale[2] != '.' && locale[2] != '@')))
1472705Sjp161948 		return (0);
1480Sstevel@tonic-gate 
1492705Sjp161948 	/* same thing with the langtag */
1502705Sjp161948 	if (strlen(langtag) < 2 ||
1512705Sjp161948 	    (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
1522705Sjp161948 	    (langtag[2] != '\0' && langtag[2] != '-')))
1532705Sjp161948 		return (0);
1540Sstevel@tonic-gate 
1552705Sjp161948 	/* primary language sub-tag and the locale's language part must match */
1562705Sjp161948 	if (strncasecmp(langtag, locale, 2) != 0)
1572705Sjp161948 		return (0);
1580Sstevel@tonic-gate 
1592705Sjp161948 	/*
1602705Sjp161948 	 * primary language sub-tag and the locale's language match, now
1612705Sjp161948 	 * fuzzy check country part
1622705Sjp161948 	 */
1630Sstevel@tonic-gate 
1642705Sjp161948 	/* neither langtag nor locale have more than one component */
1652705Sjp161948 	if (langtag[2] == '\0' &&
1662705Sjp161948 	    (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
1672705Sjp161948 		return (2);
1680Sstevel@tonic-gate 
1692705Sjp161948 	/* langtag has only one sub-tag... */
1702705Sjp161948 	if (langtag[2] == '\0')
1712705Sjp161948 		return (1);
1720Sstevel@tonic-gate 
1732705Sjp161948 	/* locale has no country code... */
1742705Sjp161948 	if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
1752705Sjp161948 		return (1);
1762705Sjp161948 
1772705Sjp161948 	/* langtag has more than one subtag and the locale has a country code */
1780Sstevel@tonic-gate 
1792705Sjp161948 	/* ignore second subtag if not two chars */
1802705Sjp161948 	if (strlen(langtag) < 5)
1812705Sjp161948 		return (1);
1820Sstevel@tonic-gate 
1832705Sjp161948 	if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
1842705Sjp161948 	    (langtag[5] != '\0' && langtag[5] != '-'))
1852705Sjp161948 		return (1);
1860Sstevel@tonic-gate 
1872705Sjp161948 	/* ignore rest of locale if there is no two-character country part */
1882705Sjp161948 	if (strlen(locale) < 5)
1892705Sjp161948 		return (1);
1900Sstevel@tonic-gate 
1912705Sjp161948 	if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
1922705Sjp161948 	    (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
1932705Sjp161948 		return (1);
1940Sstevel@tonic-gate 
1952705Sjp161948 	/* if the country part matches, return 2 */
1962705Sjp161948 	if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
1972705Sjp161948 		return (2);
1980Sstevel@tonic-gate 
1992705Sjp161948 	return (1);
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate char *
g11n_getlocale()2030Sstevel@tonic-gate g11n_getlocale()
2040Sstevel@tonic-gate {
2052705Sjp161948 	/* we have one text domain - always set it */
2062705Sjp161948 	(void) textdomain(TEXT_DOMAIN);
2070Sstevel@tonic-gate 
2082705Sjp161948 	/* if the locale is not set, set it from the env vars */
2092705Sjp161948 	if (!setlocale(LC_MESSAGES, NULL))
2102705Sjp161948 		(void) setlocale(LC_MESSAGES, "");
2110Sstevel@tonic-gate 
2122705Sjp161948 	return (setlocale(LC_MESSAGES, NULL));
2130Sstevel@tonic-gate }
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate void
g11n_setlocale(int category,const char * locale)2160Sstevel@tonic-gate g11n_setlocale(int category, const char *locale)
2170Sstevel@tonic-gate {
2182705Sjp161948 	char *curr;
2190Sstevel@tonic-gate 
2209600SNobutomo.Nakano@Sun.COM 	if (native_codeset == NULL) {
2219600SNobutomo.Nakano@Sun.COM 		/* set default locale, and record current codeset */
2229600SNobutomo.Nakano@Sun.COM 		(void) setlocale(LC_ALL, "");
2239600SNobutomo.Nakano@Sun.COM 		curr = nl_langinfo(CODESET);
2249600SNobutomo.Nakano@Sun.COM 		native_codeset = xstrdup(curr);
2259600SNobutomo.Nakano@Sun.COM 	}
2269600SNobutomo.Nakano@Sun.COM 
2272705Sjp161948 	/* we have one text domain - always set it */
2282705Sjp161948 	(void) textdomain(TEXT_DOMAIN);
2290Sstevel@tonic-gate 
2302705Sjp161948 	if (!locale)
2312705Sjp161948 		return;
2320Sstevel@tonic-gate 
2332705Sjp161948 	if (*locale && ((curr = setlocale(category, NULL))) &&
2342705Sjp161948 	    strcmp(curr, locale) == 0)
2352705Sjp161948 		return;
2362628Sjp161948 
2372705Sjp161948 	/* if <category> is bogus, setlocale() will do nothing */
2382705Sjp161948 	(void) setlocale(category, locale);
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate char **
g11n_getlocales()2420Sstevel@tonic-gate g11n_getlocales()
2430Sstevel@tonic-gate {
2442705Sjp161948 	FILE *locale_out;
2452705Sjp161948 	uint_t n_elems, list_size, long_line = 0;
2462705Sjp161948 	char **list;
2472705Sjp161948 	char locale[64];	/* 64 bytes is plenty for locale names */
2482705Sjp161948 
2492705Sjp161948 	if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
2502705Sjp161948 		return (NULL);
2510Sstevel@tonic-gate 
2522705Sjp161948 	/*
2532705Sjp161948 	 * start with enough room for 65 locales - that's a lot fewer than
2542705Sjp161948 	 * all the locales available for installation, but a lot more than
2552705Sjp161948 	 * what most users will need and install
2562705Sjp161948 	 */
2572705Sjp161948 	n_elems = 0;
2582705Sjp161948 	list_size = 192;
2592705Sjp161948 	list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
2602705Sjp161948 	memset(list, 0, sizeof (char *) * (list_size + 1));
2610Sstevel@tonic-gate 
2622705Sjp161948 	while (fgets(locale, sizeof (locale), locale_out)) {
2632705Sjp161948 		/* skip long locale names (if any) */
2642705Sjp161948 		if (!strchr(locale, '\n')) {
2652705Sjp161948 			long_line = 1;
2662705Sjp161948 			continue;
2672705Sjp161948 		} else if (long_line) {
2682705Sjp161948 			long_line = 0;
2692705Sjp161948 			continue;
2702705Sjp161948 		}
2710Sstevel@tonic-gate 
2722705Sjp161948 		if (strncmp(locale, "iso_8859", 8) == 0)
2732705Sjp161948 			/* ignore locale names like "iso_8859-1" */
2742705Sjp161948 			continue;
2750Sstevel@tonic-gate 
2762705Sjp161948 		if (n_elems == list_size) {
2772705Sjp161948 			list_size *= 2;
2782705Sjp161948 			list = (char **)xrealloc((void *) list,
2792705Sjp161948 			    (list_size + 1) * sizeof (char *));
2802705Sjp161948 			memset(&list[n_elems + 1], 0,
2812705Sjp161948 			    sizeof (char *) * (list_size - n_elems + 1));
2822705Sjp161948 		}
2832705Sjp161948 
2842705Sjp161948 		*(strchr(locale, '\n')) = '\0';	/* remove the trailing \n */
2852705Sjp161948 		list[n_elems++] = xstrdup(locale);
2860Sstevel@tonic-gate 	}
2870Sstevel@tonic-gate 
2886288Sjp161948 	(void) pclose(locale_out);
2896288Sjp161948 
2905562Sjp161948 	if (n_elems == 0) {
2915562Sjp161948 		xfree(list);
2923109Sjp161948 		return (NULL);
2935562Sjp161948 	}
2943109Sjp161948 
2952705Sjp161948 	list[n_elems] = NULL;
2960Sstevel@tonic-gate 
2972705Sjp161948 	qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
2982705Sjp161948 	return (list);
2990Sstevel@tonic-gate }
3000Sstevel@tonic-gate 
3010Sstevel@tonic-gate char *
g11n_getlangs()3020Sstevel@tonic-gate g11n_getlangs()
3030Sstevel@tonic-gate {
3042705Sjp161948 	char *locale;
3050Sstevel@tonic-gate 
3062705Sjp161948 	if (getenv("SSH_LANGS"))
3072705Sjp161948 		return (xstrdup(getenv("SSH_LANGS")));
3080Sstevel@tonic-gate 
3092705Sjp161948 	locale = g11n_getlocale();
3100Sstevel@tonic-gate 
3112705Sjp161948 	if (!locale || !*locale)
3122705Sjp161948 		return (xstrdup("i-default"));
3130Sstevel@tonic-gate 
3142705Sjp161948 	return (g11n_locale2langtag(locale));
3150Sstevel@tonic-gate }
3160Sstevel@tonic-gate 
3170Sstevel@tonic-gate char *
g11n_locales2langs(char ** locale_set)3180Sstevel@tonic-gate g11n_locales2langs(char **locale_set)
3190Sstevel@tonic-gate {
3202705Sjp161948 	char **p, **r, **q;
3215562Sjp161948 	char *langtag, *langs;
3222705Sjp161948 	int locales, skip;
3230Sstevel@tonic-gate 
3242705Sjp161948 	for (locales = 0, p = locale_set; p && *p; p++)
3252705Sjp161948 		locales++;
3260Sstevel@tonic-gate 
3272705Sjp161948 	r = (char **)xmalloc((locales + 1) * sizeof (char *));
3282705Sjp161948 	memset(r, 0, (locales + 1) * sizeof (char *));
3290Sstevel@tonic-gate 
3302705Sjp161948 	for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
3312705Sjp161948 		skip = 0;
3322705Sjp161948 		if ((langtag = g11n_locale2langtag(*p)) == NULL)
3332705Sjp161948 			continue;
3342705Sjp161948 		for (q = r; (q - r) < locales; q++) {
3352705Sjp161948 			if (!*q)
3362705Sjp161948 				break;
3372705Sjp161948 			if (*q && strcmp(*q, langtag) == 0)
3382705Sjp161948 				skip = 1;
3392705Sjp161948 		}
3402705Sjp161948 		if (!skip)
3412705Sjp161948 			*(q++) = langtag;
3425562Sjp161948 		else
3435562Sjp161948 			xfree(langtag);
3442705Sjp161948 		*q = NULL;
3450Sstevel@tonic-gate 	}
3462705Sjp161948 
3475562Sjp161948 	langs = xjoin(r, ',');
3485562Sjp161948 	g11n_freelist(r);
3495562Sjp161948 
3505562Sjp161948 	return (langs);
3510Sstevel@tonic-gate }
3520Sstevel@tonic-gate 
3532705Sjp161948 static int
sortcmp(const void * d1,const void * d2)3540Sstevel@tonic-gate sortcmp(const void *d1, const void *d2)
3550Sstevel@tonic-gate {
3562705Sjp161948 	char *s1 = *(char **)d1;
3572705Sjp161948 	char *s2 = *(char **)d2;
3580Sstevel@tonic-gate 
3592705Sjp161948 	return (strcmp(s1, s2));
3600Sstevel@tonic-gate }
3610Sstevel@tonic-gate 
3620Sstevel@tonic-gate int
g11n_langtag_match(char * langtag1,char * langtag2)3630Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2)
3640Sstevel@tonic-gate {
3652705Sjp161948 	int len1, len2;
3662705Sjp161948 	char c1, c2;
3670Sstevel@tonic-gate 
3682705Sjp161948 	len1 = (strchr(langtag1, '-')) ?
3695562Sjp161948 	    (strchr(langtag1, '-') - langtag1)
3705562Sjp161948 	    : strlen(langtag1);
3710Sstevel@tonic-gate 
3722705Sjp161948 	len2 = (strchr(langtag2, '-')) ?
3735562Sjp161948 	    (strchr(langtag2, '-') - langtag2)
3745562Sjp161948 	    : strlen(langtag2);
3750Sstevel@tonic-gate 
3762705Sjp161948 	/* no match */
3772705Sjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
3782705Sjp161948 		return (0);
3790Sstevel@tonic-gate 
3802705Sjp161948 	c1 = *(langtag1 + len1);
3812705Sjp161948 	c2 = *(langtag2 + len2);
3820Sstevel@tonic-gate 
3832705Sjp161948 	/* no country sub-tags - exact match */
3842705Sjp161948 	if (c1 == '\0' && c2 == '\0')
3852705Sjp161948 		return (2);
3860Sstevel@tonic-gate 
3872705Sjp161948 	/* one langtag has a country sub-tag, the other doesn't */
3882705Sjp161948 	if (c1 == '\0' || c2 == '\0')
3892705Sjp161948 		return (1);
3900Sstevel@tonic-gate 
3912705Sjp161948 	/* can't happen - both langtags have a country sub-tag */
3922705Sjp161948 	if (c1 != '-' || c2 != '-')
3932705Sjp161948 		return (1);
3940Sstevel@tonic-gate 
3952705Sjp161948 	/* compare country subtags */
3962705Sjp161948 	langtag1 = langtag1 + len1 + 1;
3972705Sjp161948 	langtag2 = langtag2 + len2 + 1;
3980Sstevel@tonic-gate 
3992705Sjp161948 	len1 = (strchr(langtag1, '-')) ?
4002705Sjp161948 	    (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
4012705Sjp161948 
4022705Sjp161948 	len2 = (strchr(langtag2, '-')) ?
4032705Sjp161948 	    (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
4040Sstevel@tonic-gate 
4052705Sjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
4062705Sjp161948 		return (1);
4070Sstevel@tonic-gate 
4082705Sjp161948 	/* country tags matched - exact match */
4092705Sjp161948 	return (2);
4100Sstevel@tonic-gate }
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate char *
g11n_langtag_set_intersect(char * set1,char * set2)4130Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2)
4140Sstevel@tonic-gate {
4152705Sjp161948 	char **list1, **list2, **list3, **p, **q, **r;
4162705Sjp161948 	char *set3, *lang_subtag;
4172705Sjp161948 	uint_t n1, n2, n3;
4182705Sjp161948 	uint_t do_append;
4192705Sjp161948 
4202705Sjp161948 	list1 = xsplit(set1, ',');
4212705Sjp161948 	list2 = xsplit(set2, ',');
4220Sstevel@tonic-gate 
4232705Sjp161948 	for (n1 = 0, p = list1; p && *p; p++, n1++)
4242705Sjp161948 		;
4252705Sjp161948 	for (n2 = 0, p = list2; p && *p; p++, n2++)
4262705Sjp161948 		;
4270Sstevel@tonic-gate 
4282705Sjp161948 	list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
4292705Sjp161948 	*list3 = NULL;
4300Sstevel@tonic-gate 
4312705Sjp161948 	/*
4322705Sjp161948 	 * we must not sort the user langtags - sorting or not the server's
4332705Sjp161948 	 * should not affect the outcome
4342705Sjp161948 	 */
4352705Sjp161948 	qsort(list2, n2, sizeof (char *), sortcmp);
4360Sstevel@tonic-gate 
4372705Sjp161948 	for (n3 = 0, p = list1; p && *p; p++) {
4382705Sjp161948 		do_append = 0;
4392705Sjp161948 		for (q = list2; q && *q; q++) {
4402705Sjp161948 			if (g11n_langtag_match(*p, *q) != 2) continue;
4412705Sjp161948 			/* append element */
4422705Sjp161948 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4432705Sjp161948 				do_append = 1;
4442705Sjp161948 				if (!*r)
4452705Sjp161948 					break;
4462705Sjp161948 				if (strcmp(*p, *r) == 0) {
4472705Sjp161948 					do_append = 0;
4482705Sjp161948 					break;
4492705Sjp161948 				}
4502705Sjp161948 			}
4512705Sjp161948 			if (do_append && n3 <= (n1 + n2)) {
4522705Sjp161948 				list3[n3++] = xstrdup(*p);
4532705Sjp161948 				list3[n3] = NULL;
4542705Sjp161948 			}
4550Sstevel@tonic-gate 		}
4560Sstevel@tonic-gate 	}
4572705Sjp161948 
4582705Sjp161948 	for (p = list1; p && *p; p++) {
4592705Sjp161948 		do_append = 0;
4602705Sjp161948 		for (q = list2; q && *q; q++) {
4612705Sjp161948 			if (g11n_langtag_match(*p, *q) != 1)
4622705Sjp161948 				continue;
4630Sstevel@tonic-gate 
4642705Sjp161948 			/* append element */
4652705Sjp161948 			lang_subtag = xstrdup(*p);
4662705Sjp161948 			if (strchr(lang_subtag, '-'))
4672705Sjp161948 				*(strchr(lang_subtag, '-')) = '\0';
4682705Sjp161948 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4692705Sjp161948 				do_append = 1;
4702705Sjp161948 				if (!*r)
4712705Sjp161948 					break;
4722705Sjp161948 				if (strcmp(lang_subtag, *r) == 0) {
4732705Sjp161948 					do_append = 0;
4742705Sjp161948 					break;
4752705Sjp161948 				}
4762705Sjp161948 			}
4772705Sjp161948 			if (do_append && n3 <= (n1 + n2)) {
4782705Sjp161948 				list3[n3++] = lang_subtag;
4792705Sjp161948 				list3[n3] = NULL;
4802705Sjp161948 			} else
4812705Sjp161948 				xfree(lang_subtag);
4820Sstevel@tonic-gate 		}
4830Sstevel@tonic-gate 	}
4840Sstevel@tonic-gate 
4852705Sjp161948 	set3 = xjoin(list3, ',');
4862705Sjp161948 	xfree_split_list(list1);
4872705Sjp161948 	xfree_split_list(list2);
4882705Sjp161948 	xfree_split_list(list3);
4890Sstevel@tonic-gate 
4902705Sjp161948 	return (set3);
4910Sstevel@tonic-gate }
4920Sstevel@tonic-gate 
4930Sstevel@tonic-gate char *
g11n_clnt_langtag_negotiate(char * clnt_langtags,char * srvr_langtags)4940Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
4950Sstevel@tonic-gate {
4962705Sjp161948 	char *list, *result;
4972705Sjp161948 	char **xlist;
4980Sstevel@tonic-gate 
4992705Sjp161948 	/* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
5002705Sjp161948 	list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
5010Sstevel@tonic-gate 
5022705Sjp161948 	if (!list)
5032705Sjp161948 		return (NULL);
5040Sstevel@tonic-gate 
5052705Sjp161948 	xlist = xsplit(list, ',');
5060Sstevel@tonic-gate 
5072705Sjp161948 	xfree(list);
5080Sstevel@tonic-gate 
5092705Sjp161948 	if (!xlist || !*xlist)
5102705Sjp161948 		return (NULL);
5110Sstevel@tonic-gate 
5122705Sjp161948 	result = xstrdup(*xlist);
5132705Sjp161948 	xfree_split_list(xlist);
5140Sstevel@tonic-gate 
5152705Sjp161948 	return (result);
5160Sstevel@tonic-gate }
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate /*
5190Sstevel@tonic-gate  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
5200Sstevel@tonic-gate  * a stright strcmp()
5210Sstevel@tonic-gate  */
5222705Sjp161948 static int
locale_cmp(const void * d1,const void * d2)5230Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2)
5240Sstevel@tonic-gate {
5252705Sjp161948 	char *dot_ptr;
5262705Sjp161948 	char *s1 = *(char **)d1;
5272705Sjp161948 	char *s2 = *(char **)d2;
5282705Sjp161948 	int s1_is_utf8 = 0;
5292705Sjp161948 	int s2_is_utf8 = 0;
5300Sstevel@tonic-gate 
5312705Sjp161948 	/* check if s1 is a UTF-8 locale */
5322705Sjp161948 	if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
5332705Sjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5342705Sjp161948 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5352705Sjp161948 		s1_is_utf8++;
5362705Sjp161948 	}
5372705Sjp161948 
5382705Sjp161948 	/* check if s2 is a UTF-8 locale */
5392705Sjp161948 	if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
5402705Sjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5412705Sjp161948 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5422705Sjp161948 		s2_is_utf8++;
5432705Sjp161948 	}
5440Sstevel@tonic-gate 
5452705Sjp161948 	/* prefer UTF-8 locales */
5462705Sjp161948 	if (s1_is_utf8 && !s2_is_utf8)
5472705Sjp161948 		return (-1);
5480Sstevel@tonic-gate 
5492705Sjp161948 	if (s2_is_utf8 && !s1_is_utf8)
5502705Sjp161948 		return (1);
5510Sstevel@tonic-gate 
5522705Sjp161948 	/* prefer any locale over the default locales */
5532705Sjp161948 	if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
5542705Sjp161948 	    strcmp(s1, "common") == 0) {
5552705Sjp161948 		if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
5562705Sjp161948 		    strcmp(s2, "common") != 0)
5572705Sjp161948 			return (1);
5582705Sjp161948 	}
5590Sstevel@tonic-gate 
5602705Sjp161948 	if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
5612705Sjp161948 	    strcmp(s2, "common") == 0) {
5622705Sjp161948 		if (strcmp(s1, "C") != 0 &&
5632705Sjp161948 		    strcmp(s1, "POSIX") != 0 &&
5642705Sjp161948 		    strcmp(s1, "common") != 0)
5652705Sjp161948 			return (-1);
5662705Sjp161948 	}
5670Sstevel@tonic-gate 
5682705Sjp161948 	return (strcmp(s1, s2));
5690Sstevel@tonic-gate }
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate 
5720Sstevel@tonic-gate char **
g11n_langtag_set_locale_set_intersect(char * langtag_set,char ** locale_set)5732705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
5740Sstevel@tonic-gate {
5752705Sjp161948 	char **langtag_list, **result, **p, **q, **r;
5762705Sjp161948 	char *s;
5772705Sjp161948 	uint_t do_append, n_langtags, n_locales, n_results, max_results;
5782705Sjp161948 
579*9845SJan.Pechanec@Sun.COM 	if (locale_set == NULL)
580*9845SJan.Pechanec@Sun.COM 		return (NULL);
581*9845SJan.Pechanec@Sun.COM 
5822705Sjp161948 	/* count lang tags and locales */
5832705Sjp161948 	for (n_locales = 0, p = locale_set; p && *p; p++)
5842705Sjp161948 		n_locales++;
5850Sstevel@tonic-gate 
5862705Sjp161948 	n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
5872705Sjp161948 	/* count the number of langtags */
5882705Sjp161948 	for (; s = strchr(s, ','); s++, n_langtags++)
5892705Sjp161948 		;
5902705Sjp161948 
5912705Sjp161948 	qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
5920Sstevel@tonic-gate 
5932705Sjp161948 	langtag_list = xsplit(langtag_set, ',');
5942705Sjp161948 	for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
5952705Sjp161948 		;
5960Sstevel@tonic-gate 
5972705Sjp161948 	max_results = MIN(n_locales, n_langtags) * 2;
5982705Sjp161948 	result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
5992705Sjp161948 	*result = NULL;
6002705Sjp161948 	n_results = 0;
6010Sstevel@tonic-gate 
6022705Sjp161948 	/* more specific matches first */
6032705Sjp161948 	for (p = langtag_list; p && *p; p++) {
6042705Sjp161948 		do_append = 0;
6052705Sjp161948 		for (q = locale_set; q && *q; q++) {
6062705Sjp161948 			if (g11n_langtag_matches_locale(*p, *q) == 2) {
6072705Sjp161948 				do_append = 1;
6082705Sjp161948 				for (r = result; (r - result) <=
6092705Sjp161948 				    MIN(n_locales, n_langtags); r++) {
6102705Sjp161948 					if (!*r)
6112705Sjp161948 						break;
6122705Sjp161948 					if (strcmp(*q, *r) == 0) {
6132705Sjp161948 						do_append = 0;
6142705Sjp161948 						break;
6152705Sjp161948 					}
6162705Sjp161948 				}
6172705Sjp161948 				if (do_append && n_results < max_results) {
6182705Sjp161948 					result[n_results++] = xstrdup(*q);
6192705Sjp161948 					result[n_results] = NULL;
6202705Sjp161948 				}
6212705Sjp161948 				break;
6222705Sjp161948 			}
6230Sstevel@tonic-gate 		}
6240Sstevel@tonic-gate 	}
6250Sstevel@tonic-gate 
6262705Sjp161948 	for (p = langtag_list; p && *p; p++) {
6272705Sjp161948 		do_append = 0;
6282705Sjp161948 		for (q = locale_set; q && *q; q++) {
6292705Sjp161948 			if (g11n_langtag_matches_locale(*p, *q) == 1) {
6302705Sjp161948 				do_append = 1;
6312705Sjp161948 				for (r = result; (r - result) <=
6322705Sjp161948 				    MIN(n_locales, n_langtags); r++) {
6332705Sjp161948 					if (!*r)
6342705Sjp161948 						break;
6352705Sjp161948 					if (strcmp(*q, *r) == 0) {
6362705Sjp161948 						do_append = 0;
6372705Sjp161948 						break;
6382705Sjp161948 					}
6392705Sjp161948 				}
6402705Sjp161948 				if (do_append && n_results < max_results) {
6412705Sjp161948 					result[n_results++] = xstrdup(*q);
6422705Sjp161948 					result[n_results] = NULL;
6432705Sjp161948 				}
6442705Sjp161948 				break;
6452705Sjp161948 			}
6460Sstevel@tonic-gate 		}
6470Sstevel@tonic-gate 	}
6480Sstevel@tonic-gate 
6492705Sjp161948 	xfree_split_list(langtag_list);
6502705Sjp161948 
6512705Sjp161948 	return (result);
6520Sstevel@tonic-gate }
6530Sstevel@tonic-gate 
6540Sstevel@tonic-gate char *
g11n_srvr_locale_negotiate(char * clnt_langtags,char ** srvr_locales)6550Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
6560Sstevel@tonic-gate {
6575562Sjp161948 	char **results, **locales, *result = NULL;
6585562Sjp161948 
6595562Sjp161948 	if (srvr_locales == NULL)
6605562Sjp161948 		locales = g11n_getlocales();
6615562Sjp161948 	else
6625562Sjp161948 		locales = srvr_locales;
6630Sstevel@tonic-gate 
6642705Sjp161948 	if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
6655562Sjp161948 	    locales)) == NULL)
6665562Sjp161948 		goto err;
6670Sstevel@tonic-gate 
6682705Sjp161948 	if (*results != NULL)
6692705Sjp161948 		result = xstrdup(*results);
6700Sstevel@tonic-gate 
6712705Sjp161948 	xfree_split_list(results);
6720Sstevel@tonic-gate 
6735562Sjp161948 err:
674*9845SJan.Pechanec@Sun.COM 	if (locales != NULL && locales != srvr_locales)
6755562Sjp161948 		g11n_freelist(locales);
6762705Sjp161948 	return (result);
6770Sstevel@tonic-gate }
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate /*
6809600SNobutomo.Nakano@Sun.COM  * Functions for converting to UTF-8 from the local codeset and
6819600SNobutomo.Nakano@Sun.COM  * converting from UTF-8 to the local codeset.
6820Sstevel@tonic-gate  *
6839600SNobutomo.Nakano@Sun.COM  * The error_str parameter is an pointer to a char variable where to
6849600SNobutomo.Nakano@Sun.COM  * store a string suitable for use with error() or fatal() or friends.
6859600SNobutomo.Nakano@Sun.COM  * It is also used for an error indicator when NULL is returned.
6860Sstevel@tonic-gate  *
6879600SNobutomo.Nakano@Sun.COM  * If conversion isn't necessary, *error_str is set to NULL, and
6889600SNobutomo.Nakano@Sun.COM  * NULL is returned.
6899600SNobutomo.Nakano@Sun.COM  * If conversion error occured, *error_str points to an error message,
6909600SNobutomo.Nakano@Sun.COM  * and NULL is returned.
6910Sstevel@tonic-gate  */
6929600SNobutomo.Nakano@Sun.COM char *
g11n_convert_from_utf8(const char * str,uint_t * lenp,char ** error_str)6939600SNobutomo.Nakano@Sun.COM g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str)
6940Sstevel@tonic-gate {
6959600SNobutomo.Nakano@Sun.COM 	static char *last_codeset;
6969600SNobutomo.Nakano@Sun.COM 	static iconv_t cd = (iconv_t)-1;
6979600SNobutomo.Nakano@Sun.COM 	char	*codeset;
6982705Sjp161948 
6999600SNobutomo.Nakano@Sun.COM 	*error_str = NULL;
7000Sstevel@tonic-gate 
7019600SNobutomo.Nakano@Sun.COM 	codeset = nl_langinfo(CODESET);
7020Sstevel@tonic-gate 
7039600SNobutomo.Nakano@Sun.COM 	if (strcmp(codeset, "UTF-8") == 0)
7049600SNobutomo.Nakano@Sun.COM 		return (NULL);
7050Sstevel@tonic-gate 
7069600SNobutomo.Nakano@Sun.COM 	if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
7079600SNobutomo.Nakano@Sun.COM 		if (last_codeset != NULL) {
7089600SNobutomo.Nakano@Sun.COM 			xfree(last_codeset);
7099600SNobutomo.Nakano@Sun.COM 			last_codeset = NULL;
7102705Sjp161948 		}
7119600SNobutomo.Nakano@Sun.COM 		if (cd != (iconv_t)-1)
7129600SNobutomo.Nakano@Sun.COM 			(void) iconv_close(cd);
7130Sstevel@tonic-gate 
7149600SNobutomo.Nakano@Sun.COM 		if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) {
7159600SNobutomo.Nakano@Sun.COM 			*error_str = gettext("Cannot convert UTF-8 "
7169600SNobutomo.Nakano@Sun.COM 			    "strings to the local codeset");
7179600SNobutomo.Nakano@Sun.COM 			return (NULL);
7182705Sjp161948 		}
7199600SNobutomo.Nakano@Sun.COM 		last_codeset = xstrdup(codeset);
7200Sstevel@tonic-gate 	}
7219600SNobutomo.Nakano@Sun.COM 	return (do_iconv(cd, str, lenp, error_str));
7220Sstevel@tonic-gate }
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate char *
g11n_convert_to_utf8(const char * str,uint_t * lenp,int native,char ** error_str)7259600SNobutomo.Nakano@Sun.COM g11n_convert_to_utf8(const char *str, uint_t *lenp,
7269600SNobutomo.Nakano@Sun.COM     int native, char **error_str)
7270Sstevel@tonic-gate {
7289600SNobutomo.Nakano@Sun.COM 	static char *last_codeset;
7299600SNobutomo.Nakano@Sun.COM 	static iconv_t cd = (iconv_t)-1;
7309600SNobutomo.Nakano@Sun.COM 	char	*codeset;
7310Sstevel@tonic-gate 
7329600SNobutomo.Nakano@Sun.COM 	*error_str = NULL;
7330Sstevel@tonic-gate 
7349600SNobutomo.Nakano@Sun.COM 	if (native)
7359600SNobutomo.Nakano@Sun.COM 		codeset = native_codeset;
7369600SNobutomo.Nakano@Sun.COM 	else
7379600SNobutomo.Nakano@Sun.COM 		codeset = nl_langinfo(CODESET);
7382705Sjp161948 
7399600SNobutomo.Nakano@Sun.COM 	if (strcmp(codeset, "UTF-8") == 0)
7409600SNobutomo.Nakano@Sun.COM 		return (NULL);
7410Sstevel@tonic-gate 
7429600SNobutomo.Nakano@Sun.COM 	if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
7439600SNobutomo.Nakano@Sun.COM 		if (last_codeset != NULL) {
7449600SNobutomo.Nakano@Sun.COM 			xfree(last_codeset);
7459600SNobutomo.Nakano@Sun.COM 			last_codeset = NULL;
7469600SNobutomo.Nakano@Sun.COM 		}
7479600SNobutomo.Nakano@Sun.COM 		if (cd != (iconv_t)-1)
7489600SNobutomo.Nakano@Sun.COM 			(void) iconv_close(cd);
7490Sstevel@tonic-gate 
7509600SNobutomo.Nakano@Sun.COM 		if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) {
7519600SNobutomo.Nakano@Sun.COM 			*error_str = gettext("Cannot convert the "
7529600SNobutomo.Nakano@Sun.COM 			    "local codeset strings to UTF-8");
7539600SNobutomo.Nakano@Sun.COM 			return (NULL);
7542705Sjp161948 		}
7559600SNobutomo.Nakano@Sun.COM 		last_codeset = xstrdup(codeset);
7560Sstevel@tonic-gate 	}
7579600SNobutomo.Nakano@Sun.COM 	return (do_iconv(cd, str, lenp, error_str));
7580Sstevel@tonic-gate }
7590Sstevel@tonic-gate 
7600Sstevel@tonic-gate /*
7610Sstevel@tonic-gate  * Wrapper around iconv()
7620Sstevel@tonic-gate  *
7639600SNobutomo.Nakano@Sun.COM  * The caller is responsible for freeing the result. NULL is returned when
7640Sstevel@tonic-gate  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
7659600SNobutomo.Nakano@Sun.COM  * The caller must ensure that the input string isn't NULL pointer.
7660Sstevel@tonic-gate  */
7679600SNobutomo.Nakano@Sun.COM static char *
do_iconv(iconv_t cd,const char * str,uint_t * lenp,char ** err_str)7689600SNobutomo.Nakano@Sun.COM do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str)
7692705Sjp161948 {
7709600SNobutomo.Nakano@Sun.COM 	int	ilen, olen;
7719600SNobutomo.Nakano@Sun.COM 	size_t	ileft, oleft;
7729600SNobutomo.Nakano@Sun.COM 	char	*ostr, *optr;
7739600SNobutomo.Nakano@Sun.COM 	const char *istr;
7742705Sjp161948 
7759600SNobutomo.Nakano@Sun.COM 	ilen = *lenp;
7769600SNobutomo.Nakano@Sun.COM 	olen = ilen + 1;
7772705Sjp161948 
7789600SNobutomo.Nakano@Sun.COM 	ostr = NULL;
7799600SNobutomo.Nakano@Sun.COM 	for (;;) {
7809600SNobutomo.Nakano@Sun.COM 		olen *= 2;
7819600SNobutomo.Nakano@Sun.COM 		oleft = olen;
7829600SNobutomo.Nakano@Sun.COM 		ostr = optr = xrealloc(ostr, olen);
7839600SNobutomo.Nakano@Sun.COM 		istr = (const char *)str;
7849600SNobutomo.Nakano@Sun.COM 		if ((ileft = ilen) == 0)
7859600SNobutomo.Nakano@Sun.COM 			break;
7860Sstevel@tonic-gate 
7879600SNobutomo.Nakano@Sun.COM 		if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) {
7889600SNobutomo.Nakano@Sun.COM 			/* success: generate reset sequence */
7899600SNobutomo.Nakano@Sun.COM 			if (iconv(cd, NULL, NULL,
7909600SNobutomo.Nakano@Sun.COM 			    &optr, &oleft) == (size_t)-1 && errno == E2BIG) {
7919600SNobutomo.Nakano@Sun.COM 				continue;
7929600SNobutomo.Nakano@Sun.COM 			}
7939600SNobutomo.Nakano@Sun.COM 			break;
7949600SNobutomo.Nakano@Sun.COM 		}
7959600SNobutomo.Nakano@Sun.COM 		/* failed */
7969600SNobutomo.Nakano@Sun.COM 		if (errno != E2BIG) {
7979600SNobutomo.Nakano@Sun.COM 			oleft = olen;
7989600SNobutomo.Nakano@Sun.COM 			(void) iconv(cd, NULL, NULL, &ostr, &oleft);
7999600SNobutomo.Nakano@Sun.COM 			xfree(ostr);
8009600SNobutomo.Nakano@Sun.COM 			*err_str = gettext("Codeset conversion failed");
8019600SNobutomo.Nakano@Sun.COM 			return (NULL);
8029600SNobutomo.Nakano@Sun.COM 		}
8039600SNobutomo.Nakano@Sun.COM 	}
8049600SNobutomo.Nakano@Sun.COM 	olen = optr - ostr;
8059600SNobutomo.Nakano@Sun.COM 	optr = xmalloc(olen + 1);
8069600SNobutomo.Nakano@Sun.COM 	(void) memcpy(optr, ostr, olen);
8079600SNobutomo.Nakano@Sun.COM 	xfree(ostr);
8089600SNobutomo.Nakano@Sun.COM 
8099600SNobutomo.Nakano@Sun.COM 	optr[olen] = '\0';
8109600SNobutomo.Nakano@Sun.COM 	*lenp = olen;
8119600SNobutomo.Nakano@Sun.COM 
8129600SNobutomo.Nakano@Sun.COM 	return (optr);
8139600SNobutomo.Nakano@Sun.COM }
8140Sstevel@tonic-gate 
8159600SNobutomo.Nakano@Sun.COM /*
8169600SNobutomo.Nakano@Sun.COM  * A filter for output string. Control and unprintable characters
8179600SNobutomo.Nakano@Sun.COM  * are converted into visible form (eg "\ooo").
8189600SNobutomo.Nakano@Sun.COM  */
8199600SNobutomo.Nakano@Sun.COM char *
g11n_filter_string(char * s)8209600SNobutomo.Nakano@Sun.COM g11n_filter_string(char *s)
8219600SNobutomo.Nakano@Sun.COM {
8229600SNobutomo.Nakano@Sun.COM 	int	mb_cur_max = MB_CUR_MAX;
8239600SNobutomo.Nakano@Sun.COM 	int	mblen, len;
8249600SNobutomo.Nakano@Sun.COM 	char	*os = s;
8259600SNobutomo.Nakano@Sun.COM 	wchar_t	wc;
8269600SNobutomo.Nakano@Sun.COM 	char	*obuf, *op;
8279600SNobutomo.Nakano@Sun.COM 
8289600SNobutomo.Nakano@Sun.COM 	/* all character may be converted into the form of \ooo */
8299600SNobutomo.Nakano@Sun.COM 	obuf = op = xmalloc(strlen(s) * 4 + 1);
8309600SNobutomo.Nakano@Sun.COM 
8319600SNobutomo.Nakano@Sun.COM 	while (*s != '\0') {
8329600SNobutomo.Nakano@Sun.COM 		mblen = mbtowc(&wc, s, mb_cur_max);
8339600SNobutomo.Nakano@Sun.COM 		if (mblen <= 0) {
8349600SNobutomo.Nakano@Sun.COM 			mblen = 1;
8359600SNobutomo.Nakano@Sun.COM 			wc = (unsigned char)*s;
8369600SNobutomo.Nakano@Sun.COM 		}
8379600SNobutomo.Nakano@Sun.COM 		if (!iswprint(wc) &&
8389600SNobutomo.Nakano@Sun.COM 		    wc != L'\n' && wc != L'\r' && wc != L'\t') {
8399600SNobutomo.Nakano@Sun.COM 			/*
8409600SNobutomo.Nakano@Sun.COM 			 * control chars which need to be replaced
8419600SNobutomo.Nakano@Sun.COM 			 * with safe character sequence.
8429600SNobutomo.Nakano@Sun.COM 			 */
8439600SNobutomo.Nakano@Sun.COM 			while (mblen != 0) {
8449600SNobutomo.Nakano@Sun.COM 				op += sprintf(op, "\\%03o",
8459600SNobutomo.Nakano@Sun.COM 				    (unsigned char)*s++);
8469600SNobutomo.Nakano@Sun.COM 				mblen--;
8479600SNobutomo.Nakano@Sun.COM 			}
8489600SNobutomo.Nakano@Sun.COM 		} else {
8499600SNobutomo.Nakano@Sun.COM 			while (mblen != 0) {
8509600SNobutomo.Nakano@Sun.COM 				*op++ = *s++;
8519600SNobutomo.Nakano@Sun.COM 				mblen--;
8522705Sjp161948 			}
8532705Sjp161948 		}
8549600SNobutomo.Nakano@Sun.COM 	}
8559600SNobutomo.Nakano@Sun.COM 	*op = '\0';
8569600SNobutomo.Nakano@Sun.COM 	len = op - obuf + 1;
8579600SNobutomo.Nakano@Sun.COM 	op = xrealloc(os, len);
8589600SNobutomo.Nakano@Sun.COM 	(void) memcpy(op, obuf, len);
8599600SNobutomo.Nakano@Sun.COM 	xfree(obuf);
8609600SNobutomo.Nakano@Sun.COM 	return (op);
8619600SNobutomo.Nakano@Sun.COM }
8622705Sjp161948 
8639600SNobutomo.Nakano@Sun.COM /*
8649600SNobutomo.Nakano@Sun.COM  * Once we negotiated with a langtag, server need to map it to a system
8659600SNobutomo.Nakano@Sun.COM  * locale. That is done based on the locale supported on the server side.
8669600SNobutomo.Nakano@Sun.COM  * We know (with the locale supported on Solaris) how the langtag is
8679600SNobutomo.Nakano@Sun.COM  * mapped to. However, from the client point of view, there is no way to
8689600SNobutomo.Nakano@Sun.COM  * know exactly what locale(encoding) will be used.
8699600SNobutomo.Nakano@Sun.COM  *
8709600SNobutomo.Nakano@Sun.COM  * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the
8719600SNobutomo.Nakano@Sun.COM  * UTF-8 characters always come over the wire, so it is no longer the problem
8729600SNobutomo.Nakano@Sun.COM  * as long as both side has the bug fix. However if the server side doesn't
8739600SNobutomo.Nakano@Sun.COM  * have the fix, client can't safely perform the code conversion since the
8749600SNobutomo.Nakano@Sun.COM  * incoming character encoding is unknown.
8759600SNobutomo.Nakano@Sun.COM  *
8769600SNobutomo.Nakano@Sun.COM  * To alleviate this situation, we take an empirical approach to find
8779600SNobutomo.Nakano@Sun.COM  * encoding from langtag.
8789600SNobutomo.Nakano@Sun.COM  *
8799600SNobutomo.Nakano@Sun.COM  * If langtag has a subtag, we can directly map the langtag to UTF-8 locale
8809600SNobutomo.Nakano@Sun.COM  * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions.
8819600SNobutomo.Nakano@Sun.COM  * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack
8829600SNobutomo.Nakano@Sun.COM  * of L10N support ..). Those are:
8839600SNobutomo.Nakano@Sun.COM  *
8849600SNobutomo.Nakano@Sun.COM  * 	no_NO, no_NY, sr_SP, sr_YU
8859600SNobutomo.Nakano@Sun.COM  *
8869600SNobutomo.Nakano@Sun.COM  * They all use ISO8859-X encoding.
8879600SNobutomo.Nakano@Sun.COM  *
8889600SNobutomo.Nakano@Sun.COM  * For those "xx" langtags, some of them can be mapped to "xx.UTF-8",
8899600SNobutomo.Nakano@Sun.COM  * but others cannot. So we need to use the "xx" as the locale name.
8909600SNobutomo.Nakano@Sun.COM  * Those locales are:
8919600SNobutomo.Nakano@Sun.COM  *
8929600SNobutomo.Nakano@Sun.COM  * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr
8939600SNobutomo.Nakano@Sun.COM  *
8949600SNobutomo.Nakano@Sun.COM  * Their encoding vary. They could be ISO8859-X or EUC or something else.
8959600SNobutomo.Nakano@Sun.COM  * So we don't perform code conversion for these langtags.
8969600SNobutomo.Nakano@Sun.COM  */
8979600SNobutomo.Nakano@Sun.COM static const char *non_utf8_langtag[] = {
8989600SNobutomo.Nakano@Sun.COM 	"no-NO", "no-NY", "sr-SP", "sr-YU",
8999600SNobutomo.Nakano@Sun.COM 	"ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja",
9009600SNobutomo.Nakano@Sun.COM 	"lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL};
9012705Sjp161948 
9029600SNobutomo.Nakano@Sun.COM void
g11n_test_langtag(const char * lang,int server)9039600SNobutomo.Nakano@Sun.COM g11n_test_langtag(const char *lang, int server)
9049600SNobutomo.Nakano@Sun.COM {
9059600SNobutomo.Nakano@Sun.COM 	const char	**lp;
9069600SNobutomo.Nakano@Sun.COM 
9079600SNobutomo.Nakano@Sun.COM 	if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
9089600SNobutomo.Nakano@Sun.COM 		/*
9099600SNobutomo.Nakano@Sun.COM 		 * We negotiated with real locale name (not lang tag).
9109600SNobutomo.Nakano@Sun.COM 		 * We shouldn't expect UTF-8, thus shouldn't do code
9119600SNobutomo.Nakano@Sun.COM 		 * conversion.
9129600SNobutomo.Nakano@Sun.COM 		 */
9139600SNobutomo.Nakano@Sun.COM 		datafellows |= SSH_BUG_STRING_ENCODING;
9149600SNobutomo.Nakano@Sun.COM 		return;
9159600SNobutomo.Nakano@Sun.COM 	}
9169600SNobutomo.Nakano@Sun.COM 
9179600SNobutomo.Nakano@Sun.COM 	if (datafellows & SSH_BUG_STRING_ENCODING) {
9189600SNobutomo.Nakano@Sun.COM 		if (server) {
9199600SNobutomo.Nakano@Sun.COM 			/*
9209600SNobutomo.Nakano@Sun.COM 			 * Whatever bug exists in the client side, server
9219600SNobutomo.Nakano@Sun.COM 			 * side has nothing to do, since server has no way
9229600SNobutomo.Nakano@Sun.COM 			 * to know what actual encoding is used on the client
9239600SNobutomo.Nakano@Sun.COM 			 * side. For example, even if we negotiated with
9249600SNobutomo.Nakano@Sun.COM 			 * en_US, client locale could be en_US.ISO8859-X or
9259600SNobutomo.Nakano@Sun.COM 			 * en_US.UTF-8.
9269600SNobutomo.Nakano@Sun.COM 			 */
9279600SNobutomo.Nakano@Sun.COM 			return;
9289600SNobutomo.Nakano@Sun.COM 		}
9299600SNobutomo.Nakano@Sun.COM 		/*
9309600SNobutomo.Nakano@Sun.COM 		 * We are on the client side. We'll check with known
9319600SNobutomo.Nakano@Sun.COM 		 * locales to see if non-UTF8 characters could come in.
9329600SNobutomo.Nakano@Sun.COM 		 */
9339600SNobutomo.Nakano@Sun.COM 		for (lp = non_utf8_langtag; *lp != NULL; lp++) {
9349600SNobutomo.Nakano@Sun.COM 			if (strcmp(lang, *lp) == 0)
9359600SNobutomo.Nakano@Sun.COM 				break;
9369600SNobutomo.Nakano@Sun.COM 		}
9379600SNobutomo.Nakano@Sun.COM 		if (*lp == NULL) {
9389600SNobutomo.Nakano@Sun.COM 			debug2("Server is expected to use UTF-8 locale");
9399600SNobutomo.Nakano@Sun.COM 			datafellows &= ~SSH_BUG_STRING_ENCODING;
9409600SNobutomo.Nakano@Sun.COM 		} else {
9419600SNobutomo.Nakano@Sun.COM 			/*
9429600SNobutomo.Nakano@Sun.COM 			 * Server is expected to use non-UTF8 encoding.
9439600SNobutomo.Nakano@Sun.COM 			 */
9449600SNobutomo.Nakano@Sun.COM 			debug2("Enforcing no code conversion: %s", lang);
9459600SNobutomo.Nakano@Sun.COM 		}
9469600SNobutomo.Nakano@Sun.COM 	}
9470Sstevel@tonic-gate }
9485562Sjp161948 
9495562Sjp161948 /*
9505562Sjp161948  * Free all strings in the list and then free the list itself. We know that the
9515562Sjp161948  * list ends with a NULL pointer.
9525562Sjp161948  */
9535562Sjp161948 void
g11n_freelist(char ** list)9545562Sjp161948 g11n_freelist(char **list)
9555562Sjp161948 {
9565562Sjp161948 	int i = 0;
9575562Sjp161948 
9585562Sjp161948 	while (list[i] != NULL) {
9595562Sjp161948 		xfree(list[i]);
9605562Sjp161948 		i++;
9615562Sjp161948 	}
9625562Sjp161948 
9635562Sjp161948 	xfree(list);
9645562Sjp161948 }
965