xref: /onnv-gate/usr/src/cmd/ssh/libssh/common/g11n.c (revision 2628:a16af3d2d7bd)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*2628Sjp161948  * Common Development and Distribution License (the "License").
6*2628Sjp161948  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  *
21*2628Sjp161948  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
220Sstevel@tonic-gate  * Use is subject to license terms.
230Sstevel@tonic-gate  */
240Sstevel@tonic-gate 
250Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <errno.h>
280Sstevel@tonic-gate #include <locale.h>
290Sstevel@tonic-gate #include <langinfo.h>
300Sstevel@tonic-gate #include <iconv.h>
310Sstevel@tonic-gate #include <ctype.h>
320Sstevel@tonic-gate #include <strings.h>
330Sstevel@tonic-gate #include <string.h>
340Sstevel@tonic-gate #include <stdio.h>
350Sstevel@tonic-gate #include <stdlib.h>
360Sstevel@tonic-gate #include "includes.h"
370Sstevel@tonic-gate #include "xmalloc.h"
380Sstevel@tonic-gate #include "xlist.h"
390Sstevel@tonic-gate 
400Sstevel@tonic-gate #ifdef MIN
410Sstevel@tonic-gate #undef MIN
420Sstevel@tonic-gate #endif /* MIN */
430Sstevel@tonic-gate 
440Sstevel@tonic-gate #define MIN(x, y)		    ((x) < (y) ? (x) : (y))
450Sstevel@tonic-gate 
460Sstevel@tonic-gate #define LOCALE_PATH "/usr/bin/locale"
470Sstevel@tonic-gate 
480Sstevel@tonic-gate #define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */
490Sstevel@tonic-gate 
500Sstevel@tonic-gate static u_char * do_iconv(iconv_t cd, u_int *mul_ptr,
510Sstevel@tonic-gate 		       const void *buf, u_int len,
520Sstevel@tonic-gate 		       u_int *outlen, int *err,
530Sstevel@tonic-gate 		       u_char **err_str);
540Sstevel@tonic-gate 
550Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2);
560Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale);
570Sstevel@tonic-gate 
580Sstevel@tonic-gate u_int
590Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str);
600Sstevel@tonic-gate 
610Sstevel@tonic-gate u_int
620Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str);
630Sstevel@tonic-gate 
640Sstevel@tonic-gate static
650Sstevel@tonic-gate char *
660Sstevel@tonic-gate g11n_locale2langtag(char *locale)
670Sstevel@tonic-gate {
680Sstevel@tonic-gate     char *langtag;
690Sstevel@tonic-gate 
700Sstevel@tonic-gate     /* base cases */
710Sstevel@tonic-gate     if (!locale || !*locale) return NULL;
720Sstevel@tonic-gate 
730Sstevel@tonic-gate     if (strcmp(locale, "POSIX") == 0 ||
740Sstevel@tonic-gate 	strcmp(locale, "C") == 0) return "i-default";
750Sstevel@tonic-gate 
760Sstevel@tonic-gate     /* Punt for language codes which are not exactly 2 letters */
770Sstevel@tonic-gate     if (strlen(locale) < 2 ||
780Sstevel@tonic-gate 	!isalpha(locale[0]) ||
790Sstevel@tonic-gate 	!isalpha(locale[1]) ||
800Sstevel@tonic-gate 	(locale[2] != '\0' &&
810Sstevel@tonic-gate 	locale[2] != '_' &&
820Sstevel@tonic-gate 	locale[2] != '.' &&
830Sstevel@tonic-gate 	locale[2] != '@'))
840Sstevel@tonic-gate 	return NULL;
850Sstevel@tonic-gate 
860Sstevel@tonic-gate 
870Sstevel@tonic-gate     /* We have a primary language sub-tag */
880Sstevel@tonic-gate     langtag = (char *) xmalloc(LANGTAG_MAX + 1);
890Sstevel@tonic-gate 
900Sstevel@tonic-gate     strncpy(langtag, locale, 2);
910Sstevel@tonic-gate     langtag[2] = '\0';
920Sstevel@tonic-gate 
930Sstevel@tonic-gate     /* Do we have country sub-tag? */
940Sstevel@tonic-gate     if (locale[2] == '_') {
950Sstevel@tonic-gate 	if (strlen(locale) < 5 ||
960Sstevel@tonic-gate 	    !isalpha(locale[3]) ||
970Sstevel@tonic-gate 	    !isalpha(locale[4]) ||
980Sstevel@tonic-gate 	    (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) {
990Sstevel@tonic-gate 	    return langtag;
1000Sstevel@tonic-gate 	}
1010Sstevel@tonic-gate 
1020Sstevel@tonic-gate 	/* yes, we do */
1030Sstevel@tonic-gate 	/* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag,
1040Sstevel@tonic-gate 		     country_subtag, langtag) == 8) */
1050Sstevel@tonic-gate 	if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale,
1060Sstevel@tonic-gate 		     2, locale+3) == 5)
1070Sstevel@tonic-gate 	    return langtag;
1080Sstevel@tonic-gate     }
1090Sstevel@tonic-gate 
1100Sstevel@tonic-gate     /* In all other cases we just use the primary language sub-tag */
1110Sstevel@tonic-gate     return langtag;
1120Sstevel@tonic-gate }
1130Sstevel@tonic-gate 
1140Sstevel@tonic-gate u_int
1150Sstevel@tonic-gate g11n_langtag_is_default(char *langtag)
1160Sstevel@tonic-gate {
1170Sstevel@tonic-gate     return (strcmp(langtag, "i-default") == 0);
1180Sstevel@tonic-gate }
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate  * This lang tag / locale matching function works only for two-character
1220Sstevel@tonic-gate  * language primary sub-tags and two-character country sub-tags.
1230Sstevel@tonic-gate  */
1240Sstevel@tonic-gate u_int
1250Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale)
1260Sstevel@tonic-gate {
1270Sstevel@tonic-gate     /* Match "i-default" to the process' current locale if possible */
1280Sstevel@tonic-gate     if (g11n_langtag_is_default(langtag)) {
1290Sstevel@tonic-gate 	if (strcasecmp(locale, "POSIX") == 0 ||
1300Sstevel@tonic-gate 	    strcasecmp(locale, "C") == 0)
1310Sstevel@tonic-gate 	    return 1;
1320Sstevel@tonic-gate 	else
1330Sstevel@tonic-gate 	    return 0;
1340Sstevel@tonic-gate     }
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate     /* locale must be at least 2 chars long and the lang part must be
1370Sstevel@tonic-gate      * exactly two characters */
1380Sstevel@tonic-gate     if (strlen(locale) < 2 ||
1390Sstevel@tonic-gate 	(!isalpha(locale[0]) || !isalpha(locale[1]) ||
1400Sstevel@tonic-gate 	(locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@')))
1410Sstevel@tonic-gate 	return 0;
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate     /* same thing with the langtag */
1440Sstevel@tonic-gate     if (strlen(langtag) < 2 ||
1450Sstevel@tonic-gate 	(!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
1460Sstevel@tonic-gate 	(langtag[2] != '\0' && langtag[2] != '-')))
1470Sstevel@tonic-gate 	return 0;
1480Sstevel@tonic-gate 
1490Sstevel@tonic-gate     /* primary language sub-tag and the locale's language part must match */
1500Sstevel@tonic-gate     if (strncasecmp(langtag, locale, 2) != 0)
1510Sstevel@tonic-gate 	return 0;
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate     /* primary language sub-tag and the locale's language match, now
1540Sstevel@tonic-gate      * fuzzy check country part */
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate     /* neither langtag nor locale have more than one component */
1570Sstevel@tonic-gate     if (langtag[2] == '\0' &&
1580Sstevel@tonic-gate         (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
1590Sstevel@tonic-gate 	return 2;
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate     /* langtag has only one sub-tag... */
1620Sstevel@tonic-gate     if (langtag[2] == '\0')
1630Sstevel@tonic-gate 	return 1;
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate     /* locale has no country code... */
1660Sstevel@tonic-gate     if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
1670Sstevel@tonic-gate 	return 1;
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate     /* langtag has more than one subtag and the locale has a country code */
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate     /* ignore second subtag if not two chars */
1720Sstevel@tonic-gate     if (strlen(langtag) < 5)
1730Sstevel@tonic-gate 	return 1;
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate     if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
1760Sstevel@tonic-gate 	(langtag[5] != '\0' && langtag[5] != '-'))
1770Sstevel@tonic-gate 	return 1;
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate     /* ignore rest of locale if there is no two-character country part */
1800Sstevel@tonic-gate     if (strlen(locale) < 5)
1810Sstevel@tonic-gate 	return 1;
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate     if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
1840Sstevel@tonic-gate 	(locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
1850Sstevel@tonic-gate 	return 1;
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate     /* if the country part matches, return 2 */
1880Sstevel@tonic-gate     if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
1890Sstevel@tonic-gate 	return 2;
1900Sstevel@tonic-gate 
1910Sstevel@tonic-gate     return 1;
1920Sstevel@tonic-gate }
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate char *
1950Sstevel@tonic-gate g11n_getlocale()
1960Sstevel@tonic-gate {
1970Sstevel@tonic-gate     /* We have one text domain - always set it */
1980Sstevel@tonic-gate     (void) textdomain(TEXT_DOMAIN);
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate     /* If the locale is not set, set it from the env vars */
201*2628Sjp161948     if (!setlocale(LC_MESSAGES, NULL))
202*2628Sjp161948 	(void) setlocale(LC_MESSAGES, "");
2030Sstevel@tonic-gate 
204*2628Sjp161948     return setlocale(LC_MESSAGES, NULL);
2050Sstevel@tonic-gate }
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate void
2080Sstevel@tonic-gate g11n_setlocale(int category, const char *locale)
2090Sstevel@tonic-gate {
2100Sstevel@tonic-gate     char *curr;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate     /* We have one text domain - always set it */
2130Sstevel@tonic-gate     (void) textdomain(TEXT_DOMAIN);
2140Sstevel@tonic-gate 
2150Sstevel@tonic-gate     if (!locale)
2160Sstevel@tonic-gate 	return;
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate     if (*locale && ((curr = setlocale(category, NULL))) &&
2190Sstevel@tonic-gate 	strcmp(curr, locale) == 0)
2200Sstevel@tonic-gate 	return;
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate     /*
223*2628Sjp161948      * If <category> is bogus, setlocale() will do nothing.
2240Sstevel@tonic-gate      */
225*2628Sjp161948     (void) setlocale(category, locale);
226*2628Sjp161948 
2270Sstevel@tonic-gate     return;
2280Sstevel@tonic-gate }
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate char **
2310Sstevel@tonic-gate g11n_getlocales()
2320Sstevel@tonic-gate {
2330Sstevel@tonic-gate     FILE *locale_out;
2340Sstevel@tonic-gate     u_int n_elems, list_size, long_line = 0;
2350Sstevel@tonic-gate     char **list;
2360Sstevel@tonic-gate     char locale[64];	/* 64 bytes is plenty for locale names */
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate     if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) {
2390Sstevel@tonic-gate 	return NULL;
2400Sstevel@tonic-gate     }
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate     /*
2430Sstevel@tonic-gate      * Start with enough room for 65 locales - that's a lot fewer than
2440Sstevel@tonic-gate      * all the locales available for installation, but a lot more than
2450Sstevel@tonic-gate      * what most users will need and install
2460Sstevel@tonic-gate      */
2470Sstevel@tonic-gate     n_elems=0;
2480Sstevel@tonic-gate     list_size=192;
2490Sstevel@tonic-gate     list = (char **) xmalloc(sizeof(char *) * (list_size + 1));
2500Sstevel@tonic-gate     memset(list, 0, sizeof(char *) * (list_size + 1));
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate     while (fgets(locale, sizeof(locale), locale_out)) {
2530Sstevel@tonic-gate 	/* skip long locale names (if any) */
2540Sstevel@tonic-gate 	if (!strchr(locale, '\n')) {
2550Sstevel@tonic-gate 	    long_line = 1;
2560Sstevel@tonic-gate 	    continue;
2570Sstevel@tonic-gate 	}
2580Sstevel@tonic-gate 	else if (long_line) {
2590Sstevel@tonic-gate 	    long_line = 0;
2600Sstevel@tonic-gate 	    continue;
2610Sstevel@tonic-gate 	}
2620Sstevel@tonic-gate 	if (strncmp(locale, "iso_8859", 8) == 0)
2630Sstevel@tonic-gate 	    continue;		    /* ignore locale names like "iso_8859-1" */
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate 	if (n_elems == list_size) {
2660Sstevel@tonic-gate 	    list_size *= 2;
2670Sstevel@tonic-gate 	    list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *));
2680Sstevel@tonic-gate 	    memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1));
2690Sstevel@tonic-gate 	}
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	*(strchr(locale, '\n')) = '\0';      /* remove the trailing \n */
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 	list[n_elems++] = xstrdup(locale);
2740Sstevel@tonic-gate     }
2750Sstevel@tonic-gate     list[n_elems] = NULL;
2760Sstevel@tonic-gate     (void) pclose(locale_out);
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate     qsort(list, n_elems - 1, sizeof(char *), locale_cmp);
2790Sstevel@tonic-gate     return list;
2800Sstevel@tonic-gate }
2810Sstevel@tonic-gate 
2820Sstevel@tonic-gate char *
2830Sstevel@tonic-gate g11n_getlangs()
2840Sstevel@tonic-gate {
2850Sstevel@tonic-gate     char *locale;
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate     if (getenv("SSH_LANGS"))
2880Sstevel@tonic-gate 	return xstrdup(getenv("SSH_LANGS"));
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate     locale = g11n_getlocale();
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate     if (!locale || !*locale)
2930Sstevel@tonic-gate 	return xstrdup("i-default");
2940Sstevel@tonic-gate 
2950Sstevel@tonic-gate     return g11n_locale2langtag(locale);
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate char *
2990Sstevel@tonic-gate g11n_locales2langs(char **locale_set)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate     char **p, **r, **q;
3020Sstevel@tonic-gate     char *langtag;
3030Sstevel@tonic-gate     int locales, skip;
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate     for (locales = 0, p = locale_set ; p && *p ; p++)
3060Sstevel@tonic-gate 	locales++;
3070Sstevel@tonic-gate 
3080Sstevel@tonic-gate     r = (char **) xmalloc((locales + 1) * sizeof(char *));
3090Sstevel@tonic-gate     memset(r, 0, (locales + 1) * sizeof(char *));
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate     for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) {
3120Sstevel@tonic-gate 	skip = 0;
3130Sstevel@tonic-gate 	if ((langtag = g11n_locale2langtag(*p)) == NULL)
3140Sstevel@tonic-gate 	    continue;
3150Sstevel@tonic-gate 	for (q = r ; (q - r) < locales ; q++) {
3160Sstevel@tonic-gate 	    if (!*q) break;
3170Sstevel@tonic-gate 	    if (*q && strcmp(*q, langtag) == 0)
3180Sstevel@tonic-gate 		skip = 1;
3190Sstevel@tonic-gate 	}
3200Sstevel@tonic-gate 	if (!skip)
3210Sstevel@tonic-gate 	    *(q++) = langtag;
3220Sstevel@tonic-gate 	*q = NULL;
3230Sstevel@tonic-gate     }
3240Sstevel@tonic-gate     return xjoin(r, ',');
3250Sstevel@tonic-gate }
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate static
3280Sstevel@tonic-gate int
3290Sstevel@tonic-gate sortcmp(const void *d1, const void *d2)
3300Sstevel@tonic-gate {
3310Sstevel@tonic-gate     char *s1 = *(char **)d1;
3320Sstevel@tonic-gate     char *s2 = *(char **)d2;
3330Sstevel@tonic-gate 
3340Sstevel@tonic-gate     return strcmp(s1, s2);
3350Sstevel@tonic-gate }
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate int
3380Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2)
3390Sstevel@tonic-gate {
3400Sstevel@tonic-gate     int len1, len2;
3410Sstevel@tonic-gate     char c1, c2;
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate     len1 = (strchr(langtag1, '-')) ?
3440Sstevel@tonic-gate 		(strchr(langtag1, '-') - langtag1)
3450Sstevel@tonic-gate 		: strlen(langtag1);
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate     len2 = (strchr(langtag2, '-')) ?
3480Sstevel@tonic-gate 		(strchr(langtag2, '-') - langtag2)
3490Sstevel@tonic-gate 		: strlen(langtag2);
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate     /* no match */
3520Sstevel@tonic-gate     if (len1 != len2 ||
3530Sstevel@tonic-gate 	strncmp(langtag1, langtag2, len1) != 0)
3540Sstevel@tonic-gate 	return 0;
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate     c1 = *(langtag1 + len1);
3570Sstevel@tonic-gate     c2 = *(langtag2 + len2);
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate     /* no country sub-tags - exact match */
3600Sstevel@tonic-gate     if (c1 == '\0' && c2 == '\0')
3610Sstevel@tonic-gate 	return 2;
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate     /* one langtag has a country sub-tag, the other doesn't */
3640Sstevel@tonic-gate     if (c1 == '\0' || c2 == '\0')
3650Sstevel@tonic-gate 	return 1;
3660Sstevel@tonic-gate 
3670Sstevel@tonic-gate     /* can't happen - both langtags have a country sub-tag */
3680Sstevel@tonic-gate     if (c1 != '-' || c2 != '-')
3690Sstevel@tonic-gate 	return 1;
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate     /* compare country subtags */
3720Sstevel@tonic-gate     langtag1 = langtag1 + len1 + 1;
3730Sstevel@tonic-gate     langtag2 = langtag2 + len2 + 1;
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate     len1 = (strchr(langtag1, '-')) ?
3760Sstevel@tonic-gate 		(strchr(langtag1, '-') - langtag1)
3770Sstevel@tonic-gate 		: strlen(langtag1);
3780Sstevel@tonic-gate 
3790Sstevel@tonic-gate     len2 = (strchr(langtag2, '-')) ?
3800Sstevel@tonic-gate 		(strchr(langtag2, '-') - langtag2)
3810Sstevel@tonic-gate 		: strlen(langtag2);
3820Sstevel@tonic-gate 
3830Sstevel@tonic-gate     if (len1 != len2 ||
3840Sstevel@tonic-gate 	strncmp(langtag1, langtag2, len1) != 0)
3850Sstevel@tonic-gate 	return 1;
3860Sstevel@tonic-gate 
3870Sstevel@tonic-gate     /* country tags matched - exact match */
3880Sstevel@tonic-gate     return 2;
3890Sstevel@tonic-gate }
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate char *
3920Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2)
3930Sstevel@tonic-gate {
3940Sstevel@tonic-gate     char **list1, **list2, **list3, **p, **q, **r;
3950Sstevel@tonic-gate     char *set3, *lang_subtag;
3960Sstevel@tonic-gate     u_int n1, n2, n3;
3970Sstevel@tonic-gate     u_int do_append;
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate     list1 = xsplit(set1, ',');
4000Sstevel@tonic-gate     list2 = xsplit(set2, ',');
4010Sstevel@tonic-gate     for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ;
4020Sstevel@tonic-gate     for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ;
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate     list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1));
4050Sstevel@tonic-gate     *list3 = NULL;
4060Sstevel@tonic-gate 
4070Sstevel@tonic-gate     /* we must not sort the user langtags - sorting or not the server's
4080Sstevel@tonic-gate      * should not affect the outcome
4090Sstevel@tonic-gate      */
4100Sstevel@tonic-gate     qsort(list2, n2, sizeof(char *), sortcmp);
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate     for (n3 = 0, p = list1 ; p && *p ; p++) {
4130Sstevel@tonic-gate 	do_append = 0;
4140Sstevel@tonic-gate 	for (q = list2 ; q && *q ; q++) {
4150Sstevel@tonic-gate 	    if (g11n_langtag_match(*p, *q) != 2) continue;
4160Sstevel@tonic-gate 	    /* append element */
4170Sstevel@tonic-gate 	    for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
4180Sstevel@tonic-gate 		do_append = 1;
4190Sstevel@tonic-gate 		if (!*r) break;
4200Sstevel@tonic-gate 		if (strcmp(*p, *r) == 0) {
4210Sstevel@tonic-gate 		    do_append = 0;
4220Sstevel@tonic-gate 		    break;
4230Sstevel@tonic-gate 		}
4240Sstevel@tonic-gate 	    }
4250Sstevel@tonic-gate 	    if (do_append && n3 <= (n1 + n2)) {
4260Sstevel@tonic-gate 		list3[n3++] = xstrdup(*p);
4270Sstevel@tonic-gate 		list3[n3] = NULL;
4280Sstevel@tonic-gate 	    }
4290Sstevel@tonic-gate 	}
4300Sstevel@tonic-gate     }
4310Sstevel@tonic-gate 
4320Sstevel@tonic-gate     for (p = list1 ; p && *p ; p++) {
4330Sstevel@tonic-gate 	do_append = 0;
4340Sstevel@tonic-gate 	for (q = list2 ; q && *q ; q++) {
4350Sstevel@tonic-gate 	    if (g11n_langtag_match(*p, *q) != 1) continue;
4360Sstevel@tonic-gate 	    /* append element */
4370Sstevel@tonic-gate 	    lang_subtag = xstrdup(*p);
4380Sstevel@tonic-gate 	    if (strchr(lang_subtag, '-'))
4390Sstevel@tonic-gate 		*(strchr(lang_subtag, '-')) = '\0';
4400Sstevel@tonic-gate 	    for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
4410Sstevel@tonic-gate 		do_append = 1;
4420Sstevel@tonic-gate 		if (!*r) break;
4430Sstevel@tonic-gate 		if (strcmp(lang_subtag, *r) == 0) {
4440Sstevel@tonic-gate 		    do_append = 0;
4450Sstevel@tonic-gate 		    break;
4460Sstevel@tonic-gate 		}
4470Sstevel@tonic-gate 	    }
4480Sstevel@tonic-gate 	    if (do_append && n3 <= (n1 + n2)) {
4490Sstevel@tonic-gate 		list3[n3++] = lang_subtag;
4500Sstevel@tonic-gate 		list3[n3] = NULL;
4510Sstevel@tonic-gate 	    }
4520Sstevel@tonic-gate 	    else
4530Sstevel@tonic-gate 		xfree(lang_subtag);
4540Sstevel@tonic-gate 	}
4550Sstevel@tonic-gate     }
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate     set3 = xjoin(list3, ',');
4580Sstevel@tonic-gate     xfree_split_list(list1);
4590Sstevel@tonic-gate     xfree_split_list(list2);
4600Sstevel@tonic-gate     xfree_split_list(list3);
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate     return set3;
4630Sstevel@tonic-gate }
4640Sstevel@tonic-gate 
4650Sstevel@tonic-gate char *
4660Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
4670Sstevel@tonic-gate {
4680Sstevel@tonic-gate     char *list, *result;
4690Sstevel@tonic-gate     char **xlist;
4700Sstevel@tonic-gate 
4710Sstevel@tonic-gate     /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
4720Sstevel@tonic-gate     list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate     if (!list)
4750Sstevel@tonic-gate 	    return NULL;
4760Sstevel@tonic-gate 
4770Sstevel@tonic-gate     xlist = xsplit(list, ',');
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate     xfree(list);
4800Sstevel@tonic-gate 
4810Sstevel@tonic-gate     if (!xlist || !*xlist)
4820Sstevel@tonic-gate 	    return NULL;
4830Sstevel@tonic-gate 
4840Sstevel@tonic-gate     result = xstrdup(*xlist);
4850Sstevel@tonic-gate 
4860Sstevel@tonic-gate     xfree_split_list(xlist);
4870Sstevel@tonic-gate 
4880Sstevel@tonic-gate     return result;
4890Sstevel@tonic-gate }
4900Sstevel@tonic-gate 
4910Sstevel@tonic-gate /*
4920Sstevel@tonic-gate  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
4930Sstevel@tonic-gate  * a stright strcmp()
4940Sstevel@tonic-gate  */
4950Sstevel@tonic-gate static
4960Sstevel@tonic-gate int
4970Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2)
4980Sstevel@tonic-gate {
4990Sstevel@tonic-gate     char *dot_ptr;
5000Sstevel@tonic-gate     char *s1 = *(char **)d1;
5010Sstevel@tonic-gate     char *s2 = *(char **)d2;
5020Sstevel@tonic-gate     int s1_is_utf8 = 0;
5030Sstevel@tonic-gate     int s2_is_utf8 = 0;
5040Sstevel@tonic-gate 
5050Sstevel@tonic-gate     /* check if s1 is a UTF-8 locale */
5060Sstevel@tonic-gate     if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') &&
5070Sstevel@tonic-gate 	(strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
5080Sstevel@tonic-gate 	(*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
5090Sstevel@tonic-gate 	s1_is_utf8++;
5100Sstevel@tonic-gate     }
5110Sstevel@tonic-gate     /* check if s2 is a UTF-8 locale */
5120Sstevel@tonic-gate     if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') &&
5130Sstevel@tonic-gate 	(strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
5140Sstevel@tonic-gate 	(*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
5150Sstevel@tonic-gate 	s2_is_utf8++;
5160Sstevel@tonic-gate     }
5170Sstevel@tonic-gate 
5180Sstevel@tonic-gate     /* prefer UTF-8 locales */
5190Sstevel@tonic-gate     if (s1_is_utf8 && !s2_is_utf8)
5200Sstevel@tonic-gate 	return -1;
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate     if (s2_is_utf8 && !s1_is_utf8)
5230Sstevel@tonic-gate 	return 1;
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate     /* prefer any locale over the default locales */
5260Sstevel@tonic-gate     if (strcmp(s1, "C") == 0 ||
5270Sstevel@tonic-gate 	strcmp(s1, "POSIX") == 0 ||
5280Sstevel@tonic-gate 	strcmp(s1, "common") == 0)
5290Sstevel@tonic-gate 	if (strcmp(s2, "C") != 0 &&
5300Sstevel@tonic-gate 	    strcmp(s2, "POSIX") != 0 &&
5310Sstevel@tonic-gate 	    strcmp(s2, "common") != 0)
5320Sstevel@tonic-gate 	    return 1;
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate     if (strcmp(s2, "C") == 0 ||
5350Sstevel@tonic-gate 	strcmp(s2, "POSIX") == 0 ||
5360Sstevel@tonic-gate 	strcmp(s2, "common") == 0)
5370Sstevel@tonic-gate 	if (strcmp(s1, "C") != 0 &&
5380Sstevel@tonic-gate 	    strcmp(s1, "POSIX") != 0 &&
5390Sstevel@tonic-gate 	    strcmp(s1, "common") != 0)
5400Sstevel@tonic-gate 	    return -1;
5410Sstevel@tonic-gate 
5420Sstevel@tonic-gate     return strcmp(s1, s2);
5430Sstevel@tonic-gate }
5440Sstevel@tonic-gate 
5450Sstevel@tonic-gate 
5460Sstevel@tonic-gate char **
5470Sstevel@tonic-gate g11n_langtag_set_locale_set_intersect(char *langtag_set,
5480Sstevel@tonic-gate 				      char **locale_set)
5490Sstevel@tonic-gate {
5500Sstevel@tonic-gate     char **langtag_list, **result, **p, **q, **r;
5510Sstevel@tonic-gate     char *s;
5520Sstevel@tonic-gate     u_int do_append, n_langtags, n_locales, n_results, max_results;
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate     /* Count lang tags and locales */
5550Sstevel@tonic-gate     for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++;
5560Sstevel@tonic-gate     n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
5570Sstevel@tonic-gate     for ( ; s = strchr(s, ',') ; s++, n_langtags++) ;
5580Sstevel@tonic-gate     /*
5590Sstevel@tonic-gate     while ((s = strchr(s, ','))) {
5600Sstevel@tonic-gate 	n_langtags++;
5610Sstevel@tonic-gate 	s++;
5620Sstevel@tonic-gate     }
5630Sstevel@tonic-gate      */
5640Sstevel@tonic-gate 
5650Sstevel@tonic-gate     qsort(locale_set, n_locales, sizeof(char *), locale_cmp);
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate     langtag_list = xsplit(langtag_set, ',');
5680Sstevel@tonic-gate     for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++);
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate     max_results = MIN(n_locales, n_langtags) * 2;
5710Sstevel@tonic-gate     result = (char **) xmalloc(sizeof(char *) * (max_results + 1));
5720Sstevel@tonic-gate     *result = NULL;
5730Sstevel@tonic-gate     n_results = 0;
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate     /* More specific matches first */
5760Sstevel@tonic-gate     for (p = langtag_list ; p && *p ; p++) {
5770Sstevel@tonic-gate 	do_append = 0;
5780Sstevel@tonic-gate 	for (q = locale_set ; q && *q ; q++) {
5790Sstevel@tonic-gate 	    if (g11n_langtag_matches_locale(*p, *q) == 2) {
5800Sstevel@tonic-gate 		do_append = 1;
5810Sstevel@tonic-gate 		for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
5820Sstevel@tonic-gate 		    if (!*r) break;
5830Sstevel@tonic-gate 		    if (strcmp(*q, *r) == 0) {
5840Sstevel@tonic-gate 			do_append = 0;
5850Sstevel@tonic-gate 			break;
5860Sstevel@tonic-gate 		    }
5870Sstevel@tonic-gate 		}
5880Sstevel@tonic-gate 		if (do_append && n_results < max_results) {
5890Sstevel@tonic-gate 		    result[n_results++] = xstrdup(*q);
5900Sstevel@tonic-gate 		    result[n_results] = NULL;
5910Sstevel@tonic-gate 		}
5920Sstevel@tonic-gate 		break;
5930Sstevel@tonic-gate 	    }
5940Sstevel@tonic-gate 	}
5950Sstevel@tonic-gate     }
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate     for (p = langtag_list ; p && *p ; p++) {
5980Sstevel@tonic-gate 	do_append = 0;
5990Sstevel@tonic-gate 	for (q = locale_set ; q && *q ; q++) {
6000Sstevel@tonic-gate 	    if (g11n_langtag_matches_locale(*p, *q) == 1) {
6010Sstevel@tonic-gate 		do_append = 1;
6020Sstevel@tonic-gate 		for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
6030Sstevel@tonic-gate 		    if (!*r) break;
6040Sstevel@tonic-gate 		    if (strcmp(*q, *r) == 0) {
6050Sstevel@tonic-gate 			do_append = 0;
6060Sstevel@tonic-gate 			break;
6070Sstevel@tonic-gate 		    }
6080Sstevel@tonic-gate 		}
6090Sstevel@tonic-gate 		if (do_append && n_results < max_results) {
6100Sstevel@tonic-gate 		    result[n_results++] = xstrdup(*q);
6110Sstevel@tonic-gate 		    result[n_results] = NULL;
6120Sstevel@tonic-gate 		}
6130Sstevel@tonic-gate 		break;
6140Sstevel@tonic-gate 	    }
6150Sstevel@tonic-gate 	}
6160Sstevel@tonic-gate     }
6170Sstevel@tonic-gate     xfree_split_list(langtag_list);
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate     return result;
6200Sstevel@tonic-gate }
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate char *
6230Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
6240Sstevel@tonic-gate {
6250Sstevel@tonic-gate     char **results, *result = NULL;
6260Sstevel@tonic-gate 
6270Sstevel@tonic-gate     if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
6280Sstevel@tonic-gate 	    srvr_locales ?  srvr_locales : g11n_getlocales())) == NULL)
6290Sstevel@tonic-gate 	return NULL;
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate     if (*results != NULL)
6320Sstevel@tonic-gate 	    result = xstrdup(*results);
6330Sstevel@tonic-gate 
6340Sstevel@tonic-gate     xfree_split_list(results);
6350Sstevel@tonic-gate 
6360Sstevel@tonic-gate     return result;
6370Sstevel@tonic-gate }
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate 
6400Sstevel@tonic-gate /*
6410Sstevel@tonic-gate  * Functions for validating ASCII and UTF-8 strings
6420Sstevel@tonic-gate  *
6430Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
6440Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
6450Sstevel@tonic-gate  * friends.
6460Sstevel@tonic-gate  *
6470Sstevel@tonic-gate  * The return value is 0 if success, EILSEQ or EINVAL.
6480Sstevel@tonic-gate  *
6490Sstevel@tonic-gate  */
6500Sstevel@tonic-gate 
6510Sstevel@tonic-gate u_int
6520Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str)
6530Sstevel@tonic-gate {
6540Sstevel@tonic-gate     u_char *p;
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate     for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ;
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate     if (len && ((p - (u_char *) str) != len)) {
6590Sstevel@tonic-gate 	return EILSEQ;
6600Sstevel@tonic-gate     }
6610Sstevel@tonic-gate     return 0;
6620Sstevel@tonic-gate }
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate u_int
6650Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str)
6660Sstevel@tonic-gate {
6670Sstevel@tonic-gate     u_char *p;
6680Sstevel@tonic-gate     u_int c, l;
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate     if (len == 0) len = strlen((const char *)str);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate     for (p = (u_char *) str ; p && (p - str < len) && *p ; ) {
6730Sstevel@tonic-gate 	/* 8-bit chars begin a UTF-8 sequence */
6740Sstevel@tonic-gate 	if (*p & 0x80) {
6750Sstevel@tonic-gate 	    /* Get sequence length and sanity check first byte */
6760Sstevel@tonic-gate 	    if (*p < 0xc0)
6770Sstevel@tonic-gate 		return EILSEQ;
6780Sstevel@tonic-gate 	    else if (*p < 0xe0)
6790Sstevel@tonic-gate 		l=2;
6800Sstevel@tonic-gate 	    else if (*p < 0xf0)
6810Sstevel@tonic-gate 		l=3;
6820Sstevel@tonic-gate 	    else if (*p < 0xf8)
6830Sstevel@tonic-gate 		l=4;
6840Sstevel@tonic-gate 	    else if (*p < 0xfc)
6850Sstevel@tonic-gate 		l=5;
6860Sstevel@tonic-gate 	    else if (*p < 0xfe)
6870Sstevel@tonic-gate 		l=6;
6880Sstevel@tonic-gate 	    else
6890Sstevel@tonic-gate 		return EILSEQ;
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	    if ((p + l - str) >= len)
6920Sstevel@tonic-gate 		return EILSEQ;
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	    /* overlong detection - build codepoint */
6950Sstevel@tonic-gate 	    c = *p & 0x3f;
6960Sstevel@tonic-gate 	    c = c << (6 * (l-1)); /* shift c bits from first byte */
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	    if (l > 1) {
6990Sstevel@tonic-gate 		if (*(p+1) && ((*(p+1) & 0xc0) == 0x80))
7000Sstevel@tonic-gate 		    c = c | ((*(p+1) & 0x3f) << (6 * (l-2)));
7010Sstevel@tonic-gate 		else
7020Sstevel@tonic-gate 		    return EILSEQ;
7030Sstevel@tonic-gate 		if (c < 0x80)
7040Sstevel@tonic-gate 		    return EILSEQ;
7050Sstevel@tonic-gate 	    }
7060Sstevel@tonic-gate 	    if (l > 2) {
7070Sstevel@tonic-gate 		if (*(p+2) && ((*(p+2) & 0xc0) == 0x80))
7080Sstevel@tonic-gate 		    c = c | ((*(p+2) & 0x3f) << (6 * (l-3)));
7090Sstevel@tonic-gate 		else
7100Sstevel@tonic-gate 		    return EILSEQ;
7110Sstevel@tonic-gate 		if (c < 0x800)
7120Sstevel@tonic-gate 		    return EILSEQ;
7130Sstevel@tonic-gate 	    }
7140Sstevel@tonic-gate 	    if (l > 3) {
7150Sstevel@tonic-gate 		if (*(p+3) && ((*(p+3) & 0xc0) == 0x80))
7160Sstevel@tonic-gate 		    c = c | ((*(p+3) & 0x3f) << (6 * (l-4)));
7170Sstevel@tonic-gate 		else
7180Sstevel@tonic-gate 		    return EILSEQ;
7190Sstevel@tonic-gate 		if (c < 0x10000)
7200Sstevel@tonic-gate 		    return EILSEQ;
7210Sstevel@tonic-gate 	    }
7220Sstevel@tonic-gate 	    if (l > 4) {
7230Sstevel@tonic-gate 		if (*(p+4) && ((*(p+4) & 0xc0) == 0x80))
7240Sstevel@tonic-gate 		    c = c | ((*(p+4) & 0x3f) << (6 * (l-5)));
7250Sstevel@tonic-gate 		else
7260Sstevel@tonic-gate 		    return EILSEQ;
7270Sstevel@tonic-gate 		if (c < 0x200000)
7280Sstevel@tonic-gate 		    return EILSEQ;
7290Sstevel@tonic-gate 	    }
7300Sstevel@tonic-gate 	    if (l > 5) {
7310Sstevel@tonic-gate 		if (*(p+5) && ((*(p+5) & 0xc0) == 0x80))
7320Sstevel@tonic-gate 		    c = c | (*(p+5) & 0x3f) ;
7330Sstevel@tonic-gate 		else
7340Sstevel@tonic-gate 		    return EILSEQ;
7350Sstevel@tonic-gate 		if (c < 0x4000000)
7360Sstevel@tonic-gate 		    return EILSEQ;
7370Sstevel@tonic-gate 	    }
7380Sstevel@tonic-gate 
7390Sstevel@tonic-gate 	    /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */
7400Sstevel@tonic-gate 	    if (((c <= 0xdfff) && (c >= 0xd800)) ||
7410Sstevel@tonic-gate 	        (c == 0xfffe) || (c == 0xffff))
7420Sstevel@tonic-gate 		return EILSEQ;
7430Sstevel@tonic-gate 	    p += l;
7440Sstevel@tonic-gate 	}
7450Sstevel@tonic-gate 	/* 7-bit chars are fine */
7460Sstevel@tonic-gate 	else
7470Sstevel@tonic-gate 	    p++;
7480Sstevel@tonic-gate     }
7490Sstevel@tonic-gate     return 0;
7500Sstevel@tonic-gate }
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate /*
7530Sstevel@tonic-gate  * Functions for converting to ASCII or UTF-8 from the local codeset
7540Sstevel@tonic-gate  * Functions for converting from ASCII or UTF-8 to the local codeset
7550Sstevel@tonic-gate  *
7560Sstevel@tonic-gate  * The error_str parameter is an optional pointer to a char variable
7570Sstevel@tonic-gate  * where to store a string suitable for use with error() or fatal() or
7580Sstevel@tonic-gate  * friends.
7590Sstevel@tonic-gate  *
7600Sstevel@tonic-gate  * The err parameter is an optional pointer to an integer where 0
7610Sstevel@tonic-gate  * (success) or EILSEQ or EINVAL will be stored (failure).
7620Sstevel@tonic-gate  *
7630Sstevel@tonic-gate  * These functions return NULL if the conversion fails.
7640Sstevel@tonic-gate  *
7650Sstevel@tonic-gate  */
7660Sstevel@tonic-gate 
7670Sstevel@tonic-gate u_char *
7680Sstevel@tonic-gate g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str)
7690Sstevel@tonic-gate {
7700Sstevel@tonic-gate     static u_int initialized = 0;
7710Sstevel@tonic-gate     static u_int do_convert = 0;
7720Sstevel@tonic-gate     iconv_t cd;
7730Sstevel@tonic-gate     int err;
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate     if (!initialized) {
7760Sstevel@tonic-gate 	/*
7770Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
7780Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
7790Sstevel@tonic-gate 	 */
7800Sstevel@tonic-gate 	if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
7810Sstevel@tonic-gate 	    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
7820Sstevel@tonic-gate 	    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
7830Sstevel@tonic-gate 	    initialized = 1;
7840Sstevel@tonic-gate 	    do_convert = 0;
7850Sstevel@tonic-gate 	}
7860Sstevel@tonic-gate 	else {
7870Sstevel@tonic-gate 	    cd = iconv_open(nl_langinfo(CODESET), "646");
7880Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
7890Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
7900Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
7910Sstevel@tonic-gate 		    "Cannot convert ASCII strings to the local codeset";
7920Sstevel@tonic-gate 	    }
7930Sstevel@tonic-gate 	    initialized = 1;
7940Sstevel@tonic-gate 	    do_convert = 1;
7950Sstevel@tonic-gate 	}
7960Sstevel@tonic-gate     }
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate     if (!do_convert) {
7990Sstevel@tonic-gate 	if ((err = g11n_validate_ascii(str, 0, error_str))) {
8000Sstevel@tonic-gate 	    if (err_ptr) *err_ptr = err;
8010Sstevel@tonic-gate 	    return NULL;
8020Sstevel@tonic-gate 	}
8030Sstevel@tonic-gate 	else
8040Sstevel@tonic-gate 	    return (u_char *) xstrdup(str);
8050Sstevel@tonic-gate     }
8060Sstevel@tonic-gate     return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
8070Sstevel@tonic-gate }
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate u_char *
8100Sstevel@tonic-gate g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str)
8110Sstevel@tonic-gate {
8120Sstevel@tonic-gate     static u_int initialized = 0;
8130Sstevel@tonic-gate     static u_int do_convert = 0;
8140Sstevel@tonic-gate     iconv_t cd;
8150Sstevel@tonic-gate     int err;
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate     if (!initialized) {
8180Sstevel@tonic-gate 	/*
8190Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
8200Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
8210Sstevel@tonic-gate 	 */
8220Sstevel@tonic-gate 	if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
8230Sstevel@tonic-gate 	    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
8240Sstevel@tonic-gate 	    initialized = 1;
8250Sstevel@tonic-gate 	    do_convert = 0;
8260Sstevel@tonic-gate 	}
8270Sstevel@tonic-gate 	else {
8280Sstevel@tonic-gate 	    cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
8290Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
8300Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
8310Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
8320Sstevel@tonic-gate 		    "Cannot convert UTF-8 strings to the local codeset";
8330Sstevel@tonic-gate 	    }
8340Sstevel@tonic-gate 	    initialized = 1;
8350Sstevel@tonic-gate 	    do_convert = 1;
8360Sstevel@tonic-gate 	}
8370Sstevel@tonic-gate     }
8380Sstevel@tonic-gate 
8390Sstevel@tonic-gate     if (!do_convert) {
8400Sstevel@tonic-gate 	if ((err = g11n_validate_utf8(str, 0, error_str))) {
8410Sstevel@tonic-gate 	    if (err_ptr) *err_ptr = err;
8420Sstevel@tonic-gate 	    return NULL;
8430Sstevel@tonic-gate 	}
8440Sstevel@tonic-gate 	else
8450Sstevel@tonic-gate 	    return (u_char *) xstrdup((char *) str);
8460Sstevel@tonic-gate     }
8470Sstevel@tonic-gate     return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
8480Sstevel@tonic-gate }
8490Sstevel@tonic-gate 
8500Sstevel@tonic-gate char *
8510Sstevel@tonic-gate g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str)
8520Sstevel@tonic-gate {
8530Sstevel@tonic-gate     static u_int initialized = 0;
8540Sstevel@tonic-gate     static u_int do_convert = 0;
8550Sstevel@tonic-gate     iconv_t cd;
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate     if (!initialized) {
8580Sstevel@tonic-gate 	/*
8590Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
8600Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
8610Sstevel@tonic-gate 	 */
8620Sstevel@tonic-gate 	if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
8630Sstevel@tonic-gate 	    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
8640Sstevel@tonic-gate 	    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
8650Sstevel@tonic-gate 	    initialized = 1;
8660Sstevel@tonic-gate 	    do_convert = 0;
8670Sstevel@tonic-gate 	}
8680Sstevel@tonic-gate 	else {
8690Sstevel@tonic-gate 	    cd = iconv_open("646", nl_langinfo(CODESET));
8700Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
8710Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
8720Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
8730Sstevel@tonic-gate 		    "Cannot convert UTF-8 strings to the local codeset";
8740Sstevel@tonic-gate 	    }
8750Sstevel@tonic-gate 	    initialized = 1;
8760Sstevel@tonic-gate 	    do_convert = 1;
8770Sstevel@tonic-gate 	}
8780Sstevel@tonic-gate     }
8790Sstevel@tonic-gate 
8800Sstevel@tonic-gate     if (!do_convert)
8810Sstevel@tonic-gate 	return xstrdup((char *) str);
8820Sstevel@tonic-gate     return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
8830Sstevel@tonic-gate }
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate u_char *
8860Sstevel@tonic-gate g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str)
8870Sstevel@tonic-gate {
8880Sstevel@tonic-gate     static u_int initialized = 0;
8890Sstevel@tonic-gate     static u_int do_convert = 0;
8900Sstevel@tonic-gate     iconv_t cd;
8910Sstevel@tonic-gate 
8920Sstevel@tonic-gate     if (!initialized) {
8930Sstevel@tonic-gate 	/*
8940Sstevel@tonic-gate 	 * iconv_open() fails if the to/from codesets are the
8950Sstevel@tonic-gate 	 * same, and there are aliases of codesets to boot...
8960Sstevel@tonic-gate 	 */
8970Sstevel@tonic-gate 	if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
8980Sstevel@tonic-gate 	    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
8990Sstevel@tonic-gate 	    initialized = 1;
9000Sstevel@tonic-gate 	    do_convert = 0;
9010Sstevel@tonic-gate 	}
9020Sstevel@tonic-gate 	else {
9030Sstevel@tonic-gate 	    cd = iconv_open("UTF-8", nl_langinfo(CODESET));
9040Sstevel@tonic-gate 	    if (cd == (iconv_t) -1) {
9050Sstevel@tonic-gate 		if (err_ptr) *err_ptr = errno;
9060Sstevel@tonic-gate 		if (error_str) *error_str = (u_char *)
9070Sstevel@tonic-gate 		    "Cannot convert UTF-8 strings to the local codeset";
9080Sstevel@tonic-gate 	    }
9090Sstevel@tonic-gate 	    initialized = 1;
9100Sstevel@tonic-gate 	    do_convert = 1;
9110Sstevel@tonic-gate 	}
9120Sstevel@tonic-gate     }
9130Sstevel@tonic-gate 
9140Sstevel@tonic-gate     if (!do_convert)
9150Sstevel@tonic-gate 	return (u_char *) xstrdup((char *) str);
9160Sstevel@tonic-gate     return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
9170Sstevel@tonic-gate }
9180Sstevel@tonic-gate 
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate /*
9210Sstevel@tonic-gate  * Wrapper around iconv()
9220Sstevel@tonic-gate  *
9230Sstevel@tonic-gate  * The caller is responsible for freeing the result and for handling
9240Sstevel@tonic-gate  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
9250Sstevel@tonic-gate  */
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate static
9280Sstevel@tonic-gate u_char *
9290Sstevel@tonic-gate do_iconv(iconv_t cd, u_int *mul_ptr,
9300Sstevel@tonic-gate 	 const void *buf, u_int len,
9310Sstevel@tonic-gate 	 u_int *outlen, int *err,
9320Sstevel@tonic-gate 	 u_char **err_str)
9330Sstevel@tonic-gate {
9340Sstevel@tonic-gate     size_t inbytesleft, outbytesleft, converted_size;
9350Sstevel@tonic-gate     char *outbuf;
9360Sstevel@tonic-gate     u_char *converted;
9370Sstevel@tonic-gate     const char *inbuf;
9380Sstevel@tonic-gate     u_int mul = 0;
9390Sstevel@tonic-gate 
9400Sstevel@tonic-gate     if (!buf || !(*(char *)buf)) return NULL;
9410Sstevel@tonic-gate     if (len == 0) len = strlen(buf);
9420Sstevel@tonic-gate     /* reset conversion descriptor */
9430Sstevel@tonic-gate     /* XXX Do we need initial shift sequences for UTF-8??? */
9440Sstevel@tonic-gate     (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft);
9450Sstevel@tonic-gate     inbuf = (const char *) buf;
9460Sstevel@tonic-gate     if (mul_ptr) mul = *mul_ptr;
9470Sstevel@tonic-gate     converted_size = (len << mul);
9480Sstevel@tonic-gate     outbuf = (char *) xmalloc(converted_size + 1); /* for null */
9490Sstevel@tonic-gate     converted = (u_char *) outbuf;
9500Sstevel@tonic-gate     outbytesleft = len;
9510Sstevel@tonic-gate     do {
9520Sstevel@tonic-gate 	if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) ==
9530Sstevel@tonic-gate 		(size_t) -1) {
9540Sstevel@tonic-gate 	    if (errno == E2BIG) {
9550Sstevel@tonic-gate 		/* UTF-8 codepoints are at most 8 bytes long. */
9560Sstevel@tonic-gate 		if (mul > 2) {
9570Sstevel@tonic-gate 		    if (err_str)
9580Sstevel@tonic-gate 			*err_str = (u_char *) "Conversion to UTF-8 failed due to"
9590Sstevel@tonic-gate 				  "preposterous space requirements";
9600Sstevel@tonic-gate 		    if (err)
9610Sstevel@tonic-gate 			*err = EILSEQ;
9620Sstevel@tonic-gate 		    return NULL;
9630Sstevel@tonic-gate 		}
9640Sstevel@tonic-gate 
9650Sstevel@tonic-gate 		/*
9660Sstevel@tonic-gate 		 * Re-alloc output and ensure that the outbuf
9670Sstevel@tonic-gate 		 * and outbytesleft values are adjusted.
9680Sstevel@tonic-gate 		 */
9690Sstevel@tonic-gate 		converted = xrealloc(converted, converted_size << 1 + 1);
9700Sstevel@tonic-gate 		outbuf = (char *) converted + converted_size - outbytesleft;
9710Sstevel@tonic-gate 		converted_size = (len << ++(mul));
9720Sstevel@tonic-gate 		outbytesleft = converted_size - outbytesleft;
9730Sstevel@tonic-gate 	    }
9740Sstevel@tonic-gate 	    else {
9750Sstevel@tonic-gate 		/*
9760Sstevel@tonic-gate 		 * Let the caller deal with iconv() errors, probably by
9770Sstevel@tonic-gate 		 * calling fatal(); xfree() does not set errno.
9780Sstevel@tonic-gate 		 */
9790Sstevel@tonic-gate 		if (err) *err = errno;
9800Sstevel@tonic-gate 		xfree(converted);
9810Sstevel@tonic-gate 		return NULL;
9820Sstevel@tonic-gate 	    }
9830Sstevel@tonic-gate 	}
9840Sstevel@tonic-gate     } while (inbytesleft);
9850Sstevel@tonic-gate     *outbuf = '\0'; /* ensure null-termination */
9860Sstevel@tonic-gate     if (outlen) *outlen = converted_size - outbytesleft;
9870Sstevel@tonic-gate     if (mul_ptr) *mul_ptr = mul;
9880Sstevel@tonic-gate     return converted;
9890Sstevel@tonic-gate }
990