10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52628Sjp161948 * Common Development and Distribution License (the "License"). 62628Sjp161948 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate * 21*6288Sjp161948 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 220Sstevel@tonic-gate * Use is subject to license terms. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 260Sstevel@tonic-gate 270Sstevel@tonic-gate #include <errno.h> 280Sstevel@tonic-gate #include <locale.h> 290Sstevel@tonic-gate #include <langinfo.h> 300Sstevel@tonic-gate #include <iconv.h> 310Sstevel@tonic-gate #include <ctype.h> 320Sstevel@tonic-gate #include <strings.h> 330Sstevel@tonic-gate #include <string.h> 340Sstevel@tonic-gate #include <stdio.h> 350Sstevel@tonic-gate #include <stdlib.h> 360Sstevel@tonic-gate #include "includes.h" 370Sstevel@tonic-gate #include "xmalloc.h" 380Sstevel@tonic-gate #include "xlist.h" 390Sstevel@tonic-gate 400Sstevel@tonic-gate #ifdef MIN 410Sstevel@tonic-gate #undef MIN 420Sstevel@tonic-gate #endif /* MIN */ 430Sstevel@tonic-gate 442705Sjp161948 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 450Sstevel@tonic-gate 462705Sjp161948 #define LOCALE_PATH "/usr/bin/locale" 470Sstevel@tonic-gate 482705Sjp161948 /* two-char country code, '-' and two-char region code */ 492705Sjp161948 #define LANGTAG_MAX 5 500Sstevel@tonic-gate 512705Sjp161948 static uchar_t *do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, 522705Sjp161948 uint_t len, uint_t *outlen, int *err, uchar_t **err_str); 530Sstevel@tonic-gate 540Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2); 550Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale); 560Sstevel@tonic-gate 572705Sjp161948 uint_t g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str); 582705Sjp161948 uint_t g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str); 590Sstevel@tonic-gate 605562Sjp161948 /* 615562Sjp161948 * Convert locale string name into a language tag. The caller is responsible for 625562Sjp161948 * freeing the memory allocated for the result. 635562Sjp161948 */ 642705Sjp161948 static char * 650Sstevel@tonic-gate g11n_locale2langtag(char *locale) 660Sstevel@tonic-gate { 672705Sjp161948 char *langtag; 680Sstevel@tonic-gate 692705Sjp161948 /* base cases */ 702705Sjp161948 if (!locale || !*locale) 712705Sjp161948 return (NULL); 720Sstevel@tonic-gate 732705Sjp161948 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0) 745562Sjp161948 return (xstrdup("i-default")); 750Sstevel@tonic-gate 762705Sjp161948 /* punt for language codes which are not exactly 2 letters */ 772705Sjp161948 if (strlen(locale) < 2 || 782705Sjp161948 !isalpha(locale[0]) || 792705Sjp161948 !isalpha(locale[1]) || 802705Sjp161948 (locale[2] != '\0' && 812705Sjp161948 locale[2] != '_' && 822705Sjp161948 locale[2] != '.' && 832705Sjp161948 locale[2] != '@')) 842705Sjp161948 return (NULL); 850Sstevel@tonic-gate 860Sstevel@tonic-gate 872705Sjp161948 /* we have a primary language sub-tag */ 882705Sjp161948 langtag = (char *)xmalloc(LANGTAG_MAX + 1); 890Sstevel@tonic-gate 902705Sjp161948 strncpy(langtag, locale, 2); 912705Sjp161948 langtag[2] = '\0'; 920Sstevel@tonic-gate 932705Sjp161948 /* do we have country sub-tag? For example: cs_CZ */ 942705Sjp161948 if (locale[2] == '_') { 952705Sjp161948 if (strlen(locale) < 5 || 962705Sjp161948 !isalpha(locale[3]) || 972705Sjp161948 !isalpha(locale[4]) || 982705Sjp161948 (locale[5] != '\0' && (locale[5] != '.' && 992705Sjp161948 locale[5] != '@'))) { 1002705Sjp161948 return (langtag); 1012705Sjp161948 } 1022705Sjp161948 1032705Sjp161948 /* example: create cs-CZ from cs_CZ */ 1042705Sjp161948 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2, 1052705Sjp161948 locale + 3) == 5) 1062705Sjp161948 return (langtag); 1070Sstevel@tonic-gate } 1080Sstevel@tonic-gate 1092705Sjp161948 /* in all other cases we just use the primary language sub-tag */ 1102705Sjp161948 return (langtag); 1110Sstevel@tonic-gate } 1120Sstevel@tonic-gate 1132705Sjp161948 uint_t 1140Sstevel@tonic-gate g11n_langtag_is_default(char *langtag) 1150Sstevel@tonic-gate { 1162705Sjp161948 return (strcmp(langtag, "i-default") == 0); 1170Sstevel@tonic-gate } 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* 1200Sstevel@tonic-gate * This lang tag / locale matching function works only for two-character 1210Sstevel@tonic-gate * language primary sub-tags and two-character country sub-tags. 1220Sstevel@tonic-gate */ 1232705Sjp161948 uint_t 1240Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale) 1250Sstevel@tonic-gate { 1262705Sjp161948 /* match "i-default" to the process' current locale if possible */ 1272705Sjp161948 if (g11n_langtag_is_default(langtag)) { 1282705Sjp161948 if (strcasecmp(locale, "POSIX") == 0 || 1292705Sjp161948 strcasecmp(locale, "C") == 0) 1302705Sjp161948 return (1); 1312705Sjp161948 else 1322705Sjp161948 return (0); 1332705Sjp161948 } 1340Sstevel@tonic-gate 1352705Sjp161948 /* 1362705Sjp161948 * locale must be at least 2 chars long and the lang part must be 1372705Sjp161948 * exactly two characters 1382705Sjp161948 */ 1392705Sjp161948 if (strlen(locale) < 2 || 1402705Sjp161948 (!isalpha(locale[0]) || !isalpha(locale[1]) || 1412705Sjp161948 (locale[2] != '\0' && locale[2] != '_' && 1422705Sjp161948 locale[2] != '.' && locale[2] != '@'))) 1432705Sjp161948 return (0); 1440Sstevel@tonic-gate 1452705Sjp161948 /* same thing with the langtag */ 1462705Sjp161948 if (strlen(langtag) < 2 || 1472705Sjp161948 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 1482705Sjp161948 (langtag[2] != '\0' && langtag[2] != '-'))) 1492705Sjp161948 return (0); 1500Sstevel@tonic-gate 1512705Sjp161948 /* primary language sub-tag and the locale's language part must match */ 1522705Sjp161948 if (strncasecmp(langtag, locale, 2) != 0) 1532705Sjp161948 return (0); 1540Sstevel@tonic-gate 1552705Sjp161948 /* 1562705Sjp161948 * primary language sub-tag and the locale's language match, now 1572705Sjp161948 * fuzzy check country part 1582705Sjp161948 */ 1590Sstevel@tonic-gate 1602705Sjp161948 /* neither langtag nor locale have more than one component */ 1612705Sjp161948 if (langtag[2] == '\0' && 1622705Sjp161948 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 1632705Sjp161948 return (2); 1640Sstevel@tonic-gate 1652705Sjp161948 /* langtag has only one sub-tag... */ 1662705Sjp161948 if (langtag[2] == '\0') 1672705Sjp161948 return (1); 1680Sstevel@tonic-gate 1692705Sjp161948 /* locale has no country code... */ 1702705Sjp161948 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 1712705Sjp161948 return (1); 1722705Sjp161948 1732705Sjp161948 /* langtag has more than one subtag and the locale has a country code */ 1740Sstevel@tonic-gate 1752705Sjp161948 /* ignore second subtag if not two chars */ 1762705Sjp161948 if (strlen(langtag) < 5) 1772705Sjp161948 return (1); 1780Sstevel@tonic-gate 1792705Sjp161948 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 1802705Sjp161948 (langtag[5] != '\0' && langtag[5] != '-')) 1812705Sjp161948 return (1); 1820Sstevel@tonic-gate 1832705Sjp161948 /* ignore rest of locale if there is no two-character country part */ 1842705Sjp161948 if (strlen(locale) < 5) 1852705Sjp161948 return (1); 1860Sstevel@tonic-gate 1872705Sjp161948 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 1882705Sjp161948 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 1892705Sjp161948 return (1); 1900Sstevel@tonic-gate 1912705Sjp161948 /* if the country part matches, return 2 */ 1922705Sjp161948 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 1932705Sjp161948 return (2); 1940Sstevel@tonic-gate 1952705Sjp161948 return (1); 1960Sstevel@tonic-gate } 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate char * 1990Sstevel@tonic-gate g11n_getlocale() 2000Sstevel@tonic-gate { 2012705Sjp161948 /* we have one text domain - always set it */ 2022705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2030Sstevel@tonic-gate 2042705Sjp161948 /* if the locale is not set, set it from the env vars */ 2052705Sjp161948 if (!setlocale(LC_MESSAGES, NULL)) 2062705Sjp161948 (void) setlocale(LC_MESSAGES, ""); 2070Sstevel@tonic-gate 2082705Sjp161948 return (setlocale(LC_MESSAGES, NULL)); 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate void 2120Sstevel@tonic-gate g11n_setlocale(int category, const char *locale) 2130Sstevel@tonic-gate { 2142705Sjp161948 char *curr; 2150Sstevel@tonic-gate 2162705Sjp161948 /* we have one text domain - always set it */ 2172705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2180Sstevel@tonic-gate 2192705Sjp161948 if (!locale) 2202705Sjp161948 return; 2210Sstevel@tonic-gate 2222705Sjp161948 if (*locale && ((curr = setlocale(category, NULL))) && 2232705Sjp161948 strcmp(curr, locale) == 0) 2242705Sjp161948 return; 2252628Sjp161948 2262705Sjp161948 /* if <category> is bogus, setlocale() will do nothing */ 2272705Sjp161948 (void) setlocale(category, locale); 2280Sstevel@tonic-gate } 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate char ** 2310Sstevel@tonic-gate g11n_getlocales() 2320Sstevel@tonic-gate { 2332705Sjp161948 FILE *locale_out; 2342705Sjp161948 uint_t n_elems, list_size, long_line = 0; 2352705Sjp161948 char **list; 2362705Sjp161948 char locale[64]; /* 64 bytes is plenty for locale names */ 2372705Sjp161948 2382705Sjp161948 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) 2392705Sjp161948 return (NULL); 2400Sstevel@tonic-gate 2412705Sjp161948 /* 2422705Sjp161948 * start with enough room for 65 locales - that's a lot fewer than 2432705Sjp161948 * all the locales available for installation, but a lot more than 2442705Sjp161948 * what most users will need and install 2452705Sjp161948 */ 2462705Sjp161948 n_elems = 0; 2472705Sjp161948 list_size = 192; 2482705Sjp161948 list = (char **) xmalloc(sizeof (char *) * (list_size + 1)); 2492705Sjp161948 memset(list, 0, sizeof (char *) * (list_size + 1)); 2500Sstevel@tonic-gate 2512705Sjp161948 while (fgets(locale, sizeof (locale), locale_out)) { 2522705Sjp161948 /* skip long locale names (if any) */ 2532705Sjp161948 if (!strchr(locale, '\n')) { 2542705Sjp161948 long_line = 1; 2552705Sjp161948 continue; 2562705Sjp161948 } else if (long_line) { 2572705Sjp161948 long_line = 0; 2582705Sjp161948 continue; 2592705Sjp161948 } 2600Sstevel@tonic-gate 2612705Sjp161948 if (strncmp(locale, "iso_8859", 8) == 0) 2622705Sjp161948 /* ignore locale names like "iso_8859-1" */ 2632705Sjp161948 continue; 2640Sstevel@tonic-gate 2652705Sjp161948 if (n_elems == list_size) { 2662705Sjp161948 list_size *= 2; 2672705Sjp161948 list = (char **)xrealloc((void *) list, 2682705Sjp161948 (list_size + 1) * sizeof (char *)); 2692705Sjp161948 memset(&list[n_elems + 1], 0, 2702705Sjp161948 sizeof (char *) * (list_size - n_elems + 1)); 2712705Sjp161948 } 2722705Sjp161948 2732705Sjp161948 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 2742705Sjp161948 list[n_elems++] = xstrdup(locale); 2750Sstevel@tonic-gate } 2760Sstevel@tonic-gate 277*6288Sjp161948 (void) pclose(locale_out); 278*6288Sjp161948 2795562Sjp161948 if (n_elems == 0) { 2805562Sjp161948 xfree(list); 2813109Sjp161948 return (NULL); 2825562Sjp161948 } 2833109Sjp161948 2842705Sjp161948 list[n_elems] = NULL; 2850Sstevel@tonic-gate 2862705Sjp161948 qsort(list, n_elems - 1, sizeof (char *), locale_cmp); 2872705Sjp161948 return (list); 2880Sstevel@tonic-gate } 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate char * 2910Sstevel@tonic-gate g11n_getlangs() 2920Sstevel@tonic-gate { 2932705Sjp161948 char *locale; 2940Sstevel@tonic-gate 2952705Sjp161948 if (getenv("SSH_LANGS")) 2962705Sjp161948 return (xstrdup(getenv("SSH_LANGS"))); 2970Sstevel@tonic-gate 2982705Sjp161948 locale = g11n_getlocale(); 2990Sstevel@tonic-gate 3002705Sjp161948 if (!locale || !*locale) 3012705Sjp161948 return (xstrdup("i-default")); 3020Sstevel@tonic-gate 3032705Sjp161948 return (g11n_locale2langtag(locale)); 3040Sstevel@tonic-gate } 3050Sstevel@tonic-gate 3060Sstevel@tonic-gate char * 3070Sstevel@tonic-gate g11n_locales2langs(char **locale_set) 3080Sstevel@tonic-gate { 3092705Sjp161948 char **p, **r, **q; 3105562Sjp161948 char *langtag, *langs; 3112705Sjp161948 int locales, skip; 3120Sstevel@tonic-gate 3132705Sjp161948 for (locales = 0, p = locale_set; p && *p; p++) 3142705Sjp161948 locales++; 3150Sstevel@tonic-gate 3162705Sjp161948 r = (char **)xmalloc((locales + 1) * sizeof (char *)); 3172705Sjp161948 memset(r, 0, (locales + 1) * sizeof (char *)); 3180Sstevel@tonic-gate 3192705Sjp161948 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) { 3202705Sjp161948 skip = 0; 3212705Sjp161948 if ((langtag = g11n_locale2langtag(*p)) == NULL) 3222705Sjp161948 continue; 3232705Sjp161948 for (q = r; (q - r) < locales; q++) { 3242705Sjp161948 if (!*q) 3252705Sjp161948 break; 3262705Sjp161948 if (*q && strcmp(*q, langtag) == 0) 3272705Sjp161948 skip = 1; 3282705Sjp161948 } 3292705Sjp161948 if (!skip) 3302705Sjp161948 *(q++) = langtag; 3315562Sjp161948 else 3325562Sjp161948 xfree(langtag); 3332705Sjp161948 *q = NULL; 3340Sstevel@tonic-gate } 3352705Sjp161948 3365562Sjp161948 langs = xjoin(r, ','); 3375562Sjp161948 g11n_freelist(r); 3385562Sjp161948 3395562Sjp161948 return (langs); 3400Sstevel@tonic-gate } 3410Sstevel@tonic-gate 3422705Sjp161948 static int 3430Sstevel@tonic-gate sortcmp(const void *d1, const void *d2) 3440Sstevel@tonic-gate { 3452705Sjp161948 char *s1 = *(char **)d1; 3462705Sjp161948 char *s2 = *(char **)d2; 3470Sstevel@tonic-gate 3482705Sjp161948 return (strcmp(s1, s2)); 3490Sstevel@tonic-gate } 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate int 3520Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2) 3530Sstevel@tonic-gate { 3542705Sjp161948 int len1, len2; 3552705Sjp161948 char c1, c2; 3560Sstevel@tonic-gate 3572705Sjp161948 len1 = (strchr(langtag1, '-')) ? 3585562Sjp161948 (strchr(langtag1, '-') - langtag1) 3595562Sjp161948 : strlen(langtag1); 3600Sstevel@tonic-gate 3612705Sjp161948 len2 = (strchr(langtag2, '-')) ? 3625562Sjp161948 (strchr(langtag2, '-') - langtag2) 3635562Sjp161948 : strlen(langtag2); 3640Sstevel@tonic-gate 3652705Sjp161948 /* no match */ 3662705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3672705Sjp161948 return (0); 3680Sstevel@tonic-gate 3692705Sjp161948 c1 = *(langtag1 + len1); 3702705Sjp161948 c2 = *(langtag2 + len2); 3710Sstevel@tonic-gate 3722705Sjp161948 /* no country sub-tags - exact match */ 3732705Sjp161948 if (c1 == '\0' && c2 == '\0') 3742705Sjp161948 return (2); 3750Sstevel@tonic-gate 3762705Sjp161948 /* one langtag has a country sub-tag, the other doesn't */ 3772705Sjp161948 if (c1 == '\0' || c2 == '\0') 3782705Sjp161948 return (1); 3790Sstevel@tonic-gate 3802705Sjp161948 /* can't happen - both langtags have a country sub-tag */ 3812705Sjp161948 if (c1 != '-' || c2 != '-') 3822705Sjp161948 return (1); 3830Sstevel@tonic-gate 3842705Sjp161948 /* compare country subtags */ 3852705Sjp161948 langtag1 = langtag1 + len1 + 1; 3862705Sjp161948 langtag2 = langtag2 + len2 + 1; 3870Sstevel@tonic-gate 3882705Sjp161948 len1 = (strchr(langtag1, '-')) ? 3892705Sjp161948 (strchr(langtag1, '-') - langtag1) : strlen(langtag1); 3902705Sjp161948 3912705Sjp161948 len2 = (strchr(langtag2, '-')) ? 3922705Sjp161948 (strchr(langtag2, '-') - langtag2) : strlen(langtag2); 3930Sstevel@tonic-gate 3942705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3952705Sjp161948 return (1); 3960Sstevel@tonic-gate 3972705Sjp161948 /* country tags matched - exact match */ 3982705Sjp161948 return (2); 3990Sstevel@tonic-gate } 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate char * 4020Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2) 4030Sstevel@tonic-gate { 4042705Sjp161948 char **list1, **list2, **list3, **p, **q, **r; 4052705Sjp161948 char *set3, *lang_subtag; 4062705Sjp161948 uint_t n1, n2, n3; 4072705Sjp161948 uint_t do_append; 4082705Sjp161948 4092705Sjp161948 list1 = xsplit(set1, ','); 4102705Sjp161948 list2 = xsplit(set2, ','); 4110Sstevel@tonic-gate 4122705Sjp161948 for (n1 = 0, p = list1; p && *p; p++, n1++) 4132705Sjp161948 ; 4142705Sjp161948 for (n2 = 0, p = list2; p && *p; p++, n2++) 4152705Sjp161948 ; 4160Sstevel@tonic-gate 4172705Sjp161948 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1)); 4182705Sjp161948 *list3 = NULL; 4190Sstevel@tonic-gate 4202705Sjp161948 /* 4212705Sjp161948 * we must not sort the user langtags - sorting or not the server's 4222705Sjp161948 * should not affect the outcome 4232705Sjp161948 */ 4242705Sjp161948 qsort(list2, n2, sizeof (char *), sortcmp); 4250Sstevel@tonic-gate 4262705Sjp161948 for (n3 = 0, p = list1; p && *p; p++) { 4272705Sjp161948 do_append = 0; 4282705Sjp161948 for (q = list2; q && *q; q++) { 4292705Sjp161948 if (g11n_langtag_match(*p, *q) != 2) continue; 4302705Sjp161948 /* append element */ 4312705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4322705Sjp161948 do_append = 1; 4332705Sjp161948 if (!*r) 4342705Sjp161948 break; 4352705Sjp161948 if (strcmp(*p, *r) == 0) { 4362705Sjp161948 do_append = 0; 4372705Sjp161948 break; 4382705Sjp161948 } 4392705Sjp161948 } 4402705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4412705Sjp161948 list3[n3++] = xstrdup(*p); 4422705Sjp161948 list3[n3] = NULL; 4432705Sjp161948 } 4440Sstevel@tonic-gate } 4450Sstevel@tonic-gate } 4462705Sjp161948 4472705Sjp161948 for (p = list1; p && *p; p++) { 4482705Sjp161948 do_append = 0; 4492705Sjp161948 for (q = list2; q && *q; q++) { 4502705Sjp161948 if (g11n_langtag_match(*p, *q) != 1) 4512705Sjp161948 continue; 4520Sstevel@tonic-gate 4532705Sjp161948 /* append element */ 4542705Sjp161948 lang_subtag = xstrdup(*p); 4552705Sjp161948 if (strchr(lang_subtag, '-')) 4562705Sjp161948 *(strchr(lang_subtag, '-')) = '\0'; 4572705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4582705Sjp161948 do_append = 1; 4592705Sjp161948 if (!*r) 4602705Sjp161948 break; 4612705Sjp161948 if (strcmp(lang_subtag, *r) == 0) { 4622705Sjp161948 do_append = 0; 4632705Sjp161948 break; 4642705Sjp161948 } 4652705Sjp161948 } 4662705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4672705Sjp161948 list3[n3++] = lang_subtag; 4682705Sjp161948 list3[n3] = NULL; 4692705Sjp161948 } else 4702705Sjp161948 xfree(lang_subtag); 4710Sstevel@tonic-gate } 4720Sstevel@tonic-gate } 4730Sstevel@tonic-gate 4742705Sjp161948 set3 = xjoin(list3, ','); 4752705Sjp161948 xfree_split_list(list1); 4762705Sjp161948 xfree_split_list(list2); 4772705Sjp161948 xfree_split_list(list3); 4780Sstevel@tonic-gate 4792705Sjp161948 return (set3); 4800Sstevel@tonic-gate } 4810Sstevel@tonic-gate 4820Sstevel@tonic-gate char * 4830Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 4840Sstevel@tonic-gate { 4852705Sjp161948 char *list, *result; 4862705Sjp161948 char **xlist; 4870Sstevel@tonic-gate 4882705Sjp161948 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 4892705Sjp161948 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 4900Sstevel@tonic-gate 4912705Sjp161948 if (!list) 4922705Sjp161948 return (NULL); 4930Sstevel@tonic-gate 4942705Sjp161948 xlist = xsplit(list, ','); 4950Sstevel@tonic-gate 4962705Sjp161948 xfree(list); 4970Sstevel@tonic-gate 4982705Sjp161948 if (!xlist || !*xlist) 4992705Sjp161948 return (NULL); 5000Sstevel@tonic-gate 5012705Sjp161948 result = xstrdup(*xlist); 5022705Sjp161948 xfree_split_list(xlist); 5030Sstevel@tonic-gate 5042705Sjp161948 return (result); 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate /* 5080Sstevel@tonic-gate * Compare locales, preferring UTF-8 codesets to others, otherwise doing 5090Sstevel@tonic-gate * a stright strcmp() 5100Sstevel@tonic-gate */ 5112705Sjp161948 static int 5120Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2) 5130Sstevel@tonic-gate { 5142705Sjp161948 char *dot_ptr; 5152705Sjp161948 char *s1 = *(char **)d1; 5162705Sjp161948 char *s2 = *(char **)d2; 5172705Sjp161948 int s1_is_utf8 = 0; 5182705Sjp161948 int s2_is_utf8 = 0; 5190Sstevel@tonic-gate 5202705Sjp161948 /* check if s1 is a UTF-8 locale */ 5212705Sjp161948 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) && 5222705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5232705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5242705Sjp161948 s1_is_utf8++; 5252705Sjp161948 } 5262705Sjp161948 5272705Sjp161948 /* check if s2 is a UTF-8 locale */ 5282705Sjp161948 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) && 5292705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5302705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5312705Sjp161948 s2_is_utf8++; 5322705Sjp161948 } 5330Sstevel@tonic-gate 5342705Sjp161948 /* prefer UTF-8 locales */ 5352705Sjp161948 if (s1_is_utf8 && !s2_is_utf8) 5362705Sjp161948 return (-1); 5370Sstevel@tonic-gate 5382705Sjp161948 if (s2_is_utf8 && !s1_is_utf8) 5392705Sjp161948 return (1); 5400Sstevel@tonic-gate 5412705Sjp161948 /* prefer any locale over the default locales */ 5422705Sjp161948 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 || 5432705Sjp161948 strcmp(s1, "common") == 0) { 5442705Sjp161948 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 && 5452705Sjp161948 strcmp(s2, "common") != 0) 5462705Sjp161948 return (1); 5472705Sjp161948 } 5480Sstevel@tonic-gate 5492705Sjp161948 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 || 5502705Sjp161948 strcmp(s2, "common") == 0) { 5512705Sjp161948 if (strcmp(s1, "C") != 0 && 5522705Sjp161948 strcmp(s1, "POSIX") != 0 && 5532705Sjp161948 strcmp(s1, "common") != 0) 5542705Sjp161948 return (-1); 5552705Sjp161948 } 5560Sstevel@tonic-gate 5572705Sjp161948 return (strcmp(s1, s2)); 5580Sstevel@tonic-gate } 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate 5610Sstevel@tonic-gate char ** 5622705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set) 5630Sstevel@tonic-gate { 5642705Sjp161948 char **langtag_list, **result, **p, **q, **r; 5652705Sjp161948 char *s; 5662705Sjp161948 uint_t do_append, n_langtags, n_locales, n_results, max_results; 5672705Sjp161948 5682705Sjp161948 /* count lang tags and locales */ 5692705Sjp161948 for (n_locales = 0, p = locale_set; p && *p; p++) 5702705Sjp161948 n_locales++; 5710Sstevel@tonic-gate 5722705Sjp161948 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 5732705Sjp161948 /* count the number of langtags */ 5742705Sjp161948 for (; s = strchr(s, ','); s++, n_langtags++) 5752705Sjp161948 ; 5762705Sjp161948 5772705Sjp161948 qsort(locale_set, n_locales, sizeof (char *), locale_cmp); 5780Sstevel@tonic-gate 5792705Sjp161948 langtag_list = xsplit(langtag_set, ','); 5802705Sjp161948 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++) 5812705Sjp161948 ; 5820Sstevel@tonic-gate 5832705Sjp161948 max_results = MIN(n_locales, n_langtags) * 2; 5842705Sjp161948 result = (char **) xmalloc(sizeof (char *) * (max_results + 1)); 5852705Sjp161948 *result = NULL; 5862705Sjp161948 n_results = 0; 5870Sstevel@tonic-gate 5882705Sjp161948 /* more specific matches first */ 5892705Sjp161948 for (p = langtag_list; p && *p; p++) { 5902705Sjp161948 do_append = 0; 5912705Sjp161948 for (q = locale_set; q && *q; q++) { 5922705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 2) { 5932705Sjp161948 do_append = 1; 5942705Sjp161948 for (r = result; (r - result) <= 5952705Sjp161948 MIN(n_locales, n_langtags); r++) { 5962705Sjp161948 if (!*r) 5972705Sjp161948 break; 5982705Sjp161948 if (strcmp(*q, *r) == 0) { 5992705Sjp161948 do_append = 0; 6002705Sjp161948 break; 6012705Sjp161948 } 6022705Sjp161948 } 6032705Sjp161948 if (do_append && n_results < max_results) { 6042705Sjp161948 result[n_results++] = xstrdup(*q); 6052705Sjp161948 result[n_results] = NULL; 6062705Sjp161948 } 6072705Sjp161948 break; 6082705Sjp161948 } 6090Sstevel@tonic-gate } 6100Sstevel@tonic-gate } 6110Sstevel@tonic-gate 6122705Sjp161948 for (p = langtag_list; p && *p; p++) { 6132705Sjp161948 do_append = 0; 6142705Sjp161948 for (q = locale_set; q && *q; q++) { 6152705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 1) { 6162705Sjp161948 do_append = 1; 6172705Sjp161948 for (r = result; (r - result) <= 6182705Sjp161948 MIN(n_locales, n_langtags); r++) { 6192705Sjp161948 if (!*r) 6202705Sjp161948 break; 6212705Sjp161948 if (strcmp(*q, *r) == 0) { 6222705Sjp161948 do_append = 0; 6232705Sjp161948 break; 6242705Sjp161948 } 6252705Sjp161948 } 6262705Sjp161948 if (do_append && n_results < max_results) { 6272705Sjp161948 result[n_results++] = xstrdup(*q); 6282705Sjp161948 result[n_results] = NULL; 6292705Sjp161948 } 6302705Sjp161948 break; 6312705Sjp161948 } 6320Sstevel@tonic-gate } 6330Sstevel@tonic-gate } 6340Sstevel@tonic-gate 6352705Sjp161948 xfree_split_list(langtag_list); 6362705Sjp161948 6372705Sjp161948 return (result); 6380Sstevel@tonic-gate } 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate char * 6410Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 6420Sstevel@tonic-gate { 6435562Sjp161948 char **results, **locales, *result = NULL; 6445562Sjp161948 6455562Sjp161948 if (srvr_locales == NULL) 6465562Sjp161948 locales = g11n_getlocales(); 6475562Sjp161948 else 6485562Sjp161948 locales = srvr_locales; 6490Sstevel@tonic-gate 6502705Sjp161948 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 6515562Sjp161948 locales)) == NULL) 6525562Sjp161948 goto err; 6530Sstevel@tonic-gate 6542705Sjp161948 if (*results != NULL) 6552705Sjp161948 result = xstrdup(*results); 6560Sstevel@tonic-gate 6572705Sjp161948 xfree_split_list(results); 6580Sstevel@tonic-gate 6595562Sjp161948 err: 6605562Sjp161948 if (locales != srvr_locales) 6615562Sjp161948 g11n_freelist(locales); 6622705Sjp161948 return (result); 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate /* 6670Sstevel@tonic-gate * Functions for validating ASCII and UTF-8 strings 6680Sstevel@tonic-gate * 6690Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 6700Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 6710Sstevel@tonic-gate * friends. 6720Sstevel@tonic-gate * 6730Sstevel@tonic-gate * The return value is 0 if success, EILSEQ or EINVAL. 6740Sstevel@tonic-gate * 6750Sstevel@tonic-gate */ 6762705Sjp161948 uint_t 6772705Sjp161948 g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str) 6780Sstevel@tonic-gate { 6792705Sjp161948 uchar_t *p; 6800Sstevel@tonic-gate 6812705Sjp161948 for (p = (uchar_t *)str; p && *p && (!(*p & 0x80)); p++) 6822705Sjp161948 ; 6830Sstevel@tonic-gate 6842705Sjp161948 if (len && ((p - (uchar_t *)str) != len)) 6852705Sjp161948 return (EILSEQ); 6862705Sjp161948 6872705Sjp161948 return (0); 6880Sstevel@tonic-gate } 6890Sstevel@tonic-gate 6902705Sjp161948 uint_t 6912705Sjp161948 g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str) 6920Sstevel@tonic-gate { 6932705Sjp161948 uchar_t *p; 6942705Sjp161948 uint_t c, l; 6952705Sjp161948 6962705Sjp161948 if (len == 0) 6972705Sjp161948 len = strlen((const char *)str); 6980Sstevel@tonic-gate 6992705Sjp161948 for (p = (uchar_t *)str; p && (p - str < len) && *p; ) { 7002705Sjp161948 /* 8-bit chars begin a UTF-8 sequence */ 7012705Sjp161948 if (*p & 0x80) { 7022705Sjp161948 /* get sequence length and sanity check first byte */ 7032705Sjp161948 if (*p < 0xc0) 7042705Sjp161948 return (EILSEQ); 7052705Sjp161948 else if (*p < 0xe0) 7062705Sjp161948 l = 2; 7072705Sjp161948 else if (*p < 0xf0) 7082705Sjp161948 l = 3; 7092705Sjp161948 else if (*p < 0xf8) 7102705Sjp161948 l = 4; 7112705Sjp161948 else if (*p < 0xfc) 7122705Sjp161948 l = 5; 7132705Sjp161948 else if (*p < 0xfe) 7142705Sjp161948 l = 6; 7152705Sjp161948 else 7162705Sjp161948 return (EILSEQ); 7172705Sjp161948 7182705Sjp161948 if ((p + l - str) >= len) 7192705Sjp161948 return (EILSEQ); 7202705Sjp161948 7212705Sjp161948 /* overlong detection - build codepoint */ 7222705Sjp161948 c = *p & 0x3f; 7232705Sjp161948 /* shift c bits from first byte */ 7242705Sjp161948 c = c << (6 * (l - 1)); 7252705Sjp161948 7262705Sjp161948 if (l > 1) { 7272705Sjp161948 if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80)) 7282705Sjp161948 c = c | ((*(p + 1) & 0x3f) << 7292705Sjp161948 (6 * (l - 2))); 7302705Sjp161948 else 7312705Sjp161948 return (EILSEQ); 7322705Sjp161948 7332705Sjp161948 if (c < 0x80) 7342705Sjp161948 return (EILSEQ); 7352705Sjp161948 } 7360Sstevel@tonic-gate 7372705Sjp161948 if (l > 2) { 7382705Sjp161948 if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80)) 7392705Sjp161948 c = c | ((*(p + 2) & 0x3f) << 7402705Sjp161948 (6 * (l - 3))); 7412705Sjp161948 else 7422705Sjp161948 return (EILSEQ); 7432705Sjp161948 7442705Sjp161948 if (c < 0x800) 7452705Sjp161948 return (EILSEQ); 7462705Sjp161948 } 7472705Sjp161948 7482705Sjp161948 if (l > 3) { 7492705Sjp161948 if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80)) 7502705Sjp161948 c = c | ((*(p + 3) & 0x3f) << 7512705Sjp161948 (6 * (l - 4))); 7522705Sjp161948 else 7532705Sjp161948 return (EILSEQ); 7542705Sjp161948 7552705Sjp161948 if (c < 0x10000) 7562705Sjp161948 return (EILSEQ); 7572705Sjp161948 } 7580Sstevel@tonic-gate 7592705Sjp161948 if (l > 4) { 7602705Sjp161948 if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80)) 7612705Sjp161948 c = c | ((*(p + 4) & 0x3f) << 7622705Sjp161948 (6 * (l - 5))); 7632705Sjp161948 else 7642705Sjp161948 return (EILSEQ); 7652705Sjp161948 7662705Sjp161948 if (c < 0x200000) 7672705Sjp161948 return (EILSEQ); 7682705Sjp161948 } 7690Sstevel@tonic-gate 7702705Sjp161948 if (l > 5) { 7712705Sjp161948 if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80)) 7722705Sjp161948 c = c | (*(p + 5) & 0x3f); 7732705Sjp161948 else 7742705Sjp161948 return (EILSEQ); 7752705Sjp161948 7762705Sjp161948 if (c < 0x4000000) 7772705Sjp161948 return (EILSEQ); 7782705Sjp161948 } 7790Sstevel@tonic-gate 7802705Sjp161948 /* 7812705Sjp161948 * check for UTF-16 surrogates ifs other illegal 7822705Sjp161948 * UTF-8 * points 7832705Sjp161948 */ 7842705Sjp161948 if (((c <= 0xdfff) && (c >= 0xd800)) || 7852705Sjp161948 (c == 0xfffe) || (c == 0xffff)) 7862705Sjp161948 return (EILSEQ); 7872705Sjp161948 p += l; 7882705Sjp161948 } 7892705Sjp161948 /* 7-bit chars are fine */ 7900Sstevel@tonic-gate else 7912705Sjp161948 p++; 7920Sstevel@tonic-gate } 7932705Sjp161948 return (0); 7940Sstevel@tonic-gate } 7950Sstevel@tonic-gate 7960Sstevel@tonic-gate /* 7970Sstevel@tonic-gate * Functions for converting to ASCII or UTF-8 from the local codeset 7980Sstevel@tonic-gate * Functions for converting from ASCII or UTF-8 to the local codeset 7990Sstevel@tonic-gate * 8000Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 8010Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 8020Sstevel@tonic-gate * friends. 8030Sstevel@tonic-gate * 8040Sstevel@tonic-gate * The err parameter is an optional pointer to an integer where 0 8050Sstevel@tonic-gate * (success) or EILSEQ or EINVAL will be stored (failure). 8060Sstevel@tonic-gate * 8070Sstevel@tonic-gate * These functions return NULL if the conversion fails. 8080Sstevel@tonic-gate * 8090Sstevel@tonic-gate */ 8102705Sjp161948 uchar_t * 8112705Sjp161948 g11n_convert_from_ascii(const char *str, int *err_ptr, uchar_t **error_str) 8120Sstevel@tonic-gate { 8132705Sjp161948 static uint_t initialized = 0; 8142705Sjp161948 static uint_t do_convert = 0; 8152705Sjp161948 iconv_t cd; 8162705Sjp161948 int err; 8170Sstevel@tonic-gate 8182705Sjp161948 if (!initialized) { 8192705Sjp161948 /* 8202705Sjp161948 * iconv_open() fails if the to/from codesets are the 8212705Sjp161948 * same, and there are aliases of codesets to boot... 8222705Sjp161948 */ 8232705Sjp161948 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 8245562Sjp161948 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 8255562Sjp161948 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 8262705Sjp161948 initialized = 1; 8272705Sjp161948 do_convert = 0; 8282705Sjp161948 } else { 8292705Sjp161948 cd = iconv_open(nl_langinfo(CODESET), "646"); 8302705Sjp161948 if (cd == (iconv_t)-1) { 8312705Sjp161948 if (err_ptr) 8322705Sjp161948 *err_ptr = errno; 8332705Sjp161948 if (error_str) 8342705Sjp161948 *error_str = (uchar_t *)"Cannot " 8352705Sjp161948 "convert ASCII strings to the local" 8362705Sjp161948 " codeset"; 8372705Sjp161948 } 8382705Sjp161948 initialized = 1; 8392705Sjp161948 do_convert = 1; 8402705Sjp161948 } 8410Sstevel@tonic-gate } 8422705Sjp161948 8432705Sjp161948 if (!do_convert) { 8442705Sjp161948 if ((err = g11n_validate_ascii(str, 0, error_str))) { 8452705Sjp161948 if (err_ptr) 8462705Sjp161948 *err_ptr = err; 8472705Sjp161948 return (NULL); 8482705Sjp161948 } else 8492705Sjp161948 return ((uchar_t *)xstrdup(str)); 8500Sstevel@tonic-gate } 8510Sstevel@tonic-gate 8522705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate 8552705Sjp161948 uchar_t * 8562705Sjp161948 g11n_convert_from_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) 8570Sstevel@tonic-gate { 8582705Sjp161948 static uint_t initialized = 0; 8592705Sjp161948 static uint_t do_convert = 0; 8602705Sjp161948 iconv_t cd; 8612705Sjp161948 int err; 8620Sstevel@tonic-gate 8632705Sjp161948 if (!initialized) { 8642705Sjp161948 /* 8652705Sjp161948 * iconv_open() fails if the to/from codesets are the 8662705Sjp161948 * same, and there are aliases of codesets to boot... 8672705Sjp161948 */ 8682705Sjp161948 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 8692705Sjp161948 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 8702705Sjp161948 initialized = 1; 8712705Sjp161948 do_convert = 0; 8722705Sjp161948 } else { 8732705Sjp161948 cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); 8742705Sjp161948 if (cd == (iconv_t)-1) { 8752705Sjp161948 if (err_ptr) 8762705Sjp161948 *err_ptr = errno; 8772705Sjp161948 if (error_str) 8782705Sjp161948 *error_str = (uchar_t *)"Cannot " 8792705Sjp161948 "convert UTF-8 strings to the " 8802705Sjp161948 "local codeset"; 8812705Sjp161948 } 8822705Sjp161948 initialized = 1; 8832705Sjp161948 do_convert = 1; 8842705Sjp161948 } 8850Sstevel@tonic-gate } 8862705Sjp161948 8872705Sjp161948 if (!do_convert) { 8882705Sjp161948 if ((err = g11n_validate_utf8(str, 0, error_str))) { 8892705Sjp161948 if (err_ptr) 8902705Sjp161948 *err_ptr = err; 8912705Sjp161948 return (NULL); 8922705Sjp161948 } else 8932705Sjp161948 return ((uchar_t *)xstrdup((char *)str)); 8940Sstevel@tonic-gate } 8950Sstevel@tonic-gate 8962705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 8970Sstevel@tonic-gate } 8980Sstevel@tonic-gate 8990Sstevel@tonic-gate char * 9002705Sjp161948 g11n_convert_to_ascii(const uchar_t *str, int *err_ptr, uchar_t **error_str) 9010Sstevel@tonic-gate { 9022705Sjp161948 static uint_t initialized = 0; 9032705Sjp161948 static uint_t do_convert = 0; 9042705Sjp161948 iconv_t cd; 9050Sstevel@tonic-gate 9062705Sjp161948 if (!initialized) { 9072705Sjp161948 /* 9082705Sjp161948 * iconv_open() fails if the to/from codesets are the 9092705Sjp161948 * same, and there are aliases of codesets to boot... 9102705Sjp161948 */ 9112705Sjp161948 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 9122705Sjp161948 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 9132705Sjp161948 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 9142705Sjp161948 initialized = 1; 9152705Sjp161948 do_convert = 0; 9162705Sjp161948 } else { 9172705Sjp161948 cd = iconv_open("646", nl_langinfo(CODESET)); 9182705Sjp161948 if (cd == (iconv_t)-1) { 9192705Sjp161948 if (err_ptr) 9202705Sjp161948 *err_ptr = errno; 9212705Sjp161948 if (error_str) 9222705Sjp161948 *error_str = (uchar_t *)"Cannot " 9232705Sjp161948 "convert UTF-8 strings to the " 9242705Sjp161948 "local codeset"; 9252705Sjp161948 } 9262705Sjp161948 initialized = 1; 9272705Sjp161948 do_convert = 1; 9282705Sjp161948 } 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate 9312705Sjp161948 if (!do_convert) 9322705Sjp161948 return (xstrdup((char *)str)); 9332705Sjp161948 9342705Sjp161948 return ((char *)do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate 9372705Sjp161948 uchar_t * 9382705Sjp161948 g11n_convert_to_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) 9390Sstevel@tonic-gate { 9402705Sjp161948 static uint_t initialized = 0; 9412705Sjp161948 static uint_t do_convert = 0; 9422705Sjp161948 iconv_t cd; 9430Sstevel@tonic-gate 9442705Sjp161948 if (!initialized) { 9452705Sjp161948 /* 9462705Sjp161948 * iconv_open() fails if the to/from codesets are the 9472705Sjp161948 * same, and there are aliases of codesets to boot... 9482705Sjp161948 */ 9492705Sjp161948 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 9502705Sjp161948 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 9512705Sjp161948 initialized = 1; 9522705Sjp161948 do_convert = 0; 9532705Sjp161948 } else { 9542705Sjp161948 cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 9552705Sjp161948 if (cd == (iconv_t)-1) { 9562705Sjp161948 if (err_ptr) 9572705Sjp161948 *err_ptr = errno; 9582705Sjp161948 if (error_str) 9592705Sjp161948 *error_str = (uchar_t *)"Cannot " 9602705Sjp161948 "convert UTF-8 strings to the " 9612705Sjp161948 "local codeset"; 9622705Sjp161948 } 9632705Sjp161948 initialized = 1; 9642705Sjp161948 do_convert = 1; 9652705Sjp161948 } 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate 9682705Sjp161948 if (!do_convert) 9692705Sjp161948 return ((uchar_t *)xstrdup((char *)str)); 9702705Sjp161948 9712705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 9720Sstevel@tonic-gate } 9730Sstevel@tonic-gate 9740Sstevel@tonic-gate 9750Sstevel@tonic-gate /* 9760Sstevel@tonic-gate * Wrapper around iconv() 9770Sstevel@tonic-gate * 9780Sstevel@tonic-gate * The caller is responsible for freeing the result and for handling 9790Sstevel@tonic-gate * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 9800Sstevel@tonic-gate */ 9812705Sjp161948 static uchar_t * 9822705Sjp161948 do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, uint_t len, 9832705Sjp161948 uint_t *outlen, int *err, uchar_t **err_str) 9842705Sjp161948 { 9852705Sjp161948 size_t inbytesleft, outbytesleft, converted_size; 9862705Sjp161948 char *outbuf; 9872705Sjp161948 uchar_t *converted; 9882705Sjp161948 const char *inbuf; 9892705Sjp161948 uint_t mul = 0; 9900Sstevel@tonic-gate 9912705Sjp161948 if (!buf || !(*(char *)buf)) 9922705Sjp161948 return (NULL); 9932705Sjp161948 9942705Sjp161948 if (len == 0) 9952705Sjp161948 len = strlen(buf); 9962705Sjp161948 9972705Sjp161948 /* reset conversion descriptor */ 9982705Sjp161948 /* XXX Do we need initial shift sequences for UTF-8??? */ 9992705Sjp161948 (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); 10002705Sjp161948 inbuf = (const char *) buf; 10012705Sjp161948 10022705Sjp161948 if (mul_ptr) 10032705Sjp161948 mul = *mul_ptr; 10042705Sjp161948 10052705Sjp161948 converted_size = (len << mul); 10062705Sjp161948 outbuf = (char *)xmalloc(converted_size + 1); /* for null */ 10072705Sjp161948 converted = (uchar_t *)outbuf; 10082705Sjp161948 outbytesleft = len; 10090Sstevel@tonic-gate 10102705Sjp161948 do { 10112705Sjp161948 if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == 10122705Sjp161948 (size_t)-1) { 10132705Sjp161948 if (errno == E2BIG) { 10142705Sjp161948 /* UTF-8 codepoints are at most 8 bytes long */ 10152705Sjp161948 if (mul > 2) { 10162705Sjp161948 if (err_str) 10172705Sjp161948 *err_str = (uchar_t *) 10182705Sjp161948 "Conversion to UTF-8 failed" 10192705Sjp161948 " due to preposterous space" 10202705Sjp161948 " requirements"; 10212705Sjp161948 if (err) 10222705Sjp161948 *err = EILSEQ; 10232705Sjp161948 return (NULL); 10242705Sjp161948 } 10250Sstevel@tonic-gate 10262705Sjp161948 /* 10272705Sjp161948 * re-alloc output and ensure that the outbuf 10282705Sjp161948 * and outbytesleft values are adjusted 10292705Sjp161948 */ 10302705Sjp161948 converted = xrealloc(converted, 10312705Sjp161948 converted_size << 1 + 1); 10322705Sjp161948 outbuf = (char *)converted + converted_size - 10332705Sjp161948 outbytesleft; 10342705Sjp161948 converted_size = (len << ++(mul)); 10352705Sjp161948 outbytesleft = converted_size - outbytesleft; 10362705Sjp161948 } else { 10372705Sjp161948 /* 10382705Sjp161948 * let the caller deal with iconv() errors, 10392705Sjp161948 * probably by calling fatal(); xfree() does 10402705Sjp161948 * not set errno 10412705Sjp161948 */ 10422705Sjp161948 if (err) 10432705Sjp161948 *err = errno; 10442705Sjp161948 xfree(converted); 10452705Sjp161948 return (NULL); 10462705Sjp161948 } 10472705Sjp161948 } 10482705Sjp161948 } while (inbytesleft); 10492705Sjp161948 10502705Sjp161948 *outbuf = '\0'; /* ensure null-termination */ 10512705Sjp161948 if (outlen) 10522705Sjp161948 *outlen = converted_size - outbytesleft; 10532705Sjp161948 if (mul_ptr) 10542705Sjp161948 *mul_ptr = mul; 10552705Sjp161948 10562705Sjp161948 return (converted); 10570Sstevel@tonic-gate } 10585562Sjp161948 10595562Sjp161948 /* 10605562Sjp161948 * Free all strings in the list and then free the list itself. We know that the 10615562Sjp161948 * list ends with a NULL pointer. 10625562Sjp161948 */ 10635562Sjp161948 void 10645562Sjp161948 g11n_freelist(char **list) 10655562Sjp161948 { 10665562Sjp161948 int i = 0; 10675562Sjp161948 10685562Sjp161948 while (list[i] != NULL) { 10695562Sjp161948 xfree(list[i]); 10705562Sjp161948 i++; 10715562Sjp161948 } 10725562Sjp161948 10735562Sjp161948 xfree(list); 10745562Sjp161948 } 1075