10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52628Sjp161948 * Common Development and Distribution License (the "License"). 62628Sjp161948 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate * 21*5562Sjp161948 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 220Sstevel@tonic-gate * Use is subject to license terms. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 260Sstevel@tonic-gate 270Sstevel@tonic-gate #include <errno.h> 280Sstevel@tonic-gate #include <locale.h> 290Sstevel@tonic-gate #include <langinfo.h> 300Sstevel@tonic-gate #include <iconv.h> 310Sstevel@tonic-gate #include <ctype.h> 320Sstevel@tonic-gate #include <strings.h> 330Sstevel@tonic-gate #include <string.h> 340Sstevel@tonic-gate #include <stdio.h> 350Sstevel@tonic-gate #include <stdlib.h> 360Sstevel@tonic-gate #include "includes.h" 370Sstevel@tonic-gate #include "xmalloc.h" 380Sstevel@tonic-gate #include "xlist.h" 390Sstevel@tonic-gate 400Sstevel@tonic-gate #ifdef MIN 410Sstevel@tonic-gate #undef MIN 420Sstevel@tonic-gate #endif /* MIN */ 430Sstevel@tonic-gate 442705Sjp161948 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 450Sstevel@tonic-gate 462705Sjp161948 #define LOCALE_PATH "/usr/bin/locale" 470Sstevel@tonic-gate 482705Sjp161948 /* two-char country code, '-' and two-char region code */ 492705Sjp161948 #define LANGTAG_MAX 5 500Sstevel@tonic-gate 512705Sjp161948 static uchar_t *do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, 522705Sjp161948 uint_t len, uint_t *outlen, int *err, uchar_t **err_str); 530Sstevel@tonic-gate 540Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2); 550Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale); 560Sstevel@tonic-gate 572705Sjp161948 uint_t g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str); 582705Sjp161948 uint_t g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str); 590Sstevel@tonic-gate 60*5562Sjp161948 /* 61*5562Sjp161948 * Convert locale string name into a language tag. The caller is responsible for 62*5562Sjp161948 * freeing the memory allocated for the result. 63*5562Sjp161948 */ 642705Sjp161948 static char * 650Sstevel@tonic-gate g11n_locale2langtag(char *locale) 660Sstevel@tonic-gate { 672705Sjp161948 char *langtag; 680Sstevel@tonic-gate 692705Sjp161948 /* base cases */ 702705Sjp161948 if (!locale || !*locale) 712705Sjp161948 return (NULL); 720Sstevel@tonic-gate 732705Sjp161948 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0) 74*5562Sjp161948 return (xstrdup("i-default")); 750Sstevel@tonic-gate 762705Sjp161948 /* punt for language codes which are not exactly 2 letters */ 772705Sjp161948 if (strlen(locale) < 2 || 782705Sjp161948 !isalpha(locale[0]) || 792705Sjp161948 !isalpha(locale[1]) || 802705Sjp161948 (locale[2] != '\0' && 812705Sjp161948 locale[2] != '_' && 822705Sjp161948 locale[2] != '.' && 832705Sjp161948 locale[2] != '@')) 842705Sjp161948 return (NULL); 850Sstevel@tonic-gate 860Sstevel@tonic-gate 872705Sjp161948 /* we have a primary language sub-tag */ 882705Sjp161948 langtag = (char *)xmalloc(LANGTAG_MAX + 1); 890Sstevel@tonic-gate 902705Sjp161948 strncpy(langtag, locale, 2); 912705Sjp161948 langtag[2] = '\0'; 920Sstevel@tonic-gate 932705Sjp161948 /* do we have country sub-tag? For example: cs_CZ */ 942705Sjp161948 if (locale[2] == '_') { 952705Sjp161948 if (strlen(locale) < 5 || 962705Sjp161948 !isalpha(locale[3]) || 972705Sjp161948 !isalpha(locale[4]) || 982705Sjp161948 (locale[5] != '\0' && (locale[5] != '.' && 992705Sjp161948 locale[5] != '@'))) { 1002705Sjp161948 return (langtag); 1012705Sjp161948 } 1022705Sjp161948 1032705Sjp161948 /* example: create cs-CZ from cs_CZ */ 1042705Sjp161948 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2, 1052705Sjp161948 locale + 3) == 5) 1062705Sjp161948 return (langtag); 1070Sstevel@tonic-gate } 1080Sstevel@tonic-gate 1092705Sjp161948 /* in all other cases we just use the primary language sub-tag */ 1102705Sjp161948 return (langtag); 1110Sstevel@tonic-gate } 1120Sstevel@tonic-gate 1132705Sjp161948 uint_t 1140Sstevel@tonic-gate g11n_langtag_is_default(char *langtag) 1150Sstevel@tonic-gate { 1162705Sjp161948 return (strcmp(langtag, "i-default") == 0); 1170Sstevel@tonic-gate } 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* 1200Sstevel@tonic-gate * This lang tag / locale matching function works only for two-character 1210Sstevel@tonic-gate * language primary sub-tags and two-character country sub-tags. 1220Sstevel@tonic-gate */ 1232705Sjp161948 uint_t 1240Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale) 1250Sstevel@tonic-gate { 1262705Sjp161948 /* match "i-default" to the process' current locale if possible */ 1272705Sjp161948 if (g11n_langtag_is_default(langtag)) { 1282705Sjp161948 if (strcasecmp(locale, "POSIX") == 0 || 1292705Sjp161948 strcasecmp(locale, "C") == 0) 1302705Sjp161948 return (1); 1312705Sjp161948 else 1322705Sjp161948 return (0); 1332705Sjp161948 } 1340Sstevel@tonic-gate 1352705Sjp161948 /* 1362705Sjp161948 * locale must be at least 2 chars long and the lang part must be 1372705Sjp161948 * exactly two characters 1382705Sjp161948 */ 1392705Sjp161948 if (strlen(locale) < 2 || 1402705Sjp161948 (!isalpha(locale[0]) || !isalpha(locale[1]) || 1412705Sjp161948 (locale[2] != '\0' && locale[2] != '_' && 1422705Sjp161948 locale[2] != '.' && locale[2] != '@'))) 1432705Sjp161948 return (0); 1440Sstevel@tonic-gate 1452705Sjp161948 /* same thing with the langtag */ 1462705Sjp161948 if (strlen(langtag) < 2 || 1472705Sjp161948 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 1482705Sjp161948 (langtag[2] != '\0' && langtag[2] != '-'))) 1492705Sjp161948 return (0); 1500Sstevel@tonic-gate 1512705Sjp161948 /* primary language sub-tag and the locale's language part must match */ 1522705Sjp161948 if (strncasecmp(langtag, locale, 2) != 0) 1532705Sjp161948 return (0); 1540Sstevel@tonic-gate 1552705Sjp161948 /* 1562705Sjp161948 * primary language sub-tag and the locale's language match, now 1572705Sjp161948 * fuzzy check country part 1582705Sjp161948 */ 1590Sstevel@tonic-gate 1602705Sjp161948 /* neither langtag nor locale have more than one component */ 1612705Sjp161948 if (langtag[2] == '\0' && 1622705Sjp161948 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 1632705Sjp161948 return (2); 1640Sstevel@tonic-gate 1652705Sjp161948 /* langtag has only one sub-tag... */ 1662705Sjp161948 if (langtag[2] == '\0') 1672705Sjp161948 return (1); 1680Sstevel@tonic-gate 1692705Sjp161948 /* locale has no country code... */ 1702705Sjp161948 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 1712705Sjp161948 return (1); 1722705Sjp161948 1732705Sjp161948 /* langtag has more than one subtag and the locale has a country code */ 1740Sstevel@tonic-gate 1752705Sjp161948 /* ignore second subtag if not two chars */ 1762705Sjp161948 if (strlen(langtag) < 5) 1772705Sjp161948 return (1); 1780Sstevel@tonic-gate 1792705Sjp161948 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 1802705Sjp161948 (langtag[5] != '\0' && langtag[5] != '-')) 1812705Sjp161948 return (1); 1820Sstevel@tonic-gate 1832705Sjp161948 /* ignore rest of locale if there is no two-character country part */ 1842705Sjp161948 if (strlen(locale) < 5) 1852705Sjp161948 return (1); 1860Sstevel@tonic-gate 1872705Sjp161948 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 1882705Sjp161948 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 1892705Sjp161948 return (1); 1900Sstevel@tonic-gate 1912705Sjp161948 /* if the country part matches, return 2 */ 1922705Sjp161948 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 1932705Sjp161948 return (2); 1940Sstevel@tonic-gate 1952705Sjp161948 return (1); 1960Sstevel@tonic-gate } 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate char * 1990Sstevel@tonic-gate g11n_getlocale() 2000Sstevel@tonic-gate { 2012705Sjp161948 /* we have one text domain - always set it */ 2022705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2030Sstevel@tonic-gate 2042705Sjp161948 /* if the locale is not set, set it from the env vars */ 2052705Sjp161948 if (!setlocale(LC_MESSAGES, NULL)) 2062705Sjp161948 (void) setlocale(LC_MESSAGES, ""); 2070Sstevel@tonic-gate 2082705Sjp161948 return (setlocale(LC_MESSAGES, NULL)); 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate void 2120Sstevel@tonic-gate g11n_setlocale(int category, const char *locale) 2130Sstevel@tonic-gate { 2142705Sjp161948 char *curr; 2150Sstevel@tonic-gate 2162705Sjp161948 /* we have one text domain - always set it */ 2172705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2180Sstevel@tonic-gate 2192705Sjp161948 if (!locale) 2202705Sjp161948 return; 2210Sstevel@tonic-gate 2222705Sjp161948 if (*locale && ((curr = setlocale(category, NULL))) && 2232705Sjp161948 strcmp(curr, locale) == 0) 2242705Sjp161948 return; 2252628Sjp161948 2262705Sjp161948 /* if <category> is bogus, setlocale() will do nothing */ 2272705Sjp161948 (void) setlocale(category, locale); 2280Sstevel@tonic-gate } 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate char ** 2310Sstevel@tonic-gate g11n_getlocales() 2320Sstevel@tonic-gate { 2332705Sjp161948 FILE *locale_out; 2342705Sjp161948 uint_t n_elems, list_size, long_line = 0; 2352705Sjp161948 char **list; 2362705Sjp161948 char locale[64]; /* 64 bytes is plenty for locale names */ 2372705Sjp161948 2382705Sjp161948 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) 2392705Sjp161948 return (NULL); 2400Sstevel@tonic-gate 2412705Sjp161948 /* 2422705Sjp161948 * start with enough room for 65 locales - that's a lot fewer than 2432705Sjp161948 * all the locales available for installation, but a lot more than 2442705Sjp161948 * what most users will need and install 2452705Sjp161948 */ 2462705Sjp161948 n_elems = 0; 2472705Sjp161948 list_size = 192; 2482705Sjp161948 list = (char **) xmalloc(sizeof (char *) * (list_size + 1)); 2492705Sjp161948 memset(list, 0, sizeof (char *) * (list_size + 1)); 2500Sstevel@tonic-gate 2512705Sjp161948 while (fgets(locale, sizeof (locale), locale_out)) { 2522705Sjp161948 /* skip long locale names (if any) */ 2532705Sjp161948 if (!strchr(locale, '\n')) { 2542705Sjp161948 long_line = 1; 2552705Sjp161948 continue; 2562705Sjp161948 } else if (long_line) { 2572705Sjp161948 long_line = 0; 2582705Sjp161948 continue; 2592705Sjp161948 } 2600Sstevel@tonic-gate 2612705Sjp161948 if (strncmp(locale, "iso_8859", 8) == 0) 2622705Sjp161948 /* ignore locale names like "iso_8859-1" */ 2632705Sjp161948 continue; 2640Sstevel@tonic-gate 2652705Sjp161948 if (n_elems == list_size) { 2662705Sjp161948 list_size *= 2; 2672705Sjp161948 list = (char **)xrealloc((void *) list, 2682705Sjp161948 (list_size + 1) * sizeof (char *)); 2692705Sjp161948 memset(&list[n_elems + 1], 0, 2702705Sjp161948 sizeof (char *) * (list_size - n_elems + 1)); 2712705Sjp161948 } 2722705Sjp161948 2732705Sjp161948 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 2742705Sjp161948 list[n_elems++] = xstrdup(locale); 2750Sstevel@tonic-gate } 2760Sstevel@tonic-gate 277*5562Sjp161948 if (n_elems == 0) { 278*5562Sjp161948 xfree(list); 2793109Sjp161948 return (NULL); 280*5562Sjp161948 } 2813109Sjp161948 2822705Sjp161948 list[n_elems] = NULL; 2832705Sjp161948 (void) pclose(locale_out); 2840Sstevel@tonic-gate 2852705Sjp161948 qsort(list, n_elems - 1, sizeof (char *), locale_cmp); 2862705Sjp161948 return (list); 2870Sstevel@tonic-gate } 2880Sstevel@tonic-gate 2890Sstevel@tonic-gate char * 2900Sstevel@tonic-gate g11n_getlangs() 2910Sstevel@tonic-gate { 2922705Sjp161948 char *locale; 2930Sstevel@tonic-gate 2942705Sjp161948 if (getenv("SSH_LANGS")) 2952705Sjp161948 return (xstrdup(getenv("SSH_LANGS"))); 2960Sstevel@tonic-gate 2972705Sjp161948 locale = g11n_getlocale(); 2980Sstevel@tonic-gate 2992705Sjp161948 if (!locale || !*locale) 3002705Sjp161948 return (xstrdup("i-default")); 3010Sstevel@tonic-gate 3022705Sjp161948 return (g11n_locale2langtag(locale)); 3030Sstevel@tonic-gate } 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate char * 3060Sstevel@tonic-gate g11n_locales2langs(char **locale_set) 3070Sstevel@tonic-gate { 3082705Sjp161948 char **p, **r, **q; 309*5562Sjp161948 char *langtag, *langs; 3102705Sjp161948 int locales, skip; 3110Sstevel@tonic-gate 3122705Sjp161948 for (locales = 0, p = locale_set; p && *p; p++) 3132705Sjp161948 locales++; 3140Sstevel@tonic-gate 3152705Sjp161948 r = (char **)xmalloc((locales + 1) * sizeof (char *)); 3162705Sjp161948 memset(r, 0, (locales + 1) * sizeof (char *)); 3170Sstevel@tonic-gate 3182705Sjp161948 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) { 3192705Sjp161948 skip = 0; 3202705Sjp161948 if ((langtag = g11n_locale2langtag(*p)) == NULL) 3212705Sjp161948 continue; 3222705Sjp161948 for (q = r; (q - r) < locales; q++) { 3232705Sjp161948 if (!*q) 3242705Sjp161948 break; 3252705Sjp161948 if (*q && strcmp(*q, langtag) == 0) 3262705Sjp161948 skip = 1; 3272705Sjp161948 } 3282705Sjp161948 if (!skip) 3292705Sjp161948 *(q++) = langtag; 330*5562Sjp161948 else 331*5562Sjp161948 xfree(langtag); 3322705Sjp161948 *q = NULL; 3330Sstevel@tonic-gate } 3342705Sjp161948 335*5562Sjp161948 langs = xjoin(r, ','); 336*5562Sjp161948 g11n_freelist(r); 337*5562Sjp161948 338*5562Sjp161948 return (langs); 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate 3412705Sjp161948 static int 3420Sstevel@tonic-gate sortcmp(const void *d1, const void *d2) 3430Sstevel@tonic-gate { 3442705Sjp161948 char *s1 = *(char **)d1; 3452705Sjp161948 char *s2 = *(char **)d2; 3460Sstevel@tonic-gate 3472705Sjp161948 return (strcmp(s1, s2)); 3480Sstevel@tonic-gate } 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate int 3510Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2) 3520Sstevel@tonic-gate { 3532705Sjp161948 int len1, len2; 3542705Sjp161948 char c1, c2; 3550Sstevel@tonic-gate 3562705Sjp161948 len1 = (strchr(langtag1, '-')) ? 357*5562Sjp161948 (strchr(langtag1, '-') - langtag1) 358*5562Sjp161948 : strlen(langtag1); 3590Sstevel@tonic-gate 3602705Sjp161948 len2 = (strchr(langtag2, '-')) ? 361*5562Sjp161948 (strchr(langtag2, '-') - langtag2) 362*5562Sjp161948 : strlen(langtag2); 3630Sstevel@tonic-gate 3642705Sjp161948 /* no match */ 3652705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3662705Sjp161948 return (0); 3670Sstevel@tonic-gate 3682705Sjp161948 c1 = *(langtag1 + len1); 3692705Sjp161948 c2 = *(langtag2 + len2); 3700Sstevel@tonic-gate 3712705Sjp161948 /* no country sub-tags - exact match */ 3722705Sjp161948 if (c1 == '\0' && c2 == '\0') 3732705Sjp161948 return (2); 3740Sstevel@tonic-gate 3752705Sjp161948 /* one langtag has a country sub-tag, the other doesn't */ 3762705Sjp161948 if (c1 == '\0' || c2 == '\0') 3772705Sjp161948 return (1); 3780Sstevel@tonic-gate 3792705Sjp161948 /* can't happen - both langtags have a country sub-tag */ 3802705Sjp161948 if (c1 != '-' || c2 != '-') 3812705Sjp161948 return (1); 3820Sstevel@tonic-gate 3832705Sjp161948 /* compare country subtags */ 3842705Sjp161948 langtag1 = langtag1 + len1 + 1; 3852705Sjp161948 langtag2 = langtag2 + len2 + 1; 3860Sstevel@tonic-gate 3872705Sjp161948 len1 = (strchr(langtag1, '-')) ? 3882705Sjp161948 (strchr(langtag1, '-') - langtag1) : strlen(langtag1); 3892705Sjp161948 3902705Sjp161948 len2 = (strchr(langtag2, '-')) ? 3912705Sjp161948 (strchr(langtag2, '-') - langtag2) : strlen(langtag2); 3920Sstevel@tonic-gate 3932705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3942705Sjp161948 return (1); 3950Sstevel@tonic-gate 3962705Sjp161948 /* country tags matched - exact match */ 3972705Sjp161948 return (2); 3980Sstevel@tonic-gate } 3990Sstevel@tonic-gate 4000Sstevel@tonic-gate char * 4010Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2) 4020Sstevel@tonic-gate { 4032705Sjp161948 char **list1, **list2, **list3, **p, **q, **r; 4042705Sjp161948 char *set3, *lang_subtag; 4052705Sjp161948 uint_t n1, n2, n3; 4062705Sjp161948 uint_t do_append; 4072705Sjp161948 4082705Sjp161948 list1 = xsplit(set1, ','); 4092705Sjp161948 list2 = xsplit(set2, ','); 4100Sstevel@tonic-gate 4112705Sjp161948 for (n1 = 0, p = list1; p && *p; p++, n1++) 4122705Sjp161948 ; 4132705Sjp161948 for (n2 = 0, p = list2; p && *p; p++, n2++) 4142705Sjp161948 ; 4150Sstevel@tonic-gate 4162705Sjp161948 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1)); 4172705Sjp161948 *list3 = NULL; 4180Sstevel@tonic-gate 4192705Sjp161948 /* 4202705Sjp161948 * we must not sort the user langtags - sorting or not the server's 4212705Sjp161948 * should not affect the outcome 4222705Sjp161948 */ 4232705Sjp161948 qsort(list2, n2, sizeof (char *), sortcmp); 4240Sstevel@tonic-gate 4252705Sjp161948 for (n3 = 0, p = list1; p && *p; p++) { 4262705Sjp161948 do_append = 0; 4272705Sjp161948 for (q = list2; q && *q; q++) { 4282705Sjp161948 if (g11n_langtag_match(*p, *q) != 2) continue; 4292705Sjp161948 /* append element */ 4302705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4312705Sjp161948 do_append = 1; 4322705Sjp161948 if (!*r) 4332705Sjp161948 break; 4342705Sjp161948 if (strcmp(*p, *r) == 0) { 4352705Sjp161948 do_append = 0; 4362705Sjp161948 break; 4372705Sjp161948 } 4382705Sjp161948 } 4392705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4402705Sjp161948 list3[n3++] = xstrdup(*p); 4412705Sjp161948 list3[n3] = NULL; 4422705Sjp161948 } 4430Sstevel@tonic-gate } 4440Sstevel@tonic-gate } 4452705Sjp161948 4462705Sjp161948 for (p = list1; p && *p; p++) { 4472705Sjp161948 do_append = 0; 4482705Sjp161948 for (q = list2; q && *q; q++) { 4492705Sjp161948 if (g11n_langtag_match(*p, *q) != 1) 4502705Sjp161948 continue; 4510Sstevel@tonic-gate 4522705Sjp161948 /* append element */ 4532705Sjp161948 lang_subtag = xstrdup(*p); 4542705Sjp161948 if (strchr(lang_subtag, '-')) 4552705Sjp161948 *(strchr(lang_subtag, '-')) = '\0'; 4562705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4572705Sjp161948 do_append = 1; 4582705Sjp161948 if (!*r) 4592705Sjp161948 break; 4602705Sjp161948 if (strcmp(lang_subtag, *r) == 0) { 4612705Sjp161948 do_append = 0; 4622705Sjp161948 break; 4632705Sjp161948 } 4642705Sjp161948 } 4652705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4662705Sjp161948 list3[n3++] = lang_subtag; 4672705Sjp161948 list3[n3] = NULL; 4682705Sjp161948 } else 4692705Sjp161948 xfree(lang_subtag); 4700Sstevel@tonic-gate } 4710Sstevel@tonic-gate } 4720Sstevel@tonic-gate 4732705Sjp161948 set3 = xjoin(list3, ','); 4742705Sjp161948 xfree_split_list(list1); 4752705Sjp161948 xfree_split_list(list2); 4762705Sjp161948 xfree_split_list(list3); 4770Sstevel@tonic-gate 4782705Sjp161948 return (set3); 4790Sstevel@tonic-gate } 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate char * 4820Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 4830Sstevel@tonic-gate { 4842705Sjp161948 char *list, *result; 4852705Sjp161948 char **xlist; 4860Sstevel@tonic-gate 4872705Sjp161948 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 4882705Sjp161948 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 4890Sstevel@tonic-gate 4902705Sjp161948 if (!list) 4912705Sjp161948 return (NULL); 4920Sstevel@tonic-gate 4932705Sjp161948 xlist = xsplit(list, ','); 4940Sstevel@tonic-gate 4952705Sjp161948 xfree(list); 4960Sstevel@tonic-gate 4972705Sjp161948 if (!xlist || !*xlist) 4982705Sjp161948 return (NULL); 4990Sstevel@tonic-gate 5002705Sjp161948 result = xstrdup(*xlist); 5012705Sjp161948 xfree_split_list(xlist); 5020Sstevel@tonic-gate 5032705Sjp161948 return (result); 5040Sstevel@tonic-gate } 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate /* 5070Sstevel@tonic-gate * Compare locales, preferring UTF-8 codesets to others, otherwise doing 5080Sstevel@tonic-gate * a stright strcmp() 5090Sstevel@tonic-gate */ 5102705Sjp161948 static int 5110Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2) 5120Sstevel@tonic-gate { 5132705Sjp161948 char *dot_ptr; 5142705Sjp161948 char *s1 = *(char **)d1; 5152705Sjp161948 char *s2 = *(char **)d2; 5162705Sjp161948 int s1_is_utf8 = 0; 5172705Sjp161948 int s2_is_utf8 = 0; 5180Sstevel@tonic-gate 5192705Sjp161948 /* check if s1 is a UTF-8 locale */ 5202705Sjp161948 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) && 5212705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5222705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5232705Sjp161948 s1_is_utf8++; 5242705Sjp161948 } 5252705Sjp161948 5262705Sjp161948 /* check if s2 is a UTF-8 locale */ 5272705Sjp161948 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) && 5282705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5292705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5302705Sjp161948 s2_is_utf8++; 5312705Sjp161948 } 5320Sstevel@tonic-gate 5332705Sjp161948 /* prefer UTF-8 locales */ 5342705Sjp161948 if (s1_is_utf8 && !s2_is_utf8) 5352705Sjp161948 return (-1); 5360Sstevel@tonic-gate 5372705Sjp161948 if (s2_is_utf8 && !s1_is_utf8) 5382705Sjp161948 return (1); 5390Sstevel@tonic-gate 5402705Sjp161948 /* prefer any locale over the default locales */ 5412705Sjp161948 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 || 5422705Sjp161948 strcmp(s1, "common") == 0) { 5432705Sjp161948 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 && 5442705Sjp161948 strcmp(s2, "common") != 0) 5452705Sjp161948 return (1); 5462705Sjp161948 } 5470Sstevel@tonic-gate 5482705Sjp161948 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 || 5492705Sjp161948 strcmp(s2, "common") == 0) { 5502705Sjp161948 if (strcmp(s1, "C") != 0 && 5512705Sjp161948 strcmp(s1, "POSIX") != 0 && 5522705Sjp161948 strcmp(s1, "common") != 0) 5532705Sjp161948 return (-1); 5542705Sjp161948 } 5550Sstevel@tonic-gate 5562705Sjp161948 return (strcmp(s1, s2)); 5570Sstevel@tonic-gate } 5580Sstevel@tonic-gate 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate char ** 5612705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set) 5620Sstevel@tonic-gate { 5632705Sjp161948 char **langtag_list, **result, **p, **q, **r; 5642705Sjp161948 char *s; 5652705Sjp161948 uint_t do_append, n_langtags, n_locales, n_results, max_results; 5662705Sjp161948 5672705Sjp161948 /* count lang tags and locales */ 5682705Sjp161948 for (n_locales = 0, p = locale_set; p && *p; p++) 5692705Sjp161948 n_locales++; 5700Sstevel@tonic-gate 5712705Sjp161948 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 5722705Sjp161948 /* count the number of langtags */ 5732705Sjp161948 for (; s = strchr(s, ','); s++, n_langtags++) 5742705Sjp161948 ; 5752705Sjp161948 5762705Sjp161948 qsort(locale_set, n_locales, sizeof (char *), locale_cmp); 5770Sstevel@tonic-gate 5782705Sjp161948 langtag_list = xsplit(langtag_set, ','); 5792705Sjp161948 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++) 5802705Sjp161948 ; 5810Sstevel@tonic-gate 5822705Sjp161948 max_results = MIN(n_locales, n_langtags) * 2; 5832705Sjp161948 result = (char **) xmalloc(sizeof (char *) * (max_results + 1)); 5842705Sjp161948 *result = NULL; 5852705Sjp161948 n_results = 0; 5860Sstevel@tonic-gate 5872705Sjp161948 /* more specific matches first */ 5882705Sjp161948 for (p = langtag_list; p && *p; p++) { 5892705Sjp161948 do_append = 0; 5902705Sjp161948 for (q = locale_set; q && *q; q++) { 5912705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 2) { 5922705Sjp161948 do_append = 1; 5932705Sjp161948 for (r = result; (r - result) <= 5942705Sjp161948 MIN(n_locales, n_langtags); r++) { 5952705Sjp161948 if (!*r) 5962705Sjp161948 break; 5972705Sjp161948 if (strcmp(*q, *r) == 0) { 5982705Sjp161948 do_append = 0; 5992705Sjp161948 break; 6002705Sjp161948 } 6012705Sjp161948 } 6022705Sjp161948 if (do_append && n_results < max_results) { 6032705Sjp161948 result[n_results++] = xstrdup(*q); 6042705Sjp161948 result[n_results] = NULL; 6052705Sjp161948 } 6062705Sjp161948 break; 6072705Sjp161948 } 6080Sstevel@tonic-gate } 6090Sstevel@tonic-gate } 6100Sstevel@tonic-gate 6112705Sjp161948 for (p = langtag_list; p && *p; p++) { 6122705Sjp161948 do_append = 0; 6132705Sjp161948 for (q = locale_set; q && *q; q++) { 6142705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 1) { 6152705Sjp161948 do_append = 1; 6162705Sjp161948 for (r = result; (r - result) <= 6172705Sjp161948 MIN(n_locales, n_langtags); r++) { 6182705Sjp161948 if (!*r) 6192705Sjp161948 break; 6202705Sjp161948 if (strcmp(*q, *r) == 0) { 6212705Sjp161948 do_append = 0; 6222705Sjp161948 break; 6232705Sjp161948 } 6242705Sjp161948 } 6252705Sjp161948 if (do_append && n_results < max_results) { 6262705Sjp161948 result[n_results++] = xstrdup(*q); 6272705Sjp161948 result[n_results] = NULL; 6282705Sjp161948 } 6292705Sjp161948 break; 6302705Sjp161948 } 6310Sstevel@tonic-gate } 6320Sstevel@tonic-gate } 6330Sstevel@tonic-gate 6342705Sjp161948 xfree_split_list(langtag_list); 6352705Sjp161948 6362705Sjp161948 return (result); 6370Sstevel@tonic-gate } 6380Sstevel@tonic-gate 6390Sstevel@tonic-gate char * 6400Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 6410Sstevel@tonic-gate { 642*5562Sjp161948 char **results, **locales, *result = NULL; 643*5562Sjp161948 644*5562Sjp161948 if (srvr_locales == NULL) 645*5562Sjp161948 locales = g11n_getlocales(); 646*5562Sjp161948 else 647*5562Sjp161948 locales = srvr_locales; 6480Sstevel@tonic-gate 6492705Sjp161948 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 650*5562Sjp161948 locales)) == NULL) 651*5562Sjp161948 goto err; 6520Sstevel@tonic-gate 6532705Sjp161948 if (*results != NULL) 6542705Sjp161948 result = xstrdup(*results); 6550Sstevel@tonic-gate 6562705Sjp161948 xfree_split_list(results); 6570Sstevel@tonic-gate 658*5562Sjp161948 err: 659*5562Sjp161948 if (locales != srvr_locales) 660*5562Sjp161948 g11n_freelist(locales); 6612705Sjp161948 return (result); 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate /* 6660Sstevel@tonic-gate * Functions for validating ASCII and UTF-8 strings 6670Sstevel@tonic-gate * 6680Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 6690Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 6700Sstevel@tonic-gate * friends. 6710Sstevel@tonic-gate * 6720Sstevel@tonic-gate * The return value is 0 if success, EILSEQ or EINVAL. 6730Sstevel@tonic-gate * 6740Sstevel@tonic-gate */ 6752705Sjp161948 uint_t 6762705Sjp161948 g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str) 6770Sstevel@tonic-gate { 6782705Sjp161948 uchar_t *p; 6790Sstevel@tonic-gate 6802705Sjp161948 for (p = (uchar_t *)str; p && *p && (!(*p & 0x80)); p++) 6812705Sjp161948 ; 6820Sstevel@tonic-gate 6832705Sjp161948 if (len && ((p - (uchar_t *)str) != len)) 6842705Sjp161948 return (EILSEQ); 6852705Sjp161948 6862705Sjp161948 return (0); 6870Sstevel@tonic-gate } 6880Sstevel@tonic-gate 6892705Sjp161948 uint_t 6902705Sjp161948 g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str) 6910Sstevel@tonic-gate { 6922705Sjp161948 uchar_t *p; 6932705Sjp161948 uint_t c, l; 6942705Sjp161948 6952705Sjp161948 if (len == 0) 6962705Sjp161948 len = strlen((const char *)str); 6970Sstevel@tonic-gate 6982705Sjp161948 for (p = (uchar_t *)str; p && (p - str < len) && *p; ) { 6992705Sjp161948 /* 8-bit chars begin a UTF-8 sequence */ 7002705Sjp161948 if (*p & 0x80) { 7012705Sjp161948 /* get sequence length and sanity check first byte */ 7022705Sjp161948 if (*p < 0xc0) 7032705Sjp161948 return (EILSEQ); 7042705Sjp161948 else if (*p < 0xe0) 7052705Sjp161948 l = 2; 7062705Sjp161948 else if (*p < 0xf0) 7072705Sjp161948 l = 3; 7082705Sjp161948 else if (*p < 0xf8) 7092705Sjp161948 l = 4; 7102705Sjp161948 else if (*p < 0xfc) 7112705Sjp161948 l = 5; 7122705Sjp161948 else if (*p < 0xfe) 7132705Sjp161948 l = 6; 7142705Sjp161948 else 7152705Sjp161948 return (EILSEQ); 7162705Sjp161948 7172705Sjp161948 if ((p + l - str) >= len) 7182705Sjp161948 return (EILSEQ); 7192705Sjp161948 7202705Sjp161948 /* overlong detection - build codepoint */ 7212705Sjp161948 c = *p & 0x3f; 7222705Sjp161948 /* shift c bits from first byte */ 7232705Sjp161948 c = c << (6 * (l - 1)); 7242705Sjp161948 7252705Sjp161948 if (l > 1) { 7262705Sjp161948 if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80)) 7272705Sjp161948 c = c | ((*(p + 1) & 0x3f) << 7282705Sjp161948 (6 * (l - 2))); 7292705Sjp161948 else 7302705Sjp161948 return (EILSEQ); 7312705Sjp161948 7322705Sjp161948 if (c < 0x80) 7332705Sjp161948 return (EILSEQ); 7342705Sjp161948 } 7350Sstevel@tonic-gate 7362705Sjp161948 if (l > 2) { 7372705Sjp161948 if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80)) 7382705Sjp161948 c = c | ((*(p + 2) & 0x3f) << 7392705Sjp161948 (6 * (l - 3))); 7402705Sjp161948 else 7412705Sjp161948 return (EILSEQ); 7422705Sjp161948 7432705Sjp161948 if (c < 0x800) 7442705Sjp161948 return (EILSEQ); 7452705Sjp161948 } 7462705Sjp161948 7472705Sjp161948 if (l > 3) { 7482705Sjp161948 if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80)) 7492705Sjp161948 c = c | ((*(p + 3) & 0x3f) << 7502705Sjp161948 (6 * (l - 4))); 7512705Sjp161948 else 7522705Sjp161948 return (EILSEQ); 7532705Sjp161948 7542705Sjp161948 if (c < 0x10000) 7552705Sjp161948 return (EILSEQ); 7562705Sjp161948 } 7570Sstevel@tonic-gate 7582705Sjp161948 if (l > 4) { 7592705Sjp161948 if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80)) 7602705Sjp161948 c = c | ((*(p + 4) & 0x3f) << 7612705Sjp161948 (6 * (l - 5))); 7622705Sjp161948 else 7632705Sjp161948 return (EILSEQ); 7642705Sjp161948 7652705Sjp161948 if (c < 0x200000) 7662705Sjp161948 return (EILSEQ); 7672705Sjp161948 } 7680Sstevel@tonic-gate 7692705Sjp161948 if (l > 5) { 7702705Sjp161948 if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80)) 7712705Sjp161948 c = c | (*(p + 5) & 0x3f); 7722705Sjp161948 else 7732705Sjp161948 return (EILSEQ); 7742705Sjp161948 7752705Sjp161948 if (c < 0x4000000) 7762705Sjp161948 return (EILSEQ); 7772705Sjp161948 } 7780Sstevel@tonic-gate 7792705Sjp161948 /* 7802705Sjp161948 * check for UTF-16 surrogates ifs other illegal 7812705Sjp161948 * UTF-8 * points 7822705Sjp161948 */ 7832705Sjp161948 if (((c <= 0xdfff) && (c >= 0xd800)) || 7842705Sjp161948 (c == 0xfffe) || (c == 0xffff)) 7852705Sjp161948 return (EILSEQ); 7862705Sjp161948 p += l; 7872705Sjp161948 } 7882705Sjp161948 /* 7-bit chars are fine */ 7890Sstevel@tonic-gate else 7902705Sjp161948 p++; 7910Sstevel@tonic-gate } 7922705Sjp161948 return (0); 7930Sstevel@tonic-gate } 7940Sstevel@tonic-gate 7950Sstevel@tonic-gate /* 7960Sstevel@tonic-gate * Functions for converting to ASCII or UTF-8 from the local codeset 7970Sstevel@tonic-gate * Functions for converting from ASCII or UTF-8 to the local codeset 7980Sstevel@tonic-gate * 7990Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 8000Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 8010Sstevel@tonic-gate * friends. 8020Sstevel@tonic-gate * 8030Sstevel@tonic-gate * The err parameter is an optional pointer to an integer where 0 8040Sstevel@tonic-gate * (success) or EILSEQ or EINVAL will be stored (failure). 8050Sstevel@tonic-gate * 8060Sstevel@tonic-gate * These functions return NULL if the conversion fails. 8070Sstevel@tonic-gate * 8080Sstevel@tonic-gate */ 8092705Sjp161948 uchar_t * 8102705Sjp161948 g11n_convert_from_ascii(const char *str, int *err_ptr, uchar_t **error_str) 8110Sstevel@tonic-gate { 8122705Sjp161948 static uint_t initialized = 0; 8132705Sjp161948 static uint_t do_convert = 0; 8142705Sjp161948 iconv_t cd; 8152705Sjp161948 int err; 8160Sstevel@tonic-gate 8172705Sjp161948 if (!initialized) { 8182705Sjp161948 /* 8192705Sjp161948 * iconv_open() fails if the to/from codesets are the 8202705Sjp161948 * same, and there are aliases of codesets to boot... 8212705Sjp161948 */ 8222705Sjp161948 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 823*5562Sjp161948 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 824*5562Sjp161948 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 8252705Sjp161948 initialized = 1; 8262705Sjp161948 do_convert = 0; 8272705Sjp161948 } else { 8282705Sjp161948 cd = iconv_open(nl_langinfo(CODESET), "646"); 8292705Sjp161948 if (cd == (iconv_t)-1) { 8302705Sjp161948 if (err_ptr) 8312705Sjp161948 *err_ptr = errno; 8322705Sjp161948 if (error_str) 8332705Sjp161948 *error_str = (uchar_t *)"Cannot " 8342705Sjp161948 "convert ASCII strings to the local" 8352705Sjp161948 " codeset"; 8362705Sjp161948 } 8372705Sjp161948 initialized = 1; 8382705Sjp161948 do_convert = 1; 8392705Sjp161948 } 8400Sstevel@tonic-gate } 8412705Sjp161948 8422705Sjp161948 if (!do_convert) { 8432705Sjp161948 if ((err = g11n_validate_ascii(str, 0, error_str))) { 8442705Sjp161948 if (err_ptr) 8452705Sjp161948 *err_ptr = err; 8462705Sjp161948 return (NULL); 8472705Sjp161948 } else 8482705Sjp161948 return ((uchar_t *)xstrdup(str)); 8490Sstevel@tonic-gate } 8500Sstevel@tonic-gate 8512705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 8520Sstevel@tonic-gate } 8530Sstevel@tonic-gate 8542705Sjp161948 uchar_t * 8552705Sjp161948 g11n_convert_from_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) 8560Sstevel@tonic-gate { 8572705Sjp161948 static uint_t initialized = 0; 8582705Sjp161948 static uint_t do_convert = 0; 8592705Sjp161948 iconv_t cd; 8602705Sjp161948 int err; 8610Sstevel@tonic-gate 8622705Sjp161948 if (!initialized) { 8632705Sjp161948 /* 8642705Sjp161948 * iconv_open() fails if the to/from codesets are the 8652705Sjp161948 * same, and there are aliases of codesets to boot... 8662705Sjp161948 */ 8672705Sjp161948 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 8682705Sjp161948 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 8692705Sjp161948 initialized = 1; 8702705Sjp161948 do_convert = 0; 8712705Sjp161948 } else { 8722705Sjp161948 cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); 8732705Sjp161948 if (cd == (iconv_t)-1) { 8742705Sjp161948 if (err_ptr) 8752705Sjp161948 *err_ptr = errno; 8762705Sjp161948 if (error_str) 8772705Sjp161948 *error_str = (uchar_t *)"Cannot " 8782705Sjp161948 "convert UTF-8 strings to the " 8792705Sjp161948 "local codeset"; 8802705Sjp161948 } 8812705Sjp161948 initialized = 1; 8822705Sjp161948 do_convert = 1; 8832705Sjp161948 } 8840Sstevel@tonic-gate } 8852705Sjp161948 8862705Sjp161948 if (!do_convert) { 8872705Sjp161948 if ((err = g11n_validate_utf8(str, 0, error_str))) { 8882705Sjp161948 if (err_ptr) 8892705Sjp161948 *err_ptr = err; 8902705Sjp161948 return (NULL); 8912705Sjp161948 } else 8922705Sjp161948 return ((uchar_t *)xstrdup((char *)str)); 8930Sstevel@tonic-gate } 8940Sstevel@tonic-gate 8952705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 8960Sstevel@tonic-gate } 8970Sstevel@tonic-gate 8980Sstevel@tonic-gate char * 8992705Sjp161948 g11n_convert_to_ascii(const uchar_t *str, int *err_ptr, uchar_t **error_str) 9000Sstevel@tonic-gate { 9012705Sjp161948 static uint_t initialized = 0; 9022705Sjp161948 static uint_t do_convert = 0; 9032705Sjp161948 iconv_t cd; 9040Sstevel@tonic-gate 9052705Sjp161948 if (!initialized) { 9062705Sjp161948 /* 9072705Sjp161948 * iconv_open() fails if the to/from codesets are the 9082705Sjp161948 * same, and there are aliases of codesets to boot... 9092705Sjp161948 */ 9102705Sjp161948 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 9112705Sjp161948 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 9122705Sjp161948 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 9132705Sjp161948 initialized = 1; 9142705Sjp161948 do_convert = 0; 9152705Sjp161948 } else { 9162705Sjp161948 cd = iconv_open("646", nl_langinfo(CODESET)); 9172705Sjp161948 if (cd == (iconv_t)-1) { 9182705Sjp161948 if (err_ptr) 9192705Sjp161948 *err_ptr = errno; 9202705Sjp161948 if (error_str) 9212705Sjp161948 *error_str = (uchar_t *)"Cannot " 9222705Sjp161948 "convert UTF-8 strings to the " 9232705Sjp161948 "local codeset"; 9242705Sjp161948 } 9252705Sjp161948 initialized = 1; 9262705Sjp161948 do_convert = 1; 9272705Sjp161948 } 9280Sstevel@tonic-gate } 9290Sstevel@tonic-gate 9302705Sjp161948 if (!do_convert) 9312705Sjp161948 return (xstrdup((char *)str)); 9322705Sjp161948 9332705Sjp161948 return ((char *)do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 9340Sstevel@tonic-gate } 9350Sstevel@tonic-gate 9362705Sjp161948 uchar_t * 9372705Sjp161948 g11n_convert_to_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) 9380Sstevel@tonic-gate { 9392705Sjp161948 static uint_t initialized = 0; 9402705Sjp161948 static uint_t do_convert = 0; 9412705Sjp161948 iconv_t cd; 9420Sstevel@tonic-gate 9432705Sjp161948 if (!initialized) { 9442705Sjp161948 /* 9452705Sjp161948 * iconv_open() fails if the to/from codesets are the 9462705Sjp161948 * same, and there are aliases of codesets to boot... 9472705Sjp161948 */ 9482705Sjp161948 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 9492705Sjp161948 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 9502705Sjp161948 initialized = 1; 9512705Sjp161948 do_convert = 0; 9522705Sjp161948 } else { 9532705Sjp161948 cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 9542705Sjp161948 if (cd == (iconv_t)-1) { 9552705Sjp161948 if (err_ptr) 9562705Sjp161948 *err_ptr = errno; 9572705Sjp161948 if (error_str) 9582705Sjp161948 *error_str = (uchar_t *)"Cannot " 9592705Sjp161948 "convert UTF-8 strings to the " 9602705Sjp161948 "local codeset"; 9612705Sjp161948 } 9622705Sjp161948 initialized = 1; 9632705Sjp161948 do_convert = 1; 9642705Sjp161948 } 9650Sstevel@tonic-gate } 9660Sstevel@tonic-gate 9672705Sjp161948 if (!do_convert) 9682705Sjp161948 return ((uchar_t *)xstrdup((char *)str)); 9692705Sjp161948 9702705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 9710Sstevel@tonic-gate } 9720Sstevel@tonic-gate 9730Sstevel@tonic-gate 9740Sstevel@tonic-gate /* 9750Sstevel@tonic-gate * Wrapper around iconv() 9760Sstevel@tonic-gate * 9770Sstevel@tonic-gate * The caller is responsible for freeing the result and for handling 9780Sstevel@tonic-gate * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 9790Sstevel@tonic-gate */ 9802705Sjp161948 static uchar_t * 9812705Sjp161948 do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, uint_t len, 9822705Sjp161948 uint_t *outlen, int *err, uchar_t **err_str) 9832705Sjp161948 { 9842705Sjp161948 size_t inbytesleft, outbytesleft, converted_size; 9852705Sjp161948 char *outbuf; 9862705Sjp161948 uchar_t *converted; 9872705Sjp161948 const char *inbuf; 9882705Sjp161948 uint_t mul = 0; 9890Sstevel@tonic-gate 9902705Sjp161948 if (!buf || !(*(char *)buf)) 9912705Sjp161948 return (NULL); 9922705Sjp161948 9932705Sjp161948 if (len == 0) 9942705Sjp161948 len = strlen(buf); 9952705Sjp161948 9962705Sjp161948 /* reset conversion descriptor */ 9972705Sjp161948 /* XXX Do we need initial shift sequences for UTF-8??? */ 9982705Sjp161948 (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); 9992705Sjp161948 inbuf = (const char *) buf; 10002705Sjp161948 10012705Sjp161948 if (mul_ptr) 10022705Sjp161948 mul = *mul_ptr; 10032705Sjp161948 10042705Sjp161948 converted_size = (len << mul); 10052705Sjp161948 outbuf = (char *)xmalloc(converted_size + 1); /* for null */ 10062705Sjp161948 converted = (uchar_t *)outbuf; 10072705Sjp161948 outbytesleft = len; 10080Sstevel@tonic-gate 10092705Sjp161948 do { 10102705Sjp161948 if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == 10112705Sjp161948 (size_t)-1) { 10122705Sjp161948 if (errno == E2BIG) { 10132705Sjp161948 /* UTF-8 codepoints are at most 8 bytes long */ 10142705Sjp161948 if (mul > 2) { 10152705Sjp161948 if (err_str) 10162705Sjp161948 *err_str = (uchar_t *) 10172705Sjp161948 "Conversion to UTF-8 failed" 10182705Sjp161948 " due to preposterous space" 10192705Sjp161948 " requirements"; 10202705Sjp161948 if (err) 10212705Sjp161948 *err = EILSEQ; 10222705Sjp161948 return (NULL); 10232705Sjp161948 } 10240Sstevel@tonic-gate 10252705Sjp161948 /* 10262705Sjp161948 * re-alloc output and ensure that the outbuf 10272705Sjp161948 * and outbytesleft values are adjusted 10282705Sjp161948 */ 10292705Sjp161948 converted = xrealloc(converted, 10302705Sjp161948 converted_size << 1 + 1); 10312705Sjp161948 outbuf = (char *)converted + converted_size - 10322705Sjp161948 outbytesleft; 10332705Sjp161948 converted_size = (len << ++(mul)); 10342705Sjp161948 outbytesleft = converted_size - outbytesleft; 10352705Sjp161948 } else { 10362705Sjp161948 /* 10372705Sjp161948 * let the caller deal with iconv() errors, 10382705Sjp161948 * probably by calling fatal(); xfree() does 10392705Sjp161948 * not set errno 10402705Sjp161948 */ 10412705Sjp161948 if (err) 10422705Sjp161948 *err = errno; 10432705Sjp161948 xfree(converted); 10442705Sjp161948 return (NULL); 10452705Sjp161948 } 10462705Sjp161948 } 10472705Sjp161948 } while (inbytesleft); 10482705Sjp161948 10492705Sjp161948 *outbuf = '\0'; /* ensure null-termination */ 10502705Sjp161948 if (outlen) 10512705Sjp161948 *outlen = converted_size - outbytesleft; 10522705Sjp161948 if (mul_ptr) 10532705Sjp161948 *mul_ptr = mul; 10542705Sjp161948 10552705Sjp161948 return (converted); 10560Sstevel@tonic-gate } 1057*5562Sjp161948 1058*5562Sjp161948 /* 1059*5562Sjp161948 * Free all strings in the list and then free the list itself. We know that the 1060*5562Sjp161948 * list ends with a NULL pointer. 1061*5562Sjp161948 */ 1062*5562Sjp161948 void 1063*5562Sjp161948 g11n_freelist(char **list) 1064*5562Sjp161948 { 1065*5562Sjp161948 int i = 0; 1066*5562Sjp161948 1067*5562Sjp161948 while (list[i] != NULL) { 1068*5562Sjp161948 xfree(list[i]); 1069*5562Sjp161948 i++; 1070*5562Sjp161948 } 1071*5562Sjp161948 1072*5562Sjp161948 xfree(list); 1073*5562Sjp161948 } 1074