10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52628Sjp161948 * Common Development and Distribution License (the "License"). 62628Sjp161948 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate * 212628Sjp161948 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 220Sstevel@tonic-gate * Use is subject to license terms. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 260Sstevel@tonic-gate 270Sstevel@tonic-gate #include <errno.h> 280Sstevel@tonic-gate #include <locale.h> 290Sstevel@tonic-gate #include <langinfo.h> 300Sstevel@tonic-gate #include <iconv.h> 310Sstevel@tonic-gate #include <ctype.h> 320Sstevel@tonic-gate #include <strings.h> 330Sstevel@tonic-gate #include <string.h> 340Sstevel@tonic-gate #include <stdio.h> 350Sstevel@tonic-gate #include <stdlib.h> 360Sstevel@tonic-gate #include "includes.h" 370Sstevel@tonic-gate #include "xmalloc.h" 380Sstevel@tonic-gate #include "xlist.h" 390Sstevel@tonic-gate 400Sstevel@tonic-gate #ifdef MIN 410Sstevel@tonic-gate #undef MIN 420Sstevel@tonic-gate #endif /* MIN */ 430Sstevel@tonic-gate 442705Sjp161948 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 450Sstevel@tonic-gate 462705Sjp161948 #define LOCALE_PATH "/usr/bin/locale" 470Sstevel@tonic-gate 482705Sjp161948 /* two-char country code, '-' and two-char region code */ 492705Sjp161948 #define LANGTAG_MAX 5 500Sstevel@tonic-gate 512705Sjp161948 static uchar_t *do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, 522705Sjp161948 uint_t len, uint_t *outlen, int *err, uchar_t **err_str); 530Sstevel@tonic-gate 540Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2); 550Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale); 560Sstevel@tonic-gate 572705Sjp161948 uint_t g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str); 582705Sjp161948 uint_t g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str); 590Sstevel@tonic-gate 602705Sjp161948 static char * 610Sstevel@tonic-gate g11n_locale2langtag(char *locale) 620Sstevel@tonic-gate { 632705Sjp161948 char *langtag; 640Sstevel@tonic-gate 652705Sjp161948 /* base cases */ 662705Sjp161948 if (!locale || !*locale) 672705Sjp161948 return (NULL); 680Sstevel@tonic-gate 692705Sjp161948 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0) 702705Sjp161948 return ("i-default"); 710Sstevel@tonic-gate 722705Sjp161948 /* punt for language codes which are not exactly 2 letters */ 732705Sjp161948 if (strlen(locale) < 2 || 742705Sjp161948 !isalpha(locale[0]) || 752705Sjp161948 !isalpha(locale[1]) || 762705Sjp161948 (locale[2] != '\0' && 772705Sjp161948 locale[2] != '_' && 782705Sjp161948 locale[2] != '.' && 792705Sjp161948 locale[2] != '@')) 802705Sjp161948 return (NULL); 810Sstevel@tonic-gate 820Sstevel@tonic-gate 832705Sjp161948 /* we have a primary language sub-tag */ 842705Sjp161948 langtag = (char *)xmalloc(LANGTAG_MAX + 1); 850Sstevel@tonic-gate 862705Sjp161948 strncpy(langtag, locale, 2); 872705Sjp161948 langtag[2] = '\0'; 880Sstevel@tonic-gate 892705Sjp161948 /* do we have country sub-tag? For example: cs_CZ */ 902705Sjp161948 if (locale[2] == '_') { 912705Sjp161948 if (strlen(locale) < 5 || 922705Sjp161948 !isalpha(locale[3]) || 932705Sjp161948 !isalpha(locale[4]) || 942705Sjp161948 (locale[5] != '\0' && (locale[5] != '.' && 952705Sjp161948 locale[5] != '@'))) { 962705Sjp161948 return (langtag); 972705Sjp161948 } 982705Sjp161948 992705Sjp161948 /* example: create cs-CZ from cs_CZ */ 1002705Sjp161948 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2, 1012705Sjp161948 locale + 3) == 5) 1022705Sjp161948 return (langtag); 1030Sstevel@tonic-gate } 1040Sstevel@tonic-gate 1052705Sjp161948 /* in all other cases we just use the primary language sub-tag */ 1062705Sjp161948 return (langtag); 1070Sstevel@tonic-gate } 1080Sstevel@tonic-gate 1092705Sjp161948 uint_t 1100Sstevel@tonic-gate g11n_langtag_is_default(char *langtag) 1110Sstevel@tonic-gate { 1122705Sjp161948 return (strcmp(langtag, "i-default") == 0); 1130Sstevel@tonic-gate } 1140Sstevel@tonic-gate 1150Sstevel@tonic-gate /* 1160Sstevel@tonic-gate * This lang tag / locale matching function works only for two-character 1170Sstevel@tonic-gate * language primary sub-tags and two-character country sub-tags. 1180Sstevel@tonic-gate */ 1192705Sjp161948 uint_t 1200Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale) 1210Sstevel@tonic-gate { 1222705Sjp161948 /* match "i-default" to the process' current locale if possible */ 1232705Sjp161948 if (g11n_langtag_is_default(langtag)) { 1242705Sjp161948 if (strcasecmp(locale, "POSIX") == 0 || 1252705Sjp161948 strcasecmp(locale, "C") == 0) 1262705Sjp161948 return (1); 1272705Sjp161948 else 1282705Sjp161948 return (0); 1292705Sjp161948 } 1300Sstevel@tonic-gate 1312705Sjp161948 /* 1322705Sjp161948 * locale must be at least 2 chars long and the lang part must be 1332705Sjp161948 * exactly two characters 1342705Sjp161948 */ 1352705Sjp161948 if (strlen(locale) < 2 || 1362705Sjp161948 (!isalpha(locale[0]) || !isalpha(locale[1]) || 1372705Sjp161948 (locale[2] != '\0' && locale[2] != '_' && 1382705Sjp161948 locale[2] != '.' && locale[2] != '@'))) 1392705Sjp161948 return (0); 1400Sstevel@tonic-gate 1412705Sjp161948 /* same thing with the langtag */ 1422705Sjp161948 if (strlen(langtag) < 2 || 1432705Sjp161948 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 1442705Sjp161948 (langtag[2] != '\0' && langtag[2] != '-'))) 1452705Sjp161948 return (0); 1460Sstevel@tonic-gate 1472705Sjp161948 /* primary language sub-tag and the locale's language part must match */ 1482705Sjp161948 if (strncasecmp(langtag, locale, 2) != 0) 1492705Sjp161948 return (0); 1500Sstevel@tonic-gate 1512705Sjp161948 /* 1522705Sjp161948 * primary language sub-tag and the locale's language match, now 1532705Sjp161948 * fuzzy check country part 1542705Sjp161948 */ 1550Sstevel@tonic-gate 1562705Sjp161948 /* neither langtag nor locale have more than one component */ 1572705Sjp161948 if (langtag[2] == '\0' && 1582705Sjp161948 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 1592705Sjp161948 return (2); 1600Sstevel@tonic-gate 1612705Sjp161948 /* langtag has only one sub-tag... */ 1622705Sjp161948 if (langtag[2] == '\0') 1632705Sjp161948 return (1); 1640Sstevel@tonic-gate 1652705Sjp161948 /* locale has no country code... */ 1662705Sjp161948 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 1672705Sjp161948 return (1); 1682705Sjp161948 1692705Sjp161948 /* langtag has more than one subtag and the locale has a country code */ 1700Sstevel@tonic-gate 1712705Sjp161948 /* ignore second subtag if not two chars */ 1722705Sjp161948 if (strlen(langtag) < 5) 1732705Sjp161948 return (1); 1740Sstevel@tonic-gate 1752705Sjp161948 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 1762705Sjp161948 (langtag[5] != '\0' && langtag[5] != '-')) 1772705Sjp161948 return (1); 1780Sstevel@tonic-gate 1792705Sjp161948 /* ignore rest of locale if there is no two-character country part */ 1802705Sjp161948 if (strlen(locale) < 5) 1812705Sjp161948 return (1); 1820Sstevel@tonic-gate 1832705Sjp161948 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 1842705Sjp161948 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 1852705Sjp161948 return (1); 1860Sstevel@tonic-gate 1872705Sjp161948 /* if the country part matches, return 2 */ 1882705Sjp161948 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 1892705Sjp161948 return (2); 1900Sstevel@tonic-gate 1912705Sjp161948 return (1); 1920Sstevel@tonic-gate } 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate char * 1950Sstevel@tonic-gate g11n_getlocale() 1960Sstevel@tonic-gate { 1972705Sjp161948 /* we have one text domain - always set it */ 1982705Sjp161948 (void) textdomain(TEXT_DOMAIN); 1990Sstevel@tonic-gate 2002705Sjp161948 /* if the locale is not set, set it from the env vars */ 2012705Sjp161948 if (!setlocale(LC_MESSAGES, NULL)) 2022705Sjp161948 (void) setlocale(LC_MESSAGES, ""); 2030Sstevel@tonic-gate 2042705Sjp161948 return (setlocale(LC_MESSAGES, NULL)); 2050Sstevel@tonic-gate } 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate void 2080Sstevel@tonic-gate g11n_setlocale(int category, const char *locale) 2090Sstevel@tonic-gate { 2102705Sjp161948 char *curr; 2110Sstevel@tonic-gate 2122705Sjp161948 /* we have one text domain - always set it */ 2132705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2140Sstevel@tonic-gate 2152705Sjp161948 if (!locale) 2162705Sjp161948 return; 2170Sstevel@tonic-gate 2182705Sjp161948 if (*locale && ((curr = setlocale(category, NULL))) && 2192705Sjp161948 strcmp(curr, locale) == 0) 2202705Sjp161948 return; 2212628Sjp161948 2222705Sjp161948 /* if <category> is bogus, setlocale() will do nothing */ 2232705Sjp161948 (void) setlocale(category, locale); 2240Sstevel@tonic-gate } 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate char ** 2270Sstevel@tonic-gate g11n_getlocales() 2280Sstevel@tonic-gate { 2292705Sjp161948 FILE *locale_out; 2302705Sjp161948 uint_t n_elems, list_size, long_line = 0; 2312705Sjp161948 char **list; 2322705Sjp161948 char locale[64]; /* 64 bytes is plenty for locale names */ 2332705Sjp161948 2342705Sjp161948 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) 2352705Sjp161948 return (NULL); 2360Sstevel@tonic-gate 2372705Sjp161948 /* 2382705Sjp161948 * start with enough room for 65 locales - that's a lot fewer than 2392705Sjp161948 * all the locales available for installation, but a lot more than 2402705Sjp161948 * what most users will need and install 2412705Sjp161948 */ 2422705Sjp161948 n_elems = 0; 2432705Sjp161948 list_size = 192; 2442705Sjp161948 list = (char **) xmalloc(sizeof (char *) * (list_size + 1)); 2452705Sjp161948 memset(list, 0, sizeof (char *) * (list_size + 1)); 2460Sstevel@tonic-gate 2472705Sjp161948 while (fgets(locale, sizeof (locale), locale_out)) { 2482705Sjp161948 /* skip long locale names (if any) */ 2492705Sjp161948 if (!strchr(locale, '\n')) { 2502705Sjp161948 long_line = 1; 2512705Sjp161948 continue; 2522705Sjp161948 } else if (long_line) { 2532705Sjp161948 long_line = 0; 2542705Sjp161948 continue; 2552705Sjp161948 } 2560Sstevel@tonic-gate 2572705Sjp161948 if (strncmp(locale, "iso_8859", 8) == 0) 2582705Sjp161948 /* ignore locale names like "iso_8859-1" */ 2592705Sjp161948 continue; 2600Sstevel@tonic-gate 2612705Sjp161948 if (n_elems == list_size) { 2622705Sjp161948 list_size *= 2; 2632705Sjp161948 list = (char **)xrealloc((void *) list, 2642705Sjp161948 (list_size + 1) * sizeof (char *)); 2652705Sjp161948 memset(&list[n_elems + 1], 0, 2662705Sjp161948 sizeof (char *) * (list_size - n_elems + 1)); 2672705Sjp161948 } 2682705Sjp161948 2692705Sjp161948 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 2702705Sjp161948 list[n_elems++] = xstrdup(locale); 2710Sstevel@tonic-gate } 2720Sstevel@tonic-gate 273*3109Sjp161948 if (n_elems == 0) 274*3109Sjp161948 return (NULL); 275*3109Sjp161948 2762705Sjp161948 list[n_elems] = NULL; 2772705Sjp161948 (void) pclose(locale_out); 2780Sstevel@tonic-gate 2792705Sjp161948 qsort(list, n_elems - 1, sizeof (char *), locale_cmp); 2802705Sjp161948 return (list); 2810Sstevel@tonic-gate } 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate char * 2840Sstevel@tonic-gate g11n_getlangs() 2850Sstevel@tonic-gate { 2862705Sjp161948 char *locale; 2870Sstevel@tonic-gate 2882705Sjp161948 if (getenv("SSH_LANGS")) 2892705Sjp161948 return (xstrdup(getenv("SSH_LANGS"))); 2900Sstevel@tonic-gate 2912705Sjp161948 locale = g11n_getlocale(); 2920Sstevel@tonic-gate 2932705Sjp161948 if (!locale || !*locale) 2942705Sjp161948 return (xstrdup("i-default")); 2950Sstevel@tonic-gate 2962705Sjp161948 return (g11n_locale2langtag(locale)); 2970Sstevel@tonic-gate } 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate char * 3000Sstevel@tonic-gate g11n_locales2langs(char **locale_set) 3010Sstevel@tonic-gate { 3022705Sjp161948 char **p, **r, **q; 3032705Sjp161948 char *langtag; 3042705Sjp161948 int locales, skip; 3050Sstevel@tonic-gate 3062705Sjp161948 for (locales = 0, p = locale_set; p && *p; p++) 3072705Sjp161948 locales++; 3080Sstevel@tonic-gate 3092705Sjp161948 r = (char **)xmalloc((locales + 1) * sizeof (char *)); 3102705Sjp161948 memset(r, 0, (locales + 1) * sizeof (char *)); 3110Sstevel@tonic-gate 3122705Sjp161948 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) { 3132705Sjp161948 skip = 0; 3142705Sjp161948 if ((langtag = g11n_locale2langtag(*p)) == NULL) 3152705Sjp161948 continue; 3162705Sjp161948 for (q = r; (q - r) < locales; q++) { 3172705Sjp161948 if (!*q) 3182705Sjp161948 break; 3192705Sjp161948 if (*q && strcmp(*q, langtag) == 0) 3202705Sjp161948 skip = 1; 3212705Sjp161948 } 3222705Sjp161948 if (!skip) 3232705Sjp161948 *(q++) = langtag; 3242705Sjp161948 *q = NULL; 3250Sstevel@tonic-gate } 3262705Sjp161948 3272705Sjp161948 return (xjoin(r, ',')); 3280Sstevel@tonic-gate } 3290Sstevel@tonic-gate 3302705Sjp161948 static int 3310Sstevel@tonic-gate sortcmp(const void *d1, const void *d2) 3320Sstevel@tonic-gate { 3332705Sjp161948 char *s1 = *(char **)d1; 3342705Sjp161948 char *s2 = *(char **)d2; 3350Sstevel@tonic-gate 3362705Sjp161948 return (strcmp(s1, s2)); 3370Sstevel@tonic-gate } 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate int 3400Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2) 3410Sstevel@tonic-gate { 3422705Sjp161948 int len1, len2; 3432705Sjp161948 char c1, c2; 3440Sstevel@tonic-gate 3452705Sjp161948 len1 = (strchr(langtag1, '-')) ? 3460Sstevel@tonic-gate (strchr(langtag1, '-') - langtag1) 3470Sstevel@tonic-gate : strlen(langtag1); 3480Sstevel@tonic-gate 3492705Sjp161948 len2 = (strchr(langtag2, '-')) ? 3500Sstevel@tonic-gate (strchr(langtag2, '-') - langtag2) 3510Sstevel@tonic-gate : strlen(langtag2); 3520Sstevel@tonic-gate 3532705Sjp161948 /* no match */ 3542705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3552705Sjp161948 return (0); 3560Sstevel@tonic-gate 3572705Sjp161948 c1 = *(langtag1 + len1); 3582705Sjp161948 c2 = *(langtag2 + len2); 3590Sstevel@tonic-gate 3602705Sjp161948 /* no country sub-tags - exact match */ 3612705Sjp161948 if (c1 == '\0' && c2 == '\0') 3622705Sjp161948 return (2); 3630Sstevel@tonic-gate 3642705Sjp161948 /* one langtag has a country sub-tag, the other doesn't */ 3652705Sjp161948 if (c1 == '\0' || c2 == '\0') 3662705Sjp161948 return (1); 3670Sstevel@tonic-gate 3682705Sjp161948 /* can't happen - both langtags have a country sub-tag */ 3692705Sjp161948 if (c1 != '-' || c2 != '-') 3702705Sjp161948 return (1); 3710Sstevel@tonic-gate 3722705Sjp161948 /* compare country subtags */ 3732705Sjp161948 langtag1 = langtag1 + len1 + 1; 3742705Sjp161948 langtag2 = langtag2 + len2 + 1; 3750Sstevel@tonic-gate 3762705Sjp161948 len1 = (strchr(langtag1, '-')) ? 3772705Sjp161948 (strchr(langtag1, '-') - langtag1) : strlen(langtag1); 3782705Sjp161948 3792705Sjp161948 len2 = (strchr(langtag2, '-')) ? 3802705Sjp161948 (strchr(langtag2, '-') - langtag2) : strlen(langtag2); 3810Sstevel@tonic-gate 3822705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3832705Sjp161948 return (1); 3840Sstevel@tonic-gate 3852705Sjp161948 /* country tags matched - exact match */ 3862705Sjp161948 return (2); 3870Sstevel@tonic-gate } 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate char * 3900Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2) 3910Sstevel@tonic-gate { 3922705Sjp161948 char **list1, **list2, **list3, **p, **q, **r; 3932705Sjp161948 char *set3, *lang_subtag; 3942705Sjp161948 uint_t n1, n2, n3; 3952705Sjp161948 uint_t do_append; 3962705Sjp161948 3972705Sjp161948 list1 = xsplit(set1, ','); 3982705Sjp161948 list2 = xsplit(set2, ','); 3990Sstevel@tonic-gate 4002705Sjp161948 for (n1 = 0, p = list1; p && *p; p++, n1++) 4012705Sjp161948 ; 4022705Sjp161948 for (n2 = 0, p = list2; p && *p; p++, n2++) 4032705Sjp161948 ; 4040Sstevel@tonic-gate 4052705Sjp161948 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1)); 4062705Sjp161948 *list3 = NULL; 4070Sstevel@tonic-gate 4082705Sjp161948 /* 4092705Sjp161948 * we must not sort the user langtags - sorting or not the server's 4102705Sjp161948 * should not affect the outcome 4112705Sjp161948 */ 4122705Sjp161948 qsort(list2, n2, sizeof (char *), sortcmp); 4130Sstevel@tonic-gate 4142705Sjp161948 for (n3 = 0, p = list1; p && *p; p++) { 4152705Sjp161948 do_append = 0; 4162705Sjp161948 for (q = list2; q && *q; q++) { 4172705Sjp161948 if (g11n_langtag_match(*p, *q) != 2) continue; 4182705Sjp161948 /* append element */ 4192705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4202705Sjp161948 do_append = 1; 4212705Sjp161948 if (!*r) 4222705Sjp161948 break; 4232705Sjp161948 if (strcmp(*p, *r) == 0) { 4242705Sjp161948 do_append = 0; 4252705Sjp161948 break; 4262705Sjp161948 } 4272705Sjp161948 } 4282705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4292705Sjp161948 list3[n3++] = xstrdup(*p); 4302705Sjp161948 list3[n3] = NULL; 4312705Sjp161948 } 4320Sstevel@tonic-gate } 4330Sstevel@tonic-gate } 4342705Sjp161948 4352705Sjp161948 for (p = list1; p && *p; p++) { 4362705Sjp161948 do_append = 0; 4372705Sjp161948 for (q = list2; q && *q; q++) { 4382705Sjp161948 if (g11n_langtag_match(*p, *q) != 1) 4392705Sjp161948 continue; 4400Sstevel@tonic-gate 4412705Sjp161948 /* append element */ 4422705Sjp161948 lang_subtag = xstrdup(*p); 4432705Sjp161948 if (strchr(lang_subtag, '-')) 4442705Sjp161948 *(strchr(lang_subtag, '-')) = '\0'; 4452705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4462705Sjp161948 do_append = 1; 4472705Sjp161948 if (!*r) 4482705Sjp161948 break; 4492705Sjp161948 if (strcmp(lang_subtag, *r) == 0) { 4502705Sjp161948 do_append = 0; 4512705Sjp161948 break; 4522705Sjp161948 } 4532705Sjp161948 } 4542705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4552705Sjp161948 list3[n3++] = lang_subtag; 4562705Sjp161948 list3[n3] = NULL; 4572705Sjp161948 } else 4582705Sjp161948 xfree(lang_subtag); 4590Sstevel@tonic-gate } 4600Sstevel@tonic-gate } 4610Sstevel@tonic-gate 4622705Sjp161948 set3 = xjoin(list3, ','); 4632705Sjp161948 xfree_split_list(list1); 4642705Sjp161948 xfree_split_list(list2); 4652705Sjp161948 xfree_split_list(list3); 4660Sstevel@tonic-gate 4672705Sjp161948 return (set3); 4680Sstevel@tonic-gate } 4690Sstevel@tonic-gate 4700Sstevel@tonic-gate char * 4710Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 4720Sstevel@tonic-gate { 4732705Sjp161948 char *list, *result; 4742705Sjp161948 char **xlist; 4750Sstevel@tonic-gate 4762705Sjp161948 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 4772705Sjp161948 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 4780Sstevel@tonic-gate 4792705Sjp161948 if (!list) 4802705Sjp161948 return (NULL); 4810Sstevel@tonic-gate 4822705Sjp161948 xlist = xsplit(list, ','); 4830Sstevel@tonic-gate 4842705Sjp161948 xfree(list); 4850Sstevel@tonic-gate 4862705Sjp161948 if (!xlist || !*xlist) 4872705Sjp161948 return (NULL); 4880Sstevel@tonic-gate 4892705Sjp161948 result = xstrdup(*xlist); 4902705Sjp161948 xfree_split_list(xlist); 4910Sstevel@tonic-gate 4922705Sjp161948 return (result); 4930Sstevel@tonic-gate } 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate /* 4960Sstevel@tonic-gate * Compare locales, preferring UTF-8 codesets to others, otherwise doing 4970Sstevel@tonic-gate * a stright strcmp() 4980Sstevel@tonic-gate */ 4992705Sjp161948 static int 5000Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2) 5010Sstevel@tonic-gate { 5022705Sjp161948 char *dot_ptr; 5032705Sjp161948 char *s1 = *(char **)d1; 5042705Sjp161948 char *s2 = *(char **)d2; 5052705Sjp161948 int s1_is_utf8 = 0; 5062705Sjp161948 int s2_is_utf8 = 0; 5070Sstevel@tonic-gate 5082705Sjp161948 /* check if s1 is a UTF-8 locale */ 5092705Sjp161948 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) && 5102705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5112705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5122705Sjp161948 s1_is_utf8++; 5132705Sjp161948 } 5142705Sjp161948 5152705Sjp161948 /* check if s2 is a UTF-8 locale */ 5162705Sjp161948 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) && 5172705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5182705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5192705Sjp161948 s2_is_utf8++; 5202705Sjp161948 } 5210Sstevel@tonic-gate 5222705Sjp161948 /* prefer UTF-8 locales */ 5232705Sjp161948 if (s1_is_utf8 && !s2_is_utf8) 5242705Sjp161948 return (-1); 5250Sstevel@tonic-gate 5262705Sjp161948 if (s2_is_utf8 && !s1_is_utf8) 5272705Sjp161948 return (1); 5280Sstevel@tonic-gate 5292705Sjp161948 /* prefer any locale over the default locales */ 5302705Sjp161948 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 || 5312705Sjp161948 strcmp(s1, "common") == 0) { 5322705Sjp161948 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 && 5332705Sjp161948 strcmp(s2, "common") != 0) 5342705Sjp161948 return (1); 5352705Sjp161948 } 5360Sstevel@tonic-gate 5372705Sjp161948 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 || 5382705Sjp161948 strcmp(s2, "common") == 0) { 5392705Sjp161948 if (strcmp(s1, "C") != 0 && 5402705Sjp161948 strcmp(s1, "POSIX") != 0 && 5412705Sjp161948 strcmp(s1, "common") != 0) 5422705Sjp161948 return (-1); 5432705Sjp161948 } 5440Sstevel@tonic-gate 5452705Sjp161948 return (strcmp(s1, s2)); 5460Sstevel@tonic-gate } 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate 5490Sstevel@tonic-gate char ** 5502705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set) 5510Sstevel@tonic-gate { 5522705Sjp161948 char **langtag_list, **result, **p, **q, **r; 5532705Sjp161948 char *s; 5542705Sjp161948 uint_t do_append, n_langtags, n_locales, n_results, max_results; 5552705Sjp161948 5562705Sjp161948 /* count lang tags and locales */ 5572705Sjp161948 for (n_locales = 0, p = locale_set; p && *p; p++) 5582705Sjp161948 n_locales++; 5590Sstevel@tonic-gate 5602705Sjp161948 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 5612705Sjp161948 /* count the number of langtags */ 5622705Sjp161948 for (; s = strchr(s, ','); s++, n_langtags++) 5632705Sjp161948 ; 5642705Sjp161948 5652705Sjp161948 qsort(locale_set, n_locales, sizeof (char *), locale_cmp); 5660Sstevel@tonic-gate 5672705Sjp161948 langtag_list = xsplit(langtag_set, ','); 5682705Sjp161948 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++) 5692705Sjp161948 ; 5700Sstevel@tonic-gate 5712705Sjp161948 max_results = MIN(n_locales, n_langtags) * 2; 5722705Sjp161948 result = (char **) xmalloc(sizeof (char *) * (max_results + 1)); 5732705Sjp161948 *result = NULL; 5742705Sjp161948 n_results = 0; 5750Sstevel@tonic-gate 5762705Sjp161948 /* more specific matches first */ 5772705Sjp161948 for (p = langtag_list; p && *p; p++) { 5782705Sjp161948 do_append = 0; 5792705Sjp161948 for (q = locale_set; q && *q; q++) { 5802705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 2) { 5812705Sjp161948 do_append = 1; 5822705Sjp161948 for (r = result; (r - result) <= 5832705Sjp161948 MIN(n_locales, n_langtags); r++) { 5842705Sjp161948 if (!*r) 5852705Sjp161948 break; 5862705Sjp161948 if (strcmp(*q, *r) == 0) { 5872705Sjp161948 do_append = 0; 5882705Sjp161948 break; 5892705Sjp161948 } 5902705Sjp161948 } 5912705Sjp161948 if (do_append && n_results < max_results) { 5922705Sjp161948 result[n_results++] = xstrdup(*q); 5932705Sjp161948 result[n_results] = NULL; 5942705Sjp161948 } 5952705Sjp161948 break; 5962705Sjp161948 } 5970Sstevel@tonic-gate } 5980Sstevel@tonic-gate } 5990Sstevel@tonic-gate 6002705Sjp161948 for (p = langtag_list; p && *p; p++) { 6012705Sjp161948 do_append = 0; 6022705Sjp161948 for (q = locale_set; q && *q; q++) { 6032705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 1) { 6042705Sjp161948 do_append = 1; 6052705Sjp161948 for (r = result; (r - result) <= 6062705Sjp161948 MIN(n_locales, n_langtags); r++) { 6072705Sjp161948 if (!*r) 6082705Sjp161948 break; 6092705Sjp161948 if (strcmp(*q, *r) == 0) { 6102705Sjp161948 do_append = 0; 6112705Sjp161948 break; 6122705Sjp161948 } 6132705Sjp161948 } 6142705Sjp161948 if (do_append && n_results < max_results) { 6152705Sjp161948 result[n_results++] = xstrdup(*q); 6162705Sjp161948 result[n_results] = NULL; 6172705Sjp161948 } 6182705Sjp161948 break; 6192705Sjp161948 } 6200Sstevel@tonic-gate } 6210Sstevel@tonic-gate } 6220Sstevel@tonic-gate 6232705Sjp161948 xfree_split_list(langtag_list); 6242705Sjp161948 6252705Sjp161948 return (result); 6260Sstevel@tonic-gate } 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate char * 6290Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 6300Sstevel@tonic-gate { 6312705Sjp161948 char **results, *result = NULL; 6320Sstevel@tonic-gate 6332705Sjp161948 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 6340Sstevel@tonic-gate srvr_locales ? srvr_locales : g11n_getlocales())) == NULL) 6352705Sjp161948 return (NULL); 6360Sstevel@tonic-gate 6372705Sjp161948 if (*results != NULL) 6382705Sjp161948 result = xstrdup(*results); 6390Sstevel@tonic-gate 6402705Sjp161948 xfree_split_list(results); 6410Sstevel@tonic-gate 6422705Sjp161948 return (result); 6430Sstevel@tonic-gate } 6440Sstevel@tonic-gate 6450Sstevel@tonic-gate 6460Sstevel@tonic-gate /* 6470Sstevel@tonic-gate * Functions for validating ASCII and UTF-8 strings 6480Sstevel@tonic-gate * 6490Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 6500Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 6510Sstevel@tonic-gate * friends. 6520Sstevel@tonic-gate * 6530Sstevel@tonic-gate * The return value is 0 if success, EILSEQ or EINVAL. 6540Sstevel@tonic-gate * 6550Sstevel@tonic-gate */ 6562705Sjp161948 uint_t 6572705Sjp161948 g11n_validate_ascii(const char *str, uint_t len, uchar_t **error_str) 6580Sstevel@tonic-gate { 6592705Sjp161948 uchar_t *p; 6600Sstevel@tonic-gate 6612705Sjp161948 for (p = (uchar_t *)str; p && *p && (!(*p & 0x80)); p++) 6622705Sjp161948 ; 6630Sstevel@tonic-gate 6642705Sjp161948 if (len && ((p - (uchar_t *)str) != len)) 6652705Sjp161948 return (EILSEQ); 6662705Sjp161948 6672705Sjp161948 return (0); 6680Sstevel@tonic-gate } 6690Sstevel@tonic-gate 6702705Sjp161948 uint_t 6712705Sjp161948 g11n_validate_utf8(const uchar_t *str, uint_t len, uchar_t **error_str) 6720Sstevel@tonic-gate { 6732705Sjp161948 uchar_t *p; 6742705Sjp161948 uint_t c, l; 6752705Sjp161948 6762705Sjp161948 if (len == 0) 6772705Sjp161948 len = strlen((const char *)str); 6780Sstevel@tonic-gate 6792705Sjp161948 for (p = (uchar_t *)str; p && (p - str < len) && *p; ) { 6802705Sjp161948 /* 8-bit chars begin a UTF-8 sequence */ 6812705Sjp161948 if (*p & 0x80) { 6822705Sjp161948 /* get sequence length and sanity check first byte */ 6832705Sjp161948 if (*p < 0xc0) 6842705Sjp161948 return (EILSEQ); 6852705Sjp161948 else if (*p < 0xe0) 6862705Sjp161948 l = 2; 6872705Sjp161948 else if (*p < 0xf0) 6882705Sjp161948 l = 3; 6892705Sjp161948 else if (*p < 0xf8) 6902705Sjp161948 l = 4; 6912705Sjp161948 else if (*p < 0xfc) 6922705Sjp161948 l = 5; 6932705Sjp161948 else if (*p < 0xfe) 6942705Sjp161948 l = 6; 6952705Sjp161948 else 6962705Sjp161948 return (EILSEQ); 6972705Sjp161948 6982705Sjp161948 if ((p + l - str) >= len) 6992705Sjp161948 return (EILSEQ); 7002705Sjp161948 7012705Sjp161948 /* overlong detection - build codepoint */ 7022705Sjp161948 c = *p & 0x3f; 7032705Sjp161948 /* shift c bits from first byte */ 7042705Sjp161948 c = c << (6 * (l - 1)); 7052705Sjp161948 7062705Sjp161948 if (l > 1) { 7072705Sjp161948 if (*(p + 1) && ((*(p + 1) & 0xc0) == 0x80)) 7082705Sjp161948 c = c | ((*(p + 1) & 0x3f) << 7092705Sjp161948 (6 * (l - 2))); 7102705Sjp161948 else 7112705Sjp161948 return (EILSEQ); 7122705Sjp161948 7132705Sjp161948 if (c < 0x80) 7142705Sjp161948 return (EILSEQ); 7152705Sjp161948 } 7160Sstevel@tonic-gate 7172705Sjp161948 if (l > 2) { 7182705Sjp161948 if (*(p + 2) && ((*(p + 2) & 0xc0) == 0x80)) 7192705Sjp161948 c = c | ((*(p + 2) & 0x3f) << 7202705Sjp161948 (6 * (l - 3))); 7212705Sjp161948 else 7222705Sjp161948 return (EILSEQ); 7232705Sjp161948 7242705Sjp161948 if (c < 0x800) 7252705Sjp161948 return (EILSEQ); 7262705Sjp161948 } 7272705Sjp161948 7282705Sjp161948 if (l > 3) { 7292705Sjp161948 if (*(p + 3) && ((*(p + 3) & 0xc0) == 0x80)) 7302705Sjp161948 c = c | ((*(p + 3) & 0x3f) << 7312705Sjp161948 (6 * (l - 4))); 7322705Sjp161948 else 7332705Sjp161948 return (EILSEQ); 7342705Sjp161948 7352705Sjp161948 if (c < 0x10000) 7362705Sjp161948 return (EILSEQ); 7372705Sjp161948 } 7380Sstevel@tonic-gate 7392705Sjp161948 if (l > 4) { 7402705Sjp161948 if (*(p + 4) && ((*(p + 4) & 0xc0) == 0x80)) 7412705Sjp161948 c = c | ((*(p + 4) & 0x3f) << 7422705Sjp161948 (6 * (l - 5))); 7432705Sjp161948 else 7442705Sjp161948 return (EILSEQ); 7452705Sjp161948 7462705Sjp161948 if (c < 0x200000) 7472705Sjp161948 return (EILSEQ); 7482705Sjp161948 } 7490Sstevel@tonic-gate 7502705Sjp161948 if (l > 5) { 7512705Sjp161948 if (*(p + 5) && ((*(p + 5) & 0xc0) == 0x80)) 7522705Sjp161948 c = c | (*(p + 5) & 0x3f); 7532705Sjp161948 else 7542705Sjp161948 return (EILSEQ); 7552705Sjp161948 7562705Sjp161948 if (c < 0x4000000) 7572705Sjp161948 return (EILSEQ); 7582705Sjp161948 } 7590Sstevel@tonic-gate 7602705Sjp161948 /* 7612705Sjp161948 * check for UTF-16 surrogates ifs other illegal 7622705Sjp161948 * UTF-8 * points 7632705Sjp161948 */ 7642705Sjp161948 if (((c <= 0xdfff) && (c >= 0xd800)) || 7652705Sjp161948 (c == 0xfffe) || (c == 0xffff)) 7662705Sjp161948 return (EILSEQ); 7672705Sjp161948 p += l; 7682705Sjp161948 } 7692705Sjp161948 /* 7-bit chars are fine */ 7700Sstevel@tonic-gate else 7712705Sjp161948 p++; 7720Sstevel@tonic-gate } 7732705Sjp161948 return (0); 7740Sstevel@tonic-gate } 7750Sstevel@tonic-gate 7760Sstevel@tonic-gate /* 7770Sstevel@tonic-gate * Functions for converting to ASCII or UTF-8 from the local codeset 7780Sstevel@tonic-gate * Functions for converting from ASCII or UTF-8 to the local codeset 7790Sstevel@tonic-gate * 7800Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 7810Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 7820Sstevel@tonic-gate * friends. 7830Sstevel@tonic-gate * 7840Sstevel@tonic-gate * The err parameter is an optional pointer to an integer where 0 7850Sstevel@tonic-gate * (success) or EILSEQ or EINVAL will be stored (failure). 7860Sstevel@tonic-gate * 7870Sstevel@tonic-gate * These functions return NULL if the conversion fails. 7880Sstevel@tonic-gate * 7890Sstevel@tonic-gate */ 7902705Sjp161948 uchar_t * 7912705Sjp161948 g11n_convert_from_ascii(const char *str, int *err_ptr, uchar_t **error_str) 7920Sstevel@tonic-gate { 7932705Sjp161948 static uint_t initialized = 0; 7942705Sjp161948 static uint_t do_convert = 0; 7952705Sjp161948 iconv_t cd; 7962705Sjp161948 int err; 7970Sstevel@tonic-gate 7982705Sjp161948 if (!initialized) { 7992705Sjp161948 /* 8002705Sjp161948 * iconv_open() fails if the to/from codesets are the 8012705Sjp161948 * same, and there are aliases of codesets to boot... 8022705Sjp161948 */ 8032705Sjp161948 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 8042705Sjp161948 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 8052705Sjp161948 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 8062705Sjp161948 initialized = 1; 8072705Sjp161948 do_convert = 0; 8082705Sjp161948 } else { 8092705Sjp161948 cd = iconv_open(nl_langinfo(CODESET), "646"); 8102705Sjp161948 if (cd == (iconv_t)-1) { 8112705Sjp161948 if (err_ptr) 8122705Sjp161948 *err_ptr = errno; 8132705Sjp161948 if (error_str) 8142705Sjp161948 *error_str = (uchar_t *)"Cannot " 8152705Sjp161948 "convert ASCII strings to the local" 8162705Sjp161948 " codeset"; 8172705Sjp161948 } 8182705Sjp161948 initialized = 1; 8192705Sjp161948 do_convert = 1; 8202705Sjp161948 } 8210Sstevel@tonic-gate } 8222705Sjp161948 8232705Sjp161948 if (!do_convert) { 8242705Sjp161948 if ((err = g11n_validate_ascii(str, 0, error_str))) { 8252705Sjp161948 if (err_ptr) 8262705Sjp161948 *err_ptr = err; 8272705Sjp161948 return (NULL); 8282705Sjp161948 } else 8292705Sjp161948 return ((uchar_t *)xstrdup(str)); 8300Sstevel@tonic-gate } 8310Sstevel@tonic-gate 8322705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 8330Sstevel@tonic-gate } 8340Sstevel@tonic-gate 8352705Sjp161948 uchar_t * 8362705Sjp161948 g11n_convert_from_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) 8370Sstevel@tonic-gate { 8382705Sjp161948 static uint_t initialized = 0; 8392705Sjp161948 static uint_t do_convert = 0; 8402705Sjp161948 iconv_t cd; 8412705Sjp161948 int err; 8420Sstevel@tonic-gate 8432705Sjp161948 if (!initialized) { 8442705Sjp161948 /* 8452705Sjp161948 * iconv_open() fails if the to/from codesets are the 8462705Sjp161948 * same, and there are aliases of codesets to boot... 8472705Sjp161948 */ 8482705Sjp161948 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 8492705Sjp161948 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 8502705Sjp161948 initialized = 1; 8512705Sjp161948 do_convert = 0; 8522705Sjp161948 } else { 8532705Sjp161948 cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); 8542705Sjp161948 if (cd == (iconv_t)-1) { 8552705Sjp161948 if (err_ptr) 8562705Sjp161948 *err_ptr = errno; 8572705Sjp161948 if (error_str) 8582705Sjp161948 *error_str = (uchar_t *)"Cannot " 8592705Sjp161948 "convert UTF-8 strings to the " 8602705Sjp161948 "local codeset"; 8612705Sjp161948 } 8622705Sjp161948 initialized = 1; 8632705Sjp161948 do_convert = 1; 8642705Sjp161948 } 8650Sstevel@tonic-gate } 8662705Sjp161948 8672705Sjp161948 if (!do_convert) { 8682705Sjp161948 if ((err = g11n_validate_utf8(str, 0, error_str))) { 8692705Sjp161948 if (err_ptr) 8702705Sjp161948 *err_ptr = err; 8712705Sjp161948 return (NULL); 8722705Sjp161948 } else 8732705Sjp161948 return ((uchar_t *)xstrdup((char *)str)); 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate 8762705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate 8790Sstevel@tonic-gate char * 8802705Sjp161948 g11n_convert_to_ascii(const uchar_t *str, int *err_ptr, uchar_t **error_str) 8810Sstevel@tonic-gate { 8822705Sjp161948 static uint_t initialized = 0; 8832705Sjp161948 static uint_t do_convert = 0; 8842705Sjp161948 iconv_t cd; 8850Sstevel@tonic-gate 8862705Sjp161948 if (!initialized) { 8872705Sjp161948 /* 8882705Sjp161948 * iconv_open() fails if the to/from codesets are the 8892705Sjp161948 * same, and there are aliases of codesets to boot... 8902705Sjp161948 */ 8912705Sjp161948 if (strcmp("646", nl_langinfo(CODESET)) == 0 || 8922705Sjp161948 strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 8932705Sjp161948 strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 8942705Sjp161948 initialized = 1; 8952705Sjp161948 do_convert = 0; 8962705Sjp161948 } else { 8972705Sjp161948 cd = iconv_open("646", nl_langinfo(CODESET)); 8982705Sjp161948 if (cd == (iconv_t)-1) { 8992705Sjp161948 if (err_ptr) 9002705Sjp161948 *err_ptr = errno; 9012705Sjp161948 if (error_str) 9022705Sjp161948 *error_str = (uchar_t *)"Cannot " 9032705Sjp161948 "convert UTF-8 strings to the " 9042705Sjp161948 "local codeset"; 9052705Sjp161948 } 9062705Sjp161948 initialized = 1; 9072705Sjp161948 do_convert = 1; 9082705Sjp161948 } 9090Sstevel@tonic-gate } 9100Sstevel@tonic-gate 9112705Sjp161948 if (!do_convert) 9122705Sjp161948 return (xstrdup((char *)str)); 9132705Sjp161948 9142705Sjp161948 return ((char *)do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 9150Sstevel@tonic-gate } 9160Sstevel@tonic-gate 9172705Sjp161948 uchar_t * 9182705Sjp161948 g11n_convert_to_utf8(const uchar_t *str, int *err_ptr, uchar_t **error_str) 9190Sstevel@tonic-gate { 9202705Sjp161948 static uint_t initialized = 0; 9212705Sjp161948 static uint_t do_convert = 0; 9222705Sjp161948 iconv_t cd; 9230Sstevel@tonic-gate 9242705Sjp161948 if (!initialized) { 9252705Sjp161948 /* 9262705Sjp161948 * iconv_open() fails if the to/from codesets are the 9272705Sjp161948 * same, and there are aliases of codesets to boot... 9282705Sjp161948 */ 9292705Sjp161948 if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 9302705Sjp161948 strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 9312705Sjp161948 initialized = 1; 9322705Sjp161948 do_convert = 0; 9332705Sjp161948 } else { 9342705Sjp161948 cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 9352705Sjp161948 if (cd == (iconv_t)-1) { 9362705Sjp161948 if (err_ptr) 9372705Sjp161948 *err_ptr = errno; 9382705Sjp161948 if (error_str) 9392705Sjp161948 *error_str = (uchar_t *)"Cannot " 9402705Sjp161948 "convert UTF-8 strings to the " 9412705Sjp161948 "local codeset"; 9422705Sjp161948 } 9432705Sjp161948 initialized = 1; 9442705Sjp161948 do_convert = 1; 9452705Sjp161948 } 9460Sstevel@tonic-gate } 9470Sstevel@tonic-gate 9482705Sjp161948 if (!do_convert) 9492705Sjp161948 return ((uchar_t *)xstrdup((char *)str)); 9502705Sjp161948 9512705Sjp161948 return (do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str)); 9520Sstevel@tonic-gate } 9530Sstevel@tonic-gate 9540Sstevel@tonic-gate 9550Sstevel@tonic-gate /* 9560Sstevel@tonic-gate * Wrapper around iconv() 9570Sstevel@tonic-gate * 9580Sstevel@tonic-gate * The caller is responsible for freeing the result and for handling 9590Sstevel@tonic-gate * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 9600Sstevel@tonic-gate */ 9612705Sjp161948 static uchar_t * 9622705Sjp161948 do_iconv(iconv_t cd, uint_t *mul_ptr, const void *buf, uint_t len, 9632705Sjp161948 uint_t *outlen, int *err, uchar_t **err_str) 9642705Sjp161948 { 9652705Sjp161948 size_t inbytesleft, outbytesleft, converted_size; 9662705Sjp161948 char *outbuf; 9672705Sjp161948 uchar_t *converted; 9682705Sjp161948 const char *inbuf; 9692705Sjp161948 uint_t mul = 0; 9700Sstevel@tonic-gate 9712705Sjp161948 if (!buf || !(*(char *)buf)) 9722705Sjp161948 return (NULL); 9732705Sjp161948 9742705Sjp161948 if (len == 0) 9752705Sjp161948 len = strlen(buf); 9762705Sjp161948 9772705Sjp161948 /* reset conversion descriptor */ 9782705Sjp161948 /* XXX Do we need initial shift sequences for UTF-8??? */ 9792705Sjp161948 (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); 9802705Sjp161948 inbuf = (const char *) buf; 9812705Sjp161948 9822705Sjp161948 if (mul_ptr) 9832705Sjp161948 mul = *mul_ptr; 9842705Sjp161948 9852705Sjp161948 converted_size = (len << mul); 9862705Sjp161948 outbuf = (char *)xmalloc(converted_size + 1); /* for null */ 9872705Sjp161948 converted = (uchar_t *)outbuf; 9882705Sjp161948 outbytesleft = len; 9890Sstevel@tonic-gate 9902705Sjp161948 do { 9912705Sjp161948 if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == 9922705Sjp161948 (size_t)-1) { 9932705Sjp161948 if (errno == E2BIG) { 9942705Sjp161948 /* UTF-8 codepoints are at most 8 bytes long */ 9952705Sjp161948 if (mul > 2) { 9962705Sjp161948 if (err_str) 9972705Sjp161948 *err_str = (uchar_t *) 9982705Sjp161948 "Conversion to UTF-8 failed" 9992705Sjp161948 " due to preposterous space" 10002705Sjp161948 " requirements"; 10012705Sjp161948 if (err) 10022705Sjp161948 *err = EILSEQ; 10032705Sjp161948 return (NULL); 10042705Sjp161948 } 10050Sstevel@tonic-gate 10062705Sjp161948 /* 10072705Sjp161948 * re-alloc output and ensure that the outbuf 10082705Sjp161948 * and outbytesleft values are adjusted 10092705Sjp161948 */ 10102705Sjp161948 converted = xrealloc(converted, 10112705Sjp161948 converted_size << 1 + 1); 10122705Sjp161948 outbuf = (char *)converted + converted_size - 10132705Sjp161948 outbytesleft; 10142705Sjp161948 converted_size = (len << ++(mul)); 10152705Sjp161948 outbytesleft = converted_size - outbytesleft; 10162705Sjp161948 } else { 10172705Sjp161948 /* 10182705Sjp161948 * let the caller deal with iconv() errors, 10192705Sjp161948 * probably by calling fatal(); xfree() does 10202705Sjp161948 * not set errno 10212705Sjp161948 */ 10222705Sjp161948 if (err) 10232705Sjp161948 *err = errno; 10242705Sjp161948 xfree(converted); 10252705Sjp161948 return (NULL); 10262705Sjp161948 } 10272705Sjp161948 } 10282705Sjp161948 } while (inbytesleft); 10292705Sjp161948 10302705Sjp161948 *outbuf = '\0'; /* ensure null-termination */ 10312705Sjp161948 if (outlen) 10322705Sjp161948 *outlen = converted_size - outbytesleft; 10332705Sjp161948 if (mul_ptr) 10342705Sjp161948 *mul_ptr = mul; 10352705Sjp161948 10362705Sjp161948 return (converted); 10370Sstevel@tonic-gate } 1038