10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52628Sjp161948 * Common Development and Distribution License (the "License"). 62628Sjp161948 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate * 21*9600SNobutomo.Nakano@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 220Sstevel@tonic-gate * Use is subject to license terms. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #include <errno.h> 260Sstevel@tonic-gate #include <locale.h> 270Sstevel@tonic-gate #include <langinfo.h> 280Sstevel@tonic-gate #include <iconv.h> 290Sstevel@tonic-gate #include <ctype.h> 30*9600SNobutomo.Nakano@Sun.COM #include <wctype.h> 310Sstevel@tonic-gate #include <strings.h> 320Sstevel@tonic-gate #include <string.h> 330Sstevel@tonic-gate #include <stdio.h> 340Sstevel@tonic-gate #include <stdlib.h> 350Sstevel@tonic-gate #include "includes.h" 360Sstevel@tonic-gate #include "xmalloc.h" 370Sstevel@tonic-gate #include "xlist.h" 38*9600SNobutomo.Nakano@Sun.COM #include "compat.h" 39*9600SNobutomo.Nakano@Sun.COM #include "log.h" 400Sstevel@tonic-gate 410Sstevel@tonic-gate #ifdef MIN 420Sstevel@tonic-gate #undef MIN 430Sstevel@tonic-gate #endif /* MIN */ 440Sstevel@tonic-gate 452705Sjp161948 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 460Sstevel@tonic-gate 472705Sjp161948 #define LOCALE_PATH "/usr/bin/locale" 480Sstevel@tonic-gate 492705Sjp161948 /* two-char country code, '-' and two-char region code */ 502705Sjp161948 #define LANGTAG_MAX 5 510Sstevel@tonic-gate 520Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2); 530Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale); 540Sstevel@tonic-gate 55*9600SNobutomo.Nakano@Sun.COM static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str); 56*9600SNobutomo.Nakano@Sun.COM 57*9600SNobutomo.Nakano@Sun.COM /* 58*9600SNobutomo.Nakano@Sun.COM * native_codeset records the codeset of the default system locale. 59*9600SNobutomo.Nakano@Sun.COM * It is used to convert the contents of file (eg /etc/issue) which is 60*9600SNobutomo.Nakano@Sun.COM * supposed to be in the codeset of default system locale. 61*9600SNobutomo.Nakano@Sun.COM */ 62*9600SNobutomo.Nakano@Sun.COM static char *native_codeset; 630Sstevel@tonic-gate 645562Sjp161948 /* 655562Sjp161948 * Convert locale string name into a language tag. The caller is responsible for 665562Sjp161948 * freeing the memory allocated for the result. 675562Sjp161948 */ 682705Sjp161948 static char * 690Sstevel@tonic-gate g11n_locale2langtag(char *locale) 700Sstevel@tonic-gate { 712705Sjp161948 char *langtag; 720Sstevel@tonic-gate 732705Sjp161948 /* base cases */ 742705Sjp161948 if (!locale || !*locale) 752705Sjp161948 return (NULL); 760Sstevel@tonic-gate 772705Sjp161948 if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0) 785562Sjp161948 return (xstrdup("i-default")); 790Sstevel@tonic-gate 802705Sjp161948 /* punt for language codes which are not exactly 2 letters */ 812705Sjp161948 if (strlen(locale) < 2 || 822705Sjp161948 !isalpha(locale[0]) || 832705Sjp161948 !isalpha(locale[1]) || 842705Sjp161948 (locale[2] != '\0' && 852705Sjp161948 locale[2] != '_' && 862705Sjp161948 locale[2] != '.' && 872705Sjp161948 locale[2] != '@')) 882705Sjp161948 return (NULL); 890Sstevel@tonic-gate 900Sstevel@tonic-gate 912705Sjp161948 /* we have a primary language sub-tag */ 922705Sjp161948 langtag = (char *)xmalloc(LANGTAG_MAX + 1); 930Sstevel@tonic-gate 942705Sjp161948 strncpy(langtag, locale, 2); 952705Sjp161948 langtag[2] = '\0'; 960Sstevel@tonic-gate 972705Sjp161948 /* do we have country sub-tag? For example: cs_CZ */ 982705Sjp161948 if (locale[2] == '_') { 992705Sjp161948 if (strlen(locale) < 5 || 1002705Sjp161948 !isalpha(locale[3]) || 1012705Sjp161948 !isalpha(locale[4]) || 1022705Sjp161948 (locale[5] != '\0' && (locale[5] != '.' && 1032705Sjp161948 locale[5] != '@'))) { 1042705Sjp161948 return (langtag); 1052705Sjp161948 } 1062705Sjp161948 1072705Sjp161948 /* example: create cs-CZ from cs_CZ */ 1082705Sjp161948 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2, 1092705Sjp161948 locale + 3) == 5) 1102705Sjp161948 return (langtag); 1110Sstevel@tonic-gate } 1120Sstevel@tonic-gate 1132705Sjp161948 /* in all other cases we just use the primary language sub-tag */ 1142705Sjp161948 return (langtag); 1150Sstevel@tonic-gate } 1160Sstevel@tonic-gate 1172705Sjp161948 uint_t 1180Sstevel@tonic-gate g11n_langtag_is_default(char *langtag) 1190Sstevel@tonic-gate { 1202705Sjp161948 return (strcmp(langtag, "i-default") == 0); 1210Sstevel@tonic-gate } 1220Sstevel@tonic-gate 1230Sstevel@tonic-gate /* 1240Sstevel@tonic-gate * This lang tag / locale matching function works only for two-character 1250Sstevel@tonic-gate * language primary sub-tags and two-character country sub-tags. 1260Sstevel@tonic-gate */ 1272705Sjp161948 uint_t 1280Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale) 1290Sstevel@tonic-gate { 1302705Sjp161948 /* match "i-default" to the process' current locale if possible */ 1312705Sjp161948 if (g11n_langtag_is_default(langtag)) { 1322705Sjp161948 if (strcasecmp(locale, "POSIX") == 0 || 1332705Sjp161948 strcasecmp(locale, "C") == 0) 1342705Sjp161948 return (1); 1352705Sjp161948 else 1362705Sjp161948 return (0); 1372705Sjp161948 } 1380Sstevel@tonic-gate 1392705Sjp161948 /* 1402705Sjp161948 * locale must be at least 2 chars long and the lang part must be 1412705Sjp161948 * exactly two characters 1422705Sjp161948 */ 1432705Sjp161948 if (strlen(locale) < 2 || 1442705Sjp161948 (!isalpha(locale[0]) || !isalpha(locale[1]) || 1452705Sjp161948 (locale[2] != '\0' && locale[2] != '_' && 1462705Sjp161948 locale[2] != '.' && locale[2] != '@'))) 1472705Sjp161948 return (0); 1480Sstevel@tonic-gate 1492705Sjp161948 /* same thing with the langtag */ 1502705Sjp161948 if (strlen(langtag) < 2 || 1512705Sjp161948 (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 1522705Sjp161948 (langtag[2] != '\0' && langtag[2] != '-'))) 1532705Sjp161948 return (0); 1540Sstevel@tonic-gate 1552705Sjp161948 /* primary language sub-tag and the locale's language part must match */ 1562705Sjp161948 if (strncasecmp(langtag, locale, 2) != 0) 1572705Sjp161948 return (0); 1580Sstevel@tonic-gate 1592705Sjp161948 /* 1602705Sjp161948 * primary language sub-tag and the locale's language match, now 1612705Sjp161948 * fuzzy check country part 1622705Sjp161948 */ 1630Sstevel@tonic-gate 1642705Sjp161948 /* neither langtag nor locale have more than one component */ 1652705Sjp161948 if (langtag[2] == '\0' && 1662705Sjp161948 (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 1672705Sjp161948 return (2); 1680Sstevel@tonic-gate 1692705Sjp161948 /* langtag has only one sub-tag... */ 1702705Sjp161948 if (langtag[2] == '\0') 1712705Sjp161948 return (1); 1720Sstevel@tonic-gate 1732705Sjp161948 /* locale has no country code... */ 1742705Sjp161948 if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 1752705Sjp161948 return (1); 1762705Sjp161948 1772705Sjp161948 /* langtag has more than one subtag and the locale has a country code */ 1780Sstevel@tonic-gate 1792705Sjp161948 /* ignore second subtag if not two chars */ 1802705Sjp161948 if (strlen(langtag) < 5) 1812705Sjp161948 return (1); 1820Sstevel@tonic-gate 1832705Sjp161948 if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 1842705Sjp161948 (langtag[5] != '\0' && langtag[5] != '-')) 1852705Sjp161948 return (1); 1860Sstevel@tonic-gate 1872705Sjp161948 /* ignore rest of locale if there is no two-character country part */ 1882705Sjp161948 if (strlen(locale) < 5) 1892705Sjp161948 return (1); 1900Sstevel@tonic-gate 1912705Sjp161948 if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 1922705Sjp161948 (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 1932705Sjp161948 return (1); 1940Sstevel@tonic-gate 1952705Sjp161948 /* if the country part matches, return 2 */ 1962705Sjp161948 if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 1972705Sjp161948 return (2); 1980Sstevel@tonic-gate 1992705Sjp161948 return (1); 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate char * 2030Sstevel@tonic-gate g11n_getlocale() 2040Sstevel@tonic-gate { 2052705Sjp161948 /* we have one text domain - always set it */ 2062705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2070Sstevel@tonic-gate 2082705Sjp161948 /* if the locale is not set, set it from the env vars */ 2092705Sjp161948 if (!setlocale(LC_MESSAGES, NULL)) 2102705Sjp161948 (void) setlocale(LC_MESSAGES, ""); 2110Sstevel@tonic-gate 2122705Sjp161948 return (setlocale(LC_MESSAGES, NULL)); 2130Sstevel@tonic-gate } 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate void 2160Sstevel@tonic-gate g11n_setlocale(int category, const char *locale) 2170Sstevel@tonic-gate { 2182705Sjp161948 char *curr; 2190Sstevel@tonic-gate 220*9600SNobutomo.Nakano@Sun.COM if (native_codeset == NULL) { 221*9600SNobutomo.Nakano@Sun.COM /* set default locale, and record current codeset */ 222*9600SNobutomo.Nakano@Sun.COM (void) setlocale(LC_ALL, ""); 223*9600SNobutomo.Nakano@Sun.COM curr = nl_langinfo(CODESET); 224*9600SNobutomo.Nakano@Sun.COM native_codeset = xstrdup(curr); 225*9600SNobutomo.Nakano@Sun.COM } 226*9600SNobutomo.Nakano@Sun.COM 2272705Sjp161948 /* we have one text domain - always set it */ 2282705Sjp161948 (void) textdomain(TEXT_DOMAIN); 2290Sstevel@tonic-gate 2302705Sjp161948 if (!locale) 2312705Sjp161948 return; 2320Sstevel@tonic-gate 2332705Sjp161948 if (*locale && ((curr = setlocale(category, NULL))) && 2342705Sjp161948 strcmp(curr, locale) == 0) 2352705Sjp161948 return; 2362628Sjp161948 2372705Sjp161948 /* if <category> is bogus, setlocale() will do nothing */ 2382705Sjp161948 (void) setlocale(category, locale); 2390Sstevel@tonic-gate } 2400Sstevel@tonic-gate 2410Sstevel@tonic-gate char ** 2420Sstevel@tonic-gate g11n_getlocales() 2430Sstevel@tonic-gate { 2442705Sjp161948 FILE *locale_out; 2452705Sjp161948 uint_t n_elems, list_size, long_line = 0; 2462705Sjp161948 char **list; 2472705Sjp161948 char locale[64]; /* 64 bytes is plenty for locale names */ 2482705Sjp161948 2492705Sjp161948 if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) 2502705Sjp161948 return (NULL); 2510Sstevel@tonic-gate 2522705Sjp161948 /* 2532705Sjp161948 * start with enough room for 65 locales - that's a lot fewer than 2542705Sjp161948 * all the locales available for installation, but a lot more than 2552705Sjp161948 * what most users will need and install 2562705Sjp161948 */ 2572705Sjp161948 n_elems = 0; 2582705Sjp161948 list_size = 192; 2592705Sjp161948 list = (char **) xmalloc(sizeof (char *) * (list_size + 1)); 2602705Sjp161948 memset(list, 0, sizeof (char *) * (list_size + 1)); 2610Sstevel@tonic-gate 2622705Sjp161948 while (fgets(locale, sizeof (locale), locale_out)) { 2632705Sjp161948 /* skip long locale names (if any) */ 2642705Sjp161948 if (!strchr(locale, '\n')) { 2652705Sjp161948 long_line = 1; 2662705Sjp161948 continue; 2672705Sjp161948 } else if (long_line) { 2682705Sjp161948 long_line = 0; 2692705Sjp161948 continue; 2702705Sjp161948 } 2710Sstevel@tonic-gate 2722705Sjp161948 if (strncmp(locale, "iso_8859", 8) == 0) 2732705Sjp161948 /* ignore locale names like "iso_8859-1" */ 2742705Sjp161948 continue; 2750Sstevel@tonic-gate 2762705Sjp161948 if (n_elems == list_size) { 2772705Sjp161948 list_size *= 2; 2782705Sjp161948 list = (char **)xrealloc((void *) list, 2792705Sjp161948 (list_size + 1) * sizeof (char *)); 2802705Sjp161948 memset(&list[n_elems + 1], 0, 2812705Sjp161948 sizeof (char *) * (list_size - n_elems + 1)); 2822705Sjp161948 } 2832705Sjp161948 2842705Sjp161948 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 2852705Sjp161948 list[n_elems++] = xstrdup(locale); 2860Sstevel@tonic-gate } 2870Sstevel@tonic-gate 2886288Sjp161948 (void) pclose(locale_out); 2896288Sjp161948 2905562Sjp161948 if (n_elems == 0) { 2915562Sjp161948 xfree(list); 2923109Sjp161948 return (NULL); 2935562Sjp161948 } 2943109Sjp161948 2952705Sjp161948 list[n_elems] = NULL; 2960Sstevel@tonic-gate 2972705Sjp161948 qsort(list, n_elems - 1, sizeof (char *), locale_cmp); 2982705Sjp161948 return (list); 2990Sstevel@tonic-gate } 3000Sstevel@tonic-gate 3010Sstevel@tonic-gate char * 3020Sstevel@tonic-gate g11n_getlangs() 3030Sstevel@tonic-gate { 3042705Sjp161948 char *locale; 3050Sstevel@tonic-gate 3062705Sjp161948 if (getenv("SSH_LANGS")) 3072705Sjp161948 return (xstrdup(getenv("SSH_LANGS"))); 3080Sstevel@tonic-gate 3092705Sjp161948 locale = g11n_getlocale(); 3100Sstevel@tonic-gate 3112705Sjp161948 if (!locale || !*locale) 3122705Sjp161948 return (xstrdup("i-default")); 3130Sstevel@tonic-gate 3142705Sjp161948 return (g11n_locale2langtag(locale)); 3150Sstevel@tonic-gate } 3160Sstevel@tonic-gate 3170Sstevel@tonic-gate char * 3180Sstevel@tonic-gate g11n_locales2langs(char **locale_set) 3190Sstevel@tonic-gate { 3202705Sjp161948 char **p, **r, **q; 3215562Sjp161948 char *langtag, *langs; 3222705Sjp161948 int locales, skip; 3230Sstevel@tonic-gate 3242705Sjp161948 for (locales = 0, p = locale_set; p && *p; p++) 3252705Sjp161948 locales++; 3260Sstevel@tonic-gate 3272705Sjp161948 r = (char **)xmalloc((locales + 1) * sizeof (char *)); 3282705Sjp161948 memset(r, 0, (locales + 1) * sizeof (char *)); 3290Sstevel@tonic-gate 3302705Sjp161948 for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) { 3312705Sjp161948 skip = 0; 3322705Sjp161948 if ((langtag = g11n_locale2langtag(*p)) == NULL) 3332705Sjp161948 continue; 3342705Sjp161948 for (q = r; (q - r) < locales; q++) { 3352705Sjp161948 if (!*q) 3362705Sjp161948 break; 3372705Sjp161948 if (*q && strcmp(*q, langtag) == 0) 3382705Sjp161948 skip = 1; 3392705Sjp161948 } 3402705Sjp161948 if (!skip) 3412705Sjp161948 *(q++) = langtag; 3425562Sjp161948 else 3435562Sjp161948 xfree(langtag); 3442705Sjp161948 *q = NULL; 3450Sstevel@tonic-gate } 3462705Sjp161948 3475562Sjp161948 langs = xjoin(r, ','); 3485562Sjp161948 g11n_freelist(r); 3495562Sjp161948 3505562Sjp161948 return (langs); 3510Sstevel@tonic-gate } 3520Sstevel@tonic-gate 3532705Sjp161948 static int 3540Sstevel@tonic-gate sortcmp(const void *d1, const void *d2) 3550Sstevel@tonic-gate { 3562705Sjp161948 char *s1 = *(char **)d1; 3572705Sjp161948 char *s2 = *(char **)d2; 3580Sstevel@tonic-gate 3592705Sjp161948 return (strcmp(s1, s2)); 3600Sstevel@tonic-gate } 3610Sstevel@tonic-gate 3620Sstevel@tonic-gate int 3630Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2) 3640Sstevel@tonic-gate { 3652705Sjp161948 int len1, len2; 3662705Sjp161948 char c1, c2; 3670Sstevel@tonic-gate 3682705Sjp161948 len1 = (strchr(langtag1, '-')) ? 3695562Sjp161948 (strchr(langtag1, '-') - langtag1) 3705562Sjp161948 : strlen(langtag1); 3710Sstevel@tonic-gate 3722705Sjp161948 len2 = (strchr(langtag2, '-')) ? 3735562Sjp161948 (strchr(langtag2, '-') - langtag2) 3745562Sjp161948 : strlen(langtag2); 3750Sstevel@tonic-gate 3762705Sjp161948 /* no match */ 3772705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 3782705Sjp161948 return (0); 3790Sstevel@tonic-gate 3802705Sjp161948 c1 = *(langtag1 + len1); 3812705Sjp161948 c2 = *(langtag2 + len2); 3820Sstevel@tonic-gate 3832705Sjp161948 /* no country sub-tags - exact match */ 3842705Sjp161948 if (c1 == '\0' && c2 == '\0') 3852705Sjp161948 return (2); 3860Sstevel@tonic-gate 3872705Sjp161948 /* one langtag has a country sub-tag, the other doesn't */ 3882705Sjp161948 if (c1 == '\0' || c2 == '\0') 3892705Sjp161948 return (1); 3900Sstevel@tonic-gate 3912705Sjp161948 /* can't happen - both langtags have a country sub-tag */ 3922705Sjp161948 if (c1 != '-' || c2 != '-') 3932705Sjp161948 return (1); 3940Sstevel@tonic-gate 3952705Sjp161948 /* compare country subtags */ 3962705Sjp161948 langtag1 = langtag1 + len1 + 1; 3972705Sjp161948 langtag2 = langtag2 + len2 + 1; 3980Sstevel@tonic-gate 3992705Sjp161948 len1 = (strchr(langtag1, '-')) ? 4002705Sjp161948 (strchr(langtag1, '-') - langtag1) : strlen(langtag1); 4012705Sjp161948 4022705Sjp161948 len2 = (strchr(langtag2, '-')) ? 4032705Sjp161948 (strchr(langtag2, '-') - langtag2) : strlen(langtag2); 4040Sstevel@tonic-gate 4052705Sjp161948 if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0) 4062705Sjp161948 return (1); 4070Sstevel@tonic-gate 4082705Sjp161948 /* country tags matched - exact match */ 4092705Sjp161948 return (2); 4100Sstevel@tonic-gate } 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate char * 4130Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2) 4140Sstevel@tonic-gate { 4152705Sjp161948 char **list1, **list2, **list3, **p, **q, **r; 4162705Sjp161948 char *set3, *lang_subtag; 4172705Sjp161948 uint_t n1, n2, n3; 4182705Sjp161948 uint_t do_append; 4192705Sjp161948 4202705Sjp161948 list1 = xsplit(set1, ','); 4212705Sjp161948 list2 = xsplit(set2, ','); 4220Sstevel@tonic-gate 4232705Sjp161948 for (n1 = 0, p = list1; p && *p; p++, n1++) 4242705Sjp161948 ; 4252705Sjp161948 for (n2 = 0, p = list2; p && *p; p++, n2++) 4262705Sjp161948 ; 4270Sstevel@tonic-gate 4282705Sjp161948 list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1)); 4292705Sjp161948 *list3 = NULL; 4300Sstevel@tonic-gate 4312705Sjp161948 /* 4322705Sjp161948 * we must not sort the user langtags - sorting or not the server's 4332705Sjp161948 * should not affect the outcome 4342705Sjp161948 */ 4352705Sjp161948 qsort(list2, n2, sizeof (char *), sortcmp); 4360Sstevel@tonic-gate 4372705Sjp161948 for (n3 = 0, p = list1; p && *p; p++) { 4382705Sjp161948 do_append = 0; 4392705Sjp161948 for (q = list2; q && *q; q++) { 4402705Sjp161948 if (g11n_langtag_match(*p, *q) != 2) continue; 4412705Sjp161948 /* append element */ 4422705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4432705Sjp161948 do_append = 1; 4442705Sjp161948 if (!*r) 4452705Sjp161948 break; 4462705Sjp161948 if (strcmp(*p, *r) == 0) { 4472705Sjp161948 do_append = 0; 4482705Sjp161948 break; 4492705Sjp161948 } 4502705Sjp161948 } 4512705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4522705Sjp161948 list3[n3++] = xstrdup(*p); 4532705Sjp161948 list3[n3] = NULL; 4542705Sjp161948 } 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate } 4572705Sjp161948 4582705Sjp161948 for (p = list1; p && *p; p++) { 4592705Sjp161948 do_append = 0; 4602705Sjp161948 for (q = list2; q && *q; q++) { 4612705Sjp161948 if (g11n_langtag_match(*p, *q) != 1) 4622705Sjp161948 continue; 4630Sstevel@tonic-gate 4642705Sjp161948 /* append element */ 4652705Sjp161948 lang_subtag = xstrdup(*p); 4662705Sjp161948 if (strchr(lang_subtag, '-')) 4672705Sjp161948 *(strchr(lang_subtag, '-')) = '\0'; 4682705Sjp161948 for (r = list3; (r - list3) <= (n1 + n2); r++) { 4692705Sjp161948 do_append = 1; 4702705Sjp161948 if (!*r) 4712705Sjp161948 break; 4722705Sjp161948 if (strcmp(lang_subtag, *r) == 0) { 4732705Sjp161948 do_append = 0; 4742705Sjp161948 break; 4752705Sjp161948 } 4762705Sjp161948 } 4772705Sjp161948 if (do_append && n3 <= (n1 + n2)) { 4782705Sjp161948 list3[n3++] = lang_subtag; 4792705Sjp161948 list3[n3] = NULL; 4802705Sjp161948 } else 4812705Sjp161948 xfree(lang_subtag); 4820Sstevel@tonic-gate } 4830Sstevel@tonic-gate } 4840Sstevel@tonic-gate 4852705Sjp161948 set3 = xjoin(list3, ','); 4862705Sjp161948 xfree_split_list(list1); 4872705Sjp161948 xfree_split_list(list2); 4882705Sjp161948 xfree_split_list(list3); 4890Sstevel@tonic-gate 4902705Sjp161948 return (set3); 4910Sstevel@tonic-gate } 4920Sstevel@tonic-gate 4930Sstevel@tonic-gate char * 4940Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 4950Sstevel@tonic-gate { 4962705Sjp161948 char *list, *result; 4972705Sjp161948 char **xlist; 4980Sstevel@tonic-gate 4992705Sjp161948 /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 5002705Sjp161948 list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 5010Sstevel@tonic-gate 5022705Sjp161948 if (!list) 5032705Sjp161948 return (NULL); 5040Sstevel@tonic-gate 5052705Sjp161948 xlist = xsplit(list, ','); 5060Sstevel@tonic-gate 5072705Sjp161948 xfree(list); 5080Sstevel@tonic-gate 5092705Sjp161948 if (!xlist || !*xlist) 5102705Sjp161948 return (NULL); 5110Sstevel@tonic-gate 5122705Sjp161948 result = xstrdup(*xlist); 5132705Sjp161948 xfree_split_list(xlist); 5140Sstevel@tonic-gate 5152705Sjp161948 return (result); 5160Sstevel@tonic-gate } 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate /* 5190Sstevel@tonic-gate * Compare locales, preferring UTF-8 codesets to others, otherwise doing 5200Sstevel@tonic-gate * a stright strcmp() 5210Sstevel@tonic-gate */ 5222705Sjp161948 static int 5230Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2) 5240Sstevel@tonic-gate { 5252705Sjp161948 char *dot_ptr; 5262705Sjp161948 char *s1 = *(char **)d1; 5272705Sjp161948 char *s2 = *(char **)d2; 5282705Sjp161948 int s1_is_utf8 = 0; 5292705Sjp161948 int s2_is_utf8 = 0; 5300Sstevel@tonic-gate 5312705Sjp161948 /* check if s1 is a UTF-8 locale */ 5322705Sjp161948 if (((dot_ptr = strchr((char *)s1, '.')) != NULL) && 5332705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5342705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5352705Sjp161948 s1_is_utf8++; 5362705Sjp161948 } 5372705Sjp161948 5382705Sjp161948 /* check if s2 is a UTF-8 locale */ 5392705Sjp161948 if (((dot_ptr = strchr((char *)s2, '.')) != NULL) && 5402705Sjp161948 (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) && 5412705Sjp161948 (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) { 5422705Sjp161948 s2_is_utf8++; 5432705Sjp161948 } 5440Sstevel@tonic-gate 5452705Sjp161948 /* prefer UTF-8 locales */ 5462705Sjp161948 if (s1_is_utf8 && !s2_is_utf8) 5472705Sjp161948 return (-1); 5480Sstevel@tonic-gate 5492705Sjp161948 if (s2_is_utf8 && !s1_is_utf8) 5502705Sjp161948 return (1); 5510Sstevel@tonic-gate 5522705Sjp161948 /* prefer any locale over the default locales */ 5532705Sjp161948 if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 || 5542705Sjp161948 strcmp(s1, "common") == 0) { 5552705Sjp161948 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 && 5562705Sjp161948 strcmp(s2, "common") != 0) 5572705Sjp161948 return (1); 5582705Sjp161948 } 5590Sstevel@tonic-gate 5602705Sjp161948 if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 || 5612705Sjp161948 strcmp(s2, "common") == 0) { 5622705Sjp161948 if (strcmp(s1, "C") != 0 && 5632705Sjp161948 strcmp(s1, "POSIX") != 0 && 5642705Sjp161948 strcmp(s1, "common") != 0) 5652705Sjp161948 return (-1); 5662705Sjp161948 } 5670Sstevel@tonic-gate 5682705Sjp161948 return (strcmp(s1, s2)); 5690Sstevel@tonic-gate } 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate 5720Sstevel@tonic-gate char ** 5732705Sjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set) 5740Sstevel@tonic-gate { 5752705Sjp161948 char **langtag_list, **result, **p, **q, **r; 5762705Sjp161948 char *s; 5772705Sjp161948 uint_t do_append, n_langtags, n_locales, n_results, max_results; 5782705Sjp161948 5792705Sjp161948 /* count lang tags and locales */ 5802705Sjp161948 for (n_locales = 0, p = locale_set; p && *p; p++) 5812705Sjp161948 n_locales++; 5820Sstevel@tonic-gate 5832705Sjp161948 n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 5842705Sjp161948 /* count the number of langtags */ 5852705Sjp161948 for (; s = strchr(s, ','); s++, n_langtags++) 5862705Sjp161948 ; 5872705Sjp161948 5882705Sjp161948 qsort(locale_set, n_locales, sizeof (char *), locale_cmp); 5890Sstevel@tonic-gate 5902705Sjp161948 langtag_list = xsplit(langtag_set, ','); 5912705Sjp161948 for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++) 5922705Sjp161948 ; 5930Sstevel@tonic-gate 5942705Sjp161948 max_results = MIN(n_locales, n_langtags) * 2; 5952705Sjp161948 result = (char **) xmalloc(sizeof (char *) * (max_results + 1)); 5962705Sjp161948 *result = NULL; 5972705Sjp161948 n_results = 0; 5980Sstevel@tonic-gate 5992705Sjp161948 /* more specific matches first */ 6002705Sjp161948 for (p = langtag_list; p && *p; p++) { 6012705Sjp161948 do_append = 0; 6022705Sjp161948 for (q = locale_set; q && *q; q++) { 6032705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 2) { 6042705Sjp161948 do_append = 1; 6052705Sjp161948 for (r = result; (r - result) <= 6062705Sjp161948 MIN(n_locales, n_langtags); r++) { 6072705Sjp161948 if (!*r) 6082705Sjp161948 break; 6092705Sjp161948 if (strcmp(*q, *r) == 0) { 6102705Sjp161948 do_append = 0; 6112705Sjp161948 break; 6122705Sjp161948 } 6132705Sjp161948 } 6142705Sjp161948 if (do_append && n_results < max_results) { 6152705Sjp161948 result[n_results++] = xstrdup(*q); 6162705Sjp161948 result[n_results] = NULL; 6172705Sjp161948 } 6182705Sjp161948 break; 6192705Sjp161948 } 6200Sstevel@tonic-gate } 6210Sstevel@tonic-gate } 6220Sstevel@tonic-gate 6232705Sjp161948 for (p = langtag_list; p && *p; p++) { 6242705Sjp161948 do_append = 0; 6252705Sjp161948 for (q = locale_set; q && *q; q++) { 6262705Sjp161948 if (g11n_langtag_matches_locale(*p, *q) == 1) { 6272705Sjp161948 do_append = 1; 6282705Sjp161948 for (r = result; (r - result) <= 6292705Sjp161948 MIN(n_locales, n_langtags); r++) { 6302705Sjp161948 if (!*r) 6312705Sjp161948 break; 6322705Sjp161948 if (strcmp(*q, *r) == 0) { 6332705Sjp161948 do_append = 0; 6342705Sjp161948 break; 6352705Sjp161948 } 6362705Sjp161948 } 6372705Sjp161948 if (do_append && n_results < max_results) { 6382705Sjp161948 result[n_results++] = xstrdup(*q); 6392705Sjp161948 result[n_results] = NULL; 6402705Sjp161948 } 6412705Sjp161948 break; 6422705Sjp161948 } 6430Sstevel@tonic-gate } 6440Sstevel@tonic-gate } 6450Sstevel@tonic-gate 6462705Sjp161948 xfree_split_list(langtag_list); 6472705Sjp161948 6482705Sjp161948 return (result); 6490Sstevel@tonic-gate } 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate char * 6520Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 6530Sstevel@tonic-gate { 6545562Sjp161948 char **results, **locales, *result = NULL; 6555562Sjp161948 6565562Sjp161948 if (srvr_locales == NULL) 6575562Sjp161948 locales = g11n_getlocales(); 6585562Sjp161948 else 6595562Sjp161948 locales = srvr_locales; 6600Sstevel@tonic-gate 6612705Sjp161948 if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 6625562Sjp161948 locales)) == NULL) 6635562Sjp161948 goto err; 6640Sstevel@tonic-gate 6652705Sjp161948 if (*results != NULL) 6662705Sjp161948 result = xstrdup(*results); 6670Sstevel@tonic-gate 6682705Sjp161948 xfree_split_list(results); 6690Sstevel@tonic-gate 6705562Sjp161948 err: 6715562Sjp161948 if (locales != srvr_locales) 6725562Sjp161948 g11n_freelist(locales); 6732705Sjp161948 return (result); 6740Sstevel@tonic-gate } 6750Sstevel@tonic-gate 6760Sstevel@tonic-gate /* 677*9600SNobutomo.Nakano@Sun.COM * Functions for converting to UTF-8 from the local codeset and 678*9600SNobutomo.Nakano@Sun.COM * converting from UTF-8 to the local codeset. 6790Sstevel@tonic-gate * 680*9600SNobutomo.Nakano@Sun.COM * The error_str parameter is an pointer to a char variable where to 681*9600SNobutomo.Nakano@Sun.COM * store a string suitable for use with error() or fatal() or friends. 682*9600SNobutomo.Nakano@Sun.COM * It is also used for an error indicator when NULL is returned. 6830Sstevel@tonic-gate * 684*9600SNobutomo.Nakano@Sun.COM * If conversion isn't necessary, *error_str is set to NULL, and 685*9600SNobutomo.Nakano@Sun.COM * NULL is returned. 686*9600SNobutomo.Nakano@Sun.COM * If conversion error occured, *error_str points to an error message, 687*9600SNobutomo.Nakano@Sun.COM * and NULL is returned. 6880Sstevel@tonic-gate */ 689*9600SNobutomo.Nakano@Sun.COM char * 690*9600SNobutomo.Nakano@Sun.COM g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str) 6910Sstevel@tonic-gate { 692*9600SNobutomo.Nakano@Sun.COM static char *last_codeset; 693*9600SNobutomo.Nakano@Sun.COM static iconv_t cd = (iconv_t)-1; 694*9600SNobutomo.Nakano@Sun.COM char *codeset; 6952705Sjp161948 696*9600SNobutomo.Nakano@Sun.COM *error_str = NULL; 6970Sstevel@tonic-gate 698*9600SNobutomo.Nakano@Sun.COM codeset = nl_langinfo(CODESET); 6990Sstevel@tonic-gate 700*9600SNobutomo.Nakano@Sun.COM if (strcmp(codeset, "UTF-8") == 0) 701*9600SNobutomo.Nakano@Sun.COM return (NULL); 7020Sstevel@tonic-gate 703*9600SNobutomo.Nakano@Sun.COM if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { 704*9600SNobutomo.Nakano@Sun.COM if (last_codeset != NULL) { 705*9600SNobutomo.Nakano@Sun.COM xfree(last_codeset); 706*9600SNobutomo.Nakano@Sun.COM last_codeset = NULL; 7072705Sjp161948 } 708*9600SNobutomo.Nakano@Sun.COM if (cd != (iconv_t)-1) 709*9600SNobutomo.Nakano@Sun.COM (void) iconv_close(cd); 7100Sstevel@tonic-gate 711*9600SNobutomo.Nakano@Sun.COM if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) { 712*9600SNobutomo.Nakano@Sun.COM *error_str = gettext("Cannot convert UTF-8 " 713*9600SNobutomo.Nakano@Sun.COM "strings to the local codeset"); 714*9600SNobutomo.Nakano@Sun.COM return (NULL); 7152705Sjp161948 } 716*9600SNobutomo.Nakano@Sun.COM last_codeset = xstrdup(codeset); 7170Sstevel@tonic-gate } 718*9600SNobutomo.Nakano@Sun.COM return (do_iconv(cd, str, lenp, error_str)); 7190Sstevel@tonic-gate } 7200Sstevel@tonic-gate 7210Sstevel@tonic-gate char * 722*9600SNobutomo.Nakano@Sun.COM g11n_convert_to_utf8(const char *str, uint_t *lenp, 723*9600SNobutomo.Nakano@Sun.COM int native, char **error_str) 7240Sstevel@tonic-gate { 725*9600SNobutomo.Nakano@Sun.COM static char *last_codeset; 726*9600SNobutomo.Nakano@Sun.COM static iconv_t cd = (iconv_t)-1; 727*9600SNobutomo.Nakano@Sun.COM char *codeset; 7280Sstevel@tonic-gate 729*9600SNobutomo.Nakano@Sun.COM *error_str = NULL; 7300Sstevel@tonic-gate 731*9600SNobutomo.Nakano@Sun.COM if (native) 732*9600SNobutomo.Nakano@Sun.COM codeset = native_codeset; 733*9600SNobutomo.Nakano@Sun.COM else 734*9600SNobutomo.Nakano@Sun.COM codeset = nl_langinfo(CODESET); 7352705Sjp161948 736*9600SNobutomo.Nakano@Sun.COM if (strcmp(codeset, "UTF-8") == 0) 737*9600SNobutomo.Nakano@Sun.COM return (NULL); 7380Sstevel@tonic-gate 739*9600SNobutomo.Nakano@Sun.COM if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) { 740*9600SNobutomo.Nakano@Sun.COM if (last_codeset != NULL) { 741*9600SNobutomo.Nakano@Sun.COM xfree(last_codeset); 742*9600SNobutomo.Nakano@Sun.COM last_codeset = NULL; 743*9600SNobutomo.Nakano@Sun.COM } 744*9600SNobutomo.Nakano@Sun.COM if (cd != (iconv_t)-1) 745*9600SNobutomo.Nakano@Sun.COM (void) iconv_close(cd); 7460Sstevel@tonic-gate 747*9600SNobutomo.Nakano@Sun.COM if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) { 748*9600SNobutomo.Nakano@Sun.COM *error_str = gettext("Cannot convert the " 749*9600SNobutomo.Nakano@Sun.COM "local codeset strings to UTF-8"); 750*9600SNobutomo.Nakano@Sun.COM return (NULL); 7512705Sjp161948 } 752*9600SNobutomo.Nakano@Sun.COM last_codeset = xstrdup(codeset); 7530Sstevel@tonic-gate } 754*9600SNobutomo.Nakano@Sun.COM return (do_iconv(cd, str, lenp, error_str)); 7550Sstevel@tonic-gate } 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate /* 7580Sstevel@tonic-gate * Wrapper around iconv() 7590Sstevel@tonic-gate * 760*9600SNobutomo.Nakano@Sun.COM * The caller is responsible for freeing the result. NULL is returned when 7610Sstevel@tonic-gate * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 762*9600SNobutomo.Nakano@Sun.COM * The caller must ensure that the input string isn't NULL pointer. 7630Sstevel@tonic-gate */ 764*9600SNobutomo.Nakano@Sun.COM static char * 765*9600SNobutomo.Nakano@Sun.COM do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str) 7662705Sjp161948 { 767*9600SNobutomo.Nakano@Sun.COM int ilen, olen; 768*9600SNobutomo.Nakano@Sun.COM size_t ileft, oleft; 769*9600SNobutomo.Nakano@Sun.COM char *ostr, *optr; 770*9600SNobutomo.Nakano@Sun.COM const char *istr; 7712705Sjp161948 772*9600SNobutomo.Nakano@Sun.COM ilen = *lenp; 773*9600SNobutomo.Nakano@Sun.COM olen = ilen + 1; 7742705Sjp161948 775*9600SNobutomo.Nakano@Sun.COM ostr = NULL; 776*9600SNobutomo.Nakano@Sun.COM for (;;) { 777*9600SNobutomo.Nakano@Sun.COM olen *= 2; 778*9600SNobutomo.Nakano@Sun.COM oleft = olen; 779*9600SNobutomo.Nakano@Sun.COM ostr = optr = xrealloc(ostr, olen); 780*9600SNobutomo.Nakano@Sun.COM istr = (const char *)str; 781*9600SNobutomo.Nakano@Sun.COM if ((ileft = ilen) == 0) 782*9600SNobutomo.Nakano@Sun.COM break; 7830Sstevel@tonic-gate 784*9600SNobutomo.Nakano@Sun.COM if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) { 785*9600SNobutomo.Nakano@Sun.COM /* success: generate reset sequence */ 786*9600SNobutomo.Nakano@Sun.COM if (iconv(cd, NULL, NULL, 787*9600SNobutomo.Nakano@Sun.COM &optr, &oleft) == (size_t)-1 && errno == E2BIG) { 788*9600SNobutomo.Nakano@Sun.COM continue; 789*9600SNobutomo.Nakano@Sun.COM } 790*9600SNobutomo.Nakano@Sun.COM break; 791*9600SNobutomo.Nakano@Sun.COM } 792*9600SNobutomo.Nakano@Sun.COM /* failed */ 793*9600SNobutomo.Nakano@Sun.COM if (errno != E2BIG) { 794*9600SNobutomo.Nakano@Sun.COM oleft = olen; 795*9600SNobutomo.Nakano@Sun.COM (void) iconv(cd, NULL, NULL, &ostr, &oleft); 796*9600SNobutomo.Nakano@Sun.COM xfree(ostr); 797*9600SNobutomo.Nakano@Sun.COM *err_str = gettext("Codeset conversion failed"); 798*9600SNobutomo.Nakano@Sun.COM return (NULL); 799*9600SNobutomo.Nakano@Sun.COM } 800*9600SNobutomo.Nakano@Sun.COM } 801*9600SNobutomo.Nakano@Sun.COM olen = optr - ostr; 802*9600SNobutomo.Nakano@Sun.COM optr = xmalloc(olen + 1); 803*9600SNobutomo.Nakano@Sun.COM (void) memcpy(optr, ostr, olen); 804*9600SNobutomo.Nakano@Sun.COM xfree(ostr); 805*9600SNobutomo.Nakano@Sun.COM 806*9600SNobutomo.Nakano@Sun.COM optr[olen] = '\0'; 807*9600SNobutomo.Nakano@Sun.COM *lenp = olen; 808*9600SNobutomo.Nakano@Sun.COM 809*9600SNobutomo.Nakano@Sun.COM return (optr); 810*9600SNobutomo.Nakano@Sun.COM } 8110Sstevel@tonic-gate 812*9600SNobutomo.Nakano@Sun.COM /* 813*9600SNobutomo.Nakano@Sun.COM * A filter for output string. Control and unprintable characters 814*9600SNobutomo.Nakano@Sun.COM * are converted into visible form (eg "\ooo"). 815*9600SNobutomo.Nakano@Sun.COM */ 816*9600SNobutomo.Nakano@Sun.COM char * 817*9600SNobutomo.Nakano@Sun.COM g11n_filter_string(char *s) 818*9600SNobutomo.Nakano@Sun.COM { 819*9600SNobutomo.Nakano@Sun.COM int mb_cur_max = MB_CUR_MAX; 820*9600SNobutomo.Nakano@Sun.COM int mblen, len; 821*9600SNobutomo.Nakano@Sun.COM char *os = s; 822*9600SNobutomo.Nakano@Sun.COM wchar_t wc; 823*9600SNobutomo.Nakano@Sun.COM char *obuf, *op; 824*9600SNobutomo.Nakano@Sun.COM 825*9600SNobutomo.Nakano@Sun.COM /* all character may be converted into the form of \ooo */ 826*9600SNobutomo.Nakano@Sun.COM obuf = op = xmalloc(strlen(s) * 4 + 1); 827*9600SNobutomo.Nakano@Sun.COM 828*9600SNobutomo.Nakano@Sun.COM while (*s != '\0') { 829*9600SNobutomo.Nakano@Sun.COM mblen = mbtowc(&wc, s, mb_cur_max); 830*9600SNobutomo.Nakano@Sun.COM if (mblen <= 0) { 831*9600SNobutomo.Nakano@Sun.COM mblen = 1; 832*9600SNobutomo.Nakano@Sun.COM wc = (unsigned char)*s; 833*9600SNobutomo.Nakano@Sun.COM } 834*9600SNobutomo.Nakano@Sun.COM if (!iswprint(wc) && 835*9600SNobutomo.Nakano@Sun.COM wc != L'\n' && wc != L'\r' && wc != L'\t') { 836*9600SNobutomo.Nakano@Sun.COM /* 837*9600SNobutomo.Nakano@Sun.COM * control chars which need to be replaced 838*9600SNobutomo.Nakano@Sun.COM * with safe character sequence. 839*9600SNobutomo.Nakano@Sun.COM */ 840*9600SNobutomo.Nakano@Sun.COM while (mblen != 0) { 841*9600SNobutomo.Nakano@Sun.COM op += sprintf(op, "\\%03o", 842*9600SNobutomo.Nakano@Sun.COM (unsigned char)*s++); 843*9600SNobutomo.Nakano@Sun.COM mblen--; 844*9600SNobutomo.Nakano@Sun.COM } 845*9600SNobutomo.Nakano@Sun.COM } else { 846*9600SNobutomo.Nakano@Sun.COM while (mblen != 0) { 847*9600SNobutomo.Nakano@Sun.COM *op++ = *s++; 848*9600SNobutomo.Nakano@Sun.COM mblen--; 8492705Sjp161948 } 8502705Sjp161948 } 851*9600SNobutomo.Nakano@Sun.COM } 852*9600SNobutomo.Nakano@Sun.COM *op = '\0'; 853*9600SNobutomo.Nakano@Sun.COM len = op - obuf + 1; 854*9600SNobutomo.Nakano@Sun.COM op = xrealloc(os, len); 855*9600SNobutomo.Nakano@Sun.COM (void) memcpy(op, obuf, len); 856*9600SNobutomo.Nakano@Sun.COM xfree(obuf); 857*9600SNobutomo.Nakano@Sun.COM return (op); 858*9600SNobutomo.Nakano@Sun.COM } 8592705Sjp161948 860*9600SNobutomo.Nakano@Sun.COM /* 861*9600SNobutomo.Nakano@Sun.COM * Once we negotiated with a langtag, server need to map it to a system 862*9600SNobutomo.Nakano@Sun.COM * locale. That is done based on the locale supported on the server side. 863*9600SNobutomo.Nakano@Sun.COM * We know (with the locale supported on Solaris) how the langtag is 864*9600SNobutomo.Nakano@Sun.COM * mapped to. However, from the client point of view, there is no way to 865*9600SNobutomo.Nakano@Sun.COM * know exactly what locale(encoding) will be used. 866*9600SNobutomo.Nakano@Sun.COM * 867*9600SNobutomo.Nakano@Sun.COM * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the 868*9600SNobutomo.Nakano@Sun.COM * UTF-8 characters always come over the wire, so it is no longer the problem 869*9600SNobutomo.Nakano@Sun.COM * as long as both side has the bug fix. However if the server side doesn't 870*9600SNobutomo.Nakano@Sun.COM * have the fix, client can't safely perform the code conversion since the 871*9600SNobutomo.Nakano@Sun.COM * incoming character encoding is unknown. 872*9600SNobutomo.Nakano@Sun.COM * 873*9600SNobutomo.Nakano@Sun.COM * To alleviate this situation, we take an empirical approach to find 874*9600SNobutomo.Nakano@Sun.COM * encoding from langtag. 875*9600SNobutomo.Nakano@Sun.COM * 876*9600SNobutomo.Nakano@Sun.COM * If langtag has a subtag, we can directly map the langtag to UTF-8 locale 877*9600SNobutomo.Nakano@Sun.COM * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions. 878*9600SNobutomo.Nakano@Sun.COM * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack 879*9600SNobutomo.Nakano@Sun.COM * of L10N support ..). Those are: 880*9600SNobutomo.Nakano@Sun.COM * 881*9600SNobutomo.Nakano@Sun.COM * no_NO, no_NY, sr_SP, sr_YU 882*9600SNobutomo.Nakano@Sun.COM * 883*9600SNobutomo.Nakano@Sun.COM * They all use ISO8859-X encoding. 884*9600SNobutomo.Nakano@Sun.COM * 885*9600SNobutomo.Nakano@Sun.COM * For those "xx" langtags, some of them can be mapped to "xx.UTF-8", 886*9600SNobutomo.Nakano@Sun.COM * but others cannot. So we need to use the "xx" as the locale name. 887*9600SNobutomo.Nakano@Sun.COM * Those locales are: 888*9600SNobutomo.Nakano@Sun.COM * 889*9600SNobutomo.Nakano@Sun.COM * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr 890*9600SNobutomo.Nakano@Sun.COM * 891*9600SNobutomo.Nakano@Sun.COM * Their encoding vary. They could be ISO8859-X or EUC or something else. 892*9600SNobutomo.Nakano@Sun.COM * So we don't perform code conversion for these langtags. 893*9600SNobutomo.Nakano@Sun.COM */ 894*9600SNobutomo.Nakano@Sun.COM static const char *non_utf8_langtag[] = { 895*9600SNobutomo.Nakano@Sun.COM "no-NO", "no-NY", "sr-SP", "sr-YU", 896*9600SNobutomo.Nakano@Sun.COM "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja", 897*9600SNobutomo.Nakano@Sun.COM "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL}; 8982705Sjp161948 899*9600SNobutomo.Nakano@Sun.COM void 900*9600SNobutomo.Nakano@Sun.COM g11n_test_langtag(const char *lang, int server) 901*9600SNobutomo.Nakano@Sun.COM { 902*9600SNobutomo.Nakano@Sun.COM const char **lp; 903*9600SNobutomo.Nakano@Sun.COM 904*9600SNobutomo.Nakano@Sun.COM if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) { 905*9600SNobutomo.Nakano@Sun.COM /* 906*9600SNobutomo.Nakano@Sun.COM * We negotiated with real locale name (not lang tag). 907*9600SNobutomo.Nakano@Sun.COM * We shouldn't expect UTF-8, thus shouldn't do code 908*9600SNobutomo.Nakano@Sun.COM * conversion. 909*9600SNobutomo.Nakano@Sun.COM */ 910*9600SNobutomo.Nakano@Sun.COM datafellows |= SSH_BUG_STRING_ENCODING; 911*9600SNobutomo.Nakano@Sun.COM return; 912*9600SNobutomo.Nakano@Sun.COM } 913*9600SNobutomo.Nakano@Sun.COM 914*9600SNobutomo.Nakano@Sun.COM if (datafellows & SSH_BUG_STRING_ENCODING) { 915*9600SNobutomo.Nakano@Sun.COM if (server) { 916*9600SNobutomo.Nakano@Sun.COM /* 917*9600SNobutomo.Nakano@Sun.COM * Whatever bug exists in the client side, server 918*9600SNobutomo.Nakano@Sun.COM * side has nothing to do, since server has no way 919*9600SNobutomo.Nakano@Sun.COM * to know what actual encoding is used on the client 920*9600SNobutomo.Nakano@Sun.COM * side. For example, even if we negotiated with 921*9600SNobutomo.Nakano@Sun.COM * en_US, client locale could be en_US.ISO8859-X or 922*9600SNobutomo.Nakano@Sun.COM * en_US.UTF-8. 923*9600SNobutomo.Nakano@Sun.COM */ 924*9600SNobutomo.Nakano@Sun.COM return; 925*9600SNobutomo.Nakano@Sun.COM } 926*9600SNobutomo.Nakano@Sun.COM /* 927*9600SNobutomo.Nakano@Sun.COM * We are on the client side. We'll check with known 928*9600SNobutomo.Nakano@Sun.COM * locales to see if non-UTF8 characters could come in. 929*9600SNobutomo.Nakano@Sun.COM */ 930*9600SNobutomo.Nakano@Sun.COM for (lp = non_utf8_langtag; *lp != NULL; lp++) { 931*9600SNobutomo.Nakano@Sun.COM if (strcmp(lang, *lp) == 0) 932*9600SNobutomo.Nakano@Sun.COM break; 933*9600SNobutomo.Nakano@Sun.COM } 934*9600SNobutomo.Nakano@Sun.COM if (*lp == NULL) { 935*9600SNobutomo.Nakano@Sun.COM debug2("Server is expected to use UTF-8 locale"); 936*9600SNobutomo.Nakano@Sun.COM datafellows &= ~SSH_BUG_STRING_ENCODING; 937*9600SNobutomo.Nakano@Sun.COM } else { 938*9600SNobutomo.Nakano@Sun.COM /* 939*9600SNobutomo.Nakano@Sun.COM * Server is expected to use non-UTF8 encoding. 940*9600SNobutomo.Nakano@Sun.COM */ 941*9600SNobutomo.Nakano@Sun.COM debug2("Enforcing no code conversion: %s", lang); 942*9600SNobutomo.Nakano@Sun.COM } 943*9600SNobutomo.Nakano@Sun.COM } 9440Sstevel@tonic-gate } 9455562Sjp161948 9465562Sjp161948 /* 9475562Sjp161948 * Free all strings in the list and then free the list itself. We know that the 9485562Sjp161948 * list ends with a NULL pointer. 9495562Sjp161948 */ 9505562Sjp161948 void 9515562Sjp161948 g11n_freelist(char **list) 9525562Sjp161948 { 9535562Sjp161948 int i = 0; 9545562Sjp161948 9555562Sjp161948 while (list[i] != NULL) { 9565562Sjp161948 xfree(list[i]); 9575562Sjp161948 i++; 9585562Sjp161948 } 9595562Sjp161948 9605562Sjp161948 xfree(list); 9615562Sjp161948 } 962