10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*2628Sjp161948 * Common Development and Distribution License (the "License"). 6*2628Sjp161948 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate * 21*2628Sjp161948 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 220Sstevel@tonic-gate * Use is subject to license terms. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 260Sstevel@tonic-gate 270Sstevel@tonic-gate #include <errno.h> 280Sstevel@tonic-gate #include <locale.h> 290Sstevel@tonic-gate #include <langinfo.h> 300Sstevel@tonic-gate #include <iconv.h> 310Sstevel@tonic-gate #include <ctype.h> 320Sstevel@tonic-gate #include <strings.h> 330Sstevel@tonic-gate #include <string.h> 340Sstevel@tonic-gate #include <stdio.h> 350Sstevel@tonic-gate #include <stdlib.h> 360Sstevel@tonic-gate #include "includes.h" 370Sstevel@tonic-gate #include "xmalloc.h" 380Sstevel@tonic-gate #include "xlist.h" 390Sstevel@tonic-gate 400Sstevel@tonic-gate #ifdef MIN 410Sstevel@tonic-gate #undef MIN 420Sstevel@tonic-gate #endif /* MIN */ 430Sstevel@tonic-gate 440Sstevel@tonic-gate #define MIN(x, y) ((x) < (y) ? (x) : (y)) 450Sstevel@tonic-gate 460Sstevel@tonic-gate #define LOCALE_PATH "/usr/bin/locale" 470Sstevel@tonic-gate 480Sstevel@tonic-gate #define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */ 490Sstevel@tonic-gate 500Sstevel@tonic-gate static u_char * do_iconv(iconv_t cd, u_int *mul_ptr, 510Sstevel@tonic-gate const void *buf, u_int len, 520Sstevel@tonic-gate u_int *outlen, int *err, 530Sstevel@tonic-gate u_char **err_str); 540Sstevel@tonic-gate 550Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2); 560Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale); 570Sstevel@tonic-gate 580Sstevel@tonic-gate u_int 590Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str); 600Sstevel@tonic-gate 610Sstevel@tonic-gate u_int 620Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str); 630Sstevel@tonic-gate 640Sstevel@tonic-gate static 650Sstevel@tonic-gate char * 660Sstevel@tonic-gate g11n_locale2langtag(char *locale) 670Sstevel@tonic-gate { 680Sstevel@tonic-gate char *langtag; 690Sstevel@tonic-gate 700Sstevel@tonic-gate /* base cases */ 710Sstevel@tonic-gate if (!locale || !*locale) return NULL; 720Sstevel@tonic-gate 730Sstevel@tonic-gate if (strcmp(locale, "POSIX") == 0 || 740Sstevel@tonic-gate strcmp(locale, "C") == 0) return "i-default"; 750Sstevel@tonic-gate 760Sstevel@tonic-gate /* Punt for language codes which are not exactly 2 letters */ 770Sstevel@tonic-gate if (strlen(locale) < 2 || 780Sstevel@tonic-gate !isalpha(locale[0]) || 790Sstevel@tonic-gate !isalpha(locale[1]) || 800Sstevel@tonic-gate (locale[2] != '\0' && 810Sstevel@tonic-gate locale[2] != '_' && 820Sstevel@tonic-gate locale[2] != '.' && 830Sstevel@tonic-gate locale[2] != '@')) 840Sstevel@tonic-gate return NULL; 850Sstevel@tonic-gate 860Sstevel@tonic-gate 870Sstevel@tonic-gate /* We have a primary language sub-tag */ 880Sstevel@tonic-gate langtag = (char *) xmalloc(LANGTAG_MAX + 1); 890Sstevel@tonic-gate 900Sstevel@tonic-gate strncpy(langtag, locale, 2); 910Sstevel@tonic-gate langtag[2] = '\0'; 920Sstevel@tonic-gate 930Sstevel@tonic-gate /* Do we have country sub-tag? */ 940Sstevel@tonic-gate if (locale[2] == '_') { 950Sstevel@tonic-gate if (strlen(locale) < 5 || 960Sstevel@tonic-gate !isalpha(locale[3]) || 970Sstevel@tonic-gate !isalpha(locale[4]) || 980Sstevel@tonic-gate (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) { 990Sstevel@tonic-gate return langtag; 1000Sstevel@tonic-gate } 1010Sstevel@tonic-gate 1020Sstevel@tonic-gate /* yes, we do */ 1030Sstevel@tonic-gate /* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag, 1040Sstevel@tonic-gate country_subtag, langtag) == 8) */ 1050Sstevel@tonic-gate if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 1060Sstevel@tonic-gate 2, locale+3) == 5) 1070Sstevel@tonic-gate return langtag; 1080Sstevel@tonic-gate } 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate /* In all other cases we just use the primary language sub-tag */ 1110Sstevel@tonic-gate return langtag; 1120Sstevel@tonic-gate } 1130Sstevel@tonic-gate 1140Sstevel@tonic-gate u_int 1150Sstevel@tonic-gate g11n_langtag_is_default(char *langtag) 1160Sstevel@tonic-gate { 1170Sstevel@tonic-gate return (strcmp(langtag, "i-default") == 0); 1180Sstevel@tonic-gate } 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate /* 1210Sstevel@tonic-gate * This lang tag / locale matching function works only for two-character 1220Sstevel@tonic-gate * language primary sub-tags and two-character country sub-tags. 1230Sstevel@tonic-gate */ 1240Sstevel@tonic-gate u_int 1250Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale) 1260Sstevel@tonic-gate { 1270Sstevel@tonic-gate /* Match "i-default" to the process' current locale if possible */ 1280Sstevel@tonic-gate if (g11n_langtag_is_default(langtag)) { 1290Sstevel@tonic-gate if (strcasecmp(locale, "POSIX") == 0 || 1300Sstevel@tonic-gate strcasecmp(locale, "C") == 0) 1310Sstevel@tonic-gate return 1; 1320Sstevel@tonic-gate else 1330Sstevel@tonic-gate return 0; 1340Sstevel@tonic-gate } 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /* locale must be at least 2 chars long and the lang part must be 1370Sstevel@tonic-gate * exactly two characters */ 1380Sstevel@tonic-gate if (strlen(locale) < 2 || 1390Sstevel@tonic-gate (!isalpha(locale[0]) || !isalpha(locale[1]) || 1400Sstevel@tonic-gate (locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@'))) 1410Sstevel@tonic-gate return 0; 1420Sstevel@tonic-gate 1430Sstevel@tonic-gate /* same thing with the langtag */ 1440Sstevel@tonic-gate if (strlen(langtag) < 2 || 1450Sstevel@tonic-gate (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 1460Sstevel@tonic-gate (langtag[2] != '\0' && langtag[2] != '-'))) 1470Sstevel@tonic-gate return 0; 1480Sstevel@tonic-gate 1490Sstevel@tonic-gate /* primary language sub-tag and the locale's language part must match */ 1500Sstevel@tonic-gate if (strncasecmp(langtag, locale, 2) != 0) 1510Sstevel@tonic-gate return 0; 1520Sstevel@tonic-gate 1530Sstevel@tonic-gate /* primary language sub-tag and the locale's language match, now 1540Sstevel@tonic-gate * fuzzy check country part */ 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate /* neither langtag nor locale have more than one component */ 1570Sstevel@tonic-gate if (langtag[2] == '\0' && 1580Sstevel@tonic-gate (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 1590Sstevel@tonic-gate return 2; 1600Sstevel@tonic-gate 1610Sstevel@tonic-gate /* langtag has only one sub-tag... */ 1620Sstevel@tonic-gate if (langtag[2] == '\0') 1630Sstevel@tonic-gate return 1; 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate /* locale has no country code... */ 1660Sstevel@tonic-gate if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 1670Sstevel@tonic-gate return 1; 1680Sstevel@tonic-gate 1690Sstevel@tonic-gate /* langtag has more than one subtag and the locale has a country code */ 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate /* ignore second subtag if not two chars */ 1720Sstevel@tonic-gate if (strlen(langtag) < 5) 1730Sstevel@tonic-gate return 1; 1740Sstevel@tonic-gate 1750Sstevel@tonic-gate if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 1760Sstevel@tonic-gate (langtag[5] != '\0' && langtag[5] != '-')) 1770Sstevel@tonic-gate return 1; 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate /* ignore rest of locale if there is no two-character country part */ 1800Sstevel@tonic-gate if (strlen(locale) < 5) 1810Sstevel@tonic-gate return 1; 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 1840Sstevel@tonic-gate (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 1850Sstevel@tonic-gate return 1; 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate /* if the country part matches, return 2 */ 1880Sstevel@tonic-gate if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 1890Sstevel@tonic-gate return 2; 1900Sstevel@tonic-gate 1910Sstevel@tonic-gate return 1; 1920Sstevel@tonic-gate } 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate char * 1950Sstevel@tonic-gate g11n_getlocale() 1960Sstevel@tonic-gate { 1970Sstevel@tonic-gate /* We have one text domain - always set it */ 1980Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* If the locale is not set, set it from the env vars */ 201*2628Sjp161948 if (!setlocale(LC_MESSAGES, NULL)) 202*2628Sjp161948 (void) setlocale(LC_MESSAGES, ""); 2030Sstevel@tonic-gate 204*2628Sjp161948 return setlocale(LC_MESSAGES, NULL); 2050Sstevel@tonic-gate } 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate void 2080Sstevel@tonic-gate g11n_setlocale(int category, const char *locale) 2090Sstevel@tonic-gate { 2100Sstevel@tonic-gate char *curr; 2110Sstevel@tonic-gate 2120Sstevel@tonic-gate /* We have one text domain - always set it */ 2130Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate if (!locale) 2160Sstevel@tonic-gate return; 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate if (*locale && ((curr = setlocale(category, NULL))) && 2190Sstevel@tonic-gate strcmp(curr, locale) == 0) 2200Sstevel@tonic-gate return; 2210Sstevel@tonic-gate 2220Sstevel@tonic-gate /* 223*2628Sjp161948 * If <category> is bogus, setlocale() will do nothing. 2240Sstevel@tonic-gate */ 225*2628Sjp161948 (void) setlocale(category, locale); 226*2628Sjp161948 2270Sstevel@tonic-gate return; 2280Sstevel@tonic-gate } 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate char ** 2310Sstevel@tonic-gate g11n_getlocales() 2320Sstevel@tonic-gate { 2330Sstevel@tonic-gate FILE *locale_out; 2340Sstevel@tonic-gate u_int n_elems, list_size, long_line = 0; 2350Sstevel@tonic-gate char **list; 2360Sstevel@tonic-gate char locale[64]; /* 64 bytes is plenty for locale names */ 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) { 2390Sstevel@tonic-gate return NULL; 2400Sstevel@tonic-gate } 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate /* 2430Sstevel@tonic-gate * Start with enough room for 65 locales - that's a lot fewer than 2440Sstevel@tonic-gate * all the locales available for installation, but a lot more than 2450Sstevel@tonic-gate * what most users will need and install 2460Sstevel@tonic-gate */ 2470Sstevel@tonic-gate n_elems=0; 2480Sstevel@tonic-gate list_size=192; 2490Sstevel@tonic-gate list = (char **) xmalloc(sizeof(char *) * (list_size + 1)); 2500Sstevel@tonic-gate memset(list, 0, sizeof(char *) * (list_size + 1)); 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate while (fgets(locale, sizeof(locale), locale_out)) { 2530Sstevel@tonic-gate /* skip long locale names (if any) */ 2540Sstevel@tonic-gate if (!strchr(locale, '\n')) { 2550Sstevel@tonic-gate long_line = 1; 2560Sstevel@tonic-gate continue; 2570Sstevel@tonic-gate } 2580Sstevel@tonic-gate else if (long_line) { 2590Sstevel@tonic-gate long_line = 0; 2600Sstevel@tonic-gate continue; 2610Sstevel@tonic-gate } 2620Sstevel@tonic-gate if (strncmp(locale, "iso_8859", 8) == 0) 2630Sstevel@tonic-gate continue; /* ignore locale names like "iso_8859-1" */ 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate if (n_elems == list_size) { 2660Sstevel@tonic-gate list_size *= 2; 2670Sstevel@tonic-gate list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *)); 2680Sstevel@tonic-gate memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1)); 2690Sstevel@tonic-gate } 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate list[n_elems++] = xstrdup(locale); 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate list[n_elems] = NULL; 2760Sstevel@tonic-gate (void) pclose(locale_out); 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate qsort(list, n_elems - 1, sizeof(char *), locale_cmp); 2790Sstevel@tonic-gate return list; 2800Sstevel@tonic-gate } 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate char * 2830Sstevel@tonic-gate g11n_getlangs() 2840Sstevel@tonic-gate { 2850Sstevel@tonic-gate char *locale; 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate if (getenv("SSH_LANGS")) 2880Sstevel@tonic-gate return xstrdup(getenv("SSH_LANGS")); 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate locale = g11n_getlocale(); 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate if (!locale || !*locale) 2930Sstevel@tonic-gate return xstrdup("i-default"); 2940Sstevel@tonic-gate 2950Sstevel@tonic-gate return g11n_locale2langtag(locale); 2960Sstevel@tonic-gate } 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate char * 2990Sstevel@tonic-gate g11n_locales2langs(char **locale_set) 3000Sstevel@tonic-gate { 3010Sstevel@tonic-gate char **p, **r, **q; 3020Sstevel@tonic-gate char *langtag; 3030Sstevel@tonic-gate int locales, skip; 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate for (locales = 0, p = locale_set ; p && *p ; p++) 3060Sstevel@tonic-gate locales++; 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate r = (char **) xmalloc((locales + 1) * sizeof(char *)); 3090Sstevel@tonic-gate memset(r, 0, (locales + 1) * sizeof(char *)); 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) { 3120Sstevel@tonic-gate skip = 0; 3130Sstevel@tonic-gate if ((langtag = g11n_locale2langtag(*p)) == NULL) 3140Sstevel@tonic-gate continue; 3150Sstevel@tonic-gate for (q = r ; (q - r) < locales ; q++) { 3160Sstevel@tonic-gate if (!*q) break; 3170Sstevel@tonic-gate if (*q && strcmp(*q, langtag) == 0) 3180Sstevel@tonic-gate skip = 1; 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate if (!skip) 3210Sstevel@tonic-gate *(q++) = langtag; 3220Sstevel@tonic-gate *q = NULL; 3230Sstevel@tonic-gate } 3240Sstevel@tonic-gate return xjoin(r, ','); 3250Sstevel@tonic-gate } 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate static 3280Sstevel@tonic-gate int 3290Sstevel@tonic-gate sortcmp(const void *d1, const void *d2) 3300Sstevel@tonic-gate { 3310Sstevel@tonic-gate char *s1 = *(char **)d1; 3320Sstevel@tonic-gate char *s2 = *(char **)d2; 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate return strcmp(s1, s2); 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate 3370Sstevel@tonic-gate int 3380Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2) 3390Sstevel@tonic-gate { 3400Sstevel@tonic-gate int len1, len2; 3410Sstevel@tonic-gate char c1, c2; 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate len1 = (strchr(langtag1, '-')) ? 3440Sstevel@tonic-gate (strchr(langtag1, '-') - langtag1) 3450Sstevel@tonic-gate : strlen(langtag1); 3460Sstevel@tonic-gate 3470Sstevel@tonic-gate len2 = (strchr(langtag2, '-')) ? 3480Sstevel@tonic-gate (strchr(langtag2, '-') - langtag2) 3490Sstevel@tonic-gate : strlen(langtag2); 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate /* no match */ 3520Sstevel@tonic-gate if (len1 != len2 || 3530Sstevel@tonic-gate strncmp(langtag1, langtag2, len1) != 0) 3540Sstevel@tonic-gate return 0; 3550Sstevel@tonic-gate 3560Sstevel@tonic-gate c1 = *(langtag1 + len1); 3570Sstevel@tonic-gate c2 = *(langtag2 + len2); 3580Sstevel@tonic-gate 3590Sstevel@tonic-gate /* no country sub-tags - exact match */ 3600Sstevel@tonic-gate if (c1 == '\0' && c2 == '\0') 3610Sstevel@tonic-gate return 2; 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate /* one langtag has a country sub-tag, the other doesn't */ 3640Sstevel@tonic-gate if (c1 == '\0' || c2 == '\0') 3650Sstevel@tonic-gate return 1; 3660Sstevel@tonic-gate 3670Sstevel@tonic-gate /* can't happen - both langtags have a country sub-tag */ 3680Sstevel@tonic-gate if (c1 != '-' || c2 != '-') 3690Sstevel@tonic-gate return 1; 3700Sstevel@tonic-gate 3710Sstevel@tonic-gate /* compare country subtags */ 3720Sstevel@tonic-gate langtag1 = langtag1 + len1 + 1; 3730Sstevel@tonic-gate langtag2 = langtag2 + len2 + 1; 3740Sstevel@tonic-gate 3750Sstevel@tonic-gate len1 = (strchr(langtag1, '-')) ? 3760Sstevel@tonic-gate (strchr(langtag1, '-') - langtag1) 3770Sstevel@tonic-gate : strlen(langtag1); 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate len2 = (strchr(langtag2, '-')) ? 3800Sstevel@tonic-gate (strchr(langtag2, '-') - langtag2) 3810Sstevel@tonic-gate : strlen(langtag2); 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate if (len1 != len2 || 3840Sstevel@tonic-gate strncmp(langtag1, langtag2, len1) != 0) 3850Sstevel@tonic-gate return 1; 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate /* country tags matched - exact match */ 3880Sstevel@tonic-gate return 2; 3890Sstevel@tonic-gate } 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate char * 3920Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2) 3930Sstevel@tonic-gate { 3940Sstevel@tonic-gate char **list1, **list2, **list3, **p, **q, **r; 3950Sstevel@tonic-gate char *set3, *lang_subtag; 3960Sstevel@tonic-gate u_int n1, n2, n3; 3970Sstevel@tonic-gate u_int do_append; 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate list1 = xsplit(set1, ','); 4000Sstevel@tonic-gate list2 = xsplit(set2, ','); 4010Sstevel@tonic-gate for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ; 4020Sstevel@tonic-gate for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ; 4030Sstevel@tonic-gate 4040Sstevel@tonic-gate list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1)); 4050Sstevel@tonic-gate *list3 = NULL; 4060Sstevel@tonic-gate 4070Sstevel@tonic-gate /* we must not sort the user langtags - sorting or not the server's 4080Sstevel@tonic-gate * should not affect the outcome 4090Sstevel@tonic-gate */ 4100Sstevel@tonic-gate qsort(list2, n2, sizeof(char *), sortcmp); 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate for (n3 = 0, p = list1 ; p && *p ; p++) { 4130Sstevel@tonic-gate do_append = 0; 4140Sstevel@tonic-gate for (q = list2 ; q && *q ; q++) { 4150Sstevel@tonic-gate if (g11n_langtag_match(*p, *q) != 2) continue; 4160Sstevel@tonic-gate /* append element */ 4170Sstevel@tonic-gate for (r = list3; (r - list3) <= (n1 + n2) ; r++) { 4180Sstevel@tonic-gate do_append = 1; 4190Sstevel@tonic-gate if (!*r) break; 4200Sstevel@tonic-gate if (strcmp(*p, *r) == 0) { 4210Sstevel@tonic-gate do_append = 0; 4220Sstevel@tonic-gate break; 4230Sstevel@tonic-gate } 4240Sstevel@tonic-gate } 4250Sstevel@tonic-gate if (do_append && n3 <= (n1 + n2)) { 4260Sstevel@tonic-gate list3[n3++] = xstrdup(*p); 4270Sstevel@tonic-gate list3[n3] = NULL; 4280Sstevel@tonic-gate } 4290Sstevel@tonic-gate } 4300Sstevel@tonic-gate } 4310Sstevel@tonic-gate 4320Sstevel@tonic-gate for (p = list1 ; p && *p ; p++) { 4330Sstevel@tonic-gate do_append = 0; 4340Sstevel@tonic-gate for (q = list2 ; q && *q ; q++) { 4350Sstevel@tonic-gate if (g11n_langtag_match(*p, *q) != 1) continue; 4360Sstevel@tonic-gate /* append element */ 4370Sstevel@tonic-gate lang_subtag = xstrdup(*p); 4380Sstevel@tonic-gate if (strchr(lang_subtag, '-')) 4390Sstevel@tonic-gate *(strchr(lang_subtag, '-')) = '\0'; 4400Sstevel@tonic-gate for (r = list3; (r - list3) <= (n1 + n2) ; r++) { 4410Sstevel@tonic-gate do_append = 1; 4420Sstevel@tonic-gate if (!*r) break; 4430Sstevel@tonic-gate if (strcmp(lang_subtag, *r) == 0) { 4440Sstevel@tonic-gate do_append = 0; 4450Sstevel@tonic-gate break; 4460Sstevel@tonic-gate } 4470Sstevel@tonic-gate } 4480Sstevel@tonic-gate if (do_append && n3 <= (n1 + n2)) { 4490Sstevel@tonic-gate list3[n3++] = lang_subtag; 4500Sstevel@tonic-gate list3[n3] = NULL; 4510Sstevel@tonic-gate } 4520Sstevel@tonic-gate else 4530Sstevel@tonic-gate xfree(lang_subtag); 4540Sstevel@tonic-gate } 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate set3 = xjoin(list3, ','); 4580Sstevel@tonic-gate xfree_split_list(list1); 4590Sstevel@tonic-gate xfree_split_list(list2); 4600Sstevel@tonic-gate xfree_split_list(list3); 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate return set3; 4630Sstevel@tonic-gate } 4640Sstevel@tonic-gate 4650Sstevel@tonic-gate char * 4660Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 4670Sstevel@tonic-gate { 4680Sstevel@tonic-gate char *list, *result; 4690Sstevel@tonic-gate char **xlist; 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 4720Sstevel@tonic-gate list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 4730Sstevel@tonic-gate 4740Sstevel@tonic-gate if (!list) 4750Sstevel@tonic-gate return NULL; 4760Sstevel@tonic-gate 4770Sstevel@tonic-gate xlist = xsplit(list, ','); 4780Sstevel@tonic-gate 4790Sstevel@tonic-gate xfree(list); 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate if (!xlist || !*xlist) 4820Sstevel@tonic-gate return NULL; 4830Sstevel@tonic-gate 4840Sstevel@tonic-gate result = xstrdup(*xlist); 4850Sstevel@tonic-gate 4860Sstevel@tonic-gate xfree_split_list(xlist); 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate return result; 4890Sstevel@tonic-gate } 4900Sstevel@tonic-gate 4910Sstevel@tonic-gate /* 4920Sstevel@tonic-gate * Compare locales, preferring UTF-8 codesets to others, otherwise doing 4930Sstevel@tonic-gate * a stright strcmp() 4940Sstevel@tonic-gate */ 4950Sstevel@tonic-gate static 4960Sstevel@tonic-gate int 4970Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2) 4980Sstevel@tonic-gate { 4990Sstevel@tonic-gate char *dot_ptr; 5000Sstevel@tonic-gate char *s1 = *(char **)d1; 5010Sstevel@tonic-gate char *s2 = *(char **)d2; 5020Sstevel@tonic-gate int s1_is_utf8 = 0; 5030Sstevel@tonic-gate int s2_is_utf8 = 0; 5040Sstevel@tonic-gate 5050Sstevel@tonic-gate /* check if s1 is a UTF-8 locale */ 5060Sstevel@tonic-gate if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') && 5070Sstevel@tonic-gate (strncmp(dot_ptr+1, "UTF-8", 5) == 0) && 5080Sstevel@tonic-gate (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) { 5090Sstevel@tonic-gate s1_is_utf8++; 5100Sstevel@tonic-gate } 5110Sstevel@tonic-gate /* check if s2 is a UTF-8 locale */ 5120Sstevel@tonic-gate if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') && 5130Sstevel@tonic-gate (strncmp(dot_ptr+1, "UTF-8", 5) == 0) && 5140Sstevel@tonic-gate (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) { 5150Sstevel@tonic-gate s2_is_utf8++; 5160Sstevel@tonic-gate } 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate /* prefer UTF-8 locales */ 5190Sstevel@tonic-gate if (s1_is_utf8 && !s2_is_utf8) 5200Sstevel@tonic-gate return -1; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate if (s2_is_utf8 && !s1_is_utf8) 5230Sstevel@tonic-gate return 1; 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate /* prefer any locale over the default locales */ 5260Sstevel@tonic-gate if (strcmp(s1, "C") == 0 || 5270Sstevel@tonic-gate strcmp(s1, "POSIX") == 0 || 5280Sstevel@tonic-gate strcmp(s1, "common") == 0) 5290Sstevel@tonic-gate if (strcmp(s2, "C") != 0 && 5300Sstevel@tonic-gate strcmp(s2, "POSIX") != 0 && 5310Sstevel@tonic-gate strcmp(s2, "common") != 0) 5320Sstevel@tonic-gate return 1; 5330Sstevel@tonic-gate 5340Sstevel@tonic-gate if (strcmp(s2, "C") == 0 || 5350Sstevel@tonic-gate strcmp(s2, "POSIX") == 0 || 5360Sstevel@tonic-gate strcmp(s2, "common") == 0) 5370Sstevel@tonic-gate if (strcmp(s1, "C") != 0 && 5380Sstevel@tonic-gate strcmp(s1, "POSIX") != 0 && 5390Sstevel@tonic-gate strcmp(s1, "common") != 0) 5400Sstevel@tonic-gate return -1; 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate return strcmp(s1, s2); 5430Sstevel@tonic-gate } 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate 5460Sstevel@tonic-gate char ** 5470Sstevel@tonic-gate g11n_langtag_set_locale_set_intersect(char *langtag_set, 5480Sstevel@tonic-gate char **locale_set) 5490Sstevel@tonic-gate { 5500Sstevel@tonic-gate char **langtag_list, **result, **p, **q, **r; 5510Sstevel@tonic-gate char *s; 5520Sstevel@tonic-gate u_int do_append, n_langtags, n_locales, n_results, max_results; 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate /* Count lang tags and locales */ 5550Sstevel@tonic-gate for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++; 5560Sstevel@tonic-gate n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 5570Sstevel@tonic-gate for ( ; s = strchr(s, ',') ; s++, n_langtags++) ; 5580Sstevel@tonic-gate /* 5590Sstevel@tonic-gate while ((s = strchr(s, ','))) { 5600Sstevel@tonic-gate n_langtags++; 5610Sstevel@tonic-gate s++; 5620Sstevel@tonic-gate } 5630Sstevel@tonic-gate */ 5640Sstevel@tonic-gate 5650Sstevel@tonic-gate qsort(locale_set, n_locales, sizeof(char *), locale_cmp); 5660Sstevel@tonic-gate 5670Sstevel@tonic-gate langtag_list = xsplit(langtag_set, ','); 5680Sstevel@tonic-gate for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++); 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate max_results = MIN(n_locales, n_langtags) * 2; 5710Sstevel@tonic-gate result = (char **) xmalloc(sizeof(char *) * (max_results + 1)); 5720Sstevel@tonic-gate *result = NULL; 5730Sstevel@tonic-gate n_results = 0; 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate /* More specific matches first */ 5760Sstevel@tonic-gate for (p = langtag_list ; p && *p ; p++) { 5770Sstevel@tonic-gate do_append = 0; 5780Sstevel@tonic-gate for (q = locale_set ; q && *q ; q++) { 5790Sstevel@tonic-gate if (g11n_langtag_matches_locale(*p, *q) == 2) { 5800Sstevel@tonic-gate do_append = 1; 5810Sstevel@tonic-gate for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) { 5820Sstevel@tonic-gate if (!*r) break; 5830Sstevel@tonic-gate if (strcmp(*q, *r) == 0) { 5840Sstevel@tonic-gate do_append = 0; 5850Sstevel@tonic-gate break; 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate } 5880Sstevel@tonic-gate if (do_append && n_results < max_results) { 5890Sstevel@tonic-gate result[n_results++] = xstrdup(*q); 5900Sstevel@tonic-gate result[n_results] = NULL; 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate break; 5930Sstevel@tonic-gate } 5940Sstevel@tonic-gate } 5950Sstevel@tonic-gate } 5960Sstevel@tonic-gate 5970Sstevel@tonic-gate for (p = langtag_list ; p && *p ; p++) { 5980Sstevel@tonic-gate do_append = 0; 5990Sstevel@tonic-gate for (q = locale_set ; q && *q ; q++) { 6000Sstevel@tonic-gate if (g11n_langtag_matches_locale(*p, *q) == 1) { 6010Sstevel@tonic-gate do_append = 1; 6020Sstevel@tonic-gate for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) { 6030Sstevel@tonic-gate if (!*r) break; 6040Sstevel@tonic-gate if (strcmp(*q, *r) == 0) { 6050Sstevel@tonic-gate do_append = 0; 6060Sstevel@tonic-gate break; 6070Sstevel@tonic-gate } 6080Sstevel@tonic-gate } 6090Sstevel@tonic-gate if (do_append && n_results < max_results) { 6100Sstevel@tonic-gate result[n_results++] = xstrdup(*q); 6110Sstevel@tonic-gate result[n_results] = NULL; 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate break; 6140Sstevel@tonic-gate } 6150Sstevel@tonic-gate } 6160Sstevel@tonic-gate } 6170Sstevel@tonic-gate xfree_split_list(langtag_list); 6180Sstevel@tonic-gate 6190Sstevel@tonic-gate return result; 6200Sstevel@tonic-gate } 6210Sstevel@tonic-gate 6220Sstevel@tonic-gate char * 6230Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 6240Sstevel@tonic-gate { 6250Sstevel@tonic-gate char **results, *result = NULL; 6260Sstevel@tonic-gate 6270Sstevel@tonic-gate if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 6280Sstevel@tonic-gate srvr_locales ? srvr_locales : g11n_getlocales())) == NULL) 6290Sstevel@tonic-gate return NULL; 6300Sstevel@tonic-gate 6310Sstevel@tonic-gate if (*results != NULL) 6320Sstevel@tonic-gate result = xstrdup(*results); 6330Sstevel@tonic-gate 6340Sstevel@tonic-gate xfree_split_list(results); 6350Sstevel@tonic-gate 6360Sstevel@tonic-gate return result; 6370Sstevel@tonic-gate } 6380Sstevel@tonic-gate 6390Sstevel@tonic-gate 6400Sstevel@tonic-gate /* 6410Sstevel@tonic-gate * Functions for validating ASCII and UTF-8 strings 6420Sstevel@tonic-gate * 6430Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 6440Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 6450Sstevel@tonic-gate * friends. 6460Sstevel@tonic-gate * 6470Sstevel@tonic-gate * The return value is 0 if success, EILSEQ or EINVAL. 6480Sstevel@tonic-gate * 6490Sstevel@tonic-gate */ 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate u_int 6520Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str) 6530Sstevel@tonic-gate { 6540Sstevel@tonic-gate u_char *p; 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ; 6570Sstevel@tonic-gate 6580Sstevel@tonic-gate if (len && ((p - (u_char *) str) != len)) { 6590Sstevel@tonic-gate return EILSEQ; 6600Sstevel@tonic-gate } 6610Sstevel@tonic-gate return 0; 6620Sstevel@tonic-gate } 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate u_int 6650Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str) 6660Sstevel@tonic-gate { 6670Sstevel@tonic-gate u_char *p; 6680Sstevel@tonic-gate u_int c, l; 6690Sstevel@tonic-gate 6700Sstevel@tonic-gate if (len == 0) len = strlen((const char *)str); 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate for (p = (u_char *) str ; p && (p - str < len) && *p ; ) { 6730Sstevel@tonic-gate /* 8-bit chars begin a UTF-8 sequence */ 6740Sstevel@tonic-gate if (*p & 0x80) { 6750Sstevel@tonic-gate /* Get sequence length and sanity check first byte */ 6760Sstevel@tonic-gate if (*p < 0xc0) 6770Sstevel@tonic-gate return EILSEQ; 6780Sstevel@tonic-gate else if (*p < 0xe0) 6790Sstevel@tonic-gate l=2; 6800Sstevel@tonic-gate else if (*p < 0xf0) 6810Sstevel@tonic-gate l=3; 6820Sstevel@tonic-gate else if (*p < 0xf8) 6830Sstevel@tonic-gate l=4; 6840Sstevel@tonic-gate else if (*p < 0xfc) 6850Sstevel@tonic-gate l=5; 6860Sstevel@tonic-gate else if (*p < 0xfe) 6870Sstevel@tonic-gate l=6; 6880Sstevel@tonic-gate else 6890Sstevel@tonic-gate return EILSEQ; 6900Sstevel@tonic-gate 6910Sstevel@tonic-gate if ((p + l - str) >= len) 6920Sstevel@tonic-gate return EILSEQ; 6930Sstevel@tonic-gate 6940Sstevel@tonic-gate /* overlong detection - build codepoint */ 6950Sstevel@tonic-gate c = *p & 0x3f; 6960Sstevel@tonic-gate c = c << (6 * (l-1)); /* shift c bits from first byte */ 6970Sstevel@tonic-gate 6980Sstevel@tonic-gate if (l > 1) { 6990Sstevel@tonic-gate if (*(p+1) && ((*(p+1) & 0xc0) == 0x80)) 7000Sstevel@tonic-gate c = c | ((*(p+1) & 0x3f) << (6 * (l-2))); 7010Sstevel@tonic-gate else 7020Sstevel@tonic-gate return EILSEQ; 7030Sstevel@tonic-gate if (c < 0x80) 7040Sstevel@tonic-gate return EILSEQ; 7050Sstevel@tonic-gate } 7060Sstevel@tonic-gate if (l > 2) { 7070Sstevel@tonic-gate if (*(p+2) && ((*(p+2) & 0xc0) == 0x80)) 7080Sstevel@tonic-gate c = c | ((*(p+2) & 0x3f) << (6 * (l-3))); 7090Sstevel@tonic-gate else 7100Sstevel@tonic-gate return EILSEQ; 7110Sstevel@tonic-gate if (c < 0x800) 7120Sstevel@tonic-gate return EILSEQ; 7130Sstevel@tonic-gate } 7140Sstevel@tonic-gate if (l > 3) { 7150Sstevel@tonic-gate if (*(p+3) && ((*(p+3) & 0xc0) == 0x80)) 7160Sstevel@tonic-gate c = c | ((*(p+3) & 0x3f) << (6 * (l-4))); 7170Sstevel@tonic-gate else 7180Sstevel@tonic-gate return EILSEQ; 7190Sstevel@tonic-gate if (c < 0x10000) 7200Sstevel@tonic-gate return EILSEQ; 7210Sstevel@tonic-gate } 7220Sstevel@tonic-gate if (l > 4) { 7230Sstevel@tonic-gate if (*(p+4) && ((*(p+4) & 0xc0) == 0x80)) 7240Sstevel@tonic-gate c = c | ((*(p+4) & 0x3f) << (6 * (l-5))); 7250Sstevel@tonic-gate else 7260Sstevel@tonic-gate return EILSEQ; 7270Sstevel@tonic-gate if (c < 0x200000) 7280Sstevel@tonic-gate return EILSEQ; 7290Sstevel@tonic-gate } 7300Sstevel@tonic-gate if (l > 5) { 7310Sstevel@tonic-gate if (*(p+5) && ((*(p+5) & 0xc0) == 0x80)) 7320Sstevel@tonic-gate c = c | (*(p+5) & 0x3f) ; 7330Sstevel@tonic-gate else 7340Sstevel@tonic-gate return EILSEQ; 7350Sstevel@tonic-gate if (c < 0x4000000) 7360Sstevel@tonic-gate return EILSEQ; 7370Sstevel@tonic-gate } 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */ 7400Sstevel@tonic-gate if (((c <= 0xdfff) && (c >= 0xd800)) || 7410Sstevel@tonic-gate (c == 0xfffe) || (c == 0xffff)) 7420Sstevel@tonic-gate return EILSEQ; 7430Sstevel@tonic-gate p += l; 7440Sstevel@tonic-gate } 7450Sstevel@tonic-gate /* 7-bit chars are fine */ 7460Sstevel@tonic-gate else 7470Sstevel@tonic-gate p++; 7480Sstevel@tonic-gate } 7490Sstevel@tonic-gate return 0; 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate 7520Sstevel@tonic-gate /* 7530Sstevel@tonic-gate * Functions for converting to ASCII or UTF-8 from the local codeset 7540Sstevel@tonic-gate * Functions for converting from ASCII or UTF-8 to the local codeset 7550Sstevel@tonic-gate * 7560Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 7570Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 7580Sstevel@tonic-gate * friends. 7590Sstevel@tonic-gate * 7600Sstevel@tonic-gate * The err parameter is an optional pointer to an integer where 0 7610Sstevel@tonic-gate * (success) or EILSEQ or EINVAL will be stored (failure). 7620Sstevel@tonic-gate * 7630Sstevel@tonic-gate * These functions return NULL if the conversion fails. 7640Sstevel@tonic-gate * 7650Sstevel@tonic-gate */ 7660Sstevel@tonic-gate 7670Sstevel@tonic-gate u_char * 7680Sstevel@tonic-gate g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str) 7690Sstevel@tonic-gate { 7700Sstevel@tonic-gate static u_int initialized = 0; 7710Sstevel@tonic-gate static u_int do_convert = 0; 7720Sstevel@tonic-gate iconv_t cd; 7730Sstevel@tonic-gate int err; 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate if (!initialized) { 7760Sstevel@tonic-gate /* 7770Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 7780Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 7790Sstevel@tonic-gate */ 7800Sstevel@tonic-gate if (strcmp("646", nl_langinfo(CODESET)) == 0 || 7810Sstevel@tonic-gate strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 7820Sstevel@tonic-gate strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 7830Sstevel@tonic-gate initialized = 1; 7840Sstevel@tonic-gate do_convert = 0; 7850Sstevel@tonic-gate } 7860Sstevel@tonic-gate else { 7870Sstevel@tonic-gate cd = iconv_open(nl_langinfo(CODESET), "646"); 7880Sstevel@tonic-gate if (cd == (iconv_t) -1) { 7890Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 7900Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 7910Sstevel@tonic-gate "Cannot convert ASCII strings to the local codeset"; 7920Sstevel@tonic-gate } 7930Sstevel@tonic-gate initialized = 1; 7940Sstevel@tonic-gate do_convert = 1; 7950Sstevel@tonic-gate } 7960Sstevel@tonic-gate } 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate if (!do_convert) { 7990Sstevel@tonic-gate if ((err = g11n_validate_ascii(str, 0, error_str))) { 8000Sstevel@tonic-gate if (err_ptr) *err_ptr = err; 8010Sstevel@tonic-gate return NULL; 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate else 8040Sstevel@tonic-gate return (u_char *) xstrdup(str); 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate u_char * 8100Sstevel@tonic-gate g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str) 8110Sstevel@tonic-gate { 8120Sstevel@tonic-gate static u_int initialized = 0; 8130Sstevel@tonic-gate static u_int do_convert = 0; 8140Sstevel@tonic-gate iconv_t cd; 8150Sstevel@tonic-gate int err; 8160Sstevel@tonic-gate 8170Sstevel@tonic-gate if (!initialized) { 8180Sstevel@tonic-gate /* 8190Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 8200Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 8210Sstevel@tonic-gate */ 8220Sstevel@tonic-gate if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 8230Sstevel@tonic-gate strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 8240Sstevel@tonic-gate initialized = 1; 8250Sstevel@tonic-gate do_convert = 0; 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate else { 8280Sstevel@tonic-gate cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); 8290Sstevel@tonic-gate if (cd == (iconv_t) -1) { 8300Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 8310Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 8320Sstevel@tonic-gate "Cannot convert UTF-8 strings to the local codeset"; 8330Sstevel@tonic-gate } 8340Sstevel@tonic-gate initialized = 1; 8350Sstevel@tonic-gate do_convert = 1; 8360Sstevel@tonic-gate } 8370Sstevel@tonic-gate } 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate if (!do_convert) { 8400Sstevel@tonic-gate if ((err = g11n_validate_utf8(str, 0, error_str))) { 8410Sstevel@tonic-gate if (err_ptr) *err_ptr = err; 8420Sstevel@tonic-gate return NULL; 8430Sstevel@tonic-gate } 8440Sstevel@tonic-gate else 8450Sstevel@tonic-gate return (u_char *) xstrdup((char *) str); 8460Sstevel@tonic-gate } 8470Sstevel@tonic-gate return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 8480Sstevel@tonic-gate } 8490Sstevel@tonic-gate 8500Sstevel@tonic-gate char * 8510Sstevel@tonic-gate g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str) 8520Sstevel@tonic-gate { 8530Sstevel@tonic-gate static u_int initialized = 0; 8540Sstevel@tonic-gate static u_int do_convert = 0; 8550Sstevel@tonic-gate iconv_t cd; 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate if (!initialized) { 8580Sstevel@tonic-gate /* 8590Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 8600Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 8610Sstevel@tonic-gate */ 8620Sstevel@tonic-gate if (strcmp("646", nl_langinfo(CODESET)) == 0 || 8630Sstevel@tonic-gate strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 8640Sstevel@tonic-gate strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 8650Sstevel@tonic-gate initialized = 1; 8660Sstevel@tonic-gate do_convert = 0; 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate else { 8690Sstevel@tonic-gate cd = iconv_open("646", nl_langinfo(CODESET)); 8700Sstevel@tonic-gate if (cd == (iconv_t) -1) { 8710Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 8720Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 8730Sstevel@tonic-gate "Cannot convert UTF-8 strings to the local codeset"; 8740Sstevel@tonic-gate } 8750Sstevel@tonic-gate initialized = 1; 8760Sstevel@tonic-gate do_convert = 1; 8770Sstevel@tonic-gate } 8780Sstevel@tonic-gate } 8790Sstevel@tonic-gate 8800Sstevel@tonic-gate if (!do_convert) 8810Sstevel@tonic-gate return xstrdup((char *) str); 8820Sstevel@tonic-gate return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 8830Sstevel@tonic-gate } 8840Sstevel@tonic-gate 8850Sstevel@tonic-gate u_char * 8860Sstevel@tonic-gate g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str) 8870Sstevel@tonic-gate { 8880Sstevel@tonic-gate static u_int initialized = 0; 8890Sstevel@tonic-gate static u_int do_convert = 0; 8900Sstevel@tonic-gate iconv_t cd; 8910Sstevel@tonic-gate 8920Sstevel@tonic-gate if (!initialized) { 8930Sstevel@tonic-gate /* 8940Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 8950Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 8960Sstevel@tonic-gate */ 8970Sstevel@tonic-gate if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 8980Sstevel@tonic-gate strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 8990Sstevel@tonic-gate initialized = 1; 9000Sstevel@tonic-gate do_convert = 0; 9010Sstevel@tonic-gate } 9020Sstevel@tonic-gate else { 9030Sstevel@tonic-gate cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 9040Sstevel@tonic-gate if (cd == (iconv_t) -1) { 9050Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 9060Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 9070Sstevel@tonic-gate "Cannot convert UTF-8 strings to the local codeset"; 9080Sstevel@tonic-gate } 9090Sstevel@tonic-gate initialized = 1; 9100Sstevel@tonic-gate do_convert = 1; 9110Sstevel@tonic-gate } 9120Sstevel@tonic-gate } 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate if (!do_convert) 9150Sstevel@tonic-gate return (u_char *) xstrdup((char *) str); 9160Sstevel@tonic-gate return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 9170Sstevel@tonic-gate } 9180Sstevel@tonic-gate 9190Sstevel@tonic-gate 9200Sstevel@tonic-gate /* 9210Sstevel@tonic-gate * Wrapper around iconv() 9220Sstevel@tonic-gate * 9230Sstevel@tonic-gate * The caller is responsible for freeing the result and for handling 9240Sstevel@tonic-gate * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 9250Sstevel@tonic-gate */ 9260Sstevel@tonic-gate 9270Sstevel@tonic-gate static 9280Sstevel@tonic-gate u_char * 9290Sstevel@tonic-gate do_iconv(iconv_t cd, u_int *mul_ptr, 9300Sstevel@tonic-gate const void *buf, u_int len, 9310Sstevel@tonic-gate u_int *outlen, int *err, 9320Sstevel@tonic-gate u_char **err_str) 9330Sstevel@tonic-gate { 9340Sstevel@tonic-gate size_t inbytesleft, outbytesleft, converted_size; 9350Sstevel@tonic-gate char *outbuf; 9360Sstevel@tonic-gate u_char *converted; 9370Sstevel@tonic-gate const char *inbuf; 9380Sstevel@tonic-gate u_int mul = 0; 9390Sstevel@tonic-gate 9400Sstevel@tonic-gate if (!buf || !(*(char *)buf)) return NULL; 9410Sstevel@tonic-gate if (len == 0) len = strlen(buf); 9420Sstevel@tonic-gate /* reset conversion descriptor */ 9430Sstevel@tonic-gate /* XXX Do we need initial shift sequences for UTF-8??? */ 9440Sstevel@tonic-gate (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); 9450Sstevel@tonic-gate inbuf = (const char *) buf; 9460Sstevel@tonic-gate if (mul_ptr) mul = *mul_ptr; 9470Sstevel@tonic-gate converted_size = (len << mul); 9480Sstevel@tonic-gate outbuf = (char *) xmalloc(converted_size + 1); /* for null */ 9490Sstevel@tonic-gate converted = (u_char *) outbuf; 9500Sstevel@tonic-gate outbytesleft = len; 9510Sstevel@tonic-gate do { 9520Sstevel@tonic-gate if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == 9530Sstevel@tonic-gate (size_t) -1) { 9540Sstevel@tonic-gate if (errno == E2BIG) { 9550Sstevel@tonic-gate /* UTF-8 codepoints are at most 8 bytes long. */ 9560Sstevel@tonic-gate if (mul > 2) { 9570Sstevel@tonic-gate if (err_str) 9580Sstevel@tonic-gate *err_str = (u_char *) "Conversion to UTF-8 failed due to" 9590Sstevel@tonic-gate "preposterous space requirements"; 9600Sstevel@tonic-gate if (err) 9610Sstevel@tonic-gate *err = EILSEQ; 9620Sstevel@tonic-gate return NULL; 9630Sstevel@tonic-gate } 9640Sstevel@tonic-gate 9650Sstevel@tonic-gate /* 9660Sstevel@tonic-gate * Re-alloc output and ensure that the outbuf 9670Sstevel@tonic-gate * and outbytesleft values are adjusted. 9680Sstevel@tonic-gate */ 9690Sstevel@tonic-gate converted = xrealloc(converted, converted_size << 1 + 1); 9700Sstevel@tonic-gate outbuf = (char *) converted + converted_size - outbytesleft; 9710Sstevel@tonic-gate converted_size = (len << ++(mul)); 9720Sstevel@tonic-gate outbytesleft = converted_size - outbytesleft; 9730Sstevel@tonic-gate } 9740Sstevel@tonic-gate else { 9750Sstevel@tonic-gate /* 9760Sstevel@tonic-gate * Let the caller deal with iconv() errors, probably by 9770Sstevel@tonic-gate * calling fatal(); xfree() does not set errno. 9780Sstevel@tonic-gate */ 9790Sstevel@tonic-gate if (err) *err = errno; 9800Sstevel@tonic-gate xfree(converted); 9810Sstevel@tonic-gate return NULL; 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate } 9840Sstevel@tonic-gate } while (inbytesleft); 9850Sstevel@tonic-gate *outbuf = '\0'; /* ensure null-termination */ 9860Sstevel@tonic-gate if (outlen) *outlen = converted_size - outbytesleft; 9870Sstevel@tonic-gate if (mul_ptr) *mul_ptr = mul; 9880Sstevel@tonic-gate return converted; 9890Sstevel@tonic-gate } 990