1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate * 22*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 23*0Sstevel@tonic-gate * Use is subject to license terms. 24*0Sstevel@tonic-gate */ 25*0Sstevel@tonic-gate 26*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 27*0Sstevel@tonic-gate 28*0Sstevel@tonic-gate #include <errno.h> 29*0Sstevel@tonic-gate #include <locale.h> 30*0Sstevel@tonic-gate #include <langinfo.h> 31*0Sstevel@tonic-gate #include <iconv.h> 32*0Sstevel@tonic-gate #include <ctype.h> 33*0Sstevel@tonic-gate #include <strings.h> 34*0Sstevel@tonic-gate #include <string.h> 35*0Sstevel@tonic-gate #include <stdio.h> 36*0Sstevel@tonic-gate #include <stdlib.h> 37*0Sstevel@tonic-gate #include "includes.h" 38*0Sstevel@tonic-gate #include "xmalloc.h" 39*0Sstevel@tonic-gate #include "xlist.h" 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate #ifdef MIN 42*0Sstevel@tonic-gate #undef MIN 43*0Sstevel@tonic-gate #endif /* MIN */ 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gate #define MIN(x, y) ((x) < (y) ? (x) : (y)) 46*0Sstevel@tonic-gate 47*0Sstevel@tonic-gate #define LOCALE_PATH "/usr/bin/locale" 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate #define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */ 50*0Sstevel@tonic-gate 51*0Sstevel@tonic-gate static u_char * do_iconv(iconv_t cd, u_int *mul_ptr, 52*0Sstevel@tonic-gate const void *buf, u_int len, 53*0Sstevel@tonic-gate u_int *outlen, int *err, 54*0Sstevel@tonic-gate u_char **err_str); 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2); 57*0Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale); 58*0Sstevel@tonic-gate 59*0Sstevel@tonic-gate u_int 60*0Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str); 61*0Sstevel@tonic-gate 62*0Sstevel@tonic-gate u_int 63*0Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str); 64*0Sstevel@tonic-gate 65*0Sstevel@tonic-gate static 66*0Sstevel@tonic-gate char * 67*0Sstevel@tonic-gate g11n_locale2langtag(char *locale) 68*0Sstevel@tonic-gate { 69*0Sstevel@tonic-gate char *langtag; 70*0Sstevel@tonic-gate 71*0Sstevel@tonic-gate /* base cases */ 72*0Sstevel@tonic-gate if (!locale || !*locale) return NULL; 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate if (strcmp(locale, "POSIX") == 0 || 75*0Sstevel@tonic-gate strcmp(locale, "C") == 0) return "i-default"; 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate /* Punt for language codes which are not exactly 2 letters */ 78*0Sstevel@tonic-gate if (strlen(locale) < 2 || 79*0Sstevel@tonic-gate !isalpha(locale[0]) || 80*0Sstevel@tonic-gate !isalpha(locale[1]) || 81*0Sstevel@tonic-gate (locale[2] != '\0' && 82*0Sstevel@tonic-gate locale[2] != '_' && 83*0Sstevel@tonic-gate locale[2] != '.' && 84*0Sstevel@tonic-gate locale[2] != '@')) 85*0Sstevel@tonic-gate return NULL; 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate 88*0Sstevel@tonic-gate /* We have a primary language sub-tag */ 89*0Sstevel@tonic-gate langtag = (char *) xmalloc(LANGTAG_MAX + 1); 90*0Sstevel@tonic-gate 91*0Sstevel@tonic-gate strncpy(langtag, locale, 2); 92*0Sstevel@tonic-gate langtag[2] = '\0'; 93*0Sstevel@tonic-gate 94*0Sstevel@tonic-gate /* Do we have country sub-tag? */ 95*0Sstevel@tonic-gate if (locale[2] == '_') { 96*0Sstevel@tonic-gate if (strlen(locale) < 5 || 97*0Sstevel@tonic-gate !isalpha(locale[3]) || 98*0Sstevel@tonic-gate !isalpha(locale[4]) || 99*0Sstevel@tonic-gate (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) { 100*0Sstevel@tonic-gate return langtag; 101*0Sstevel@tonic-gate } 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate /* yes, we do */ 104*0Sstevel@tonic-gate /* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag, 105*0Sstevel@tonic-gate country_subtag, langtag) == 8) */ 106*0Sstevel@tonic-gate if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 107*0Sstevel@tonic-gate 2, locale+3) == 5) 108*0Sstevel@tonic-gate return langtag; 109*0Sstevel@tonic-gate } 110*0Sstevel@tonic-gate 111*0Sstevel@tonic-gate /* In all other cases we just use the primary language sub-tag */ 112*0Sstevel@tonic-gate return langtag; 113*0Sstevel@tonic-gate } 114*0Sstevel@tonic-gate 115*0Sstevel@tonic-gate u_int 116*0Sstevel@tonic-gate g11n_langtag_is_default(char *langtag) 117*0Sstevel@tonic-gate { 118*0Sstevel@tonic-gate return (strcmp(langtag, "i-default") == 0); 119*0Sstevel@tonic-gate } 120*0Sstevel@tonic-gate 121*0Sstevel@tonic-gate /* 122*0Sstevel@tonic-gate * This lang tag / locale matching function works only for two-character 123*0Sstevel@tonic-gate * language primary sub-tags and two-character country sub-tags. 124*0Sstevel@tonic-gate */ 125*0Sstevel@tonic-gate u_int 126*0Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale) 127*0Sstevel@tonic-gate { 128*0Sstevel@tonic-gate /* Match "i-default" to the process' current locale if possible */ 129*0Sstevel@tonic-gate if (g11n_langtag_is_default(langtag)) { 130*0Sstevel@tonic-gate if (strcasecmp(locale, "POSIX") == 0 || 131*0Sstevel@tonic-gate strcasecmp(locale, "C") == 0) 132*0Sstevel@tonic-gate return 1; 133*0Sstevel@tonic-gate else 134*0Sstevel@tonic-gate return 0; 135*0Sstevel@tonic-gate } 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate /* locale must be at least 2 chars long and the lang part must be 138*0Sstevel@tonic-gate * exactly two characters */ 139*0Sstevel@tonic-gate if (strlen(locale) < 2 || 140*0Sstevel@tonic-gate (!isalpha(locale[0]) || !isalpha(locale[1]) || 141*0Sstevel@tonic-gate (locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@'))) 142*0Sstevel@tonic-gate return 0; 143*0Sstevel@tonic-gate 144*0Sstevel@tonic-gate /* same thing with the langtag */ 145*0Sstevel@tonic-gate if (strlen(langtag) < 2 || 146*0Sstevel@tonic-gate (!isalpha(langtag[0]) || !isalpha(langtag[1]) || 147*0Sstevel@tonic-gate (langtag[2] != '\0' && langtag[2] != '-'))) 148*0Sstevel@tonic-gate return 0; 149*0Sstevel@tonic-gate 150*0Sstevel@tonic-gate /* primary language sub-tag and the locale's language part must match */ 151*0Sstevel@tonic-gate if (strncasecmp(langtag, locale, 2) != 0) 152*0Sstevel@tonic-gate return 0; 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate /* primary language sub-tag and the locale's language match, now 155*0Sstevel@tonic-gate * fuzzy check country part */ 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate /* neither langtag nor locale have more than one component */ 158*0Sstevel@tonic-gate if (langtag[2] == '\0' && 159*0Sstevel@tonic-gate (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')) 160*0Sstevel@tonic-gate return 2; 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate /* langtag has only one sub-tag... */ 163*0Sstevel@tonic-gate if (langtag[2] == '\0') 164*0Sstevel@tonic-gate return 1; 165*0Sstevel@tonic-gate 166*0Sstevel@tonic-gate /* locale has no country code... */ 167*0Sstevel@tonic-gate if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@') 168*0Sstevel@tonic-gate return 1; 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate /* langtag has more than one subtag and the locale has a country code */ 171*0Sstevel@tonic-gate 172*0Sstevel@tonic-gate /* ignore second subtag if not two chars */ 173*0Sstevel@tonic-gate if (strlen(langtag) < 5) 174*0Sstevel@tonic-gate return 1; 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate if (!isalpha(langtag[3]) || !isalpha(langtag[4]) || 177*0Sstevel@tonic-gate (langtag[5] != '\0' && langtag[5] != '-')) 178*0Sstevel@tonic-gate return 1; 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gate /* ignore rest of locale if there is no two-character country part */ 181*0Sstevel@tonic-gate if (strlen(locale) < 5) 182*0Sstevel@tonic-gate return 1; 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gate if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) || 185*0Sstevel@tonic-gate (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@')) 186*0Sstevel@tonic-gate return 1; 187*0Sstevel@tonic-gate 188*0Sstevel@tonic-gate /* if the country part matches, return 2 */ 189*0Sstevel@tonic-gate if (strncasecmp(&langtag[3], &locale[3], 2) == 0) 190*0Sstevel@tonic-gate return 2; 191*0Sstevel@tonic-gate 192*0Sstevel@tonic-gate return 1; 193*0Sstevel@tonic-gate } 194*0Sstevel@tonic-gate 195*0Sstevel@tonic-gate char * 196*0Sstevel@tonic-gate g11n_getlocale() 197*0Sstevel@tonic-gate { 198*0Sstevel@tonic-gate /* We have one text domain - always set it */ 199*0Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gate /* If the locale is not set, set it from the env vars */ 202*0Sstevel@tonic-gate if (!setlocale(LC_CTYPE, NULL)) 203*0Sstevel@tonic-gate (void) setlocale(LC_CTYPE, ""); 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate return setlocale(LC_CTYPE, NULL); 206*0Sstevel@tonic-gate } 207*0Sstevel@tonic-gate 208*0Sstevel@tonic-gate void 209*0Sstevel@tonic-gate g11n_setlocale(int category, const char *locale) 210*0Sstevel@tonic-gate { 211*0Sstevel@tonic-gate char *curr; 212*0Sstevel@tonic-gate 213*0Sstevel@tonic-gate /* We have one text domain - always set it */ 214*0Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gate if (!locale) 217*0Sstevel@tonic-gate return; 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate if (*locale && ((curr = setlocale(category, NULL))) && 220*0Sstevel@tonic-gate strcmp(curr, locale) == 0) 221*0Sstevel@tonic-gate return; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate /* 224*0Sstevel@tonic-gate * If <category> is bogus, setlocale() will do nothing and will 225*0Sstevel@tonic-gate * return NULL. 226*0Sstevel@tonic-gate */ 227*0Sstevel@tonic-gate if (!setlocale(category, locale)) 228*0Sstevel@tonic-gate return; 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate /* If setting the locale from the environment, then we're done */ 231*0Sstevel@tonic-gate if (!*locale) 232*0Sstevel@tonic-gate return; 233*0Sstevel@tonic-gate 234*0Sstevel@tonic-gate /* 235*0Sstevel@tonic-gate * If setting a locale from the <locale> argument, then set the 236*0Sstevel@tonic-gate * related env vars. 237*0Sstevel@tonic-gate */ 238*0Sstevel@tonic-gate switch (category) { 239*0Sstevel@tonic-gate case LC_ALL: 240*0Sstevel@tonic-gate setenv("LANG", locale, 1); 241*0Sstevel@tonic-gate setenv("LC_ALL", locale, 1); 242*0Sstevel@tonic-gate break; 243*0Sstevel@tonic-gate case LC_CTYPE: 244*0Sstevel@tonic-gate setenv("LC_CTYPE", locale, 1); 245*0Sstevel@tonic-gate break; 246*0Sstevel@tonic-gate case LC_NUMERIC: 247*0Sstevel@tonic-gate setenv("LC_NUMERIC", locale, 1); 248*0Sstevel@tonic-gate break; 249*0Sstevel@tonic-gate case LC_TIME: 250*0Sstevel@tonic-gate setenv("LC_TIME", locale, 1); 251*0Sstevel@tonic-gate break; 252*0Sstevel@tonic-gate case LC_COLLATE: 253*0Sstevel@tonic-gate setenv("LC_COLLATE", locale, 1); 254*0Sstevel@tonic-gate break; 255*0Sstevel@tonic-gate case LC_MONETARY: 256*0Sstevel@tonic-gate setenv("LC_MONETARY", locale, 1); 257*0Sstevel@tonic-gate break; 258*0Sstevel@tonic-gate case LC_MESSAGES: 259*0Sstevel@tonic-gate setenv("LC_MESSAGES", locale, 1); 260*0Sstevel@tonic-gate break; 261*0Sstevel@tonic-gate } 262*0Sstevel@tonic-gate return; 263*0Sstevel@tonic-gate } 264*0Sstevel@tonic-gate 265*0Sstevel@tonic-gate char ** 266*0Sstevel@tonic-gate g11n_getlocales() 267*0Sstevel@tonic-gate { 268*0Sstevel@tonic-gate FILE *locale_out; 269*0Sstevel@tonic-gate u_int n_elems, list_size, long_line = 0; 270*0Sstevel@tonic-gate char **list; 271*0Sstevel@tonic-gate char locale[64]; /* 64 bytes is plenty for locale names */ 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) { 274*0Sstevel@tonic-gate return NULL; 275*0Sstevel@tonic-gate } 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gate /* 278*0Sstevel@tonic-gate * Start with enough room for 65 locales - that's a lot fewer than 279*0Sstevel@tonic-gate * all the locales available for installation, but a lot more than 280*0Sstevel@tonic-gate * what most users will need and install 281*0Sstevel@tonic-gate */ 282*0Sstevel@tonic-gate n_elems=0; 283*0Sstevel@tonic-gate list_size=192; 284*0Sstevel@tonic-gate list = (char **) xmalloc(sizeof(char *) * (list_size + 1)); 285*0Sstevel@tonic-gate memset(list, 0, sizeof(char *) * (list_size + 1)); 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate while (fgets(locale, sizeof(locale), locale_out)) { 288*0Sstevel@tonic-gate /* skip long locale names (if any) */ 289*0Sstevel@tonic-gate if (!strchr(locale, '\n')) { 290*0Sstevel@tonic-gate long_line = 1; 291*0Sstevel@tonic-gate continue; 292*0Sstevel@tonic-gate } 293*0Sstevel@tonic-gate else if (long_line) { 294*0Sstevel@tonic-gate long_line = 0; 295*0Sstevel@tonic-gate continue; 296*0Sstevel@tonic-gate } 297*0Sstevel@tonic-gate if (strncmp(locale, "iso_8859", 8) == 0) 298*0Sstevel@tonic-gate continue; /* ignore locale names like "iso_8859-1" */ 299*0Sstevel@tonic-gate 300*0Sstevel@tonic-gate if (n_elems == list_size) { 301*0Sstevel@tonic-gate list_size *= 2; 302*0Sstevel@tonic-gate list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *)); 303*0Sstevel@tonic-gate memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1)); 304*0Sstevel@tonic-gate } 305*0Sstevel@tonic-gate 306*0Sstevel@tonic-gate *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */ 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate list[n_elems++] = xstrdup(locale); 309*0Sstevel@tonic-gate } 310*0Sstevel@tonic-gate list[n_elems] = NULL; 311*0Sstevel@tonic-gate (void) pclose(locale_out); 312*0Sstevel@tonic-gate 313*0Sstevel@tonic-gate qsort(list, n_elems - 1, sizeof(char *), locale_cmp); 314*0Sstevel@tonic-gate return list; 315*0Sstevel@tonic-gate } 316*0Sstevel@tonic-gate 317*0Sstevel@tonic-gate char * 318*0Sstevel@tonic-gate g11n_getlangs() 319*0Sstevel@tonic-gate { 320*0Sstevel@tonic-gate char *locale; 321*0Sstevel@tonic-gate 322*0Sstevel@tonic-gate if (getenv("SSH_LANGS")) 323*0Sstevel@tonic-gate return xstrdup(getenv("SSH_LANGS")); 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate locale = g11n_getlocale(); 326*0Sstevel@tonic-gate 327*0Sstevel@tonic-gate if (!locale || !*locale) 328*0Sstevel@tonic-gate return xstrdup("i-default"); 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gate return g11n_locale2langtag(locale); 331*0Sstevel@tonic-gate } 332*0Sstevel@tonic-gate 333*0Sstevel@tonic-gate char * 334*0Sstevel@tonic-gate g11n_locales2langs(char **locale_set) 335*0Sstevel@tonic-gate { 336*0Sstevel@tonic-gate char **p, **r, **q; 337*0Sstevel@tonic-gate char *langtag; 338*0Sstevel@tonic-gate int locales, skip; 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate for (locales = 0, p = locale_set ; p && *p ; p++) 341*0Sstevel@tonic-gate locales++; 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gate r = (char **) xmalloc((locales + 1) * sizeof(char *)); 344*0Sstevel@tonic-gate memset(r, 0, (locales + 1) * sizeof(char *)); 345*0Sstevel@tonic-gate 346*0Sstevel@tonic-gate for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) { 347*0Sstevel@tonic-gate skip = 0; 348*0Sstevel@tonic-gate if ((langtag = g11n_locale2langtag(*p)) == NULL) 349*0Sstevel@tonic-gate continue; 350*0Sstevel@tonic-gate for (q = r ; (q - r) < locales ; q++) { 351*0Sstevel@tonic-gate if (!*q) break; 352*0Sstevel@tonic-gate if (*q && strcmp(*q, langtag) == 0) 353*0Sstevel@tonic-gate skip = 1; 354*0Sstevel@tonic-gate } 355*0Sstevel@tonic-gate if (!skip) 356*0Sstevel@tonic-gate *(q++) = langtag; 357*0Sstevel@tonic-gate *q = NULL; 358*0Sstevel@tonic-gate } 359*0Sstevel@tonic-gate return xjoin(r, ','); 360*0Sstevel@tonic-gate } 361*0Sstevel@tonic-gate 362*0Sstevel@tonic-gate static 363*0Sstevel@tonic-gate int 364*0Sstevel@tonic-gate sortcmp(const void *d1, const void *d2) 365*0Sstevel@tonic-gate { 366*0Sstevel@tonic-gate char *s1 = *(char **)d1; 367*0Sstevel@tonic-gate char *s2 = *(char **)d2; 368*0Sstevel@tonic-gate 369*0Sstevel@tonic-gate return strcmp(s1, s2); 370*0Sstevel@tonic-gate } 371*0Sstevel@tonic-gate 372*0Sstevel@tonic-gate int 373*0Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2) 374*0Sstevel@tonic-gate { 375*0Sstevel@tonic-gate int len1, len2; 376*0Sstevel@tonic-gate char c1, c2; 377*0Sstevel@tonic-gate 378*0Sstevel@tonic-gate len1 = (strchr(langtag1, '-')) ? 379*0Sstevel@tonic-gate (strchr(langtag1, '-') - langtag1) 380*0Sstevel@tonic-gate : strlen(langtag1); 381*0Sstevel@tonic-gate 382*0Sstevel@tonic-gate len2 = (strchr(langtag2, '-')) ? 383*0Sstevel@tonic-gate (strchr(langtag2, '-') - langtag2) 384*0Sstevel@tonic-gate : strlen(langtag2); 385*0Sstevel@tonic-gate 386*0Sstevel@tonic-gate /* no match */ 387*0Sstevel@tonic-gate if (len1 != len2 || 388*0Sstevel@tonic-gate strncmp(langtag1, langtag2, len1) != 0) 389*0Sstevel@tonic-gate return 0; 390*0Sstevel@tonic-gate 391*0Sstevel@tonic-gate c1 = *(langtag1 + len1); 392*0Sstevel@tonic-gate c2 = *(langtag2 + len2); 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gate /* no country sub-tags - exact match */ 395*0Sstevel@tonic-gate if (c1 == '\0' && c2 == '\0') 396*0Sstevel@tonic-gate return 2; 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate /* one langtag has a country sub-tag, the other doesn't */ 399*0Sstevel@tonic-gate if (c1 == '\0' || c2 == '\0') 400*0Sstevel@tonic-gate return 1; 401*0Sstevel@tonic-gate 402*0Sstevel@tonic-gate /* can't happen - both langtags have a country sub-tag */ 403*0Sstevel@tonic-gate if (c1 != '-' || c2 != '-') 404*0Sstevel@tonic-gate return 1; 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate /* compare country subtags */ 407*0Sstevel@tonic-gate langtag1 = langtag1 + len1 + 1; 408*0Sstevel@tonic-gate langtag2 = langtag2 + len2 + 1; 409*0Sstevel@tonic-gate 410*0Sstevel@tonic-gate len1 = (strchr(langtag1, '-')) ? 411*0Sstevel@tonic-gate (strchr(langtag1, '-') - langtag1) 412*0Sstevel@tonic-gate : strlen(langtag1); 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate len2 = (strchr(langtag2, '-')) ? 415*0Sstevel@tonic-gate (strchr(langtag2, '-') - langtag2) 416*0Sstevel@tonic-gate : strlen(langtag2); 417*0Sstevel@tonic-gate 418*0Sstevel@tonic-gate if (len1 != len2 || 419*0Sstevel@tonic-gate strncmp(langtag1, langtag2, len1) != 0) 420*0Sstevel@tonic-gate return 1; 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate /* country tags matched - exact match */ 423*0Sstevel@tonic-gate return 2; 424*0Sstevel@tonic-gate } 425*0Sstevel@tonic-gate 426*0Sstevel@tonic-gate char * 427*0Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2) 428*0Sstevel@tonic-gate { 429*0Sstevel@tonic-gate char **list1, **list2, **list3, **p, **q, **r; 430*0Sstevel@tonic-gate char *set3, *lang_subtag; 431*0Sstevel@tonic-gate u_int n1, n2, n3; 432*0Sstevel@tonic-gate u_int do_append; 433*0Sstevel@tonic-gate 434*0Sstevel@tonic-gate list1 = xsplit(set1, ','); 435*0Sstevel@tonic-gate list2 = xsplit(set2, ','); 436*0Sstevel@tonic-gate for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ; 437*0Sstevel@tonic-gate for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ; 438*0Sstevel@tonic-gate 439*0Sstevel@tonic-gate list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1)); 440*0Sstevel@tonic-gate *list3 = NULL; 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate /* we must not sort the user langtags - sorting or not the server's 443*0Sstevel@tonic-gate * should not affect the outcome 444*0Sstevel@tonic-gate */ 445*0Sstevel@tonic-gate qsort(list2, n2, sizeof(char *), sortcmp); 446*0Sstevel@tonic-gate 447*0Sstevel@tonic-gate for (n3 = 0, p = list1 ; p && *p ; p++) { 448*0Sstevel@tonic-gate do_append = 0; 449*0Sstevel@tonic-gate for (q = list2 ; q && *q ; q++) { 450*0Sstevel@tonic-gate if (g11n_langtag_match(*p, *q) != 2) continue; 451*0Sstevel@tonic-gate /* append element */ 452*0Sstevel@tonic-gate for (r = list3; (r - list3) <= (n1 + n2) ; r++) { 453*0Sstevel@tonic-gate do_append = 1; 454*0Sstevel@tonic-gate if (!*r) break; 455*0Sstevel@tonic-gate if (strcmp(*p, *r) == 0) { 456*0Sstevel@tonic-gate do_append = 0; 457*0Sstevel@tonic-gate break; 458*0Sstevel@tonic-gate } 459*0Sstevel@tonic-gate } 460*0Sstevel@tonic-gate if (do_append && n3 <= (n1 + n2)) { 461*0Sstevel@tonic-gate list3[n3++] = xstrdup(*p); 462*0Sstevel@tonic-gate list3[n3] = NULL; 463*0Sstevel@tonic-gate } 464*0Sstevel@tonic-gate } 465*0Sstevel@tonic-gate } 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate for (p = list1 ; p && *p ; p++) { 468*0Sstevel@tonic-gate do_append = 0; 469*0Sstevel@tonic-gate for (q = list2 ; q && *q ; q++) { 470*0Sstevel@tonic-gate if (g11n_langtag_match(*p, *q) != 1) continue; 471*0Sstevel@tonic-gate /* append element */ 472*0Sstevel@tonic-gate lang_subtag = xstrdup(*p); 473*0Sstevel@tonic-gate if (strchr(lang_subtag, '-')) 474*0Sstevel@tonic-gate *(strchr(lang_subtag, '-')) = '\0'; 475*0Sstevel@tonic-gate for (r = list3; (r - list3) <= (n1 + n2) ; r++) { 476*0Sstevel@tonic-gate do_append = 1; 477*0Sstevel@tonic-gate if (!*r) break; 478*0Sstevel@tonic-gate if (strcmp(lang_subtag, *r) == 0) { 479*0Sstevel@tonic-gate do_append = 0; 480*0Sstevel@tonic-gate break; 481*0Sstevel@tonic-gate } 482*0Sstevel@tonic-gate } 483*0Sstevel@tonic-gate if (do_append && n3 <= (n1 + n2)) { 484*0Sstevel@tonic-gate list3[n3++] = lang_subtag; 485*0Sstevel@tonic-gate list3[n3] = NULL; 486*0Sstevel@tonic-gate } 487*0Sstevel@tonic-gate else 488*0Sstevel@tonic-gate xfree(lang_subtag); 489*0Sstevel@tonic-gate } 490*0Sstevel@tonic-gate } 491*0Sstevel@tonic-gate 492*0Sstevel@tonic-gate set3 = xjoin(list3, ','); 493*0Sstevel@tonic-gate xfree_split_list(list1); 494*0Sstevel@tonic-gate xfree_split_list(list2); 495*0Sstevel@tonic-gate xfree_split_list(list3); 496*0Sstevel@tonic-gate 497*0Sstevel@tonic-gate return set3; 498*0Sstevel@tonic-gate } 499*0Sstevel@tonic-gate 500*0Sstevel@tonic-gate char * 501*0Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags) 502*0Sstevel@tonic-gate { 503*0Sstevel@tonic-gate char *list, *result; 504*0Sstevel@tonic-gate char **xlist; 505*0Sstevel@tonic-gate 506*0Sstevel@tonic-gate /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */ 507*0Sstevel@tonic-gate list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags); 508*0Sstevel@tonic-gate 509*0Sstevel@tonic-gate if (!list) 510*0Sstevel@tonic-gate return NULL; 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate xlist = xsplit(list, ','); 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate xfree(list); 515*0Sstevel@tonic-gate 516*0Sstevel@tonic-gate if (!xlist || !*xlist) 517*0Sstevel@tonic-gate return NULL; 518*0Sstevel@tonic-gate 519*0Sstevel@tonic-gate result = xstrdup(*xlist); 520*0Sstevel@tonic-gate 521*0Sstevel@tonic-gate xfree_split_list(xlist); 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate return result; 524*0Sstevel@tonic-gate } 525*0Sstevel@tonic-gate 526*0Sstevel@tonic-gate /* 527*0Sstevel@tonic-gate * Compare locales, preferring UTF-8 codesets to others, otherwise doing 528*0Sstevel@tonic-gate * a stright strcmp() 529*0Sstevel@tonic-gate */ 530*0Sstevel@tonic-gate static 531*0Sstevel@tonic-gate int 532*0Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2) 533*0Sstevel@tonic-gate { 534*0Sstevel@tonic-gate char *dot_ptr; 535*0Sstevel@tonic-gate char *s1 = *(char **)d1; 536*0Sstevel@tonic-gate char *s2 = *(char **)d2; 537*0Sstevel@tonic-gate int s1_is_utf8 = 0; 538*0Sstevel@tonic-gate int s2_is_utf8 = 0; 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gate /* check if s1 is a UTF-8 locale */ 541*0Sstevel@tonic-gate if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') && 542*0Sstevel@tonic-gate (strncmp(dot_ptr+1, "UTF-8", 5) == 0) && 543*0Sstevel@tonic-gate (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) { 544*0Sstevel@tonic-gate s1_is_utf8++; 545*0Sstevel@tonic-gate } 546*0Sstevel@tonic-gate /* check if s2 is a UTF-8 locale */ 547*0Sstevel@tonic-gate if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') && 548*0Sstevel@tonic-gate (strncmp(dot_ptr+1, "UTF-8", 5) == 0) && 549*0Sstevel@tonic-gate (*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) { 550*0Sstevel@tonic-gate s2_is_utf8++; 551*0Sstevel@tonic-gate } 552*0Sstevel@tonic-gate 553*0Sstevel@tonic-gate /* prefer UTF-8 locales */ 554*0Sstevel@tonic-gate if (s1_is_utf8 && !s2_is_utf8) 555*0Sstevel@tonic-gate return -1; 556*0Sstevel@tonic-gate 557*0Sstevel@tonic-gate if (s2_is_utf8 && !s1_is_utf8) 558*0Sstevel@tonic-gate return 1; 559*0Sstevel@tonic-gate 560*0Sstevel@tonic-gate /* prefer any locale over the default locales */ 561*0Sstevel@tonic-gate if (strcmp(s1, "C") == 0 || 562*0Sstevel@tonic-gate strcmp(s1, "POSIX") == 0 || 563*0Sstevel@tonic-gate strcmp(s1, "common") == 0) 564*0Sstevel@tonic-gate if (strcmp(s2, "C") != 0 && 565*0Sstevel@tonic-gate strcmp(s2, "POSIX") != 0 && 566*0Sstevel@tonic-gate strcmp(s2, "common") != 0) 567*0Sstevel@tonic-gate return 1; 568*0Sstevel@tonic-gate 569*0Sstevel@tonic-gate if (strcmp(s2, "C") == 0 || 570*0Sstevel@tonic-gate strcmp(s2, "POSIX") == 0 || 571*0Sstevel@tonic-gate strcmp(s2, "common") == 0) 572*0Sstevel@tonic-gate if (strcmp(s1, "C") != 0 && 573*0Sstevel@tonic-gate strcmp(s1, "POSIX") != 0 && 574*0Sstevel@tonic-gate strcmp(s1, "common") != 0) 575*0Sstevel@tonic-gate return -1; 576*0Sstevel@tonic-gate 577*0Sstevel@tonic-gate return strcmp(s1, s2); 578*0Sstevel@tonic-gate } 579*0Sstevel@tonic-gate 580*0Sstevel@tonic-gate 581*0Sstevel@tonic-gate char ** 582*0Sstevel@tonic-gate g11n_langtag_set_locale_set_intersect(char *langtag_set, 583*0Sstevel@tonic-gate char **locale_set) 584*0Sstevel@tonic-gate { 585*0Sstevel@tonic-gate char **langtag_list, **result, **p, **q, **r; 586*0Sstevel@tonic-gate char *s; 587*0Sstevel@tonic-gate u_int do_append, n_langtags, n_locales, n_results, max_results; 588*0Sstevel@tonic-gate 589*0Sstevel@tonic-gate /* Count lang tags and locales */ 590*0Sstevel@tonic-gate for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++; 591*0Sstevel@tonic-gate n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0; 592*0Sstevel@tonic-gate for ( ; s = strchr(s, ',') ; s++, n_langtags++) ; 593*0Sstevel@tonic-gate /* 594*0Sstevel@tonic-gate while ((s = strchr(s, ','))) { 595*0Sstevel@tonic-gate n_langtags++; 596*0Sstevel@tonic-gate s++; 597*0Sstevel@tonic-gate } 598*0Sstevel@tonic-gate */ 599*0Sstevel@tonic-gate 600*0Sstevel@tonic-gate qsort(locale_set, n_locales, sizeof(char *), locale_cmp); 601*0Sstevel@tonic-gate 602*0Sstevel@tonic-gate langtag_list = xsplit(langtag_set, ','); 603*0Sstevel@tonic-gate for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++); 604*0Sstevel@tonic-gate 605*0Sstevel@tonic-gate max_results = MIN(n_locales, n_langtags) * 2; 606*0Sstevel@tonic-gate result = (char **) xmalloc(sizeof(char *) * (max_results + 1)); 607*0Sstevel@tonic-gate *result = NULL; 608*0Sstevel@tonic-gate n_results = 0; 609*0Sstevel@tonic-gate 610*0Sstevel@tonic-gate /* More specific matches first */ 611*0Sstevel@tonic-gate for (p = langtag_list ; p && *p ; p++) { 612*0Sstevel@tonic-gate do_append = 0; 613*0Sstevel@tonic-gate for (q = locale_set ; q && *q ; q++) { 614*0Sstevel@tonic-gate if (g11n_langtag_matches_locale(*p, *q) == 2) { 615*0Sstevel@tonic-gate do_append = 1; 616*0Sstevel@tonic-gate for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) { 617*0Sstevel@tonic-gate if (!*r) break; 618*0Sstevel@tonic-gate if (strcmp(*q, *r) == 0) { 619*0Sstevel@tonic-gate do_append = 0; 620*0Sstevel@tonic-gate break; 621*0Sstevel@tonic-gate } 622*0Sstevel@tonic-gate } 623*0Sstevel@tonic-gate if (do_append && n_results < max_results) { 624*0Sstevel@tonic-gate result[n_results++] = xstrdup(*q); 625*0Sstevel@tonic-gate result[n_results] = NULL; 626*0Sstevel@tonic-gate } 627*0Sstevel@tonic-gate break; 628*0Sstevel@tonic-gate } 629*0Sstevel@tonic-gate } 630*0Sstevel@tonic-gate } 631*0Sstevel@tonic-gate 632*0Sstevel@tonic-gate for (p = langtag_list ; p && *p ; p++) { 633*0Sstevel@tonic-gate do_append = 0; 634*0Sstevel@tonic-gate for (q = locale_set ; q && *q ; q++) { 635*0Sstevel@tonic-gate if (g11n_langtag_matches_locale(*p, *q) == 1) { 636*0Sstevel@tonic-gate do_append = 1; 637*0Sstevel@tonic-gate for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) { 638*0Sstevel@tonic-gate if (!*r) break; 639*0Sstevel@tonic-gate if (strcmp(*q, *r) == 0) { 640*0Sstevel@tonic-gate do_append = 0; 641*0Sstevel@tonic-gate break; 642*0Sstevel@tonic-gate } 643*0Sstevel@tonic-gate } 644*0Sstevel@tonic-gate if (do_append && n_results < max_results) { 645*0Sstevel@tonic-gate result[n_results++] = xstrdup(*q); 646*0Sstevel@tonic-gate result[n_results] = NULL; 647*0Sstevel@tonic-gate } 648*0Sstevel@tonic-gate break; 649*0Sstevel@tonic-gate } 650*0Sstevel@tonic-gate } 651*0Sstevel@tonic-gate } 652*0Sstevel@tonic-gate xfree_split_list(langtag_list); 653*0Sstevel@tonic-gate 654*0Sstevel@tonic-gate return result; 655*0Sstevel@tonic-gate } 656*0Sstevel@tonic-gate 657*0Sstevel@tonic-gate char * 658*0Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales) 659*0Sstevel@tonic-gate { 660*0Sstevel@tonic-gate char **results, *result = NULL; 661*0Sstevel@tonic-gate 662*0Sstevel@tonic-gate if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags, 663*0Sstevel@tonic-gate srvr_locales ? srvr_locales : g11n_getlocales())) == NULL) 664*0Sstevel@tonic-gate return NULL; 665*0Sstevel@tonic-gate 666*0Sstevel@tonic-gate if (*results != NULL) 667*0Sstevel@tonic-gate result = xstrdup(*results); 668*0Sstevel@tonic-gate 669*0Sstevel@tonic-gate xfree_split_list(results); 670*0Sstevel@tonic-gate 671*0Sstevel@tonic-gate return result; 672*0Sstevel@tonic-gate } 673*0Sstevel@tonic-gate 674*0Sstevel@tonic-gate 675*0Sstevel@tonic-gate /* 676*0Sstevel@tonic-gate * Functions for validating ASCII and UTF-8 strings 677*0Sstevel@tonic-gate * 678*0Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 679*0Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 680*0Sstevel@tonic-gate * friends. 681*0Sstevel@tonic-gate * 682*0Sstevel@tonic-gate * The return value is 0 if success, EILSEQ or EINVAL. 683*0Sstevel@tonic-gate * 684*0Sstevel@tonic-gate */ 685*0Sstevel@tonic-gate 686*0Sstevel@tonic-gate u_int 687*0Sstevel@tonic-gate g11n_validate_ascii(const char *str, u_int len, u_char **error_str) 688*0Sstevel@tonic-gate { 689*0Sstevel@tonic-gate u_char *p; 690*0Sstevel@tonic-gate 691*0Sstevel@tonic-gate for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ; 692*0Sstevel@tonic-gate 693*0Sstevel@tonic-gate if (len && ((p - (u_char *) str) != len)) { 694*0Sstevel@tonic-gate return EILSEQ; 695*0Sstevel@tonic-gate } 696*0Sstevel@tonic-gate return 0; 697*0Sstevel@tonic-gate } 698*0Sstevel@tonic-gate 699*0Sstevel@tonic-gate u_int 700*0Sstevel@tonic-gate g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str) 701*0Sstevel@tonic-gate { 702*0Sstevel@tonic-gate u_char *p; 703*0Sstevel@tonic-gate u_int c, l; 704*0Sstevel@tonic-gate 705*0Sstevel@tonic-gate if (len == 0) len = strlen((const char *)str); 706*0Sstevel@tonic-gate 707*0Sstevel@tonic-gate for (p = (u_char *) str ; p && (p - str < len) && *p ; ) { 708*0Sstevel@tonic-gate /* 8-bit chars begin a UTF-8 sequence */ 709*0Sstevel@tonic-gate if (*p & 0x80) { 710*0Sstevel@tonic-gate /* Get sequence length and sanity check first byte */ 711*0Sstevel@tonic-gate if (*p < 0xc0) 712*0Sstevel@tonic-gate return EILSEQ; 713*0Sstevel@tonic-gate else if (*p < 0xe0) 714*0Sstevel@tonic-gate l=2; 715*0Sstevel@tonic-gate else if (*p < 0xf0) 716*0Sstevel@tonic-gate l=3; 717*0Sstevel@tonic-gate else if (*p < 0xf8) 718*0Sstevel@tonic-gate l=4; 719*0Sstevel@tonic-gate else if (*p < 0xfc) 720*0Sstevel@tonic-gate l=5; 721*0Sstevel@tonic-gate else if (*p < 0xfe) 722*0Sstevel@tonic-gate l=6; 723*0Sstevel@tonic-gate else 724*0Sstevel@tonic-gate return EILSEQ; 725*0Sstevel@tonic-gate 726*0Sstevel@tonic-gate if ((p + l - str) >= len) 727*0Sstevel@tonic-gate return EILSEQ; 728*0Sstevel@tonic-gate 729*0Sstevel@tonic-gate /* overlong detection - build codepoint */ 730*0Sstevel@tonic-gate c = *p & 0x3f; 731*0Sstevel@tonic-gate c = c << (6 * (l-1)); /* shift c bits from first byte */ 732*0Sstevel@tonic-gate 733*0Sstevel@tonic-gate if (l > 1) { 734*0Sstevel@tonic-gate if (*(p+1) && ((*(p+1) & 0xc0) == 0x80)) 735*0Sstevel@tonic-gate c = c | ((*(p+1) & 0x3f) << (6 * (l-2))); 736*0Sstevel@tonic-gate else 737*0Sstevel@tonic-gate return EILSEQ; 738*0Sstevel@tonic-gate if (c < 0x80) 739*0Sstevel@tonic-gate return EILSEQ; 740*0Sstevel@tonic-gate } 741*0Sstevel@tonic-gate if (l > 2) { 742*0Sstevel@tonic-gate if (*(p+2) && ((*(p+2) & 0xc0) == 0x80)) 743*0Sstevel@tonic-gate c = c | ((*(p+2) & 0x3f) << (6 * (l-3))); 744*0Sstevel@tonic-gate else 745*0Sstevel@tonic-gate return EILSEQ; 746*0Sstevel@tonic-gate if (c < 0x800) 747*0Sstevel@tonic-gate return EILSEQ; 748*0Sstevel@tonic-gate } 749*0Sstevel@tonic-gate if (l > 3) { 750*0Sstevel@tonic-gate if (*(p+3) && ((*(p+3) & 0xc0) == 0x80)) 751*0Sstevel@tonic-gate c = c | ((*(p+3) & 0x3f) << (6 * (l-4))); 752*0Sstevel@tonic-gate else 753*0Sstevel@tonic-gate return EILSEQ; 754*0Sstevel@tonic-gate if (c < 0x10000) 755*0Sstevel@tonic-gate return EILSEQ; 756*0Sstevel@tonic-gate } 757*0Sstevel@tonic-gate if (l > 4) { 758*0Sstevel@tonic-gate if (*(p+4) && ((*(p+4) & 0xc0) == 0x80)) 759*0Sstevel@tonic-gate c = c | ((*(p+4) & 0x3f) << (6 * (l-5))); 760*0Sstevel@tonic-gate else 761*0Sstevel@tonic-gate return EILSEQ; 762*0Sstevel@tonic-gate if (c < 0x200000) 763*0Sstevel@tonic-gate return EILSEQ; 764*0Sstevel@tonic-gate } 765*0Sstevel@tonic-gate if (l > 5) { 766*0Sstevel@tonic-gate if (*(p+5) && ((*(p+5) & 0xc0) == 0x80)) 767*0Sstevel@tonic-gate c = c | (*(p+5) & 0x3f) ; 768*0Sstevel@tonic-gate else 769*0Sstevel@tonic-gate return EILSEQ; 770*0Sstevel@tonic-gate if (c < 0x4000000) 771*0Sstevel@tonic-gate return EILSEQ; 772*0Sstevel@tonic-gate } 773*0Sstevel@tonic-gate 774*0Sstevel@tonic-gate /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */ 775*0Sstevel@tonic-gate if (((c <= 0xdfff) && (c >= 0xd800)) || 776*0Sstevel@tonic-gate (c == 0xfffe) || (c == 0xffff)) 777*0Sstevel@tonic-gate return EILSEQ; 778*0Sstevel@tonic-gate p += l; 779*0Sstevel@tonic-gate } 780*0Sstevel@tonic-gate /* 7-bit chars are fine */ 781*0Sstevel@tonic-gate else 782*0Sstevel@tonic-gate p++; 783*0Sstevel@tonic-gate } 784*0Sstevel@tonic-gate return 0; 785*0Sstevel@tonic-gate } 786*0Sstevel@tonic-gate 787*0Sstevel@tonic-gate /* 788*0Sstevel@tonic-gate * Functions for converting to ASCII or UTF-8 from the local codeset 789*0Sstevel@tonic-gate * Functions for converting from ASCII or UTF-8 to the local codeset 790*0Sstevel@tonic-gate * 791*0Sstevel@tonic-gate * The error_str parameter is an optional pointer to a char variable 792*0Sstevel@tonic-gate * where to store a string suitable for use with error() or fatal() or 793*0Sstevel@tonic-gate * friends. 794*0Sstevel@tonic-gate * 795*0Sstevel@tonic-gate * The err parameter is an optional pointer to an integer where 0 796*0Sstevel@tonic-gate * (success) or EILSEQ or EINVAL will be stored (failure). 797*0Sstevel@tonic-gate * 798*0Sstevel@tonic-gate * These functions return NULL if the conversion fails. 799*0Sstevel@tonic-gate * 800*0Sstevel@tonic-gate */ 801*0Sstevel@tonic-gate 802*0Sstevel@tonic-gate u_char * 803*0Sstevel@tonic-gate g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str) 804*0Sstevel@tonic-gate { 805*0Sstevel@tonic-gate static u_int initialized = 0; 806*0Sstevel@tonic-gate static u_int do_convert = 0; 807*0Sstevel@tonic-gate iconv_t cd; 808*0Sstevel@tonic-gate int err; 809*0Sstevel@tonic-gate 810*0Sstevel@tonic-gate if (!initialized) { 811*0Sstevel@tonic-gate /* 812*0Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 813*0Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 814*0Sstevel@tonic-gate */ 815*0Sstevel@tonic-gate if (strcmp("646", nl_langinfo(CODESET)) == 0 || 816*0Sstevel@tonic-gate strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 817*0Sstevel@tonic-gate strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 818*0Sstevel@tonic-gate initialized = 1; 819*0Sstevel@tonic-gate do_convert = 0; 820*0Sstevel@tonic-gate } 821*0Sstevel@tonic-gate else { 822*0Sstevel@tonic-gate cd = iconv_open(nl_langinfo(CODESET), "646"); 823*0Sstevel@tonic-gate if (cd == (iconv_t) -1) { 824*0Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 825*0Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 826*0Sstevel@tonic-gate "Cannot convert ASCII strings to the local codeset"; 827*0Sstevel@tonic-gate } 828*0Sstevel@tonic-gate initialized = 1; 829*0Sstevel@tonic-gate do_convert = 1; 830*0Sstevel@tonic-gate } 831*0Sstevel@tonic-gate } 832*0Sstevel@tonic-gate 833*0Sstevel@tonic-gate if (!do_convert) { 834*0Sstevel@tonic-gate if ((err = g11n_validate_ascii(str, 0, error_str))) { 835*0Sstevel@tonic-gate if (err_ptr) *err_ptr = err; 836*0Sstevel@tonic-gate return NULL; 837*0Sstevel@tonic-gate } 838*0Sstevel@tonic-gate else 839*0Sstevel@tonic-gate return (u_char *) xstrdup(str); 840*0Sstevel@tonic-gate } 841*0Sstevel@tonic-gate return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 842*0Sstevel@tonic-gate } 843*0Sstevel@tonic-gate 844*0Sstevel@tonic-gate u_char * 845*0Sstevel@tonic-gate g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str) 846*0Sstevel@tonic-gate { 847*0Sstevel@tonic-gate static u_int initialized = 0; 848*0Sstevel@tonic-gate static u_int do_convert = 0; 849*0Sstevel@tonic-gate iconv_t cd; 850*0Sstevel@tonic-gate int err; 851*0Sstevel@tonic-gate 852*0Sstevel@tonic-gate if (!initialized) { 853*0Sstevel@tonic-gate /* 854*0Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 855*0Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 856*0Sstevel@tonic-gate */ 857*0Sstevel@tonic-gate if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 858*0Sstevel@tonic-gate strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 859*0Sstevel@tonic-gate initialized = 1; 860*0Sstevel@tonic-gate do_convert = 0; 861*0Sstevel@tonic-gate } 862*0Sstevel@tonic-gate else { 863*0Sstevel@tonic-gate cd = iconv_open(nl_langinfo(CODESET), "UTF-8"); 864*0Sstevel@tonic-gate if (cd == (iconv_t) -1) { 865*0Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 866*0Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 867*0Sstevel@tonic-gate "Cannot convert UTF-8 strings to the local codeset"; 868*0Sstevel@tonic-gate } 869*0Sstevel@tonic-gate initialized = 1; 870*0Sstevel@tonic-gate do_convert = 1; 871*0Sstevel@tonic-gate } 872*0Sstevel@tonic-gate } 873*0Sstevel@tonic-gate 874*0Sstevel@tonic-gate if (!do_convert) { 875*0Sstevel@tonic-gate if ((err = g11n_validate_utf8(str, 0, error_str))) { 876*0Sstevel@tonic-gate if (err_ptr) *err_ptr = err; 877*0Sstevel@tonic-gate return NULL; 878*0Sstevel@tonic-gate } 879*0Sstevel@tonic-gate else 880*0Sstevel@tonic-gate return (u_char *) xstrdup((char *) str); 881*0Sstevel@tonic-gate } 882*0Sstevel@tonic-gate return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 883*0Sstevel@tonic-gate } 884*0Sstevel@tonic-gate 885*0Sstevel@tonic-gate char * 886*0Sstevel@tonic-gate g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str) 887*0Sstevel@tonic-gate { 888*0Sstevel@tonic-gate static u_int initialized = 0; 889*0Sstevel@tonic-gate static u_int do_convert = 0; 890*0Sstevel@tonic-gate iconv_t cd; 891*0Sstevel@tonic-gate 892*0Sstevel@tonic-gate if (!initialized) { 893*0Sstevel@tonic-gate /* 894*0Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 895*0Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 896*0Sstevel@tonic-gate */ 897*0Sstevel@tonic-gate if (strcmp("646", nl_langinfo(CODESET)) == 0 || 898*0Sstevel@tonic-gate strcmp("ASCII", nl_langinfo(CODESET)) == 0 || 899*0Sstevel@tonic-gate strcmp("US-ASCII", nl_langinfo(CODESET)) == 0) { 900*0Sstevel@tonic-gate initialized = 1; 901*0Sstevel@tonic-gate do_convert = 0; 902*0Sstevel@tonic-gate } 903*0Sstevel@tonic-gate else { 904*0Sstevel@tonic-gate cd = iconv_open("646", nl_langinfo(CODESET)); 905*0Sstevel@tonic-gate if (cd == (iconv_t) -1) { 906*0Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 907*0Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 908*0Sstevel@tonic-gate "Cannot convert UTF-8 strings to the local codeset"; 909*0Sstevel@tonic-gate } 910*0Sstevel@tonic-gate initialized = 1; 911*0Sstevel@tonic-gate do_convert = 1; 912*0Sstevel@tonic-gate } 913*0Sstevel@tonic-gate } 914*0Sstevel@tonic-gate 915*0Sstevel@tonic-gate if (!do_convert) 916*0Sstevel@tonic-gate return xstrdup((char *) str); 917*0Sstevel@tonic-gate return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 918*0Sstevel@tonic-gate } 919*0Sstevel@tonic-gate 920*0Sstevel@tonic-gate u_char * 921*0Sstevel@tonic-gate g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str) 922*0Sstevel@tonic-gate { 923*0Sstevel@tonic-gate static u_int initialized = 0; 924*0Sstevel@tonic-gate static u_int do_convert = 0; 925*0Sstevel@tonic-gate iconv_t cd; 926*0Sstevel@tonic-gate 927*0Sstevel@tonic-gate if (!initialized) { 928*0Sstevel@tonic-gate /* 929*0Sstevel@tonic-gate * iconv_open() fails if the to/from codesets are the 930*0Sstevel@tonic-gate * same, and there are aliases of codesets to boot... 931*0Sstevel@tonic-gate */ 932*0Sstevel@tonic-gate if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 || 933*0Sstevel@tonic-gate strcmp("UTF8", nl_langinfo(CODESET)) == 0) { 934*0Sstevel@tonic-gate initialized = 1; 935*0Sstevel@tonic-gate do_convert = 0; 936*0Sstevel@tonic-gate } 937*0Sstevel@tonic-gate else { 938*0Sstevel@tonic-gate cd = iconv_open("UTF-8", nl_langinfo(CODESET)); 939*0Sstevel@tonic-gate if (cd == (iconv_t) -1) { 940*0Sstevel@tonic-gate if (err_ptr) *err_ptr = errno; 941*0Sstevel@tonic-gate if (error_str) *error_str = (u_char *) 942*0Sstevel@tonic-gate "Cannot convert UTF-8 strings to the local codeset"; 943*0Sstevel@tonic-gate } 944*0Sstevel@tonic-gate initialized = 1; 945*0Sstevel@tonic-gate do_convert = 1; 946*0Sstevel@tonic-gate } 947*0Sstevel@tonic-gate } 948*0Sstevel@tonic-gate 949*0Sstevel@tonic-gate if (!do_convert) 950*0Sstevel@tonic-gate return (u_char *) xstrdup((char *) str); 951*0Sstevel@tonic-gate return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str); 952*0Sstevel@tonic-gate } 953*0Sstevel@tonic-gate 954*0Sstevel@tonic-gate 955*0Sstevel@tonic-gate /* 956*0Sstevel@tonic-gate * Wrapper around iconv() 957*0Sstevel@tonic-gate * 958*0Sstevel@tonic-gate * The caller is responsible for freeing the result and for handling 959*0Sstevel@tonic-gate * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF). 960*0Sstevel@tonic-gate */ 961*0Sstevel@tonic-gate 962*0Sstevel@tonic-gate static 963*0Sstevel@tonic-gate u_char * 964*0Sstevel@tonic-gate do_iconv(iconv_t cd, u_int *mul_ptr, 965*0Sstevel@tonic-gate const void *buf, u_int len, 966*0Sstevel@tonic-gate u_int *outlen, int *err, 967*0Sstevel@tonic-gate u_char **err_str) 968*0Sstevel@tonic-gate { 969*0Sstevel@tonic-gate size_t inbytesleft, outbytesleft, converted_size; 970*0Sstevel@tonic-gate char *outbuf; 971*0Sstevel@tonic-gate u_char *converted; 972*0Sstevel@tonic-gate const char *inbuf; 973*0Sstevel@tonic-gate u_int mul = 0; 974*0Sstevel@tonic-gate 975*0Sstevel@tonic-gate if (!buf || !(*(char *)buf)) return NULL; 976*0Sstevel@tonic-gate if (len == 0) len = strlen(buf); 977*0Sstevel@tonic-gate /* reset conversion descriptor */ 978*0Sstevel@tonic-gate /* XXX Do we need initial shift sequences for UTF-8??? */ 979*0Sstevel@tonic-gate (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft); 980*0Sstevel@tonic-gate inbuf = (const char *) buf; 981*0Sstevel@tonic-gate if (mul_ptr) mul = *mul_ptr; 982*0Sstevel@tonic-gate converted_size = (len << mul); 983*0Sstevel@tonic-gate outbuf = (char *) xmalloc(converted_size + 1); /* for null */ 984*0Sstevel@tonic-gate converted = (u_char *) outbuf; 985*0Sstevel@tonic-gate outbytesleft = len; 986*0Sstevel@tonic-gate do { 987*0Sstevel@tonic-gate if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == 988*0Sstevel@tonic-gate (size_t) -1) { 989*0Sstevel@tonic-gate if (errno == E2BIG) { 990*0Sstevel@tonic-gate /* UTF-8 codepoints are at most 8 bytes long. */ 991*0Sstevel@tonic-gate if (mul > 2) { 992*0Sstevel@tonic-gate if (err_str) 993*0Sstevel@tonic-gate *err_str = (u_char *) "Conversion to UTF-8 failed due to" 994*0Sstevel@tonic-gate "preposterous space requirements"; 995*0Sstevel@tonic-gate if (err) 996*0Sstevel@tonic-gate *err = EILSEQ; 997*0Sstevel@tonic-gate return NULL; 998*0Sstevel@tonic-gate } 999*0Sstevel@tonic-gate 1000*0Sstevel@tonic-gate /* 1001*0Sstevel@tonic-gate * Re-alloc output and ensure that the outbuf 1002*0Sstevel@tonic-gate * and outbytesleft values are adjusted. 1003*0Sstevel@tonic-gate */ 1004*0Sstevel@tonic-gate converted = xrealloc(converted, converted_size << 1 + 1); 1005*0Sstevel@tonic-gate outbuf = (char *) converted + converted_size - outbytesleft; 1006*0Sstevel@tonic-gate converted_size = (len << ++(mul)); 1007*0Sstevel@tonic-gate outbytesleft = converted_size - outbytesleft; 1008*0Sstevel@tonic-gate } 1009*0Sstevel@tonic-gate else { 1010*0Sstevel@tonic-gate /* 1011*0Sstevel@tonic-gate * Let the caller deal with iconv() errors, probably by 1012*0Sstevel@tonic-gate * calling fatal(); xfree() does not set errno. 1013*0Sstevel@tonic-gate */ 1014*0Sstevel@tonic-gate if (err) *err = errno; 1015*0Sstevel@tonic-gate xfree(converted); 1016*0Sstevel@tonic-gate return NULL; 1017*0Sstevel@tonic-gate } 1018*0Sstevel@tonic-gate } 1019*0Sstevel@tonic-gate } while (inbytesleft); 1020*0Sstevel@tonic-gate *outbuf = '\0'; /* ensure null-termination */ 1021*0Sstevel@tonic-gate if (outlen) *outlen = converted_size - outbytesleft; 1022*0Sstevel@tonic-gate if (mul_ptr) *mul_ptr = mul; 1023*0Sstevel@tonic-gate return converted; 1024*0Sstevel@tonic-gate } 1025