1 /* $NetBSD: localcharset.c,v 1.1.1.1 2016/01/10 21:36:18 christos Exp $ */ 2 3 /* Determine a canonical name for the current locale's character encoding. 4 5 Copyright (C) 2000-2002 Free Software Foundation, Inc. 6 7 This program is free software; you can redistribute it and/or modify it 8 under the terms of the GNU Library General Public License as published 9 by the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Library General Public License for more details. 16 17 You should have received a copy of the GNU Library General Public 18 License along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 20 USA. */ 21 22 /* Written by Bruno Haible <haible@clisp.cons.org>. */ 23 24 #ifdef HAVE_CONFIG_H 25 # include <config.h> 26 #endif 27 28 #if HAVE_STDDEF_H 29 # include <stddef.h> 30 #endif 31 32 #include <stdio.h> 33 #if HAVE_STRING_H 34 # include <string.h> 35 #else 36 # include <strings.h> 37 #endif 38 #if HAVE_STDLIB_H 39 # include <stdlib.h> 40 #endif 41 42 #if defined _WIN32 || defined __WIN32__ 43 # undef WIN32 /* avoid warning on mingw32 */ 44 # define WIN32 45 #endif 46 47 #if defined __EMX__ 48 /* Assume EMX program runs on OS/2, even if compiled under DOS. */ 49 # define OS2 50 #endif 51 52 #if !defined WIN32 53 # if HAVE_LANGINFO_CODESET 54 # include <langinfo.h> 55 # else 56 # if HAVE_SETLOCALE 57 # include <locale.h> 58 # endif 59 # endif 60 #elif defined WIN32 61 # define WIN32_LEAN_AND_MEAN 62 # include <windows.h> 63 #endif 64 #if defined OS2 65 # define INCL_DOS 66 # include <os2.h> 67 #endif 68 69 #if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ 70 /* Win32, OS/2, DOS */ 71 # define ISSLASH(C) ((C) == '/' || (C) == '\\') 72 #endif 73 74 #ifndef DIRECTORY_SEPARATOR 75 # define DIRECTORY_SEPARATOR '/' 76 #endif 77 78 #ifndef ISSLASH 79 # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) 80 #endif 81 82 #ifdef HAVE_GETC_UNLOCKED 83 # undef getc 84 # define getc getc_unlocked 85 #endif 86 87 /* The following static variable is declared 'volatile' to avoid a 88 possible multithread problem in the function get_charset_aliases. If we 89 are running in a threaded environment, and if two threads initialize 90 'charset_aliases' simultaneously, both will produce the same value, 91 and everything will be ok if the two assignments to 'charset_aliases' 92 are atomic. But I don't know what will happen if the two assignments mix. */ 93 #if __STDC__ != 1 94 # define volatile /* empty */ 95 #endif 96 /* Pointer to the contents of the charset.alias file, if it has already been 97 read, else NULL. Its format is: 98 ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ 99 static const char * volatile charset_aliases; 100 101 /* Return a pointer to the contents of the charset.alias file. */ 102 static const char * 103 get_charset_aliases () 104 { 105 const char *cp; 106 107 cp = charset_aliases; 108 if (cp == NULL) 109 { 110 #if !defined WIN32 111 FILE *fp; 112 const char *dir = LIBDIR; 113 const char *base = "charset.alias"; 114 char *file_name; 115 116 /* Concatenate dir and base into freshly allocated file_name. */ 117 { 118 size_t dir_len = strlen (dir); 119 size_t base_len = strlen (base); 120 int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); 121 file_name = (char *) malloc (dir_len + add_slash + base_len + 1); 122 if (file_name != NULL) 123 { 124 memcpy (file_name, dir, dir_len); 125 if (add_slash) 126 file_name[dir_len] = DIRECTORY_SEPARATOR; 127 memcpy (file_name + dir_len + add_slash, base, base_len + 1); 128 } 129 } 130 131 if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) 132 /* Out of memory or file not found, treat it as empty. */ 133 cp = ""; 134 else 135 { 136 /* Parse the file's contents. */ 137 int c; 138 char buf1[50+1]; 139 char buf2[50+1]; 140 char *res_ptr = NULL; 141 size_t res_size = 0; 142 size_t l1, l2; 143 144 for (;;) 145 { 146 c = getc (fp); 147 if (c == EOF) 148 break; 149 if (c == '\n' || c == ' ' || c == '\t') 150 continue; 151 if (c == '#') 152 { 153 /* Skip comment, to end of line. */ 154 do 155 c = getc (fp); 156 while (!(c == EOF || c == '\n')); 157 if (c == EOF) 158 break; 159 continue; 160 } 161 ungetc (c, fp); 162 if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) 163 break; 164 l1 = strlen (buf1); 165 l2 = strlen (buf2); 166 if (res_size == 0) 167 { 168 res_size = l1 + 1 + l2 + 1; 169 res_ptr = (char *) malloc (res_size + 1); 170 } 171 else 172 { 173 res_size += l1 + 1 + l2 + 1; 174 res_ptr = (char *) realloc (res_ptr, res_size + 1); 175 } 176 if (res_ptr == NULL) 177 { 178 /* Out of memory. */ 179 res_size = 0; 180 break; 181 } 182 strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); 183 strcpy (res_ptr + res_size - (l2 + 1), buf2); 184 } 185 fclose (fp); 186 if (res_size == 0) 187 cp = ""; 188 else 189 { 190 *(res_ptr + res_size) = '\0'; 191 cp = res_ptr; 192 } 193 } 194 195 if (file_name != NULL) 196 free (file_name); 197 198 #else 199 200 /* To avoid the troubles of installing a separate file in the same 201 directory as the DLL and of retrieving the DLL's directory at 202 runtime, simply inline the aliases here. */ 203 204 # if defined WIN32 205 cp = "CP936" "\0" "GBK" "\0" 206 "CP1361" "\0" "JOHAB" "\0"; 207 # endif 208 #endif 209 210 charset_aliases = cp; 211 } 212 213 return cp; 214 } 215 216 /* Determine the current locale's character encoding, and canonicalize it 217 into one of the canonical names listed in config.charset. 218 The result must not be freed; it is statically allocated. 219 If the canonical name cannot be determined, the result is a non-canonical 220 name. */ 221 222 #ifdef STATIC 223 STATIC 224 #endif 225 const char * 226 locale_charset () 227 { 228 const char *codeset; 229 const char *aliases; 230 231 #if !(defined WIN32 || defined OS2) 232 233 # if HAVE_LANGINFO_CODESET 234 235 /* Most systems support nl_langinfo (CODESET) nowadays. */ 236 codeset = nl_langinfo (CODESET); 237 238 # else 239 240 /* On old systems which lack it, use setlocale or getenv. */ 241 const char *locale = NULL; 242 243 /* But most old systems don't have a complete set of locales. Some 244 (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't 245 use setlocale here; it would return "C" when it doesn't support the 246 locale name the user has set. */ 247 # if HAVE_SETLOCALE && 0 248 locale = setlocale (LC_CTYPE, NULL); 249 # endif 250 if (locale == NULL || locale[0] == '\0') 251 { 252 locale = getenv ("LC_ALL"); 253 if (locale == NULL || locale[0] == '\0') 254 { 255 locale = getenv ("LC_CTYPE"); 256 if (locale == NULL || locale[0] == '\0') 257 locale = getenv ("LANG"); 258 } 259 } 260 261 /* On some old systems, one used to set locale = "iso8859_1". On others, 262 you set it to "language_COUNTRY.charset". In any case, we resolve it 263 through the charset.alias file. */ 264 codeset = locale; 265 266 # endif 267 268 #elif defined WIN32 269 270 static char buf[2 + 10 + 1]; 271 272 /* Win32 has a function returning the locale's codepage as a number. */ 273 sprintf (buf, "CP%u", GetACP ()); 274 codeset = buf; 275 276 #elif defined OS2 277 278 const char *locale; 279 static char buf[2 + 10 + 1]; 280 ULONG cp[3]; 281 ULONG cplen; 282 283 /* Allow user to override the codeset, as set in the operating system, 284 with standard language environment variables. */ 285 locale = getenv ("LC_ALL"); 286 if (locale == NULL || locale[0] == '\0') 287 { 288 locale = getenv ("LC_CTYPE"); 289 if (locale == NULL || locale[0] == '\0') 290 locale = getenv ("LANG"); 291 } 292 if (locale != NULL && locale[0] != '\0') 293 { 294 /* If the locale name contains an encoding after the dot, return it. */ 295 const char *dot = strchr (locale, '.'); 296 297 if (dot != NULL) 298 { 299 const char *modifier; 300 301 dot++; 302 /* Look for the possible @... trailer and remove it, if any. */ 303 modifier = strchr (dot, '@'); 304 if (modifier == NULL) 305 return dot; 306 if (modifier - dot < sizeof (buf)) 307 { 308 memcpy (buf, dot, modifier - dot); 309 buf [modifier - dot] = '\0'; 310 return buf; 311 } 312 } 313 314 /* Resolve through the charset.alias file. */ 315 codeset = locale; 316 } 317 else 318 { 319 /* OS/2 has a function returning the locale's codepage as a number. */ 320 if (DosQueryCp (sizeof (cp), cp, &cplen)) 321 codeset = ""; 322 else 323 { 324 sprintf (buf, "CP%u", cp[0]); 325 codeset = buf; 326 } 327 } 328 329 #endif 330 331 if (codeset == NULL) 332 /* The canonical name cannot be determined. */ 333 codeset = ""; 334 335 /* Resolve alias. */ 336 for (aliases = get_charset_aliases (); 337 *aliases != '\0'; 338 aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) 339 if (strcmp (codeset, aliases) == 0 340 || (aliases[0] == '*' && aliases[1] == '\0')) 341 { 342 codeset = aliases + strlen (aliases) + 1; 343 break; 344 } 345 346 return codeset; 347 } 348