xref: /netbsd-src/external/gpl3/binutils.old/dist/binutils/winduni.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* winduni.c -- unicode support for the windres program.
2    Copyright 1997, 1998, 2000, 2001, 2003, 2005, 2007, 2009
3    Free Software Foundation, Inc.
4    Written by Ian Lance Taylor, Cygnus Support.
5    Rewritten by Kai Tietz, Onevision.
6 
7    This file is part of GNU Binutils.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
22    02110-1301, USA.  */
23 
24 
25 /* This file contains unicode support routines for the windres
26    program.  Ideally, we would have generic unicode support which
27    would work on all systems.  However, we don't.  Instead, on a
28    Windows host, we are prepared to call some Windows routines.  This
29    means that we will generate different output on Windows and Unix
30    hosts, but that seems better than not really supporting unicode at
31    all.  */
32 
33 #include "sysdep.h"
34 #include "bfd.h"
35 #include "libiberty.h" /* for xstrdup */
36 #include "bucomm.h"
37 /* Must be include before windows.h and winnls.h.  */
38 #if defined (_WIN32) || defined (__CYGWIN__)
39 #include <windows.h>
40 #include <winnls.h>
41 #endif
42 #include "winduni.h"
43 #include "safe-ctype.h"
44 
45 #if HAVE_ICONV
46 #include <iconv.h>
47 #endif
48 
49 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
50 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
51 static int unichar_isascii (const unichar *, rc_uint_type);
52 
53 /* Convert an ASCII string to a unicode string.  We just copy it,
54    expanding chars to shorts, rather than doing something intelligent.  */
55 
56 #if !defined (_WIN32) && !defined (__CYGWIN__)
57 
58 /* Codepages mapped.  */
59 static local_iconv_map codepages[] =
60 {
61   { 0, "MS-ANSI" },
62   { 1, "WINDOWS-1252" },
63   { 437, "MS-ANSI" },
64   { 737, "MS-GREEK" },
65   { 775, "WINBALTRIM" },
66   { 850, "MS-ANSI" },
67   { 852, "MS-EE" },
68   { 857, "MS-TURK" },
69   { 862, "CP862" },
70   { 864, "CP864" },
71   { 866, "MS-CYRL" },
72   { 874, "WINDOWS-874" },
73   { 932, "CP932" },
74   { 936, "CP936" },
75   { 949, "CP949" },
76   { 950, "CP950" },
77   { 1250, "WINDOWS-1250" },
78   { 1251, "WINDOWS-1251" },
79   { 1252, "WINDOWS-1252" },
80   { 1253, "WINDOWS-1253" },
81   { 1254, "WINDOWS-1254" },
82   { 1255, "WINDOWS-1255" },
83   { 1256, "WINDOWS-1256" },
84   { 1257, "WINDOWS-1257" },
85   { 1258, "WINDOWS-1258" },
86   { CP_UTF7, "UTF-7" },
87   { CP_UTF8, "UTF-8" },
88   { CP_UTF16, "UTF-16" },
89   { (rc_uint_type) -1, NULL }
90 };
91 
92 /* Languages supported.  */
93 static const wind_language_t languages[] =
94 {
95   { 0x0000, 437, 1252, "Neutral", "Neutral" },
96   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
97   { 0x0403, 850, 1252, "Catalan", "Spain" },	      { 0x0404, 950,  950, "Chinese", "Taiwan" },
98   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
99   { 0x0407, 850, 1252, "German", "Germany" },	      { 0x0408, 737, 1253, "Greek", "Greece" },
100   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
101   { 0x040B, 850, 1252, "Finnish", "Finland" },	      { 0x040C, 850, 1252, "French", "France" },
102   { 0x040D, 862, 1255, "Hebrew", "Israel" },	      { 0x040E, 852, 1250, "Hungarian", "Hungary" },
103   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
104   { 0x0411, 932,  932, "Japanese", "Japan" },	      { 0x0412, 949,  949, "Korean", "Korea (south)" },
105   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm�l)", "Norway" },
106   { 0x0415, 852, 1250, "Polish", "Poland" },	      { 0x0416, 850, 1252, "Portuguese", "Brazil" },
107   { 0x0418, 852, 1250, "Romanian", "Romania" },	      { 0x0419, 866, 1251, "Russian", "Russia" },
108   { 0x041A, 852, 1250, "Croatian", "Croatia" },	      { 0x041B, 852, 1250, "Slovak", "Slovakia" },
109   { 0x041C, 852, 1250, "Albanian", "Albania" },	      { 0x041D, 850, 1252, "Swedish", "Sweden" },
110   { 0x041E, 874,  874, "Thai", "Thailand" },	      { 0x041F, 857, 1254, "Turkish", "Turkey" },
111   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
112   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
113   { 0x0425, 775, 1257, "Estonian", "Estonia" },	      { 0x0426, 775, 1257, "Latvian", "Latvia" },
114   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
115   { 0x0429, 864, 1256, "Arabic", "Farsi" },	      { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
116   { 0x042D, 850, 1252, "Basque", "Spain" },
117   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
118   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
119   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
120   { 0x043C, 437, 1252, "Irish", "Ireland" },
121   { 0x043E, 850, 1252, "Malay", "Malaysia" },
122   { 0x0801, 864, 1256, "Arabic", "Iraq" },
123   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
124   { 0x0807, 850, 1252, "German", "Switzerland" },
125   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
126   { 0x080C, 850, 1252, "French", "Belgium" },
127   { 0x0810, 850, 1252, "Italian", "Switzerland" },
128   { 0x0813, 850, 1252, "Dutch", "Belgium" },	      { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
129   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
130   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
131   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
132   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
133   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
134   { 0x0C07, 850, 1252, "German", "Austria" },
135   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
136   { 0x0C0C, 850, 1252, "French", "Canada"},
137   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
138   { 0x1001, 864, 1256, "Arabic", "Libya" },
139   { 0x1004, 936,  936, "Chinese", "Singapore" },
140   { 0x1007, 850, 1252, "German", "Luxembourg" },
141   { 0x1009, 850, 1252, "English", "Canada" },
142   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
143   { 0x100C, 850, 1252, "French", "Switzerland" },
144   { 0x1401, 864, 1256, "Arabic", "Algeria" },
145   { 0x1407, 850, 1252, "German", "Liechtenstein" },
146   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
147   { 0x140C, 850, 1252, "French", "Luxembourg" },
148   { 0x1801, 864, 1256, "Arabic", "Morocco" },
149   { 0x1809, 850, 1252, "English", "Ireland" },	      { 0x180A, 850, 1252, "Spanish", "Panama" },
150   { 0x180C, 850, 1252, "French", "Monaco" },
151   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
152   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
153   { 0x2001, 864, 1256, "Arabic", "Oman" },
154   { 0x2009, 850, 1252, "English", "Jamaica" },	      { 0x200A, 850, 1252, "Spanish", "Venezuela" },
155   { 0x2401, 864, 1256, "Arabic", "Yemen" },
156   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
157   { 0x2801, 864, 1256, "Arabic", "Syria" },
158   { 0x2809, 850, 1252, "English", "Belize" },	      { 0x280A, 850, 1252, "Spanish", "Peru" },
159   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
160   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
161   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
162   { 0x3009, 437, 1252, "English", "Zimbabwe" },	      { 0x300A, 850, 1252, "Spanish", "Ecuador" },
163   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
164   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
165   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
166   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
167   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
168   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
169   { 0x4001, 864, 1256, "Arabic", "Qatar" },
170   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
171   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
172   { 0x480A, 850, 1252, "Spanish", "Honduras" },
173   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
174   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
175   { (unsigned) -1,  0,      0, NULL, NULL }
176 };
177 
178 #endif
179 
180 /* Specifies the default codepage to be used for unicode
181    transformations.  By default this is CP_ACP.  */
182 rc_uint_type wind_default_codepage = CP_ACP;
183 
184 /* Specifies the currently used codepage for unicode
185    transformations.  By default this is CP_ACP.  */
186 rc_uint_type wind_current_codepage = CP_ACP;
187 
188 /* Convert an ASCII string to a unicode string.  We just copy it,
189    expanding chars to shorts, rather than doing something intelligent.  */
190 
191 void
192 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
193 {
194   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
195 }
196 
197 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
198    copy it, expanding chars to shorts, rather than doing something intelligent.
199    This routine converts also \0 within a string.  */
200 
201 void
202 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
203 {
204   char *tmp, *p;
205   rc_uint_type tlen, elen, idx = 0;
206 
207   *unicode = NULL;
208 
209   if (!a_length)
210     {
211       if (length)
212         *length = 0;
213       return;
214     }
215 
216   /* Make sure we have zero terminated string.  */
217   p = tmp = (char *) alloca (a_length + 1);
218   memcpy (tmp, ascii, a_length);
219   tmp[a_length] = 0;
220 
221   while (a_length > 0)
222     {
223       unichar *utmp, *up;
224 
225       tlen = strlen (p);
226 
227       if (tlen > a_length)
228         tlen = a_length;
229       if (*p == 0)
230         {
231 	  /* Make room for one more character.  */
232 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
233 	  if (idx > 0)
234 	    {
235 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
236 	    }
237 	  *unicode = utmp;
238 	  utmp[idx++] = 0;
239 	  --a_length;
240 	  p++;
241 	  continue;
242 	}
243       utmp = NULL;
244       elen = 0;
245       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
246       if (elen)
247 	{
248 	  utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
249 	  wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
250 	  elen /= sizeof (unichar);
251 	  elen --;
252 	}
253       else
254         {
255 	  /* Make room for one more character.  */
256 	  utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
257 	  if (idx > 0)
258 	    {
259 	      memcpy (utmp, *unicode, idx * sizeof (unichar));
260 	    }
261 	  *unicode = utmp;
262 	  utmp[idx++] = ((unichar) *p) & 0xff;
263 	  --a_length;
264 	  p++;
265 	  continue;
266 	}
267       p += tlen;
268       a_length -= tlen;
269 
270       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
271       if (idx > 0)
272 	memcpy (up, *unicode, idx * sizeof (unichar));
273 
274       *unicode = up;
275       if (elen)
276 	memcpy (&up[idx], utmp, sizeof (unichar) * elen);
277 
278       idx += elen;
279     }
280 
281   if (length)
282     *length = idx;
283 }
284 
285 /* Convert an unicode string to an ASCII string.  We just copy it,
286    shrink shorts to chars, rather than doing something intelligent.
287    Shorts with not within the char range are replaced by '_'.  */
288 
289 void
290 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
291 {
292   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
293 }
294 
295 /* Print the unicode string UNICODE to the file E.  LENGTH is the
296    number of characters to print, or -1 if we should print until the
297    end of the string.  FIXME: On a Windows host, we should be calling
298    some Windows function, probably WideCharToMultiByte.  */
299 
300 void
301 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
302 {
303   while (1)
304     {
305       unichar ch;
306 
307       if (length == 0)
308 	return;
309       if ((bfd_signed_vma) length > 0)
310 	--length;
311 
312       ch = *unicode;
313 
314       if (ch == 0 && (bfd_signed_vma) length < 0)
315 	return;
316 
317       ++unicode;
318 
319       if ((ch & 0x7f) == ch)
320 	{
321 	  if (ch == '\\')
322 	    fputs ("\\\\", e);
323 	  else if (ch == '"')
324 	    fputs ("\"\"", e);
325 	  else if (ISPRINT (ch))
326 	    putc (ch, e);
327 	  else
328 	    {
329 	      switch (ch)
330 		{
331 		case ESCAPE_A:
332 		  fputs ("\\a", e);
333 		  break;
334 
335 		case ESCAPE_B:
336 		  fputs ("\\b", e);
337 		  break;
338 
339 		case ESCAPE_F:
340 		  fputs ("\\f", e);
341 		  break;
342 
343 		case ESCAPE_N:
344 		  fputs ("\\n", e);
345 		  break;
346 
347 		case ESCAPE_R:
348 		  fputs ("\\r", e);
349 		  break;
350 
351 		case ESCAPE_T:
352 		  fputs ("\\t", e);
353 		  break;
354 
355 		case ESCAPE_V:
356 		  fputs ("\\v", e);
357 		  break;
358 
359 		default:
360 		  fprintf (e, "\\%03o", (unsigned int) ch);
361 		  break;
362 		}
363 	    }
364 	}
365       else if ((ch & 0xff) == ch)
366 	fprintf (e, "\\%03o", (unsigned int) ch);
367       else
368 	fprintf (e, "\\x%04x", (unsigned int) ch);
369     }
370 }
371 
372 /* Print a unicode string to a file.  */
373 
374 void
375 ascii_print (FILE *e, const char *s, rc_uint_type length)
376 {
377   while (1)
378     {
379       char ch;
380 
381       if (length == 0)
382 	return;
383       if ((bfd_signed_vma) length > 0)
384 	--length;
385 
386       ch = *s;
387 
388       if (ch == 0 && (bfd_signed_vma) length < 0)
389 	return;
390 
391       ++s;
392 
393       if ((ch & 0x7f) == ch)
394 	{
395 	  if (ch == '\\')
396 	    fputs ("\\\\", e);
397 	  else if (ch == '"')
398 	    fputs ("\"\"", e);
399 	  else if (ISPRINT (ch))
400 	    putc (ch, e);
401 	  else
402 	    {
403 	      switch (ch)
404 		{
405 		case ESCAPE_A:
406 		  fputs ("\\a", e);
407 		  break;
408 
409 		case ESCAPE_B:
410 		  fputs ("\\b", e);
411 		  break;
412 
413 		case ESCAPE_F:
414 		  fputs ("\\f", e);
415 		  break;
416 
417 		case ESCAPE_N:
418 		  fputs ("\\n", e);
419 		  break;
420 
421 		case ESCAPE_R:
422 		  fputs ("\\r", e);
423 		  break;
424 
425 		case ESCAPE_T:
426 		  fputs ("\\t", e);
427 		  break;
428 
429 		case ESCAPE_V:
430 		  fputs ("\\v", e);
431 		  break;
432 
433 		default:
434 		  fprintf (e, "\\%03o", (unsigned int) ch);
435 		  break;
436 		}
437 	    }
438 	}
439       else
440 	fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
441     }
442 }
443 
444 rc_uint_type
445 unichar_len (const unichar *unicode)
446 {
447   rc_uint_type r = 0;
448 
449   if (unicode)
450     while (unicode[r] != 0)
451       r++;
452   else
453     --r;
454   return r;
455 }
456 
457 unichar *
458 unichar_dup (const unichar *unicode)
459 {
460   unichar *r;
461   int len;
462 
463   if (! unicode)
464     return NULL;
465   for (len = 0; unicode[len] != 0; ++len)
466     ;
467   ++len;
468   r = ((unichar *) res_alloc (len * sizeof (unichar)));
469   memcpy (r, unicode, len * sizeof (unichar));
470   return r;
471 }
472 
473 unichar *
474 unichar_dup_uppercase (const unichar *u)
475 {
476   unichar *r = unichar_dup (u);
477   int i;
478 
479   if (! r)
480     return NULL;
481 
482   for (i = 0; r[i] != 0; ++i)
483     {
484       if (r[i] >= 'a' && r[i] <= 'z')
485 	r[i] &= 0xdf;
486     }
487   return r;
488 }
489 
490 static int
491 unichar_isascii (const unichar *u, rc_uint_type len)
492 {
493   rc_uint_type i;
494 
495   if ((bfd_signed_vma) len < 0)
496     {
497       if (u)
498 	len = (rc_uint_type) unichar_len (u);
499       else
500 	len = 0;
501     }
502 
503   for (i = 0; i < len; i++)
504     if ((u[i] & 0xff80) != 0)
505       return 0;
506   return 1;
507 }
508 
509 void
510 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
511 {
512   if (! unichar_isascii (u, len))
513     fputc ('L', e);
514   fputc ('"', e);
515   unicode_print (e, u, len);
516   fputc ('"', e);
517 }
518 
519 int
520 unicode_is_valid_codepage (rc_uint_type cp)
521 {
522   if ((cp & 0xffff) != cp)
523     return 0;
524   if (cp == CP_UTF16 || cp == CP_ACP)
525     return 1;
526 
527 #if !defined (_WIN32) && !defined (__CYGWIN__)
528   if (! wind_find_codepage_info (cp))
529     return 0;
530   return 1;
531 #else
532   return !! IsValidCodePage ((UINT) cp);
533 #endif
534 }
535 
536 #if defined (_WIN32) || defined (__CYGWIN__)
537 
538 #define max_cp_string_len 6
539 
540 static unsigned int
541 codepage_from_langid (unsigned short langid)
542 {
543   char cp_string [max_cp_string_len];
544   int c;
545 
546   memset (cp_string, 0, max_cp_string_len);
547   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
548      but is unavailable on Win95.  */
549   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
550   		      LOCALE_IDEFAULTANSICODEPAGE,
551   		      cp_string, max_cp_string_len);
552   /* If codepage data for an LCID is not installed on users's system,
553      GetLocaleInfo returns an empty string.  Fall back to system ANSI
554      default. */
555   if (c == 0)
556     return CP_ACP;
557   return strtoul (cp_string, 0, 10);
558 }
559 
560 static unsigned int
561 wincodepage_from_langid (unsigned short langid)
562 {
563   char cp_string [max_cp_string_len];
564   int c;
565 
566   memset (cp_string, 0, max_cp_string_len);
567   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
568      but is unavailable on Win95.  */
569   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
570 		      LOCALE_IDEFAULTCODEPAGE,
571 		      cp_string, max_cp_string_len);
572   /* If codepage data for an LCID is not installed on users's system,
573      GetLocaleInfo returns an empty string.  Fall back to system ANSI
574      default. */
575   if (c == 0)
576     return CP_OEM;
577   return strtoul (cp_string, 0, 10);
578 }
579 
580 static char *
581 lang_from_langid (unsigned short langid)
582 {
583   char cp_string[261];
584   int c;
585 
586   memset (cp_string, 0, 261);
587   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
588   		      LOCALE_SENGLANGUAGE,
589   		      cp_string, 260);
590   /* If codepage data for an LCID is not installed on users's system,
591      GetLocaleInfo returns an empty string.  Fall back to system ANSI
592      default. */
593   if (c == 0)
594     strcpy (cp_string, "Neutral");
595   return xstrdup (cp_string);
596 }
597 
598 static char *
599 country_from_langid (unsigned short langid)
600 {
601   char cp_string[261];
602   int c;
603 
604   memset (cp_string, 0, 261);
605   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
606   		      LOCALE_SENGCOUNTRY,
607   		      cp_string, 260);
608   /* If codepage data for an LCID is not installed on users's system,
609      GetLocaleInfo returns an empty string.  Fall back to system ANSI
610      default. */
611   if (c == 0)
612     strcpy (cp_string, "Neutral");
613   return xstrdup (cp_string);
614 }
615 
616 #endif
617 
618 const wind_language_t *
619 wind_find_language_by_id (unsigned id)
620 {
621 #if !defined (_WIN32) && !defined (__CYGWIN__)
622   int i;
623 
624   if (! id)
625     return NULL;
626   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
627     ;
628   if (languages[i].id == id)
629     return &languages[i];
630   return NULL;
631 #else
632   static wind_language_t wl;
633 
634   wl.id = id;
635   wl.doscp = codepage_from_langid ((unsigned short) id);
636   wl.wincp = wincodepage_from_langid ((unsigned short) id);
637   wl.name = lang_from_langid ((unsigned short) id);
638   wl.country = country_from_langid ((unsigned short) id);
639 
640   return & wl;
641 #endif
642 }
643 
644 const local_iconv_map *
645 wind_find_codepage_info (unsigned cp)
646 {
647 #if !defined (_WIN32) && !defined (__CYGWIN__)
648   int i;
649 
650   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
651     ;
652   if (codepages[i].codepage == (rc_uint_type) -1)
653     return NULL;
654   return &codepages[i];
655 #else
656   static local_iconv_map lim;
657   if (!unicode_is_valid_codepage (cp))
658   	return NULL;
659   lim.codepage = cp;
660   lim.iconv_name = "";
661   return & lim;
662 #endif
663 }
664 
665 /* Convert an Codepage string to a unicode string.  */
666 
667 void
668 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
669 {
670   rc_uint_type len;
671 
672   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
673   if (len)
674     {
675       *u = ((unichar *) res_alloc (len));
676       wind_MultiByteToWideChar (cp, src, *u, len);
677     }
678   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
679      this will set *length to -1.  */
680   len -= sizeof (unichar);
681 
682   if (length != NULL)
683     *length = len / sizeof (unichar);
684 }
685 
686 /* Convert an unicode string to an codepage string.  */
687 
688 void
689 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
690 {
691   rc_uint_type len;
692 
693   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
694   if (len)
695     {
696       *ascii = (char *) res_alloc (len * sizeof (char));
697       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
698     }
699   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
700      this will set *length to -1.  */
701   len--;
702 
703   if (length != NULL)
704     *length = len;
705 }
706 
707 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
708 static int
709 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
710 {
711   int i;
712 
713   for (i = 1; i <= 32; i++)
714     {
715       char *tmp_d = d;
716       ICONV_CONST char *tmp_s = s;
717       size_t ret;
718       size_t s_left = (size_t) i;
719       size_t d_left = (size_t) d_len;
720 
721       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
722 
723       if (ret != (size_t) -1)
724 	{
725 	  *n_s = tmp_s;
726 	  *n_d = tmp_d;
727 	  return 0;
728 	}
729     }
730 
731   return 1;
732 }
733 
734 static const char *
735 wind_iconv_cp (rc_uint_type cp)
736 {
737   const local_iconv_map *lim = wind_find_codepage_info (cp);
738 
739   if (!lim)
740     return NULL;
741   return lim->iconv_name;
742 }
743 #endif /* HAVE_ICONV */
744 
745 static rc_uint_type
746 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
747 			  unichar *u, rc_uint_type u_len)
748 {
749   rc_uint_type ret = 0;
750 
751 #if defined (_WIN32) || defined (__CYGWIN__)
752   rc_uint_type conv_flags = MB_PRECOMPOSED;
753 
754   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
755      MultiByteToWideChar will set the last error to
756      ERROR_INVALID_FLAGS if we do. */
757   if (cp == CP_UTF8 || cp == CP_UTF7)
758     conv_flags = 0;
759 
760   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
761 					    mb, -1, u, u_len);
762   /* Convert to bytes. */
763   ret *= sizeof (unichar);
764 
765 #elif defined (HAVE_ICONV)
766   int first = 1;
767   char tmp[32];
768   char *p_tmp;
769   const char *iconv_name = wind_iconv_cp (cp);
770 
771   if (!mb || !iconv_name)
772     return 0;
773   iconv_t cd = iconv_open ("UTF-16", iconv_name);
774 
775   while (1)
776     {
777       int iret;
778       const char *n_mb = "";
779       char *n_tmp = "";
780 
781       p_tmp = tmp;
782       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
783       if (first)
784 	{
785 	  first = 0;
786 	  continue;
787 	}
788       if (!iret)
789 	{
790 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
791 
792 	  if (u)
793 	    {
794 	      if ((size_t) u_len < l_tmp)
795 		break;
796 	      memcpy (u, tmp, l_tmp);
797 	      u += l_tmp/2;
798 	      u_len -= l_tmp;
799 	    }
800 	  ret += l_tmp;
801 	}
802       else
803 	break;
804       if (tmp[0] == 0 && tmp[1] == 0)
805 	break;
806       mb = n_mb;
807     }
808   iconv_close (cd);
809 #else
810   if (cp)
811     ret = 0;
812   ret = strlen (mb) + 1;
813   ret *= sizeof (unichar);
814   if (u != NULL && u_len != 0)
815     {
816       do
817 	{
818 	  *u++ = ((unichar) *mb) & 0xff;
819 	  --u_len; mb++;
820 	}
821       while (u_len != 0 && mb[-1] != 0);
822     }
823   if (u != NULL && u_len != 0)
824     *u = 0;
825 #endif
826   return ret;
827 }
828 
829 static rc_uint_type
830 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
831 {
832   rc_uint_type ret = 0;
833 #if defined (_WIN32) || defined (__CYGWIN__)
834   WINBOOL used_def = FALSE;
835 
836   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
837 				      	    NULL, & used_def);
838 #elif defined (HAVE_ICONV)
839   int first = 1;
840   char tmp[32];
841   char *p_tmp;
842   const char *iconv_name = wind_iconv_cp (cp);
843 
844   if (!u || !iconv_name)
845     return 0;
846   iconv_t cd = iconv_open (iconv_name, "UTF-16");
847 
848   while (1)
849     {
850       int iret;
851       const char *n_u = "";
852       char *n_tmp = "";
853 
854       p_tmp = tmp;
855       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
856       if (first)
857 	{
858 	  first = 0;
859 	  continue;
860 	}
861       if (!iret)
862 	{
863 	  size_t l_tmp = (size_t) (n_tmp - p_tmp);
864 
865 	  if (mb)
866 	    {
867 	      if ((size_t) mb_len < l_tmp)
868 		break;
869 	      memcpy (mb, tmp, l_tmp);
870 	      mb += l_tmp;
871 	      mb_len -= l_tmp;
872 	    }
873 	  ret += l_tmp;
874 	}
875       else
876 	break;
877       if (u[0] == 0)
878 	break;
879       u = (const unichar *) n_u;
880     }
881   iconv_close (cd);
882 #else
883   if (cp)
884     ret = 0;
885 
886   while (u[ret] != 0)
887     ++ret;
888 
889   ++ret;
890 
891   if (mb)
892     {
893       while (*u != 0 && mb_len != 0)
894 	{
895 	  if (u[0] == (u[0] & 0x7f))
896 	    *mb++ = (char) u[0];
897 	  else
898 	    *mb++ = '_';
899 	  ++u; --mb_len;
900 	}
901       if (mb_len != 0)
902 	*mb = 0;
903     }
904 #endif
905   return ret;
906 }
907