Lines Matching +full:ascii +full:. +full:r
1 /* Determine a canonical name for the current locale's character encoding.
3 Copyright (C) 2000-2006, 2008-2020 Free Software Foundation, Inc.
8 any later version.
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
16 with this program; if not, see <https://www.gnu.org/licenses/>. */
18 /* Written by Bruno Haible <bruno@clisp.org>. */
20 #include <config.h>
22 /* Specification. */
23 #include "localcharset.h"
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <stdlib.h>
31 # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
36 # include <locale.h>
40 /* Assume EMX program runs on OS/2, even if compiled under DOS. */
48 # include <langinfo.h>
51 # include <locale.h>
56 # include <windows.h>
60 # include <windows.h>
61 /* For the use of setlocale() below, the Gnulib override in setlocale.c is
62 not needed; see the platform lists in setlocale_null.m4. */
67 # include <os2.h>
72 # include <xlocale.h>
79 to GNU canonical encoding name. */
83 GNU canonical names directly. */
92 /* Table of platform-dependent mappings, sorted in ascending order. */
98 { "C", "ASCII" },
113 /*{ "KOI8-R", "KOI8-R" },*/
116 { "US-ASCII", "ASCII" },
123 { "646", "ASCII" },
138 /*{ "KOI8-R", "KOI8-R" },*/
149 { "646", "ASCII" },
157 { "US-ASCII", "ASCII" }
165 LC_CTYPE file.
167 the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
169 "... all code that calls BSD system routines should ensure
171 encoding. All BSD system functions expect their string
172 parameters to be in UTF-8 encoding and nothing else."
176 UTF-8. In a canonical UTF-8 Unicode string, all decomposable
177 characters are decomposed ..."
180 them to decomposed UTF-8 before accessing the file system.
181 - The Apple Terminal application displays UTF-8 by default.
183 - xterm uses ISO-8859-1 by default.
184 - TextEdit uses MacRoman by default.
186 minimize the use of decomposed Unicode. Unfortunately, through the
188 space nevertheless.
189 Then there are also the locales with encodings other than US-ASCII
190 and UTF-8. These locales can be occasionally useful to users (e.g.
192 file names are in US-ASCII.
213 { "KOI8-R", "KOI8-R" },
275 { "koi8r", "KOI8-R" },
323 { "646", "ASCII" },
341 { "TIS620.2533", "TIS-620" },
347 { "koi8-r", "KOI8-R" }
351 { "646", "ASCII" }
356 { "CP20127", "ASCII" },
357 { "CP20866", "KOI8-R" },
383 <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
385 <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */
405 { "CP878", "KOI8-R" },
463 { "IBM-367", "ASCII" },
488 { "IBM-878", "KOI8-R" },
547 section 10.7 "Handling Different Character Sets". */
565 /* Just a dummy entry, to avoid a C syntax error. */
575 encoding name. */
583 /* Table of platform-dependent mappings, sorted in ascending order. */
587 { "cs_CZ.ISO_8859-2", "ISO-8859-2" },
588 { "da_DK.DIS_8859-15", "ISO-8859-15" },
589 { "da_DK.ISO_8859-1", "ISO-8859-1" },
590 { "de_AT.DIS_8859-15", "ISO-8859-15" },
591 { "de_AT.ISO_8859-1", "ISO-8859-1" },
592 { "de_CH.DIS_8859-15", "ISO-8859-15" },
593 { "de_CH.ISO_8859-1", "ISO-8859-1" },
594 { "de_DE.DIS_8859-15", "ISO-8859-15" },
595 { "de_DE.ISO_8859-1", "ISO-8859-1" },
596 { "en_AU.DIS_8859-15", "ISO-8859-15" },
597 { "en_AU.ISO_8859-1", "ISO-8859-1" },
598 { "en_CA.DIS_8859-15", "ISO-8859-15" },
599 { "en_CA.ISO_8859-1", "ISO-8859-1" },
600 { "en_GB.DIS_8859-15", "ISO-8859-15" },
601 { "en_GB.ISO_8859-1", "ISO-8859-1" },
602 { "en_US.DIS_8859-15", "ISO-8859-15" },
603 { "en_US.ISO_8859-1", "ISO-8859-1" },
604 { "es_ES.DIS_8859-15", "ISO-8859-15" },
605 { "es_ES.ISO_8859-1", "ISO-8859-1" },
606 { "fi_FI.DIS_8859-15", "ISO-8859-15" },
607 { "fi_FI.ISO_8859-1", "ISO-8859-1" },
608 { "fr_BE.DIS_8859-15", "ISO-8859-15" },
609 { "fr_BE.ISO_8859-1", "ISO-8859-1" },
610 { "fr_CA.DIS_8859-15", "ISO-8859-15" },
611 { "fr_CA.ISO_8859-1", "ISO-8859-1" },
612 { "fr_CH.DIS_8859-15", "ISO-8859-15" },
613 { "fr_CH.ISO_8859-1", "ISO-8859-1" },
614 { "fr_FR.DIS_8859-15", "ISO-8859-15" },
615 { "fr_FR.ISO_8859-1", "ISO-8859-1" },
616 { "hr_HR.ISO_8859-2", "ISO-8859-2" },
617 { "hu_HU.ISO_8859-2", "ISO-8859-2" },
618 { "is_IS.DIS_8859-15", "ISO-8859-15" },
619 { "is_IS.ISO_8859-1", "ISO-8859-1" },
620 { "it_CH.DIS_8859-15", "ISO-8859-15" },
621 { "it_CH.ISO_8859-1", "ISO-8859-1" },
622 { "it_IT.DIS_8859-15", "ISO-8859-15" },
623 { "it_IT.ISO_8859-1", "ISO-8859-1" },
624 { "ja_JP.EUC", "EUC-JP" },
625 { "ja_JP.SJIS", "SHIFT_JIS" },
626 { "ja_JP.Shift_JIS", "SHIFT_JIS" },
627 { "ko_KR.EUC", "EUC-KR" },
628 { "la_LN.ASCII", "ASCII" },
629 { "la_LN.DIS_8859-15", "ISO-8859-15" },
630 { "la_LN.ISO_8859-1", "ISO-8859-1" },
631 { "la_LN.ISO_8859-2", "ISO-8859-2" },
632 { "la_LN.ISO_8859-4", "ISO-8859-4" },
633 { "lt_LN.ASCII", "ASCII" },
634 { "lt_LN.DIS_8859-15", "ISO-8859-15" },
635 { "lt_LN.ISO_8859-1", "ISO-8859-1" },
636 { "lt_LN.ISO_8859-2", "ISO-8859-2" },
637 { "lt_LT.ISO_8859-4", "ISO-8859-4" },
638 { "nl_BE.DIS_8859-15", "ISO-8859-15" },
639 { "nl_BE.ISO_8859-1", "ISO-8859-1" },
640 { "nl_NL.DIS_8859-15", "ISO-8859-15" },
641 { "nl_NL.ISO_8859-1", "ISO-8859-1" },
642 { "no_NO.DIS_8859-15", "ISO-8859-15" },
643 { "no_NO.ISO_8859-1", "ISO-8859-1" },
644 { "pl_PL.ISO_8859-2", "ISO-8859-2" },
645 { "pt_PT.DIS_8859-15", "ISO-8859-15" },
646 { "pt_PT.ISO_8859-1", "ISO-8859-1" },
647 { "ru_RU.CP866", "CP866" },
648 { "ru_RU.ISO_8859-5", "ISO-8859-5" },
649 { "ru_RU.KOI8-R", "KOI8-R" },
650 { "ru_SU.CP866", "CP866" },
651 { "ru_SU.ISO_8859-5", "ISO-8859-5" },
652 { "ru_SU.KOI8-R", "KOI8-R" },
653 { "sl_SI.ISO_8859-2", "ISO-8859-2" },
654 { "sv_SE.DIS_8859-15", "ISO-8859-15" },
655 { "sv_SE.ISO_8859-1", "ISO-8859-1" },
656 { "uk_UA.KOI8-U", "KOI8-U" },
657 { "zh_CN.EUC", "GB2312" },
658 { "zh_TW.BIG5", "BIG5" },
659 { "zh_TW.Big5", "BIG5" }
663 /* The encodings given here may not all be correct.
667 Juan Manuel Guerrero <juan.guerrero@gmx.de>
668 and <bug-gnulib@gnu.org>. */
669 { "C", "ASCII" },
810 /* Just a dummy entry, to avoid a C syntax error. */
819 into one of the canonical names listed below.
820 The result must not be freed; it is statically allocated. The result
823 is changed; threads in multithreaded programs should not do this.
825 name. */
835 /* This function must be multithread-safe. To achieve this without using in locale_charset()
837 buffer. Filling it through, for example, strcpy + strcat would not be in locale_charset()
839 currently accessing it. If necessary, the contents is first assembled in in locale_charset()
840 a stack-allocated buffer. */ in locale_charset()
846 /* Most systems support nl_langinfo (CODESET) nowadays. */ in locale_charset()
850 /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always in locale_charset()
851 returns "US-ASCII". Return the suffix of the locale name from the in locale_charset()
852 environment variables (if present) or the codepage as a number. */ in locale_charset()
853 if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) in locale_charset()
868 it. */ in locale_charset()
869 const char *dot = strchr (locale, '.'); in locale_charset()
876 /* Look for the possible @... trailer and remove it, if any. */ in locale_charset()
882 /* This way of filling resultbuf is multithread-safe. */ in locale_charset()
891 number: GetACP(). This encoding is used by Cygwin, unless the user in locale_charset()
893 people do). in locale_charset()
896 GetConsoleOutputCP() if it is using a TrueType font). Cygwin does in locale_charset()
897 this conversion transparently (see winsup/cygwin/fhandler_console.cc), in locale_charset()
898 converting to GetConsoleOutputCP(). This leads to correct results, in locale_charset()
900 in use. */ in locale_charset()
912 /* The canonical name cannot be determined. */ in locale_charset()
922 'setlocale' call specified. So we use it as a last resort, in in locale_charset()
924 codepage. */ in locale_charset()
926 char *pdot = strrchr (current_locale, '.'); in locale_charset()
933 number: GetACP(). in locale_charset()
936 GetConsoleOutputCP() encoding if it is using a TrueType font. in locale_charset()
938 encoding is the best bet. */ in locale_charset()
941 /* For a locale name such as "French_France.65001", in Windows 10, in locale_charset()
942 setlocale now returns "French_France.utf8" instead. */ in locale_charset()
961 with standard language environment variables. */ in locale_charset()
971 /* If the locale name contains an encoding after the dot, return it. */ in locale_charset()
972 const char *dot = strchr (locale, '.'); in locale_charset()
979 /* Look for the possible @... trailer and remove it, if any. */ in locale_charset()
985 /* This way of filling resultbuf is multithread-safe. */ in locale_charset()
992 /* For the POSIX locale, don't use the system's codepage. */ in locale_charset()
999 /* OS/2 has a function returning the locale's codepage as a number. */ in locale_charset()
1014 # error "Add code for other platforms here." in locale_charset()
1018 /* Resolve alias. */ in locale_charset()
1021 /* On some platforms, UTF-8 locales are the most frequently used ones. in locale_charset()
1023 testing for this case first. */ in locale_charset()
1033 /* The table is sorted. Perform a binary search. */ in locale_charset()
1039 for i < lo, strcmp (table[i].alias, codeset) < 0, in locale_charset()
1040 for i >= hi, strcmp (table[i].alias, codeset) > 0. */ in locale_charset()
1042 int cmp = strcmp (table[mid].alias, codeset); in locale_charset()
1050 strcmp (table[i].alias, codeset) == 0. */ in locale_charset()
1051 codeset = table[mid].canonical; in locale_charset()
1061 /* Did not find it in the table. */ in locale_charset()
1062 /* On Mac OS X, all modern locales use the UTF-8 encoding. in locale_charset()
1063 BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ in locale_charset()
1067 /* Don't return an empty string. GNU libc and GNU libiconv interpret in locale_charset()
1069 thus GNU libiconv would call this function a second time. */ in locale_charset()
1071 codeset = "ASCII"; in locale_charset()
1078 /* On old systems which lack it, use setlocale or getenv. */ in locale_charset()
1081 /* But most old systems don't have a complete set of locales. Some in locale_charset()
1082 (like DJGPP) have only the C locale. Therefore we don't use setlocale in locale_charset()
1084 user has set. */ in locale_charset()
1101 /* Map locale name to canonical encoding name. */ in locale_charset()
1107 /* The table is sorted. Perform a binary search. */ in locale_charset()
1113 for i < lo, strcmp (table[i].locale, locale) < 0, in locale_charset()
1114 for i >= hi, strcmp (table[i].locale, locale) > 0. */ in locale_charset()
1116 int cmp = strcmp (table[mid].locale, locale); in locale_charset()
1124 strcmp (table[i].locale, locale) == 0. */ in locale_charset()
1125 codeset = table[mid].canonical; in locale_charset()
1134 /* Did not find it in the table. */ in locale_charset()
1135 /* On Mac OS X, all modern locales use the UTF-8 encoding. in locale_charset()
1136 BeOS and Haiku have a single locale, and it has UTF-8 encoding. */ in locale_charset()
1140 /* The canonical name cannot be determined. */ in locale_charset()
1141 /* Don't return an empty string. GNU libc and GNU libiconv interpret in locale_charset()
1143 thus GNU libiconv would call this function a second time. */ in locale_charset()
1144 codeset = "ASCII"; in locale_charset()
1153 (the default codeset) does not work when MB_CUR_MAX is 1. */ in locale_charset()
1155 codeset = "ASCII"; in locale_charset()