xref: /openbsd-src/gnu/usr.bin/perl/t/loc_tools.pl (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1# Common tools for test files files to find the locales which exist on the
2# system.  Caller should have defined ok() for the unlikely event that setup
3# here fails, and should have verified that this isn't miniperl before calling
4# the functions.
5
6# Note that it's okay that some languages have their native names
7# capitalized here even though that's not "right".  They are lowercased
8# anyway later during the scanning process (and besides, some clueless
9# vendor might have them capitalized erroneously anyway).
10
11sub _trylocale {    # Adds the locale given by the first parameter to the list
12                    # given by the 3rd iff the platform supports the locale in
13                    # each of the categories given by the 2nd parameter, which
14                    # is either a single category or a reference to a list of
15                    # categories
16    my $locale = shift;
17    my $categories = shift;
18    my $list = shift;
19    return if grep { $locale eq $_ } @$list;
20
21    $categories = [ $categories ] unless ref $categories;
22
23    foreach my $category (@$categories) {
24        return unless setlocale($category, $locale);
25    }
26
27    my $badutf8;
28    {
29        local $SIG{__WARN__} = sub {
30            $badutf8 = $_[0] =~ /Malformed UTF-8/;
31        };
32    }
33
34    if ($badutf8) {
35        ok(0, "Verify locale name doesn't contain malformed utf8");
36        return;
37    }
38    push @$list, $locale;
39}
40
41sub _decode_encodings {
42    my @enc;
43
44    foreach (split(/ /, shift)) {
45	if (/^(\d+)$/) {
46	    push @enc, "ISO8859-$1";
47	    push @enc, "iso8859$1";	# HP
48	    if ($1 eq '1') {
49		 push @enc, "roman8";	# HP
50	    }
51	    push @enc, $_;
52            push @enc, "$_.UTF-8";
53            push @enc, "$_.65001"; # Windows UTF-8
54            push @enc, "$_.ACP"; # Windows ANSI code page
55            push @enc, "$_.OCP"; # Windows OEM code page
56	}
57    }
58    if ($^O eq 'os390') {
59	push @enc, qw(IBM-037 IBM-819 IBM-1047);
60    }
61    push @enc, "UTF-8";
62    push @enc, "65001"; # Windows UTF-8
63
64    return @enc;
65}
66
67sub find_locales ($) {  # Returns an array of all the locales we found on the
68                        # system.  The parameter is either a single locale
69                        # category or a reference to a list of categories to
70                        # find valid locales for it or them
71    my $categories = shift;
72
73    use Config;;
74    my $have_setlocale = $Config{d_setlocale};
75
76    # Visual C's CRT goes silly on strings of the form "en_US.ISO8859-1"
77    # and mingw32 uses said silly CRT
78    # This doesn't seem to be an issue any more, at least on Windows XP,
79    # so re-enable the tests for Windows XP onwards.
80    my $winxp = ($^O eq 'MSWin32' && defined &Win32::GetOSVersion &&
81                    join('.', (Win32::GetOSVersion())[1..2]) >= 5.1);
82    $have_setlocale = 0 if ((($^O eq 'MSWin32' && !$winxp) || $^O eq 'NetWare') &&
83                    $Config{cc} =~ /^(cl|gcc|g\+\+|ici)/i);
84
85    # UWIN seems to loop after taint tests, just skip for now
86    $have_setlocale = 0 if ($^O =~ /^uwin/);
87
88    return unless $have_setlocale;
89
90    # Done this way in case this is 'required' in the caller before seeing if
91    # this is miniperl.
92    require POSIX; import POSIX 'locale_h';
93
94    _trylocale("C", $categories, \@Locale);
95    _trylocale("POSIX", $categories, \@Locale);
96    foreach (0..15) {
97        _trylocale("ISO8859-$_", $categories, \@Locale);
98        _trylocale("iso8859$_", $categories, \@Locale);
99        _trylocale("iso8859-$_", $categories, \@Locale);
100        _trylocale("iso_8859_$_", $categories, \@Locale);
101        _trylocale("isolatin$_", $categories, \@Locale);
102        _trylocale("isolatin-$_", $categories, \@Locale);
103        _trylocale("iso_latin_$_", $categories, \@Locale);
104    }
105
106    # Sanitize the environment so that we can run the external 'locale'
107    # program without the taint mode getting grumpy.
108
109    # $ENV{PATH} is special in VMS.
110    delete local $ENV{PATH} if $^O ne 'VMS' or $Config{d_setenv};
111
112    # Other subversive stuff.
113    delete local @ENV{qw(IFS CDPATH ENV BASH_ENV)};
114
115    if (-x "/usr/bin/locale"
116        && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|"))
117    {
118        while (<LOCALES>) {
119            # It seems that /usr/bin/locale steadfastly outputs 8 bit data, which
120            # ain't great when we're running this testPERL_UNICODE= so that utf8
121            # locales will cause all IO hadles to default to (assume) utf8
122            next unless utf8::valid($_);
123            chomp;
124            _trylocale($_, $categories, \@Locale);
125        }
126        close(LOCALES);
127    } elsif ($^O eq 'VMS'
128             && defined($ENV{'SYS$I18N_LOCALE'})
129             && -d 'SYS$I18N_LOCALE')
130    {
131    # The SYS$I18N_LOCALE logical name search list was not present on
132    # VAX VMS V5.5-12, but was on AXP && VAX VMS V6.2 as well as later versions.
133        opendir(LOCALES, "SYS\$I18N_LOCALE:");
134        while ($_ = readdir(LOCALES)) {
135            chomp;
136            _trylocale($_, $categories, \@Locale);
137        }
138        close(LOCALES);
139    } elsif (($^O eq 'openbsd' || $^O eq 'bitrig' ) && -e '/usr/share/locale') {
140
141    # OpenBSD doesn't have a locale executable, so reading /usr/share/locale
142    # is much easier and faster than the last resort method.
143
144        opendir(LOCALES, '/usr/share/locale');
145        while ($_ = readdir(LOCALES)) {
146            chomp;
147            _trylocale($_, $categories, \@Locale);
148        }
149        close(LOCALES);
150    } else { # Final fallback.  Try our list of locales hard-coded here
151
152        # This is going to be slow.
153        my @Data;
154
155
156        # Locales whose name differs if the utf8 bit is on are stored in these two
157        # files with appropriate encodings.
158        if ($^H & 0x08 || (${^OPEN} || "") =~ /:utf8/) {
159            @Data = do "lib/locale/utf8";
160        } else {
161            @Data = do "lib/locale/latin1";
162        }
163
164        # The rest of the locales are in this file.
165        push @Data, <DATA>;
166
167        foreach my $line (@Data) {
168            my ($locale_name, $language_codes, $country_codes, $encodings) =
169                split /:/, $line;
170            my @enc = _decode_encodings($encodings);
171            foreach my $loc (split(/ /, $locale_name)) {
172                _trylocale($loc, $categories, \@Locale);
173                foreach my $enc (@enc) {
174                    _trylocale("$loc.$enc", $categories, \@Locale);
175                }
176                $loc = lc $loc;
177                foreach my $enc (@enc) {
178                    _trylocale("$loc.$enc", $categories, \@Locale);
179                }
180            }
181            foreach my $lang (split(/ /, $language_codes)) {
182                _trylocale($lang, $categories, \@Locale);
183                foreach my $country (split(/ /, $country_codes)) {
184                    my $lc = "${lang}_${country}";
185                    _trylocale($lc, $categories, \@Locale);
186                    foreach my $enc (@enc) {
187                        _trylocale("$lc.$enc", $categories, \@Locale);
188                    }
189                    my $lC = "${lang}_\U${country}";
190                    _trylocale($lC, $categories, \@Locale);
191                    foreach my $enc (@enc) {
192                        _trylocale("$lC.$enc", $categories, \@Locale);
193                    }
194                }
195            }
196        }
197    }
198
199    @Locale = sort @Locale;
200
201    return @Locale;
202
203
204}
205
206sub is_locale_utf8 ($) { # Return a boolean as to if core Perl thinks the input
207                         # is a UTF-8 locale
208    my $locale = shift;
209
210    use locale;
211
212    my $save_locale = setlocale(&POSIX::LC_CTYPE());
213    if (! $save_locale) {
214        ok(0, "Verify could save previous locale");
215        return 0;
216    }
217
218    if (! setlocale(&POSIX::LC_CTYPE(), $locale)) {
219        ok(0, "Verify could setlocale to $locale");
220        return 0;
221    }
222
223    my $ret = 0;
224
225    # Use an op that gives different results for UTF-8 than any other locale.
226    # If a platform has UTF-8 locales, there should be at least one locale on
227    # most platforms with UTF-8 in its name, so if there is a bug in the op
228    # giving a false negative, we should get a failure for those locales as we
229    # go through testing all the locales on the platform.
230    if (CORE::fc(chr utf8::unicode_to_native(0xdf)) ne "ss") {
231        if ($locale =~ /UTF-?8/i) {
232            diag("Cannot verify $locale with UTF-8 in name is a UTF-8 locale");
233            #ok (0, "Verify $locale with UTF-8 in name is a UTF-8 locale");
234        }
235    }
236    else {
237        $ret = 1;
238    }
239
240    die "Couldn't restore locale '$save_locale'"
241        unless setlocale(&POSIX::LC_CTYPE(), $save_locale);
242
243    return $ret;
244}
245
246sub find_utf8_ctype_locale (;$) { # Return the name of locale that core Perl
247                                  # thinks is a UTF-8 LC_CTYPE locale.
248                                  # Optional parameter is a reference to a
249                                  # list of locales to try; if omitted, this
250                                  # tries all locales it can find on the
251                                  # platform
252    my $locales_ref = shift;
253    return if !defined &POSIX::LC_CTYPE;
254    if (! defined $locales_ref) {
255        my @locales = find_locales(&POSIX::LC_CTYPE());
256        $locales_ref = \@locales;
257    }
258
259    foreach my $locale (@$locales_ref) {
260        return $locale if is_locale_utf8($locale);
261    }
262
263    return;
264}
265
2661
267
268# Format of data is: locale_name, language_codes, country_codes, encodings
269__DATA__
270Afrikaans:af:za:1 15
271Arabic:ar:dz eg sa:6 arabic8
272Brezhoneg Breton:br:fr:1 15
273Bulgarski Bulgarian:bg:bg:5
274Chinese:zh:cn tw:cn.EUC eucCN eucTW euc.CN euc.TW Big5 GB2312 tw.EUC
275Hrvatski Croatian:hr:hr:2
276Cymraeg Welsh:cy:cy:1 14 15
277Czech:cs:cz:2
278Dansk Danish:da:dk:1 15
279Nederlands Dutch:nl:be nl:1 15
280English American British:en:au ca gb ie nz us uk zw:1 15 cp850
281Esperanto:eo:eo:3
282Eesti Estonian:et:ee:4 6 13
283Suomi Finnish:fi:fi:1 15
284Flamish::fl:1 15
285Deutsch German:de:at be ch de lu:1 15
286Euskaraz Basque:eu:es fr:1 15
287Galego Galician:gl:es:1 15
288Ellada Greek:el:gr:7 g8
289Frysk:fy:nl:1 15
290Greenlandic:kl:gl:4 6
291Hebrew:iw:il:8 hebrew8
292Hungarian:hu:hu:2
293Indonesian:id:id:1 15
294Gaeilge Irish:ga:IE:1 14 15
295Italiano Italian:it:ch it:1 15
296Nihongo Japanese:ja:jp:euc eucJP jp.EUC sjis
297Korean:ko:kr:
298Latine Latin:la:va:1 15
299Latvian:lv:lv:4 6 13
300Lithuanian:lt:lt:4 6 13
301Macedonian:mk:mk:1 15
302Maltese:mt:mt:3
303Moldovan:mo:mo:2
304Norsk Norwegian:no no\@nynorsk nb nn:no:1 15
305Occitan:oc:es:1 15
306Polski Polish:pl:pl:2
307Rumanian:ro:ro:2
308Russki Russian:ru:ru su ua:5 koi8 koi8r KOI8-R koi8u cp1251 cp866
309Serbski Serbian:sr:yu:5
310Slovak:sk:sk:2
311Slovene Slovenian:sl:si:2
312Sqhip Albanian:sq:sq:1 15
313Svenska Swedish:sv:fi se:1 15
314Thai:th:th:11 tis620
315Turkish:tr:tr:9 turkish8
316Yiddish:yi::1 15
317