xref: /openbsd-src/gnu/usr.bin/perl/ext/I18N-Langinfo/t/Langinfo.t (revision 5486feefcc8cb79b19e014ab332cc5dfd05b3b33)
1#!perl -T
2use strict;
3use Config;
4use Test::More;
5require "../../t/loc_tools.pl";
6
7plan skip_all => "I18N::Langinfo or POSIX unavailable"
8    if $Config{'extensions'} !~ m!\bI18N/Langinfo\b!;
9
10my @times  = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7
11                 MON_8 MON_9 MON_10 MON_11 MON_12
12                 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7);
13my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT
14                   D_FMT T_FMT);
15push @constants, @times;
16
17# The values a C locale should return
18my %want = (    RADIXCHAR => qr/ ^ \. $ /x,
19                THOUSEP	  => qr/ ^$ /x,
20
21                # Can be empty; otherwise first character must be one of
22                # these.  In the C locale, there is nothing after the first
23                # character.
24                CRNCYSTR  => qr/ ^ [+-.]? $ /x,
25
26                _NL_ADDRESS_COUNTRY_NUM => qr/^ 0 $/x,
27                _NL_IDENTIFICATION_TERRITORY => qr/ ^ ISO $/x,
28                _NL_MEASUREMENT_MEASUREMENT => qr/ ^ [01] $/x,
29                _NL_PAPER_HEIGHT => qr/^ \d+ $/x,
30                _NL_NAME_NAME_GEN => qr/ .* /x,
31                _NL_TELEPHONE_INT_SELECT => qr/ .* /x,
32           );
33
34# Abbreviated and full are swapped in many locales in early netbsd.  Skip
35# them.
36if (   $Config{osname} !~ / netbsd /ix
37    || $Config{osvers} !~ / ^ [1-6] \. /x)
38{
39    $want{ABDAY_1} = qr/ ^ Sun $ /x;
40    $want{DAY_1}   = qr/ ^ Sunday $ /x;
41    $want{ABMON_1} = qr/ ^ Jan $ /x;
42    $want{MON_1}   = qr/ ^ January $ /x;
43}
44
45sub disp_str ($) {
46    my $string = shift;
47
48    # Displays the string unambiguously.  ASCII printables are always output
49    # as-is, though perhaps separated by blanks from other characters.  If
50    # entirely printable ASCII, just returns the string.  Otherwise if valid
51    # UTF-8 it uses the character names for non-printable-ASCII.  Otherwise it
52    # outputs hex for each non-ASCII-printable byte.
53
54    return $string if $string =~ / ^ [[:print:]]* $/xa;
55
56    my $result = "";
57    my $prev_was_punct = 1; # Beginning is considered punct
58    if (utf8::valid($string) && utf8::is_utf8($string)) {
59        use charnames ();
60        foreach my $char (split "", $string) {
61
62            # Keep punctuation adjacent to other characters; otherwise
63            # separate them with a blank
64            if ($char =~ /[[:punct:]]/a) {
65                $result .= $char;
66                $prev_was_punct = 1;
67            }
68            elsif ($char =~ /[[:print:]]/a) {
69                $result .= "  " unless $prev_was_punct;
70                $result .= $char;
71                $prev_was_punct = 0;
72            }
73            else {
74                $result .= "  " unless $prev_was_punct;
75                my $name = charnames::viacode(ord $char);
76                $result .= (defined $name) ? $name : ':unknown:';
77                $prev_was_punct = 0;
78            }
79        }
80    }
81    else {
82        use bytes;
83        foreach my $char (split "", $string) {
84            if ($char =~ /[[:punct:]]/a) {
85                $result .= $char;
86                $prev_was_punct = 1;
87            }
88            elsif ($char =~ /[[:print:]]/a) {
89                $result .= " " unless $prev_was_punct;
90                $result .= $char;
91                $prev_was_punct = 0;
92            }
93            else {
94                $result .= " " unless $prev_was_punct;
95                $result .= sprintf("%02X", ord $char);
96                $prev_was_punct = 0;
97            }
98        }
99    }
100
101    return $result;
102}
103
104sub check_utf8_validity($$$) {
105
106    # Looks for a definitive result for testing perl code on UTF-8 locales.
107    # Returns 1 if definitive (one way or another).
108    # Returns 0 if the input is all ASCII.
109    # Returns -1 if it looks to be a system error
110
111    my ($string, $item, $locale) = @_;
112    my $msg_details = "The name for '$item' in $locale";
113
114    return 0 unless $string =~ /\P{ASCII}/;
115
116    if (utf8::is_utf8($string)) {
117        if (utf8::valid($string )) {
118            pass("$msg_details is a UTF8 string.  Got:\n" . disp_str($string));
119            return 1;
120        }
121
122        # Here, marked as UTF-8, but is malformed, so shouldn't have been
123        # marked thus
124        fail("$msg_details is marked as UTF8 but is malformed.  Got:\n"
125           . disp_str($string));
126        return 1;
127    }
128
129    # Here, not marked as UTF-8.  Since this is a UTF-8 locale, and contains
130    # non-ASCII, something is wrong.  It may be us, or it may be libc.  Use
131    # decode to see if the bytes form legal UTF-8.  If they did, it means
132    # perl wrongly returned the string as not UTF-8.
133    my $copy = $string;
134    my $is_valid_utf8;
135    {
136        use bytes;
137        $is_valid_utf8 = utf8::decode($copy);
138    }
139
140    if ($is_valid_utf8) {
141        fail("$msg_details should have been marked as a UTF8 string.  Got:\n"
142           . disp_str($string));
143        return 1;
144    }
145
146    # Here, the string returned wasn't marked as UTF-8 and isn't valid UTF-8.
147    # This means perl did its job and kept malformed text from being marked
148    # UTF-8.  And it means a system bug since the locale was UTF-8.
149    return -1;
150}
151
152my @want = sort keys %want;
153my @illegal_utf8;
154
155my %extra_items = (
156                    _NL_ADDRESS_POSTAL_FMT => 'LC_ADDRESS',
157                    _NL_ADDRESS_COUNTRY_NAME => 'LC_ADDRESS',
158                    _NL_ADDRESS_COUNTRY_POST => 'LC_ADDRESS',
159                    _NL_ADDRESS_COUNTRY_AB2 => 'LC_ADDRESS',
160                    _NL_ADDRESS_COUNTRY_AB3 => 'LC_ADDRESS',
161                    _NL_ADDRESS_COUNTRY_CAR => 'LC_ADDRESS',
162                    _NL_ADDRESS_COUNTRY_NUM => 'LC_ADDRESS',
163                    _NL_ADDRESS_COUNTRY_ISBN => 'LC_ADDRESS',
164                    _NL_ADDRESS_LANG_NAME => 'LC_ADDRESS',
165                    _NL_ADDRESS_LANG_AB => 'LC_ADDRESS',
166                    _NL_ADDRESS_LANG_TERM => 'LC_ADDRESS',
167                    _NL_ADDRESS_LANG_LIB => 'LC_ADDRESS',
168                    _NL_IDENTIFICATION_TITLE => 'LC_IDENTIFICATION',
169                    _NL_IDENTIFICATION_SOURCE => 'LC_IDENTIFICATION',
170                    _NL_IDENTIFICATION_ADDRESS => 'LC_IDENTIFICATION',
171                    _NL_IDENTIFICATION_CONTACT => 'LC_IDENTIFICATION',
172                    _NL_IDENTIFICATION_EMAIL => 'LC_IDENTIFICATION',
173                    _NL_IDENTIFICATION_TEL => 'LC_IDENTIFICATION',
174                    _NL_IDENTIFICATION_FAX => 'LC_IDENTIFICATION',
175                    _NL_IDENTIFICATION_LANGUAGE => 'LC_IDENTIFICATION',
176                    _NL_IDENTIFICATION_TERRITORY => 'LC_IDENTIFICATION',
177                    _NL_IDENTIFICATION_AUDIENCE => 'LC_IDENTIFICATION',
178                    _NL_IDENTIFICATION_APPLICATION => 'LC_IDENTIFICATION',
179                    _NL_IDENTIFICATION_ABBREVIATION => 'LC_IDENTIFICATION',
180                    _NL_IDENTIFICATION_REVISION => 'LC_IDENTIFICATION',
181                    _NL_IDENTIFICATION_DATE => 'LC_IDENTIFICATION',
182                    _NL_IDENTIFICATION_CATEGORY => 'LC_IDENTIFICATION',
183                    _NL_MEASUREMENT_MEASUREMENT => 'LC_MEASUREMENT',
184                    _NL_NAME_NAME_FMT => 'LC_NAME',
185                    _NL_NAME_NAME_GEN => 'LC_NAME',
186                    _NL_NAME_NAME_MR => 'LC_NAME',
187                    _NL_NAME_NAME_MRS => 'LC_NAME',
188                    _NL_NAME_NAME_MISS => 'LC_NAME',
189                    _NL_NAME_NAME_MS => 'LC_NAME',
190                    _NL_PAPER_HEIGHT => 'LC_PAPER',
191                    _NL_PAPER_WIDTH => 'LC_PAPER',
192                    _NL_TELEPHONE_TEL_INT_FMT => 'LC_TELEPHONE',
193                    _NL_TELEPHONE_TEL_DOM_FMT => 'LC_TELEPHONE',
194                    _NL_TELEPHONE_INT_SELECT => 'LC_TELEPHONE',
195                    _NL_TELEPHONE_INT_PREFIX => 'LC_TELEPHONE',
196                );
197
198use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR',
199                          keys %extra_items);
200
201use POSIX;
202
203if (locales_enabled('LC_ALL')) {
204    setlocale(LC_ALL, "C");
205}
206else { # If no LC_ALL, make sure the categories used in Langinfo are in C
207    setlocale(LC_CTYPE, "C")          if locales_enabled('LC_CTYPE');
208    setlocale(LC_MONETARY, "C")       if locales_enabled('LC_MONETARY');
209    setlocale(LC_NUMERIC, "C")        if locales_enabled('LC_NUMERIC');
210    setlocale(LC_TIME, "C")           if locales_enabled('LC_TIME');
211    setlocale(LC_ADDRESS, "C")        if locales_enabled('LC_ADDRESS');
212    setlocale(LC_IDENTIFICATION, "C") if locales_enabled('LC_IDENTIFICATION');
213    setlocale(LC_MEASUREMENT, "C")    if locales_enabled('LC_MEASUREMENT');
214    setlocale(LC_NAME, "C")           if locales_enabled('LC_NAME');
215    setlocale(LC_PAPER, "C")          if locales_enabled('LC_PAPER');
216    setlocale(LC_TELEPHONE, "C")      if locales_enabled('LC_TELEPHONE');
217}
218
219for my $constant (@constants) {
220    SKIP: {
221        my $string = eval { langinfo(eval "$constant()") };
222        is( $@, '', "calling langinfo() with $constant" );
223        skip "returned string was empty, skipping next two tests", 2 unless $string;
224        ok( defined $string, "checking if the returned string is defined" );
225        cmp_ok( length($string), '>=', 1, "checking if the returned string has a positive length" );
226    }
227}
228
229for my $i (1..@want) {
230    my $try = $want[$i-1];
231    eval { I18N::Langinfo->import($try) };
232    SKIP: {
233        skip "$try not defined", 1, if $@;
234        no strict 'refs';
235        like (langinfo(&$try), $want{$try}, "$try => '$want{$try}'");
236    }
237}
238
239my $comma_locale;
240for my $locale (find_locales( 'LC_NUMERIC' )) {
241    use POSIX;
242    use locale;
243    setlocale(LC_NUMERIC, $locale) or next;
244    my $in = 4.2; # avoid any constant folding bugs
245    my $s = sprintf("%g", $in);
246    if ($s eq "4,2")  {
247        $comma_locale = $locale;
248        last;
249    }
250}
251
252SKIP: {
253    skip "Couldn't find a locale with a comma decimal pt", 1
254                                                        unless $comma_locale;
255
256    no strict 'refs';
257    is (langinfo(&RADIXCHAR), ",",
258        "Returns ',' for decimal pt for locale '$comma_locale'");
259}
260
261SKIP: {
262
263    my $found_time = 0;
264    my $found_monetary = 0;
265
266    my %time_locales;
267    map { $time_locales{$_} = 1 } find_locales("LC_TIME");
268    my %monetary_locales;
269    map { $monetary_locales{$_} = 1 } find_locales("LC_MONETARY");
270
271    foreach my $utf8_locale (find_utf8_ctype_locales()) {
272        if ($time_locales{$utf8_locale} && ! $found_time) {
273            setlocale(&LC_TIME, $utf8_locale);
274            foreach my $time_item (@times) {
275                my $eval_string = "langinfo(&$time_item)";
276                my $time_name = eval $eval_string;
277                if ($@) {
278                    fail("'$eval_string' failed: $@");
279
280                    # If this or the next two tests fail, any other items or
281                    # locales will likely fail too, so skip testing them.
282                    $found_time = 1;
283                    last;
284                }
285                if (! defined $time_name) {
286                    fail("'$eval_string' returned undef");
287                    $found_time = 1;
288                    last;
289                }
290                if ($time_name eq "") {
291                    fail("'$eval_string' returned an empty name");
292                    $found_time = 1;
293                    last;
294                }
295
296                my $ret = check_utf8_validity($time_name, $time_item, $utf8_locale);
297                if ($ret > 0) {
298                    $found_time = 1;
299                    last;
300                }
301
302                if ($ret < 0) { # < 0 means a system error
303                    push @illegal_utf8, "$utf8_locale: $time_item:"
304                                     .  disp_str($time_name);
305                }
306            }
307        }
308
309        if ($monetary_locales{$utf8_locale} && ! $found_monetary) {
310            setlocale(&LC_MONETARY, $utf8_locale);
311            my $eval_string = "langinfo(&CRNCYSTR)";
312            my $symbol = eval $eval_string;
313            if ($@) {
314                fail("'$eval_string' failed: $@");
315                $found_monetary = 1;
316                next;
317            }
318            if (! defined $symbol) {
319                fail("'$eval_string' returned undef");
320                next;
321            }
322
323            my $ret = check_utf8_validity($symbol, 'CRNCY', $utf8_locale);
324            if ($ret > 0) {
325                $found_monetary = 1;
326            }
327            elsif ($ret < 0) { # < 0 means a system error
328                push @illegal_utf8, "$utf8_locale: CRNCY:"
329                                 .  disp_str($symbol);
330            }
331        }
332
333        last if $found_monetary && $found_time;
334    }
335
336    if ($found_time + $found_monetary < 2) {
337        my $message = "";
338        $message .= "time name" unless $found_time;
339        if (! $found_monetary) {
340            $message .= " nor" if $message;
341            "monetary name";
342        }
343        skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary);
344    }
345}
346
347if (@illegal_utf8) {
348    diag join "\n", "The following are illegal UTF-8", @illegal_utf8;
349}
350
351done_testing();
352