1#!perl -T 2use strict; 3use Config; 4use Test::More; 5require "../../t/loc_tools.pl"; 6 7plan skip_all => "I18N::Langinfo or POSIX unavailable" 8 if $Config{'extensions'} !~ m!\bI18N/Langinfo\b!; 9 10my @times = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 11 MON_8 MON_9 MON_10 MON_11 MON_12 12 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7); 13my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT 14 D_FMT T_FMT); 15push @constants, @times; 16 17# The values a C locale should return 18my %want = ( RADIXCHAR => qr/ ^ \. $ /x, 19 THOUSEP => qr/ ^$ /x, 20 21 # Can be empty; otherwise first character must be one of 22 # these. In the C locale, there is nothing after the first 23 # character. 24 CRNCYSTR => qr/ ^ [+-.]? $ /x, 25 26 _NL_ADDRESS_COUNTRY_NUM => qr/^ 0 $/x, 27 _NL_IDENTIFICATION_TERRITORY => qr/ ^ ISO $/x, 28 _NL_MEASUREMENT_MEASUREMENT => qr/ ^ [01] $/x, 29 _NL_PAPER_HEIGHT => qr/^ \d+ $/x, 30 _NL_NAME_NAME_GEN => qr/ .* /x, 31 _NL_TELEPHONE_INT_SELECT => qr/ .* /x, 32 ); 33 34# Abbreviated and full are swapped in many locales in early netbsd. Skip 35# them. 36if ( $Config{osname} !~ / netbsd /ix 37 || $Config{osvers} !~ / ^ [1-6] \. /x) 38{ 39 $want{ABDAY_1} = qr/ ^ Sun $ /x; 40 $want{DAY_1} = qr/ ^ Sunday $ /x; 41 $want{ABMON_1} = qr/ ^ Jan $ /x; 42 $want{MON_1} = qr/ ^ January $ /x; 43} 44 45sub disp_str ($) { 46 my $string = shift; 47 48 # Displays the string unambiguously. ASCII printables are always output 49 # as-is, though perhaps separated by blanks from other characters. If 50 # entirely printable ASCII, just returns the string. Otherwise if valid 51 # UTF-8 it uses the character names for non-printable-ASCII. Otherwise it 52 # outputs hex for each non-ASCII-printable byte. 53 54 return $string if $string =~ / ^ [[:print:]]* $/xa; 55 56 my $result = ""; 57 my $prev_was_punct = 1; # Beginning is considered punct 58 if (utf8::valid($string) && utf8::is_utf8($string)) { 59 use charnames (); 60 foreach my $char (split "", $string) { 61 62 # Keep punctuation adjacent to other characters; otherwise 63 # separate them with a blank 64 if ($char =~ /[[:punct:]]/a) { 65 $result .= $char; 66 $prev_was_punct = 1; 67 } 68 elsif ($char =~ /[[:print:]]/a) { 69 $result .= " " unless $prev_was_punct; 70 $result .= $char; 71 $prev_was_punct = 0; 72 } 73 else { 74 $result .= " " unless $prev_was_punct; 75 my $name = charnames::viacode(ord $char); 76 $result .= (defined $name) ? $name : ':unknown:'; 77 $prev_was_punct = 0; 78 } 79 } 80 } 81 else { 82 use bytes; 83 foreach my $char (split "", $string) { 84 if ($char =~ /[[:punct:]]/a) { 85 $result .= $char; 86 $prev_was_punct = 1; 87 } 88 elsif ($char =~ /[[:print:]]/a) { 89 $result .= " " unless $prev_was_punct; 90 $result .= $char; 91 $prev_was_punct = 0; 92 } 93 else { 94 $result .= " " unless $prev_was_punct; 95 $result .= sprintf("%02X", ord $char); 96 $prev_was_punct = 0; 97 } 98 } 99 } 100 101 return $result; 102} 103 104sub check_utf8_validity($$$) { 105 106 # Looks for a definitive result for testing perl code on UTF-8 locales. 107 # Returns 1 if definitive (one way or another). 108 # Returns 0 if the input is all ASCII. 109 # Returns -1 if it looks to be a system error 110 111 my ($string, $item, $locale) = @_; 112 my $msg_details = "The name for '$item' in $locale"; 113 114 return 0 unless $string =~ /\P{ASCII}/; 115 116 if (utf8::is_utf8($string)) { 117 if (utf8::valid($string )) { 118 pass("$msg_details is a UTF8 string. Got:\n" . disp_str($string)); 119 return 1; 120 } 121 122 # Here, marked as UTF-8, but is malformed, so shouldn't have been 123 # marked thus 124 fail("$msg_details is marked as UTF8 but is malformed. Got:\n" 125 . disp_str($string)); 126 return 1; 127 } 128 129 # Here, not marked as UTF-8. Since this is a UTF-8 locale, and contains 130 # non-ASCII, something is wrong. It may be us, or it may be libc. Use 131 # decode to see if the bytes form legal UTF-8. If they did, it means 132 # perl wrongly returned the string as not UTF-8. 133 my $copy = $string; 134 my $is_valid_utf8; 135 { 136 use bytes; 137 $is_valid_utf8 = utf8::decode($copy); 138 } 139 140 if ($is_valid_utf8) { 141 fail("$msg_details should have been marked as a UTF8 string. Got:\n" 142 . disp_str($string)); 143 return 1; 144 } 145 146 # Here, the string returned wasn't marked as UTF-8 and isn't valid UTF-8. 147 # This means perl did its job and kept malformed text from being marked 148 # UTF-8. And it means a system bug since the locale was UTF-8. 149 return -1; 150} 151 152my @want = sort keys %want; 153my @illegal_utf8; 154 155my %extra_items = ( 156 _NL_ADDRESS_POSTAL_FMT => 'LC_ADDRESS', 157 _NL_ADDRESS_COUNTRY_NAME => 'LC_ADDRESS', 158 _NL_ADDRESS_COUNTRY_POST => 'LC_ADDRESS', 159 _NL_ADDRESS_COUNTRY_AB2 => 'LC_ADDRESS', 160 _NL_ADDRESS_COUNTRY_AB3 => 'LC_ADDRESS', 161 _NL_ADDRESS_COUNTRY_CAR => 'LC_ADDRESS', 162 _NL_ADDRESS_COUNTRY_NUM => 'LC_ADDRESS', 163 _NL_ADDRESS_COUNTRY_ISBN => 'LC_ADDRESS', 164 _NL_ADDRESS_LANG_NAME => 'LC_ADDRESS', 165 _NL_ADDRESS_LANG_AB => 'LC_ADDRESS', 166 _NL_ADDRESS_LANG_TERM => 'LC_ADDRESS', 167 _NL_ADDRESS_LANG_LIB => 'LC_ADDRESS', 168 _NL_IDENTIFICATION_TITLE => 'LC_IDENTIFICATION', 169 _NL_IDENTIFICATION_SOURCE => 'LC_IDENTIFICATION', 170 _NL_IDENTIFICATION_ADDRESS => 'LC_IDENTIFICATION', 171 _NL_IDENTIFICATION_CONTACT => 'LC_IDENTIFICATION', 172 _NL_IDENTIFICATION_EMAIL => 'LC_IDENTIFICATION', 173 _NL_IDENTIFICATION_TEL => 'LC_IDENTIFICATION', 174 _NL_IDENTIFICATION_FAX => 'LC_IDENTIFICATION', 175 _NL_IDENTIFICATION_LANGUAGE => 'LC_IDENTIFICATION', 176 _NL_IDENTIFICATION_TERRITORY => 'LC_IDENTIFICATION', 177 _NL_IDENTIFICATION_AUDIENCE => 'LC_IDENTIFICATION', 178 _NL_IDENTIFICATION_APPLICATION => 'LC_IDENTIFICATION', 179 _NL_IDENTIFICATION_ABBREVIATION => 'LC_IDENTIFICATION', 180 _NL_IDENTIFICATION_REVISION => 'LC_IDENTIFICATION', 181 _NL_IDENTIFICATION_DATE => 'LC_IDENTIFICATION', 182 _NL_IDENTIFICATION_CATEGORY => 'LC_IDENTIFICATION', 183 _NL_MEASUREMENT_MEASUREMENT => 'LC_MEASUREMENT', 184 _NL_NAME_NAME_FMT => 'LC_NAME', 185 _NL_NAME_NAME_GEN => 'LC_NAME', 186 _NL_NAME_NAME_MR => 'LC_NAME', 187 _NL_NAME_NAME_MRS => 'LC_NAME', 188 _NL_NAME_NAME_MISS => 'LC_NAME', 189 _NL_NAME_NAME_MS => 'LC_NAME', 190 _NL_PAPER_HEIGHT => 'LC_PAPER', 191 _NL_PAPER_WIDTH => 'LC_PAPER', 192 _NL_TELEPHONE_TEL_INT_FMT => 'LC_TELEPHONE', 193 _NL_TELEPHONE_TEL_DOM_FMT => 'LC_TELEPHONE', 194 _NL_TELEPHONE_INT_SELECT => 'LC_TELEPHONE', 195 _NL_TELEPHONE_INT_PREFIX => 'LC_TELEPHONE', 196 ); 197 198use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR', 199 keys %extra_items); 200 201use POSIX; 202 203if (locales_enabled('LC_ALL')) { 204 setlocale(LC_ALL, "C"); 205} 206else { # If no LC_ALL, make sure the categories used in Langinfo are in C 207 setlocale(LC_CTYPE, "C") if locales_enabled('LC_CTYPE'); 208 setlocale(LC_MONETARY, "C") if locales_enabled('LC_MONETARY'); 209 setlocale(LC_NUMERIC, "C") if locales_enabled('LC_NUMERIC'); 210 setlocale(LC_TIME, "C") if locales_enabled('LC_TIME'); 211 setlocale(LC_ADDRESS, "C") if locales_enabled('LC_ADDRESS'); 212 setlocale(LC_IDENTIFICATION, "C") if locales_enabled('LC_IDENTIFICATION'); 213 setlocale(LC_MEASUREMENT, "C") if locales_enabled('LC_MEASUREMENT'); 214 setlocale(LC_NAME, "C") if locales_enabled('LC_NAME'); 215 setlocale(LC_PAPER, "C") if locales_enabled('LC_PAPER'); 216 setlocale(LC_TELEPHONE, "C") if locales_enabled('LC_TELEPHONE'); 217} 218 219for my $constant (@constants) { 220 SKIP: { 221 my $string = eval { langinfo(eval "$constant()") }; 222 is( $@, '', "calling langinfo() with $constant" ); 223 skip "returned string was empty, skipping next two tests", 2 unless $string; 224 ok( defined $string, "checking if the returned string is defined" ); 225 cmp_ok( length($string), '>=', 1, "checking if the returned string has a positive length" ); 226 } 227} 228 229for my $i (1..@want) { 230 my $try = $want[$i-1]; 231 eval { I18N::Langinfo->import($try) }; 232 SKIP: { 233 skip "$try not defined", 1, if $@; 234 no strict 'refs'; 235 like (langinfo(&$try), $want{$try}, "$try => '$want{$try}'"); 236 } 237} 238 239my $comma_locale; 240for my $locale (find_locales( 'LC_NUMERIC' )) { 241 use POSIX; 242 use locale; 243 setlocale(LC_NUMERIC, $locale) or next; 244 my $in = 4.2; # avoid any constant folding bugs 245 my $s = sprintf("%g", $in); 246 if ($s eq "4,2") { 247 $comma_locale = $locale; 248 last; 249 } 250} 251 252SKIP: { 253 skip "Couldn't find a locale with a comma decimal pt", 1 254 unless $comma_locale; 255 256 no strict 'refs'; 257 is (langinfo(&RADIXCHAR), ",", 258 "Returns ',' for decimal pt for locale '$comma_locale'"); 259} 260 261SKIP: { 262 263 my $found_time = 0; 264 my $found_monetary = 0; 265 266 my %time_locales; 267 map { $time_locales{$_} = 1 } find_locales("LC_TIME"); 268 my %monetary_locales; 269 map { $monetary_locales{$_} = 1 } find_locales("LC_MONETARY"); 270 271 foreach my $utf8_locale (find_utf8_ctype_locales()) { 272 if ($time_locales{$utf8_locale} && ! $found_time) { 273 setlocale(&LC_TIME, $utf8_locale); 274 foreach my $time_item (@times) { 275 my $eval_string = "langinfo(&$time_item)"; 276 my $time_name = eval $eval_string; 277 if ($@) { 278 fail("'$eval_string' failed: $@"); 279 280 # If this or the next two tests fail, any other items or 281 # locales will likely fail too, so skip testing them. 282 $found_time = 1; 283 last; 284 } 285 if (! defined $time_name) { 286 fail("'$eval_string' returned undef"); 287 $found_time = 1; 288 last; 289 } 290 if ($time_name eq "") { 291 fail("'$eval_string' returned an empty name"); 292 $found_time = 1; 293 last; 294 } 295 296 my $ret = check_utf8_validity($time_name, $time_item, $utf8_locale); 297 if ($ret > 0) { 298 $found_time = 1; 299 last; 300 } 301 302 if ($ret < 0) { # < 0 means a system error 303 push @illegal_utf8, "$utf8_locale: $time_item:" 304 . disp_str($time_name); 305 } 306 } 307 } 308 309 if ($monetary_locales{$utf8_locale} && ! $found_monetary) { 310 setlocale(&LC_MONETARY, $utf8_locale); 311 my $eval_string = "langinfo(&CRNCYSTR)"; 312 my $symbol = eval $eval_string; 313 if ($@) { 314 fail("'$eval_string' failed: $@"); 315 $found_monetary = 1; 316 next; 317 } 318 if (! defined $symbol) { 319 fail("'$eval_string' returned undef"); 320 next; 321 } 322 323 my $ret = check_utf8_validity($symbol, 'CRNCY', $utf8_locale); 324 if ($ret > 0) { 325 $found_monetary = 1; 326 } 327 elsif ($ret < 0) { # < 0 means a system error 328 push @illegal_utf8, "$utf8_locale: CRNCY:" 329 . disp_str($symbol); 330 } 331 } 332 333 last if $found_monetary && $found_time; 334 } 335 336 if ($found_time + $found_monetary < 2) { 337 my $message = ""; 338 $message .= "time name" unless $found_time; 339 if (! $found_monetary) { 340 $message .= " nor" if $message; 341 "monetary name"; 342 } 343 skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary); 344 } 345} 346 347if (@illegal_utf8) { 348 diag join "\n", "The following are illegal UTF-8", @illegal_utf8; 349} 350 351done_testing(); 352