1b8851fccSafresh1package ExtUtils::MakeMaker::Locale; 2b8851fccSafresh1 3b8851fccSafresh1use strict; 4eac174f2Safresh1use warnings; 5*e0680481Safresh1our $VERSION = "7.70"; 656d68f1eSafresh1$VERSION =~ tr/_//d; 7b8851fccSafresh1 8b8851fccSafresh1use base 'Exporter'; 9b8851fccSafresh1our @EXPORT_OK = qw( 10b8851fccSafresh1 decode_argv env 11b8851fccSafresh1 $ENCODING_LOCALE $ENCODING_LOCALE_FS 12b8851fccSafresh1 $ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT 13b8851fccSafresh1); 14b8851fccSafresh1 15b8851fccSafresh1use Encode (); 16b8851fccSafresh1use Encode::Alias (); 17b8851fccSafresh1 18b8851fccSafresh1our $ENCODING_LOCALE; 19b8851fccSafresh1our $ENCODING_LOCALE_FS; 20b8851fccSafresh1our $ENCODING_CONSOLE_IN; 21b8851fccSafresh1our $ENCODING_CONSOLE_OUT; 22b8851fccSafresh1 23b8851fccSafresh1sub DEBUG () { 0 } 24b8851fccSafresh1 25b8851fccSafresh1sub _init { 26b8851fccSafresh1 if ($^O eq "MSWin32") { 27b8851fccSafresh1 unless ($ENCODING_LOCALE) { 28b8851fccSafresh1 # Try to obtain what the Windows ANSI code page is 29b8851fccSafresh1 eval { 30b8851fccSafresh1 unless (defined &GetConsoleCP) { 31b8851fccSafresh1 require Win32; 32b8851fccSafresh1 # manually "import" it since Win32->import refuses 335759b3d2Safresh1 *GetConsoleCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; 34b8851fccSafresh1 } 35b8851fccSafresh1 unless (defined &GetConsoleCP) { 36b8851fccSafresh1 require Win32::API; 37b8851fccSafresh1 Win32::API->Import('kernel32', 'int GetConsoleCP()'); 38b8851fccSafresh1 } 39b8851fccSafresh1 if (defined &GetConsoleCP) { 40b8851fccSafresh1 my $cp = GetConsoleCP(); 41b8851fccSafresh1 $ENCODING_LOCALE = "cp$cp" if $cp; 42b8851fccSafresh1 } 43b8851fccSafresh1 }; 44b8851fccSafresh1 } 45b8851fccSafresh1 46b8851fccSafresh1 unless ($ENCODING_CONSOLE_IN) { 47b8851fccSafresh1 # only test one since set together 48b8851fccSafresh1 unless (defined &GetInputCP) { 49b8851fccSafresh1 eval { 50b8851fccSafresh1 require Win32; 5156d68f1eSafresh1 eval { 5256d68f1eSafresh1 local $SIG{__WARN__} = sub {} if ( "$]" < 5.014 ); # suppress deprecation warning for inherited AUTOLOAD of Win32::GetConsoleCP() 5356d68f1eSafresh1 Win32::GetConsoleCP(); 5456d68f1eSafresh1 }; 55b8851fccSafresh1 # manually "import" it since Win32->import refuses 565759b3d2Safresh1 *GetInputCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; 575759b3d2Safresh1 *GetOutputCP = sub { &Win32::GetConsoleOutputCP } if defined &Win32::GetConsoleOutputCP; 58b8851fccSafresh1 }; 59b8851fccSafresh1 unless (defined &GetInputCP) { 60b8851fccSafresh1 eval { 61b8851fccSafresh1 # try Win32::Console module for codepage to use 62b8851fccSafresh1 require Win32::Console; 63b8851fccSafresh1 *GetInputCP = sub { &Win32::Console::InputCP } 645759b3d2Safresh1 if defined &Win32::Console::InputCP; 65b8851fccSafresh1 *GetOutputCP = sub { &Win32::Console::OutputCP } 665759b3d2Safresh1 if defined &Win32::Console::OutputCP; 67b8851fccSafresh1 }; 68b8851fccSafresh1 } 69b8851fccSafresh1 unless (defined &GetInputCP) { 70b8851fccSafresh1 # final fallback 71b8851fccSafresh1 *GetInputCP = *GetOutputCP = sub { 72b8851fccSafresh1 # another fallback that could work is: 73b8851fccSafresh1 # reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP 74b8851fccSafresh1 ((qx(chcp) || '') =~ /^Active code page: (\d+)/) 75b8851fccSafresh1 ? $1 : (); 76b8851fccSafresh1 }; 77b8851fccSafresh1 } 78b8851fccSafresh1 } 79b8851fccSafresh1 my $cp = GetInputCP(); 80b8851fccSafresh1 $ENCODING_CONSOLE_IN = "cp$cp" if $cp; 81b8851fccSafresh1 $cp = GetOutputCP(); 82b8851fccSafresh1 $ENCODING_CONSOLE_OUT = "cp$cp" if $cp; 83b8851fccSafresh1 } 84b8851fccSafresh1 } 85b8851fccSafresh1 86b8851fccSafresh1 unless ($ENCODING_LOCALE) { 87b8851fccSafresh1 eval { 88b8851fccSafresh1 require I18N::Langinfo; 89b8851fccSafresh1 $ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); 90b8851fccSafresh1 91b8851fccSafresh1 # Workaround of Encode < v2.25. The "646" encoding alias was 92b8851fccSafresh1 # introduced in Encode-2.25, but we don't want to require that version 93b8851fccSafresh1 # quite yet. Should avoid the CPAN testers failure reported from 94b8851fccSafresh1 # openbsd-4.7/perl-5.10.0 combo. 95b8851fccSafresh1 $ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646"; 96b8851fccSafresh1 97b8851fccSafresh1 # https://rt.cpan.org/Ticket/Display.html?id=66373 98b8851fccSafresh1 $ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8"; 99b8851fccSafresh1 }; 100b8851fccSafresh1 $ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN; 101b8851fccSafresh1 } 102b8851fccSafresh1 10356d68f1eSafresh1 # Workaround of Encode < v2.71 for "cp65000" and "cp65001" 10456d68f1eSafresh1 # The "cp65000" and "cp65001" aliases were added in [Encode v2.71](https://github.com/dankogai/p5-encode/commit/7874bd95aa10967a3b5dbae333d16bcd703ac6c6) 10556d68f1eSafresh1 # via commit <https://github.com/dankogai/p5-encode/commit/84b9c1101d5251d37e226f80d1c6781718779047>. 10656d68f1eSafresh1 # This will avoid test failures for Win32 machines using the UTF-7 or UTF-8 code pages. 10756d68f1eSafresh1 $ENCODING_LOCALE = 'UTF-7' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65000"; 10856d68f1eSafresh1 $ENCODING_LOCALE = 'utf-8-strict' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65001"; 10956d68f1eSafresh1 110b8851fccSafresh1 if ($^O eq "darwin") { 111b8851fccSafresh1 $ENCODING_LOCALE_FS ||= "UTF-8"; 112b8851fccSafresh1 } 113b8851fccSafresh1 114b8851fccSafresh1 # final fallback 115b8851fccSafresh1 $ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8"; 116b8851fccSafresh1 $ENCODING_LOCALE_FS ||= $ENCODING_LOCALE; 117b8851fccSafresh1 $ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE; 118b8851fccSafresh1 $ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN; 119b8851fccSafresh1 120b8851fccSafresh1 unless (Encode::find_encoding($ENCODING_LOCALE)) { 121b8851fccSafresh1 my $foundit; 122b8851fccSafresh1 if (lc($ENCODING_LOCALE) eq "gb18030") { 123b8851fccSafresh1 eval { 124b8851fccSafresh1 require Encode::HanExtra; 125b8851fccSafresh1 }; 126b8851fccSafresh1 if ($@) { 127b8851fccSafresh1 die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped"; 128b8851fccSafresh1 } 129b8851fccSafresh1 $foundit++ if Encode::find_encoding($ENCODING_LOCALE); 130b8851fccSafresh1 } 131b8851fccSafresh1 die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped" 132b8851fccSafresh1 unless $foundit; 133b8851fccSafresh1 134b8851fccSafresh1 } 135b8851fccSafresh1 136b8851fccSafresh1 # use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT; 137b8851fccSafresh1} 138b8851fccSafresh1 139b8851fccSafresh1_init(); 140b8851fccSafresh1Encode::Alias::define_alias(sub { 141b8851fccSafresh1 no strict 'refs'; 142b8851fccSafresh1 no warnings 'once'; 143b8851fccSafresh1 return ${"ENCODING_" . uc(shift)}; 144b8851fccSafresh1}, "locale"); 145b8851fccSafresh1 146b8851fccSafresh1sub _flush_aliases { 147b8851fccSafresh1 no strict 'refs'; 1485759b3d2Safresh1 for my $a (sort keys %Encode::Alias::Alias) { 149b8851fccSafresh1 if (defined ${"ENCODING_" . uc($a)}) { 150b8851fccSafresh1 delete $Encode::Alias::Alias{$a}; 151b8851fccSafresh1 warn "Flushed alias cache for $a" if DEBUG; 152b8851fccSafresh1 } 153b8851fccSafresh1 } 154b8851fccSafresh1} 155b8851fccSafresh1 156b8851fccSafresh1sub reinit { 157b8851fccSafresh1 $ENCODING_LOCALE = shift; 158b8851fccSafresh1 $ENCODING_LOCALE_FS = shift; 159b8851fccSafresh1 $ENCODING_CONSOLE_IN = $ENCODING_LOCALE; 160b8851fccSafresh1 $ENCODING_CONSOLE_OUT = $ENCODING_LOCALE; 161b8851fccSafresh1 _init(); 162b8851fccSafresh1 _flush_aliases(); 163b8851fccSafresh1} 164b8851fccSafresh1 165b8851fccSafresh1sub decode_argv { 166b8851fccSafresh1 die if defined wantarray; 167b8851fccSafresh1 for (@ARGV) { 168b8851fccSafresh1 $_ = Encode::decode(locale => $_, @_); 169b8851fccSafresh1 } 170b8851fccSafresh1} 171b8851fccSafresh1 172b8851fccSafresh1sub env { 173b8851fccSafresh1 my $k = Encode::encode(locale => shift); 174b8851fccSafresh1 my $old = $ENV{$k}; 175b8851fccSafresh1 if (@_) { 176b8851fccSafresh1 my $v = shift; 177b8851fccSafresh1 if (defined $v) { 178b8851fccSafresh1 $ENV{$k} = Encode::encode(locale => $v); 179b8851fccSafresh1 } 180b8851fccSafresh1 else { 181b8851fccSafresh1 delete $ENV{$k}; 182b8851fccSafresh1 } 183b8851fccSafresh1 } 184b8851fccSafresh1 return Encode::decode(locale => $old) if defined wantarray; 185b8851fccSafresh1} 186b8851fccSafresh1 187b8851fccSafresh11; 188b8851fccSafresh1 189b8851fccSafresh1__END__ 190b8851fccSafresh1 191b8851fccSafresh1=head1 NAME 192b8851fccSafresh1 193b8851fccSafresh1ExtUtils::MakeMaker::Locale - bundled Encode::Locale 194b8851fccSafresh1 195b8851fccSafresh1=head1 SYNOPSIS 196b8851fccSafresh1 197b8851fccSafresh1 use Encode::Locale; 198b8851fccSafresh1 use Encode; 199b8851fccSafresh1 200b8851fccSafresh1 $string = decode(locale => $bytes); 201b8851fccSafresh1 $bytes = encode(locale => $string); 202b8851fccSafresh1 203b8851fccSafresh1 if (-t) { 204b8851fccSafresh1 binmode(STDIN, ":encoding(console_in)"); 205b8851fccSafresh1 binmode(STDOUT, ":encoding(console_out)"); 206b8851fccSafresh1 binmode(STDERR, ":encoding(console_out)"); 207b8851fccSafresh1 } 208b8851fccSafresh1 209b8851fccSafresh1 # Processing file names passed in as arguments 210b8851fccSafresh1 my $uni_filename = decode(locale => $ARGV[0]); 211b8851fccSafresh1 open(my $fh, "<", encode(locale_fs => $uni_filename)) 212b8851fccSafresh1 || die "Can't open '$uni_filename': $!"; 213b8851fccSafresh1 binmode($fh, ":encoding(locale)"); 214b8851fccSafresh1 ... 215b8851fccSafresh1 216b8851fccSafresh1=head1 DESCRIPTION 217b8851fccSafresh1 218b8851fccSafresh1In many applications it's wise to let Perl use Unicode for the strings it 219b8851fccSafresh1processes. Most of the interfaces Perl has to the outside world are still byte 220b8851fccSafresh1based. Programs therefore need to decode byte strings that enter the program 221b8851fccSafresh1from the outside and encode them again on the way out. 222b8851fccSafresh1 223b8851fccSafresh1The POSIX locale system is used to specify both the language conventions 224b8851fccSafresh1requested by the user and the preferred character set to consume and 225b8851fccSafresh1output. The C<Encode::Locale> module looks up the charset and encoding (called 226b8851fccSafresh1a CODESET in the locale jargon) and arranges for the L<Encode> module to know 227b8851fccSafresh1this encoding under the name "locale". It means bytes obtained from the 228b8851fccSafresh1environment can be converted to Unicode strings by calling C<< 229b8851fccSafresh1Encode::encode(locale => $bytes) >> and converted back again with C<< 230b8851fccSafresh1Encode::decode(locale => $string) >>. 231b8851fccSafresh1 232b8851fccSafresh1Where file systems interfaces pass file names in and out of the program we also 233b8851fccSafresh1need care. The trend is for operating systems to use a fixed file encoding 234b8851fccSafresh1that don't actually depend on the locale; and this module determines the most 235b8851fccSafresh1appropriate encoding for file names. The L<Encode> module will know this 236b8851fccSafresh1encoding under the name "locale_fs". For traditional Unix systems this will 237b8851fccSafresh1be an alias to the same encoding as "locale". 238b8851fccSafresh1 239b8851fccSafresh1For programs running in a terminal window (called a "Console" on some systems) 240b8851fccSafresh1the "locale" encoding is usually a good choice for what to expect as input and 241b8851fccSafresh1output. Some systems allows us to query the encoding set for the terminal and 242b8851fccSafresh1C<Encode::Locale> will do that if available and make these encodings known 243b8851fccSafresh1under the C<Encode> aliases "console_in" and "console_out". For systems where 244b8851fccSafresh1we can't determine the terminal encoding these will be aliased as the same 245b8851fccSafresh1encoding as "locale". The advice is to use "console_in" for input known to 246b8851fccSafresh1come from the terminal and "console_out" for output to the terminal. 247b8851fccSafresh1 248b8851fccSafresh1In addition to arranging for various Encode aliases the following functions and 249b8851fccSafresh1variables are provided: 250b8851fccSafresh1 251b8851fccSafresh1=over 252b8851fccSafresh1 253b8851fccSafresh1=item decode_argv( ) 254b8851fccSafresh1 255b8851fccSafresh1=item decode_argv( Encode::FB_CROAK ) 256b8851fccSafresh1 257b8851fccSafresh1This will decode the command line arguments to perl (the C<@ARGV> array) in-place. 258b8851fccSafresh1 259b8851fccSafresh1The function will by default replace characters that can't be decoded by 260b8851fccSafresh1"\x{FFFD}", the Unicode replacement character. 261b8851fccSafresh1 262b8851fccSafresh1Any argument provided is passed as CHECK to underlying Encode::decode() call. 263b8851fccSafresh1Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the 264b8851fccSafresh1command line arguments can be decoded. See L<Encode/"Handling Malformed Data"> 265b8851fccSafresh1for details on other options for CHECK. 266b8851fccSafresh1 267b8851fccSafresh1=item env( $uni_key ) 268b8851fccSafresh1 269b8851fccSafresh1=item env( $uni_key => $uni_value ) 270b8851fccSafresh1 271b8851fccSafresh1Interface to get/set environment variables. Returns the current value as a 272b8851fccSafresh1Unicode string. The $uni_key and $uni_value arguments are expected to be 273b8851fccSafresh1Unicode strings as well. Passing C<undef> as $uni_value deletes the 274b8851fccSafresh1environment variable named $uni_key. 275b8851fccSafresh1 276b8851fccSafresh1The returned value will have the characters that can't be decoded replaced by 277b8851fccSafresh1"\x{FFFD}", the Unicode replacement character. 278b8851fccSafresh1 279b8851fccSafresh1There is no interface to request alternative CHECK behavior as for 280b8851fccSafresh1decode_argv(). If you need that you need to call encode/decode yourself. 281b8851fccSafresh1For example: 282b8851fccSafresh1 283b8851fccSafresh1 my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK); 284b8851fccSafresh1 my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK); 285b8851fccSafresh1 286b8851fccSafresh1=item reinit( ) 287b8851fccSafresh1 288b8851fccSafresh1=item reinit( $encoding ) 289b8851fccSafresh1 290b8851fccSafresh1Reinitialize the encodings from the locale. You want to call this function if 291b8851fccSafresh1you changed anything in the environment that might influence the locale. 292b8851fccSafresh1 293b8851fccSafresh1This function will croak if the determined encoding isn't recognized by 294b8851fccSafresh1the Encode module. 295b8851fccSafresh1 296b8851fccSafresh1With argument force $ENCODING_... variables to set to the given value. 297b8851fccSafresh1 298b8851fccSafresh1=item $ENCODING_LOCALE 299b8851fccSafresh1 300b8851fccSafresh1The encoding name determined to be suitable for the current locale. 301b8851fccSafresh1L<Encode> know this encoding as "locale". 302b8851fccSafresh1 303b8851fccSafresh1=item $ENCODING_LOCALE_FS 304b8851fccSafresh1 305b8851fccSafresh1The encoding name determined to be suitable for file system interfaces 306b8851fccSafresh1involving file names. 307b8851fccSafresh1L<Encode> know this encoding as "locale_fs". 308b8851fccSafresh1 309b8851fccSafresh1=item $ENCODING_CONSOLE_IN 310b8851fccSafresh1 311b8851fccSafresh1=item $ENCODING_CONSOLE_OUT 312b8851fccSafresh1 313b8851fccSafresh1The encodings to be used for reading and writing output to the a console. 314b8851fccSafresh1L<Encode> know these encodings as "console_in" and "console_out". 315b8851fccSafresh1 316b8851fccSafresh1=back 317b8851fccSafresh1 318b8851fccSafresh1=head1 NOTES 319b8851fccSafresh1 320b8851fccSafresh1This table summarizes the mapping of the encodings set up 321b8851fccSafresh1by the C<Encode::Locale> module: 322b8851fccSafresh1 323b8851fccSafresh1 Encode | | | 324b8851fccSafresh1 Alias | Windows | Mac OS X | POSIX 325b8851fccSafresh1 ------------+---------+--------------+------------ 326b8851fccSafresh1 locale | ANSI | nl_langinfo | nl_langinfo 327b8851fccSafresh1 locale_fs | ANSI | UTF-8 | nl_langinfo 328b8851fccSafresh1 console_in | OEM | nl_langinfo | nl_langinfo 329b8851fccSafresh1 console_out | OEM | nl_langinfo | nl_langinfo 330b8851fccSafresh1 331b8851fccSafresh1=head2 Windows 332b8851fccSafresh1 333b8851fccSafresh1Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16 334b8851fccSafresh1strings) and a byte based API based a character set called ANSI. The 335b8851fccSafresh1regular Perl interfaces to the OS currently only uses the ANSI APIs. 336b8851fccSafresh1Unfortunately ANSI is not a single character set. 337b8851fccSafresh1 338b8851fccSafresh1The encoding that corresponds to ANSI varies between different editions of 339b8851fccSafresh1Windows. For many western editions of Windows ANSI corresponds to CP-1252 340b8851fccSafresh1which is a character set similar to ISO-8859-1. Conceptually the ANSI 341b8851fccSafresh1character set is a similar concept to the POSIX locale CODESET so this module 342b8851fccSafresh1figures out what the ANSI code page is and make this available as 343b8851fccSafresh1$ENCODING_LOCALE and the "locale" Encoding alias. 344b8851fccSafresh1 345b8851fccSafresh1Windows systems also operate with another byte based character set. 346b8851fccSafresh1It's called the OEM code page. This is the encoding that the Console 347b8851fccSafresh1takes as input and output. It's common for the OEM code page to 348b8851fccSafresh1differ from the ANSI code page. 349b8851fccSafresh1 350b8851fccSafresh1=head2 Mac OS X 351b8851fccSafresh1 352b8851fccSafresh1On Mac OS X the file system encoding is always UTF-8 while the locale 353b8851fccSafresh1can otherwise be set up as normal for POSIX systems. 354b8851fccSafresh1 355b8851fccSafresh1File names on Mac OS X will at the OS-level be converted to 356b8851fccSafresh1NFD-form. A file created by passing a NFC-filename will come 357b8851fccSafresh1in NFD-form from readdir(). See L<Unicode::Normalize> for details 358b8851fccSafresh1of NFD/NFC. 359b8851fccSafresh1 360b8851fccSafresh1Actually, Apple does not follow the Unicode NFD standard since not all 361b8851fccSafresh1character ranges are decomposed. The claim is that this avoids problems with 362b8851fccSafresh1round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for 363b8851fccSafresh1details. 364b8851fccSafresh1 365b8851fccSafresh1=head2 POSIX (Linux and other Unixes) 366b8851fccSafresh1 367b8851fccSafresh1File systems might vary in what encoding is to be used for 368b8851fccSafresh1filenames. Since this module has no way to actually figure out 369b8851fccSafresh1what the is correct it goes with the best guess which is to 370b8851fccSafresh1assume filenames are encoding according to the current locale. 371b8851fccSafresh1Users are advised to always specify UTF-8 as the locale charset. 372b8851fccSafresh1 373b8851fccSafresh1=head1 SEE ALSO 374b8851fccSafresh1 375b8851fccSafresh1L<I18N::Langinfo>, L<Encode>, L<Term::Encoding> 376b8851fccSafresh1 377b8851fccSafresh1=head1 AUTHOR 378b8851fccSafresh1 379b8851fccSafresh1Copyright 2010 Gisle Aas <gisle@aas.no>. 380b8851fccSafresh1 381b8851fccSafresh1This library is free software; you can redistribute it and/or 382b8851fccSafresh1modify it under the same terms as Perl itself. 383b8851fccSafresh1 384b8851fccSafresh1=cut 385