1# $Id: encoding.pm,v 2.17 2015/09/15 13:53:27 dankogai Exp dankogai $ 2package encoding; 3our $VERSION = sprintf "%d.%02d", q$Revision: 2.17 $ =~ /(\d+)/g; 4 5use Encode; 6use strict; 7use warnings; 8 9use constant { 10 DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, 11 HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, 12 PERL_5_21_7 => $^V && $^V ge v5.21.7, 13}; 14 15sub _exception { 16 my $name = shift; 17 $] > 5.008 and return 0; # 5.8.1 or higher then no 18 my %utfs = map { $_ => 1 } 19 qw(utf8 UCS-2BE UCS-2LE UTF-16 UTF-16BE UTF-16LE 20 UTF-32 UTF-32BE UTF-32LE); 21 $utfs{$name} or return 0; # UTFs or no 22 require Config; 23 Config->import(); 24 our %Config; 25 return $Config{perl_patchlevel} ? 0 : 1 # maintperl then no 26} 27 28sub in_locale { $^H & ( $locale::hint_bits || 0 ) } 29 30sub _get_locale_encoding { 31 my $locale_encoding; 32 33 if ($^O eq 'MSWin32') { 34 my @tries = ( 35 # First try to get the OutputCP. This will work only if we 36 # are attached to a console 37 'Win32.pm' => 'Win32::GetConsoleOutputCP', 38 'Win32/Console.pm' => 'Win32::Console::OutputCP', 39 # If above failed, this means that we are a GUI app 40 # Let's assume that the ANSI codepage is what matters 41 'Win32.pm' => 'Win32::GetACP', 42 ); 43 while (@tries) { 44 my $cp = eval { 45 require $tries[0]; 46 no strict 'refs'; 47 &{$tries[1]}() 48 }; 49 if ($cp) { 50 if ($cp == 65001) { # Code page for UTF-8 51 $locale_encoding = 'UTF-8'; 52 } else { 53 $locale_encoding = 'cp' . $cp; 54 } 55 return $locale_encoding; 56 } 57 splice(@tries, 0, 2) 58 } 59 } 60 61 # I18N::Langinfo isn't available everywhere 62 $locale_encoding = eval { 63 require I18N::Langinfo; 64 find_encoding( 65 I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() ) 66 )->name 67 }; 68 return $locale_encoding if defined $locale_encoding; 69 70 eval { 71 require POSIX; 72 # Get the current locale 73 # Remember that MSVCRT impl is quite different from Unixes 74 my $locale = POSIX::setlocale(POSIX::LC_CTYPE()); 75 if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) { 76 my $country_language; 77 ( $country_language, $locale_encoding ) = ( $1, $2 ); 78 79 # Could do more heuristics based on the country and language 80 # since we have Locale::Country and Locale::Language available. 81 # TODO: get a database of Language -> Encoding mappings 82 # (the Estonian database at http://www.eki.ee/letter/ 83 # would be excellent!) --jhi 84 if (lc($locale_encoding) eq 'euc') { 85 if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { 86 $locale_encoding = 'euc-jp'; 87 } 88 elsif ( $country_language =~ /^ko_KR|korean?$/i ) { 89 $locale_encoding = 'euc-kr'; 90 } 91 elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { 92 $locale_encoding = 'euc-cn'; 93 } 94 elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { 95 $locale_encoding = 'euc-tw'; 96 } 97 else { 98 require Carp; 99 Carp::croak( 100 "encoding: Locale encoding '$locale_encoding' too ambiguous" 101 ); 102 } 103 } 104 } 105 }; 106 107 return $locale_encoding; 108} 109 110sub import { 111 112 if ( ord("A") == 193 ) { 113 require Carp; 114 Carp::croak("encoding: pragma does not support EBCDIC platforms"); 115 } 116 117 if ($] >= 5.017) { 118 warnings::warnif("deprecated", 119 "Use of the encoding pragma is deprecated") 120 } 121 my $class = shift; 122 my $name = shift; 123 if (!$name){ 124 require Carp; 125 Carp::croak("encoding: no encoding specified."); 126 } 127 if ( $name eq ':_get_locale_encoding' ) { # used by lib/open.pm 128 my $caller = caller(); 129 { 130 no strict 'refs'; 131 *{"${caller}::_get_locale_encoding"} = \&_get_locale_encoding; 132 } 133 return; 134 } 135 $name = _get_locale_encoding() if $name eq ':locale'; 136 my %arg = @_; 137 $name = $ENV{PERL_ENCODING} unless defined $name; 138 my $enc = find_encoding($name); 139 unless ( defined $enc ) { 140 require Carp; 141 Carp::croak("encoding: Unknown encoding '$name'"); 142 } 143 $name = $enc->name; # canonize 144 unless ( $arg{Filter} ) { 145 DEBUG and warn "_exception($name) = ", _exception($name); 146 if (! _exception($name)) { 147 if (!PERL_5_21_7) { 148 ${^ENCODING} = $enc; 149 } 150 else { 151 # Starting with 5.21.7, this pragma uses a shadow variable 152 # designed explicitly for it, ${^E_NCODING}, to enforce 153 # lexical scope; instead of ${^ENCODING}. 154 $^H{'encoding'} = 1; 155 ${^E_NCODING} = $enc; 156 } 157 } 158 HAS_PERLIO or return 1; 159 } 160 else { 161 defined( ${^ENCODING} ) and undef ${^ENCODING}; 162 undef ${^E_NCODING} if PERL_5_21_7; 163 164 # implicitly 'use utf8' 165 require utf8; # to fetch $utf8::hint_bits; 166 $^H |= $utf8::hint_bits; 167 eval { 168 require Filter::Util::Call; 169 Filter::Util::Call->import; 170 filter_add( 171 sub { 172 my $status = filter_read(); 173 if ( $status > 0 ) { 174 $_ = $enc->decode( $_, 1 ); 175 DEBUG and warn $_; 176 } 177 $status; 178 } 179 ); 180 }; 181 $@ eq '' and DEBUG and warn "Filter installed"; 182 } 183 defined ${^UNICODE} and ${^UNICODE} != 0 and return 1; 184 for my $h (qw(STDIN STDOUT)) { 185 if ( $arg{$h} ) { 186 unless ( defined find_encoding( $arg{$h} ) ) { 187 require Carp; 188 Carp::croak( 189 "encoding: Unknown encoding for $h, '$arg{$h}'"); 190 } 191 eval { binmode( $h, ":raw :encoding($arg{$h})" ) }; 192 } 193 else { 194 unless ( exists $arg{$h} ) { 195 eval { 196 no warnings 'uninitialized'; 197 binmode( $h, ":raw :encoding($name)" ); 198 }; 199 } 200 } 201 if ($@) { 202 require Carp; 203 Carp::croak($@); 204 } 205 } 206 return 1; # I doubt if we need it, though 207} 208 209sub unimport { 210 no warnings; 211 undef ${^ENCODING}; 212 undef ${^E_NCODING} if PERL_5_21_7; 213 if (HAS_PERLIO) { 214 binmode( STDIN, ":raw" ); 215 binmode( STDOUT, ":raw" ); 216 } 217 else { 218 binmode(STDIN); 219 binmode(STDOUT); 220 } 221 if ( $INC{"Filter/Util/Call.pm"} ) { 222 eval { filter_del() }; 223 } 224} 225 2261; 227__END__ 228 229=pod 230 231=head1 NAME 232 233encoding - allows you to write your script in non-ASCII and non-UTF-8 234 235=head1 WARNING 236 237This module has been deprecated since perl v5.18. See L</DESCRIPTION> and 238L</BUGS>. 239 240=head1 SYNOPSIS 241 242 use encoding "greek"; # Perl like Greek to you? 243 use encoding "euc-jp"; # Jperl! 244 245 # or you can even do this if your shell supports your native encoding 246 247 perl -Mencoding=latin2 -e'...' # Feeling centrally European? 248 perl -Mencoding=euc-kr -e'...' # Or Korean? 249 250 # more control 251 252 # A simple euc-cn => utf-8 converter 253 use encoding "euc-cn", STDOUT => "utf8"; while(<>){print}; 254 255 # "no encoding;" supported 256 no encoding; 257 258 # an alternate way, Filter 259 use encoding "euc-jp", Filter=>1; 260 # now you can use kanji identifiers -- in euc-jp! 261 262 # encode based on the current locale - specialized purposes only; 263 # fraught with danger!! 264 use encoding ':locale'; 265 266=head1 DESCRIPTION 267 268This pragma is used to enable a Perl script to be written in encodings that 269aren't strictly ASCII nor UTF-8. It translates all or portions of the Perl 270program script from a given encoding into UTF-8, and changes the PerlIO layers 271of C<STDIN> and C<STDOUT> to the encoding specified. 272 273This pragma dates from the days when UTF-8-enabled editors were uncommon. But 274that was long ago, and the need for it is greatly diminished. That, coupled 275with the fact that it doesn't work with threads, along with other problems, 276(see L</BUGS>) have led to its being deprecated. It is planned to remove this 277pragma in a future Perl version. New code should be written in UTF-8, and the 278C<use utf8> pragma used instead (see L<perluniintro> and L<utf8> for details). 279Old code should be converted to UTF-8, via something like the recipe in the 280L</SYNOPSIS> (though this simple approach may require manual adjustments 281afterwards). 282 283The only legitimate use of this pragma is almost certainly just one per file, 284near the top, with file scope, as the file is likely going to only be written 285in one encoding. Further restrictions apply in Perls before v5.22 (see 286L</Prior to Perl v5.22>). 287 288There are two basic modes of operation (plus turning if off): 289 290=over 4 291 292=item C<use encoding ['I<ENCNAME>'] ;> 293 294This is the normal operation. It translates various literals encountered in 295the Perl source file from the encoding I<ENCNAME> into UTF-8, and similarly 296converts character code points. This is used when the script is a combination 297of ASCII (for the variable names and punctuation, I<etc>), but the literal 298data is in the specified encoding. 299 300I<ENCNAME> is optional. If omitted, the encoding specified in the environment 301variable L<C<PERL_ENCODING>|perlrun/PERL_ENCODING> is used. If this isn't 302set, or the resolved-to encoding is not known to C<L<Encode>>, the error 303C<Unknown encoding 'I<ENCNAME>'> will be thrown. 304 305Starting in Perl v5.8.6 (C<Encode> version 2.0.1), I<ENCNAME> may be the 306name C<:locale>. This is for very specialized applications, and is documented 307in L</The C<:locale> sub-pragma> below. 308 309The literals that are converted are C<q//, qq//, qr//, qw///, qx//>, and 310starting in v5.8.1, C<tr///>. Operations that do conversions include C<chr>, 311C<ord>, C<utf8::upgrade> (but not C<utf8::downgrade>), and C<chomp>. 312 313Also starting in v5.8.1, the C<DATA> pseudo-filehandle is translated from the 314encoding into UTF-8. 315 316For example, you can write code in EUC-JP as follows: 317 318 my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji 319 #<-char-><-char-> # 4 octets 320 s/\bCamel\b/$Rakuda/; 321 322And with C<use encoding "euc-jp"> in effect, it is the same thing as 323that code in UTF-8: 324 325 my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters 326 s/\bCamel\b/$Rakuda/; 327 328See L</EXAMPLE> below for a more complete example. 329 330Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the 331PerlIO layers of C<STDIN> and C<STDOUT> are set to "C<:encoding(I<ENCNAME>)>". 332Therefore, 333 334 use encoding "euc-jp"; 335 my $message = "Camel is the symbol of perl.\n"; 336 my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji 337 $message =~ s/\bCamel\b/$Rakuda/; 338 print $message; 339 340will print 341 342 "\xF1\xD1\xF1\xCC is the symbol of perl.\n" 343 344not 345 346 "\x{99F1}\x{99DD} is the symbol of perl.\n" 347 348You can override this by giving extra arguments; see below. 349 350Note that C<STDERR> WILL NOT be changed, regardless. 351 352Also note that non-STD file handles remain unaffected. Use C<use 353open> or C<binmode> to change the layers of those. 354 355=item C<use encoding I<ENCNAME> Filter=E<gt>1;> 356 357This operates as above, but the C<Filter> argument with a non-zero 358value causes the entire script, and not just literals, to be translated from 359the encoding into UTF-8. This allows identifiers in the source to be in that 360encoding as well. (Problems may occur if the encoding is not a superset of 361ASCII; imagine all your semi-colons being translated into something 362different.) One can use this form to make 363 364 ${"\x{4eba}"}++ 365 366work. (This is equivalent to C<$I<human>++>, where I<human> is a single Han 367ideograph). 368 369This effectively means that your source code behaves as if it were written in 370UTF-8 with C<'use utf8>' in effect. So even if your editor only supports 371Shift_JIS, for example, you can still try examples in Chapter 15 of 372C<Programming Perl, 3rd Ed.>. 373 374This option is significantly slower than the other one. 375 376=item C<no encoding;> 377 378Unsets the script encoding. The layers of C<STDIN>, C<STDOUT> are 379reset to "C<:raw>" (the default unprocessed raw stream of bytes). 380 381=back 382 383=head1 OPTIONS 384 385=head2 Setting C<STDIN> and/or C<STDOUT> individually 386 387The encodings of C<STDIN> and C<STDOUT> are individually settable by parameters to 388the pragma: 389 390 use encoding 'euc-tw', STDIN => 'greek' ...; 391 392In this case, you cannot omit the first I<ENCNAME>. C<< STDIN => undef >> 393turns the I/O transcoding completely off for that filehandle. 394 395When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, 396these options will be completely ignored. See L<perlvar/C<${^UNICODE}>> and 397L<"C<-C>" in perlrun|perlrun/-C [numberE<sol>list]> for details. 398 399=head2 The C<:locale> sub-pragma 400 401Starting in v5.8.6, the encoding name may be C<:locale>. This means that the 402encoding is taken from the current locale, and not hard-coded by the pragma. 403Since a script really can only be encoded in exactly one encoding, this option 404is dangerous. It makes sense only if the script itself is written in ASCII, 405and all the possible locales that will be in use when the script is executed 406are supersets of ASCII. That means that the script itself doesn't get 407changed, but the I/O handles have the specified encoding added, and the 408operations like C<chr> and C<ord> use that encoding. 409 410The logic of finding which locale C<:locale> uses is as follows: 411 412=over 4 413 414=item 1. 415 416If the platform supports the C<langinfo(CODESET)> interface, the codeset 417returned is used as the default encoding for the open pragma. 418 419=item 2. 420 421If 1. didn't work but we are under the locale pragma, the environment 422variables C<LC_ALL> and C<LANG> (in that order) are matched for encodings 423(the part after "C<.>", if any), and if any found, that is used 424as the default encoding for the open pragma. 425 426=item 3. 427 428If 1. and 2. didn't work, the environment variables C<LC_ALL> and C<LANG> 429(in that order) are matched for anything looking like UTF-8, and if 430any found, C<:utf8> is used as the default encoding for the open 431pragma. 432 433=back 434 435If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>) 436contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), 437the default encoding of your C<STDIN>, C<STDOUT>, and C<STDERR>, and of 438B<any subsequent file open>, is UTF-8. 439 440=head1 CAVEATS 441 442=head2 SIDE EFFECTS 443 444=over 445 446=item * 447 448If the C<encoding> pragma is in scope then the lengths returned are 449calculated from the length of C<$/> in Unicode characters, which is not 450always the same as the length of C<$/> in the native encoding. 451 452=item * 453 454Without this pragma, if strings operating under byte semantics and strings 455with Unicode character data are concatenated, the new string will 456be created by decoding the byte strings as I<ISO 8859-1 (Latin-1)>. 457 458The B<encoding> pragma changes this to use the specified encoding 459instead. For example: 460 461 use encoding 'utf8'; 462 my $string = chr(20000); # a Unicode string 463 utf8::encode($string); # now it's a UTF-8 encoded byte string 464 # concatenate with another Unicode string 465 print length($string . chr(20000)); 466 467Will print C<2>, because C<$string> is upgraded as UTF-8. Without 468C<use encoding 'utf8';>, it will print C<4> instead, since C<$string> 469is three octets when interpreted as Latin-1. 470 471=back 472 473=head2 DO NOT MIX MULTIPLE ENCODINGS 474 475Notice that only literals (string or regular expression) having only 476legacy code points are affected: if you mix data like this 477 478 \x{100}\xDF 479 \xDF\x{100} 480 481the data is assumed to be in (Latin 1 and) Unicode, not in your native 482encoding. In other words, this will match in "greek": 483 484 "\xDF" =~ /\x{3af}/ 485 486but this will not 487 488 "\xDF\x{100}" =~ /\x{3af}\x{100}/ 489 490since the C<\xDF> (ISO 8859-7 GREEK SMALL LETTER IOTA WITH TONOS) on 491the left will B<not> be upgraded to C<\x{3af}> (Unicode GREEK SMALL 492LETTER IOTA WITH TONOS) because of the C<\x{100}> on the left. You 493should not be mixing your legacy data and Unicode in the same string. 494 495This pragma also affects encoding of the 0x80..0xFF code point range: 496normally characters in that range are left as eight-bit bytes (unless 497they are combined with characters with code points 0x100 or larger, 498in which case all characters need to become UTF-8 encoded), but if 499the C<encoding> pragma is present, even the 0x80..0xFF range always 500gets UTF-8 encoded. 501 502After all, the best thing about this pragma is that you don't have to 503resort to \x{....} just to spell your name in a native encoding. 504So feel free to put your strings in your encoding in quotes and 505regexes. 506 507=head2 Prior to Perl v5.22 508 509The pragma was a per script, not a per block lexical. Only the last 510C<use encoding> or C<no encoding> mattered, and it affected 511B<the whole script>. However, the C<no encoding> pragma was supported and 512C<use encoding> could appear as many times as you want in a given script 513(though only the last was effective). 514 515Since the scope wasn't lexical, other modules' use of C<chr>, C<ord>, I<etc.> 516were affected. This leads to spooky, incorrect action at a distance that is 517hard to debug. 518 519This means you would have to be very careful of the load order: 520 521 # called module 522 package Module_IN_BAR; 523 use encoding "bar"; 524 # stuff in "bar" encoding here 525 1; 526 527 # caller script 528 use encoding "foo" 529 use Module_IN_BAR; 530 # surprise! use encoding "bar" is in effect. 531 532The best way to avoid this oddity is to use this pragma RIGHT AFTER 533other modules are loaded. i.e. 534 535 use Module_IN_BAR; 536 use encoding "foo"; 537 538=head2 Prior to Encode version 1.87 539 540=over 541 542=item * 543 544C<STDIN> and C<STDOUT> were not set under the filter option. 545And C<< STDIN=>I<ENCODING> >> and C<< STDOUT=>I<ENCODING> >> didn't work like 546non-filter version. 547 548=item * 549 550C<use utf8> wasn't implicitly declared so you have to C<use utf8> to do 551 552 ${"\x{4eba}"}++ 553 554=back 555 556=head2 Prior to Perl v5.8.1 557 558=over 559 560=item "NON-EUC" doublebyte encodings 561 562Because perl needs to parse the script before applying this pragma, such 563encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH; 564C<\x5c>) in the second byte fail because the second byte may 565accidentally escape the quoting character that follows. 566 567=item C<tr///> 568 569The B<encoding> pragma works by decoding string literals in 570C<q//,qq//,qr//,qw///, qx//> and so forth. In perl v5.8.0, this 571does not apply to C<tr///>. Therefore, 572 573 use encoding 'euc-jp'; 574 #.... 575 $kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/; 576 # -------- -------- -------- -------- 577 578Does not work as 579 580 $kana =~ tr/\x{3041}-\x{3093}/\x{30a1}-\x{30f3}/; 581 582=over 583 584=item Legend of characters above 585 586 utf8 euc-jp charnames::viacode() 587 ----------------------------------------- 588 \x{3041} \xA4\xA1 HIRAGANA LETTER SMALL A 589 \x{3093} \xA4\xF3 HIRAGANA LETTER N 590 \x{30a1} \xA5\xA1 KATAKANA LETTER SMALL A 591 \x{30f3} \xA5\xF3 KATAKANA LETTER N 592 593=back 594 595This counterintuitive behavior has been fixed in perl v5.8.1. 596 597In perl v5.8.0, you can work around this as follows; 598 599 use encoding 'euc-jp'; 600 # .... 601 eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ }; 602 603Note the C<tr//> expression is surrounded by C<qq{}>. The idea behind 604this is the same as the classic idiom that makes C<tr///> 'interpolate': 605 606 tr/$from/$to/; # wrong! 607 eval qq{ tr/$from/$to/ }; # workaround. 608 609=back 610 611=head1 EXAMPLE - Greekperl 612 613 use encoding "iso 8859-7"; 614 615 # \xDF in ISO 8859-7 (Greek) is \x{3af} in Unicode. 616 617 $a = "\xDF"; 618 $b = "\x{100}"; 619 620 printf "%#x\n", ord($a); # will print 0x3af, not 0xdf 621 622 $c = $a . $b; 623 624 # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". 625 626 # chr() is affected, and ... 627 628 print "mega\n" if ord(chr(0xdf)) == 0x3af; 629 630 # ... ord() is affected by the encoding pragma ... 631 632 print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; 633 634 # ... as are eq and cmp ... 635 636 print "peta\n" if "\x{3af}" eq pack("C", 0xdf); 637 print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0; 638 639 # ... but pack/unpack C are not affected, in case you still 640 # want to go back to your native encoding 641 642 print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; 643 644=head1 BUGS 645 646=over 647 648=item Thread safety 649 650C<use encoding ...> is not thread-safe (i.e., do not use in threaded 651applications). 652 653=item Can't be used by more than one module in a single program. 654 655Only one encoding is allowed. If you combine modules in a program that have 656different encodings, only one will be actually used. 657 658=item Other modules using C<STDIN> and C<STDOUT> get the encoded stream 659 660They may be expecting something completely different. 661 662=item literals in regex that are longer than 127 bytes 663 664For native multibyte encodings (either fixed or variable length), 665the current implementation of the regular expressions may introduce 666recoding errors for regular expression literals longer than 127 bytes. 667 668=item EBCDIC 669 670The encoding pragma is not supported on EBCDIC platforms. 671 672=item C<format> 673 674This pragma doesn't work well with C<format> because PerlIO does not 675get along very well with it. When C<format> contains non-ASCII 676characters it prints funny or gets "wide character warnings". 677To understand it, try the code below. 678 679 # Save this one in utf8 680 # replace *non-ascii* with a non-ascii string 681 my $camel; 682 format STDOUT = 683 *non-ascii*@>>>>>>> 684 $camel 685 . 686 $camel = "*non-ascii*"; 687 binmode(STDOUT=>':encoding(utf8)'); # bang! 688 write; # funny 689 print $camel, "\n"; # fine 690 691Without binmode this happens to work but without binmode, print() 692fails instead of write(). 693 694At any rate, the very use of C<format> is questionable when it comes to 695unicode characters since you have to consider such things as character 696width (i.e. double-width for ideographs) and directions (i.e. BIDI for 697Arabic and Hebrew). 698 699=item See also L</CAVEATS> 700 701=back 702 703=head1 HISTORY 704 705This pragma first appeared in Perl v5.8.0. It has been enhanced in later 706releases as specified above. 707 708=head1 SEE ALSO 709 710L<perlunicode>, L<Encode>, L<open>, L<Filter::Util::Call>, 711 712Ch. 15 of C<Programming Perl (3rd Edition)> 713by Larry Wall, Tom Christiansen, Jon Orwant; 714O'Reilly & Associates; ISBN 0-596-00027-8 715 716=cut 717