143003dfeSmillert#!./perl -w 243003dfeSmillert 343003dfeSmillertBEGIN { 443003dfeSmillert unless (eval { require Encode } ) { 543003dfeSmillert print "1..0 # Skip: not Encode\n"; 643003dfeSmillert exit 0; 743003dfeSmillert } 8b8851fccSafresh1 if (ord("A") == 193) { 9b8851fccSafresh1 print "1..0 # Skip: EBCDIC\n"; 10b8851fccSafresh1 exit 0; 11b8851fccSafresh1 } 12b8851fccSafresh1 require "../../t/charset_tools.pl"; 1343003dfeSmillert} 1443003dfeSmillert 159f11ffb7Safresh1use Test::More tests => 27; 1643003dfeSmillert 1743003dfeSmillertmy $grk = "grk$$"; 1843003dfeSmillertmy $utf = "utf$$"; 1943003dfeSmillertmy $fail1 = "fa$$"; 2043003dfeSmillertmy $fail2 = "fb$$"; 2143003dfeSmillertmy $russki = "koi8r$$"; 2243003dfeSmillertmy $threebyte = "3byte$$"; 2343003dfeSmillert 249f11ffb7Safresh1if (open(GRK, '>', $grk)) { 2543003dfeSmillert binmode(GRK, ":bytes"); 2643003dfeSmillert # alpha beta gamma in ISO 8859-7 2743003dfeSmillert print GRK "\xe1\xe2\xe3"; 2843003dfeSmillert close GRK or die "Could not close: $!"; 2943003dfeSmillert} 3043003dfeSmillert 3143003dfeSmillert{ 32898184e3Ssthen is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1); 33898184e3Ssthen is(open(my $o,'>:utf8',$utf), 1); 34898184e3Ssthen is((print $o readline $i), 1); 3543003dfeSmillert close($o) or die "Could not close: $!"; 3643003dfeSmillert close($i); 3743003dfeSmillert} 3843003dfeSmillert 399f11ffb7Safresh1if (open(UTF, '<', $utf)) { 4043003dfeSmillert binmode(UTF, ":bytes"); 41b8851fccSafresh1 4243003dfeSmillert # alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3) 43b8851fccSafresh1 is(scalar <UTF>, byte_utf8a_to_utf8n("\xce\xb1\xce\xb2\xce\xb3")); 4443003dfeSmillert close UTF; 4543003dfeSmillert} 4643003dfeSmillert 4743003dfeSmillert{ 4843003dfeSmillert use Encode; 49898184e3Ssthen is (open(my $i,'<:utf8',$utf), 1); 50898184e3Ssthen is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1); 51898184e3Ssthen is ((scalar print $o readline $i), 1); 5243003dfeSmillert close($o) or die "Could not close: $!"; 5343003dfeSmillert close($i); 5443003dfeSmillert} 5543003dfeSmillert 569f11ffb7Safresh1if (open(GRK, '<', $grk)) { 5743003dfeSmillert binmode(GRK, ":bytes"); 58898184e3Ssthen is(scalar <GRK>, "\xe1\xe2\xe3"); 5943003dfeSmillert close GRK; 6043003dfeSmillert} 6143003dfeSmillert 6243003dfeSmillert$SIG{__WARN__} = sub {$warn .= $_[0]}; 6343003dfeSmillert 64898184e3Ssthenis (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail'); 65898184e3Ssthenlike($warn, qr/^Cannot find encoding "NoneSuch" at/); 6643003dfeSmillert 679f11ffb7Safresh1is(open(RUSSKI, '>', $russki), 1); 6843003dfeSmillertprint RUSSKI "\x3c\x3f\x78"; 6943003dfeSmillertclose RUSSKI or die "Could not close: $!"; 709f11ffb7Safresh1open(RUSSKI, '<', $russki); 7143003dfeSmillertbinmode(RUSSKI, ":raw"); 7243003dfeSmillertmy $buf1; 7343003dfeSmillertread(RUSSKI, $buf1, 1); 7443003dfeSmillert# eof(RUSSKI); 7543003dfeSmillertbinmode(RUSSKI, ":encoding(koi8-r)"); 7643003dfeSmillertmy $buf2; 7743003dfeSmillertread(RUSSKI, $buf2, 1); 7843003dfeSmillertmy $offset = tell(RUSSKI); 79898184e3Ssthenis(ord $buf1, 0x3c); 80898184e3Ssthenis(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f); 81898184e3Ssthenis($offset, 2); 82898184e3Ssthenclose RUSSKI; 8343003dfeSmillert 8443003dfeSmillertundef $warn; 8543003dfeSmillert 8643003dfeSmillert# Check there is no Use of uninitialized value in concatenation (.) warning 8743003dfeSmillert# due to the way @latin2iso_num was used to make aliases. 88898184e3Ssthenis(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail'); 89898184e3Ssthen 90898184e3Ssthenlike($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/); 9143003dfeSmillert 9243003dfeSmillert# Create a string of chars that are 3 bytes in UTF-8 9343003dfeSmillertmy $str = "\x{1f80}" x 2048; 9443003dfeSmillert 9543003dfeSmillert# Write them to a file 9643003dfeSmillertopen(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!"; 9743003dfeSmillertprint F $str; 9843003dfeSmillertclose(F); 9943003dfeSmillert 10043003dfeSmillert# Read file back as UTF-8 10143003dfeSmillertopen(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!"; 10243003dfeSmillertmy $dstr = <F>; 10343003dfeSmillertclose(F); 104898184e3Ssthenis($dstr, $str); 10543003dfeSmillert 10643003dfeSmillert# Try decoding some bad stuff 10743003dfeSmillertopen(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!"; 10843003dfeSmillertif (ord('A') == 193) { # EBCDIC 10943003dfeSmillert print F "foo\x8c\x80\x80\x80bar\n\x80foo\n"; 11043003dfeSmillert} else { 11143003dfeSmillert print F "foo\xF0\x80\x80\x80bar\n\x80foo\n"; 11243003dfeSmillert} 11343003dfeSmillertclose(F); 11443003dfeSmillert 11543003dfeSmillertopen(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!"; 11643003dfeSmillert$dstr = join(":", <F>); 11743003dfeSmillertclose(F); 11843003dfeSmillertif (ord('A') == 193) { # EBCDIC 119898184e3Ssthen is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n"); 12043003dfeSmillert} else { 121898184e3Ssthen is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n"); 12243003dfeSmillert} 12343003dfeSmillert 12491f110e0Safresh1# Check that PerlIO::encoding can handle custom encodings that do funny 12591f110e0Safresh1# things with the buffer. 12691f110e0Safresh1use Encode::Encoding; 12791f110e0Safresh1package Extensive { 12891f110e0Safresh1 @ISA = Encode::Encoding; 12991f110e0Safresh1 __PACKAGE__->Define('extensive'); 13091f110e0Safresh1 sub encode($$;$) { 13191f110e0Safresh1 my ($self,$buf,$chk) = @_; 13291f110e0Safresh1 my $leftovers = ''; 13391f110e0Safresh1 if ($buf =~ /(.*\n)(?!\z)/) { 13491f110e0Safresh1 $buf = $1; 13591f110e0Safresh1 $leftovers = $'; 13691f110e0Safresh1 } 13791f110e0Safresh1 if ($chk) { 13891f110e0Safresh1 undef $_[1]; 13991f110e0Safresh1 my @x = (' ') x 8000; # reuse the just-freed buffer 14091f110e0Safresh1 $_[1] = $leftovers; # SvPVX now points elsewhere and is shorter 14191f110e0Safresh1 } # than bufsiz 14291f110e0Safresh1 $buf; 14391f110e0Safresh1 } 14491f110e0Safresh1 no warnings 'once'; 14591f110e0Safresh1 *decode = *encode; 14691f110e0Safresh1} 14791f110e0Safresh1open my $fh, ">:encoding(extensive)", \$buf; 14891f110e0Safresh1$fh->autoflush; 14991f110e0Safresh1print $fh "doughnut\n"; 15091f110e0Safresh1print $fh "quaffee\n"; 15191f110e0Safresh1# Print something longer than the buffer that encode() shrunk: 15291f110e0Safresh1print $fh "The beech leaves beech leaves on the beach by the beech.\n"; 15391f110e0Safresh1close $fh; 15491f110e0Safresh1is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by" 15591f110e0Safresh1 ." the beech.\n", 'buffer realloc during encoding'; 15691f110e0Safresh1$buf = "Sheila surely shod Sean\nin shoddy shoes.\n"; 15791f110e0Safresh1open $fh, "<:encoding(extensive)", \$buf; 15891f110e0Safresh1is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n", 15991f110e0Safresh1 'buffer realloc during decoding'; 16091f110e0Safresh1 16191f110e0Safresh1package Cower { 16291f110e0Safresh1 @ISA = Encode::Encoding; 16391f110e0Safresh1 __PACKAGE__->Define('cower'); 16491f110e0Safresh1 sub encode($$;$) { 16591f110e0Safresh1 my ($self,$buf,$chk) = @_; 16691f110e0Safresh1 my $leftovers = ''; 16791f110e0Safresh1 if ($buf =~ /(.*\n)(?!\z)/) { 16891f110e0Safresh1 $buf = $1; 16991f110e0Safresh1 $leftovers = $'; 17091f110e0Safresh1 } 17191f110e0Safresh1 if ($chk) { 17291f110e0Safresh1 no warnings; # stupid @_[1] warning 17391f110e0Safresh1 @_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write) 17491f110e0Safresh1 } 17591f110e0Safresh1 $buf; 17691f110e0Safresh1 } 17791f110e0Safresh1 no warnings 'once'; 17891f110e0Safresh1 *decode = *encode; 17991f110e0Safresh1} 18091f110e0Safresh1open $fh, ">:encoding(cower)", \$buf; 18191f110e0Safresh1$fh->autoflush; 18291f110e0Safresh1print $fh $_ for qw "pumping plum pits"; 18391f110e0Safresh1close $fh; 18491f110e0Safresh1is $buf, "pumpingplumpits", 'cowing buffer during encoding'; 18591f110e0Safresh1$buf = "pumping\nplum\npits\n"; 18691f110e0Safresh1open $fh, "<:encoding(cower)", \$buf; 18791f110e0Safresh1is join("", <$fh>), "pumping\nplum\npits\n", 18891f110e0Safresh1 'cowing buffer during decoding'; 18991f110e0Safresh1 19091f110e0Safresh1package Globber { 19191f110e0Safresh1 no warnings 'once'; 19291f110e0Safresh1 @ISA = Encode::Encoding; 19391f110e0Safresh1 __PACKAGE__->Define('globber'); 19491f110e0Safresh1 sub encode($$;$) { 19591f110e0Safresh1 my ($self,$buf,$chk) = @_; 19691f110e0Safresh1 $_[1] = *foo if $chk; 19791f110e0Safresh1 $buf; 19891f110e0Safresh1 } 19991f110e0Safresh1 *decode = *encode; 20091f110e0Safresh1} 20191f110e0Safresh1 20291f110e0Safresh1# Here we just want to test there is no crash. The actual output is not so 20391f110e0Safresh1# important. 20491f110e0Safresh1# We need a double eval, as scope unwinding will close the handle, 20591f110e0Safresh1# which croaks. 206*56d68f1eSafresh1# With PERL_DESTRUCT_LEVEL set, we have to skip this 20791f110e0Safresh1# test, as it triggers bug #115692, resulting in string table warnings. 20891f110e0Safresh1SKIP: { 209*56d68f1eSafresh1skip "produces string table warnings", 2 if $ENV{PERL_DESTRUCT_LEVEL}; 21091f110e0Safresh1 21191f110e0Safresh1eval { eval { 21291f110e0Safresh1 open my $fh, ">:encoding(globber)", \$buf; 21391f110e0Safresh1 print $fh "Agathopous Goodfoot\n"; 21491f110e0Safresh1 close $fh; 21591f110e0Safresh1}; $e = $@}; 21691f110e0Safresh1like $@||$e, qr/Close with partial character/, 21791f110e0Safresh1 'no crash when assigning glob to buffer in encode'; 21891f110e0Safresh1$buf = "To hymn him who heard her herd herd\n"; 21991f110e0Safresh1open $fh, "<:encoding(globber)", \$buf; 22091f110e0Safresh1my $x = <$fh>; 22191f110e0Safresh1close $fh; 22291f110e0Safresh1is $x, "To hymn him who heard her herd herd\n", 22391f110e0Safresh1 'no crash when assigning glob to buffer in decode'; 22491f110e0Safresh1 22591f110e0Safresh1} # SKIP 22691f110e0Safresh1 2279f11ffb7Safresh1# decoding shouldn't mutate the original bytes [perl #132833] 2289f11ffb7Safresh1{ 2299f11ffb7Safresh1 my $b = "a\0b\0\n\0"; 2309f11ffb7Safresh1 open my $fh, "<:encoding(UTF16-LE)", \$b or die; 2319f11ffb7Safresh1 is scalar(<$fh>), "ab\n"; 2329f11ffb7Safresh1 is $b, "a\0b\0\n\0"; 2339f11ffb7Safresh1 close $fh or die; 2349f11ffb7Safresh1 is $b, "a\0b\0\n\0"; 2359f11ffb7Safresh1} 2369f11ffb7Safresh1 23743003dfeSmillertEND { 23843003dfeSmillert 1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte); 23943003dfeSmillert} 240