1#!./perl -w 2 3BEGIN { 4 unless (eval { require Encode } ) { 5 print "1..0 # Skip: not Encode\n"; 6 exit 0; 7 } 8 if (ord("A") == 193) { 9 print "1..0 # Skip: EBCDIC\n"; 10 exit 0; 11 } 12 require "../../t/charset_tools.pl"; 13} 14 15use Test::More tests => 27; 16 17my $grk = "grk$$"; 18my $utf = "utf$$"; 19my $fail1 = "fa$$"; 20my $fail2 = "fb$$"; 21my $russki = "koi8r$$"; 22my $threebyte = "3byte$$"; 23 24if (open(GRK, '>', $grk)) { 25 binmode(GRK, ":bytes"); 26 # alpha beta gamma in ISO 8859-7 27 print GRK "\xe1\xe2\xe3"; 28 close GRK or die "Could not close: $!"; 29} 30 31{ 32 is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1); 33 is(open(my $o,'>:utf8',$utf), 1); 34 is((print $o readline $i), 1); 35 close($o) or die "Could not close: $!"; 36 close($i); 37} 38 39if (open(UTF, '<', $utf)) { 40 binmode(UTF, ":bytes"); 41 42 # alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3) 43 is(scalar <UTF>, byte_utf8a_to_utf8n("\xce\xb1\xce\xb2\xce\xb3")); 44 close UTF; 45} 46 47{ 48 use Encode; 49 is (open(my $i,'<:utf8',$utf), 1); 50 is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1); 51 is ((scalar print $o readline $i), 1); 52 close($o) or die "Could not close: $!"; 53 close($i); 54} 55 56if (open(GRK, '<', $grk)) { 57 binmode(GRK, ":bytes"); 58 is(scalar <GRK>, "\xe1\xe2\xe3"); 59 close GRK; 60} 61 62$SIG{__WARN__} = sub {$warn .= $_[0]}; 63 64is (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail'); 65like($warn, qr/^Cannot find encoding "NoneSuch" at/); 66 67is(open(RUSSKI, '>', $russki), 1); 68print RUSSKI "\x3c\x3f\x78"; 69close RUSSKI or die "Could not close: $!"; 70open(RUSSKI, '<', $russki); 71binmode(RUSSKI, ":raw"); 72my $buf1; 73read(RUSSKI, $buf1, 1); 74# eof(RUSSKI); 75binmode(RUSSKI, ":encoding(koi8-r)"); 76my $buf2; 77read(RUSSKI, $buf2, 1); 78my $offset = tell(RUSSKI); 79is(ord $buf1, 0x3c); 80is(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f); 81is($offset, 2); 82close RUSSKI; 83 84undef $warn; 85 86# Check there is no Use of uninitialized value in concatenation (.) warning 87# due to the way @latin2iso_num was used to make aliases. 88is(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail'); 89 90like($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/); 91 92# Create a string of chars that are 3 bytes in UTF-8 93my $str = "\x{1f80}" x 2048; 94 95# Write them to a file 96open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!"; 97print F $str; 98close(F); 99 100# Read file back as UTF-8 101open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!"; 102my $dstr = <F>; 103close(F); 104is($dstr, $str); 105 106# Try decoding some bad stuff 107open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!"; 108if (ord('A') == 193) { # EBCDIC 109 print F "foo\x8c\x80\x80\x80bar\n\x80foo\n"; 110} else { 111 print F "foo\xF0\x80\x80\x80bar\n\x80foo\n"; 112} 113close(F); 114 115open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!"; 116$dstr = join(":", <F>); 117close(F); 118if (ord('A') == 193) { # EBCDIC 119 is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n"); 120} else { 121 is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n"); 122} 123 124# Check that PerlIO::encoding can handle custom encodings that do funny 125# things with the buffer. 126use Encode::Encoding; 127package Extensive { 128 @ISA = Encode::Encoding; 129 __PACKAGE__->Define('extensive'); 130 sub encode($$;$) { 131 my ($self,$buf,$chk) = @_; 132 my $leftovers = ''; 133 if ($buf =~ /(.*\n)(?!\z)/) { 134 $buf = $1; 135 $leftovers = $'; 136 } 137 if ($chk) { 138 undef $_[1]; 139 my @x = (' ') x 8000; # reuse the just-freed buffer 140 $_[1] = $leftovers; # SvPVX now points elsewhere and is shorter 141 } # than bufsiz 142 $buf; 143 } 144 no warnings 'once'; 145 *decode = *encode; 146} 147open my $fh, ">:encoding(extensive)", \$buf; 148$fh->autoflush; 149print $fh "doughnut\n"; 150print $fh "quaffee\n"; 151# Print something longer than the buffer that encode() shrunk: 152print $fh "The beech leaves beech leaves on the beach by the beech.\n"; 153close $fh; 154is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by" 155 ." the beech.\n", 'buffer realloc during encoding'; 156$buf = "Sheila surely shod Sean\nin shoddy shoes.\n"; 157open $fh, "<:encoding(extensive)", \$buf; 158is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n", 159 'buffer realloc during decoding'; 160 161package Cower { 162 @ISA = Encode::Encoding; 163 __PACKAGE__->Define('cower'); 164 sub encode($$;$) { 165 my ($self,$buf,$chk) = @_; 166 my $leftovers = ''; 167 if ($buf =~ /(.*\n)(?!\z)/) { 168 $buf = $1; 169 $leftovers = $'; 170 } 171 if ($chk) { 172 no warnings; # stupid @_[1] warning 173 @_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write) 174 } 175 $buf; 176 } 177 no warnings 'once'; 178 *decode = *encode; 179} 180open $fh, ">:encoding(cower)", \$buf; 181$fh->autoflush; 182print $fh $_ for qw "pumping plum pits"; 183close $fh; 184is $buf, "pumpingplumpits", 'cowing buffer during encoding'; 185$buf = "pumping\nplum\npits\n"; 186open $fh, "<:encoding(cower)", \$buf; 187is join("", <$fh>), "pumping\nplum\npits\n", 188 'cowing buffer during decoding'; 189 190package Globber { 191 no warnings 'once'; 192 @ISA = Encode::Encoding; 193 __PACKAGE__->Define('globber'); 194 sub encode($$;$) { 195 my ($self,$buf,$chk) = @_; 196 $_[1] = *foo if $chk; 197 $buf; 198 } 199 *decode = *encode; 200} 201 202# Here we just want to test there is no crash. The actual output is not so 203# important. 204# We need a double eval, as scope unwinding will close the handle, 205# which croaks. 206# With PERL_DESTRUCT_LEVEL set, we have to skip this 207# test, as it triggers bug #115692, resulting in string table warnings. 208SKIP: { 209skip "produces string table warnings", 2 if $ENV{PERL_DESTRUCT_LEVEL}; 210 211eval { eval { 212 open my $fh, ">:encoding(globber)", \$buf; 213 print $fh "Agathopous Goodfoot\n"; 214 close $fh; 215}; $e = $@}; 216like $@||$e, qr/Close with partial character/, 217 'no crash when assigning glob to buffer in encode'; 218$buf = "To hymn him who heard her herd herd\n"; 219open $fh, "<:encoding(globber)", \$buf; 220my $x = <$fh>; 221close $fh; 222is $x, "To hymn him who heard her herd herd\n", 223 'no crash when assigning glob to buffer in decode'; 224 225} # SKIP 226 227# decoding shouldn't mutate the original bytes [perl #132833] 228{ 229 my $b = "a\0b\0\n\0"; 230 open my $fh, "<:encoding(UTF16-LE)", \$b or die; 231 is scalar(<$fh>), "ab\n"; 232 is $b, "a\0b\0\n\0"; 233 close $fh or die; 234 is $b, "a\0b\0\n\0"; 235} 236 237END { 238 1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte); 239} 240