xref: /openbsd-src/gnu/usr.bin/perl/ext/PerlIO-encoding/t/encoding.t (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
143003dfeSmillert#!./perl -w
243003dfeSmillert
343003dfeSmillertBEGIN {
443003dfeSmillert    unless (eval { require Encode } ) {
543003dfeSmillert	print "1..0 # Skip: not Encode\n";
643003dfeSmillert	exit 0;
743003dfeSmillert    }
8b8851fccSafresh1    if (ord("A") == 193) {
9b8851fccSafresh1	print "1..0 # Skip: EBCDIC\n";
10b8851fccSafresh1	exit 0;
11b8851fccSafresh1    }
12b8851fccSafresh1    require "../../t/charset_tools.pl";
1343003dfeSmillert}
1443003dfeSmillert
159f11ffb7Safresh1use Test::More tests => 27;
1643003dfeSmillert
1743003dfeSmillertmy $grk = "grk$$";
1843003dfeSmillertmy $utf = "utf$$";
1943003dfeSmillertmy $fail1 = "fa$$";
2043003dfeSmillertmy $fail2 = "fb$$";
2143003dfeSmillertmy $russki = "koi8r$$";
2243003dfeSmillertmy $threebyte = "3byte$$";
2343003dfeSmillert
249f11ffb7Safresh1if (open(GRK, '>', $grk)) {
2543003dfeSmillert    binmode(GRK, ":bytes");
2643003dfeSmillert    # alpha beta gamma in ISO 8859-7
2743003dfeSmillert    print GRK "\xe1\xe2\xe3";
2843003dfeSmillert    close GRK or die "Could not close: $!";
2943003dfeSmillert}
3043003dfeSmillert
3143003dfeSmillert{
32898184e3Ssthen    is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1);
33898184e3Ssthen    is(open(my $o,'>:utf8',$utf), 1);
34898184e3Ssthen    is((print $o readline $i), 1);
3543003dfeSmillert    close($o) or die "Could not close: $!";
3643003dfeSmillert    close($i);
3743003dfeSmillert}
3843003dfeSmillert
399f11ffb7Safresh1if (open(UTF, '<', $utf)) {
4043003dfeSmillert    binmode(UTF, ":bytes");
41b8851fccSafresh1
4243003dfeSmillert    # alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
43b8851fccSafresh1    is(scalar <UTF>, byte_utf8a_to_utf8n("\xce\xb1\xce\xb2\xce\xb3"));
4443003dfeSmillert    close UTF;
4543003dfeSmillert}
4643003dfeSmillert
4743003dfeSmillert{
4843003dfeSmillert    use Encode;
49898184e3Ssthen    is (open(my $i,'<:utf8',$utf), 1);
50898184e3Ssthen    is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1);
51898184e3Ssthen    is ((scalar print $o readline $i), 1);
5243003dfeSmillert    close($o) or die "Could not close: $!";
5343003dfeSmillert    close($i);
5443003dfeSmillert}
5543003dfeSmillert
569f11ffb7Safresh1if (open(GRK, '<', $grk)) {
5743003dfeSmillert    binmode(GRK, ":bytes");
58898184e3Ssthen    is(scalar <GRK>, "\xe1\xe2\xe3");
5943003dfeSmillert    close GRK;
6043003dfeSmillert}
6143003dfeSmillert
6243003dfeSmillert$SIG{__WARN__} = sub {$warn .= $_[0]};
6343003dfeSmillert
64898184e3Ssthenis (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail');
65898184e3Ssthenlike($warn, qr/^Cannot find encoding "NoneSuch" at/);
6643003dfeSmillert
679f11ffb7Safresh1is(open(RUSSKI, '>', $russki), 1);
6843003dfeSmillertprint RUSSKI "\x3c\x3f\x78";
6943003dfeSmillertclose RUSSKI or die "Could not close: $!";
709f11ffb7Safresh1open(RUSSKI, '<', $russki);
7143003dfeSmillertbinmode(RUSSKI, ":raw");
7243003dfeSmillertmy $buf1;
7343003dfeSmillertread(RUSSKI, $buf1, 1);
7443003dfeSmillert# eof(RUSSKI);
7543003dfeSmillertbinmode(RUSSKI, ":encoding(koi8-r)");
7643003dfeSmillertmy $buf2;
7743003dfeSmillertread(RUSSKI, $buf2, 1);
7843003dfeSmillertmy $offset = tell(RUSSKI);
79898184e3Ssthenis(ord $buf1, 0x3c);
80898184e3Ssthenis(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f);
81898184e3Ssthenis($offset, 2);
82898184e3Ssthenclose RUSSKI;
8343003dfeSmillert
8443003dfeSmillertundef $warn;
8543003dfeSmillert
8643003dfeSmillert# Check there is no Use of uninitialized value in concatenation (.) warning
8743003dfeSmillert# due to the way @latin2iso_num was used to make aliases.
88898184e3Ssthenis(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail');
89898184e3Ssthen
90898184e3Ssthenlike($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/);
9143003dfeSmillert
9243003dfeSmillert# Create a string of chars that are 3 bytes in UTF-8
9343003dfeSmillertmy $str = "\x{1f80}" x 2048;
9443003dfeSmillert
9543003dfeSmillert# Write them to a file
9643003dfeSmillertopen(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
9743003dfeSmillertprint F $str;
9843003dfeSmillertclose(F);
9943003dfeSmillert
10043003dfeSmillert# Read file back as UTF-8
10143003dfeSmillertopen(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
10243003dfeSmillertmy $dstr = <F>;
10343003dfeSmillertclose(F);
104898184e3Ssthenis($dstr, $str);
10543003dfeSmillert
10643003dfeSmillert# Try decoding some bad stuff
10743003dfeSmillertopen(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
10843003dfeSmillertif (ord('A') == 193) { # EBCDIC
10943003dfeSmillert    print F "foo\x8c\x80\x80\x80bar\n\x80foo\n";
11043003dfeSmillert} else {
11143003dfeSmillert    print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
11243003dfeSmillert}
11343003dfeSmillertclose(F);
11443003dfeSmillert
11543003dfeSmillertopen(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
11643003dfeSmillert$dstr = join(":", <F>);
11743003dfeSmillertclose(F);
11843003dfeSmillertif (ord('A') == 193) { # EBCDIC
119898184e3Ssthen    is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n");
12043003dfeSmillert} else {
121898184e3Ssthen    is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n");
12243003dfeSmillert}
12343003dfeSmillert
12491f110e0Safresh1# Check that PerlIO::encoding can handle custom encodings that do funny
12591f110e0Safresh1# things with the buffer.
12691f110e0Safresh1use Encode::Encoding;
12791f110e0Safresh1package Extensive {
12891f110e0Safresh1 @ISA = Encode::Encoding;
12991f110e0Safresh1 __PACKAGE__->Define('extensive');
13091f110e0Safresh1 sub encode($$;$) {
13191f110e0Safresh1  my ($self,$buf,$chk) = @_;
13291f110e0Safresh1  my $leftovers = '';
13391f110e0Safresh1  if ($buf =~ /(.*\n)(?!\z)/) {
13491f110e0Safresh1    $buf = $1;
13591f110e0Safresh1    $leftovers = $';
13691f110e0Safresh1  }
13791f110e0Safresh1  if ($chk) {
13891f110e0Safresh1   undef $_[1];
13991f110e0Safresh1   my @x = (' ') x 8000; # reuse the just-freed buffer
14091f110e0Safresh1   $_[1] = $leftovers;   # SvPVX now points elsewhere and is shorter
14191f110e0Safresh1  }                      # than bufsiz
14291f110e0Safresh1  $buf;
14391f110e0Safresh1 }
14491f110e0Safresh1 no warnings 'once';
14591f110e0Safresh1 *decode = *encode;
14691f110e0Safresh1}
14791f110e0Safresh1open my $fh, ">:encoding(extensive)", \$buf;
14891f110e0Safresh1$fh->autoflush;
14991f110e0Safresh1print $fh "doughnut\n";
15091f110e0Safresh1print $fh "quaffee\n";
15191f110e0Safresh1# Print something longer than the buffer that encode() shrunk:
15291f110e0Safresh1print $fh "The beech leaves beech leaves on the beach by the beech.\n";
15391f110e0Safresh1close $fh;
15491f110e0Safresh1is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by"
15591f110e0Safresh1        ." the beech.\n", 'buffer realloc during encoding';
15691f110e0Safresh1$buf = "Sheila surely shod Sean\nin shoddy shoes.\n";
15791f110e0Safresh1open $fh, "<:encoding(extensive)", \$buf;
15891f110e0Safresh1is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n",
15991f110e0Safresh1   'buffer realloc during decoding';
16091f110e0Safresh1
16191f110e0Safresh1package Cower {
16291f110e0Safresh1 @ISA = Encode::Encoding;
16391f110e0Safresh1 __PACKAGE__->Define('cower');
16491f110e0Safresh1 sub encode($$;$) {
16591f110e0Safresh1  my ($self,$buf,$chk) = @_;
16691f110e0Safresh1  my $leftovers = '';
16791f110e0Safresh1  if ($buf =~ /(.*\n)(?!\z)/) {
16891f110e0Safresh1    $buf = $1;
16991f110e0Safresh1    $leftovers = $';
17091f110e0Safresh1  }
17191f110e0Safresh1  if ($chk) {
17291f110e0Safresh1   no warnings; # stupid @_[1] warning
17391f110e0Safresh1   @_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write)
17491f110e0Safresh1  }
17591f110e0Safresh1  $buf;
17691f110e0Safresh1 }
17791f110e0Safresh1 no warnings 'once';
17891f110e0Safresh1 *decode = *encode;
17991f110e0Safresh1}
18091f110e0Safresh1open $fh, ">:encoding(cower)", \$buf;
18191f110e0Safresh1$fh->autoflush;
18291f110e0Safresh1print $fh $_ for qw "pumping plum pits";
18391f110e0Safresh1close $fh;
18491f110e0Safresh1is $buf, "pumpingplumpits", 'cowing buffer during encoding';
18591f110e0Safresh1$buf = "pumping\nplum\npits\n";
18691f110e0Safresh1open $fh, "<:encoding(cower)", \$buf;
18791f110e0Safresh1is join("", <$fh>), "pumping\nplum\npits\n",
18891f110e0Safresh1  'cowing buffer during decoding';
18991f110e0Safresh1
19091f110e0Safresh1package Globber {
19191f110e0Safresh1 no warnings 'once';
19291f110e0Safresh1 @ISA = Encode::Encoding;
19391f110e0Safresh1 __PACKAGE__->Define('globber');
19491f110e0Safresh1 sub encode($$;$) {
19591f110e0Safresh1  my ($self,$buf,$chk) = @_;
19691f110e0Safresh1  $_[1] = *foo if $chk;
19791f110e0Safresh1  $buf;
19891f110e0Safresh1 }
19991f110e0Safresh1 *decode = *encode;
20091f110e0Safresh1}
20191f110e0Safresh1
20291f110e0Safresh1# Here we just want to test there is no crash.  The actual output is not so
20391f110e0Safresh1# important.
20491f110e0Safresh1# We need a double eval, as scope unwinding will close the handle,
20591f110e0Safresh1# which croaks.
206*56d68f1eSafresh1# With PERL_DESTRUCT_LEVEL set, we have to skip this
20791f110e0Safresh1# test, as it triggers bug #115692, resulting in string table warnings.
20891f110e0Safresh1SKIP: {
209*56d68f1eSafresh1skip "produces string table warnings", 2 if $ENV{PERL_DESTRUCT_LEVEL};
21091f110e0Safresh1
21191f110e0Safresh1eval { eval {
21291f110e0Safresh1    open my $fh, ">:encoding(globber)", \$buf;
21391f110e0Safresh1    print $fh "Agathopous Goodfoot\n";
21491f110e0Safresh1    close $fh;
21591f110e0Safresh1}; $e = $@};
21691f110e0Safresh1like $@||$e, qr/Close with partial character/,
21791f110e0Safresh1     'no crash when assigning glob to buffer in encode';
21891f110e0Safresh1$buf = "To hymn him who heard her herd herd\n";
21991f110e0Safresh1open $fh, "<:encoding(globber)", \$buf;
22091f110e0Safresh1my $x = <$fh>;
22191f110e0Safresh1close $fh;
22291f110e0Safresh1is $x, "To hymn him who heard her herd herd\n",
22391f110e0Safresh1     'no crash when assigning glob to buffer in decode';
22491f110e0Safresh1
22591f110e0Safresh1} # SKIP
22691f110e0Safresh1
2279f11ffb7Safresh1# decoding shouldn't mutate the original bytes [perl #132833]
2289f11ffb7Safresh1{
2299f11ffb7Safresh1    my $b = "a\0b\0\n\0";
2309f11ffb7Safresh1    open my $fh, "<:encoding(UTF16-LE)", \$b or die;
2319f11ffb7Safresh1    is scalar(<$fh>), "ab\n";
2329f11ffb7Safresh1    is $b, "a\0b\0\n\0";
2339f11ffb7Safresh1    close $fh or die;
2349f11ffb7Safresh1    is $b, "a\0b\0\n\0";
2359f11ffb7Safresh1}
2369f11ffb7Safresh1
23743003dfeSmillertEND {
23843003dfeSmillert    1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
23943003dfeSmillert}
240