xref: /openbsd-src/gnu/usr.bin/perl/ext/PerlIO-encoding/t/encoding.t (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
1#!./perl -w
2
3BEGIN {
4    unless (eval { require Encode } ) {
5	print "1..0 # Skip: not Encode\n";
6	exit 0;
7    }
8    if (ord("A") == 193) {
9	print "1..0 # Skip: EBCDIC\n";
10	exit 0;
11    }
12    require "../../t/charset_tools.pl";
13}
14
15use Test::More tests => 27;
16
17my $grk = "grk$$";
18my $utf = "utf$$";
19my $fail1 = "fa$$";
20my $fail2 = "fb$$";
21my $russki = "koi8r$$";
22my $threebyte = "3byte$$";
23
24if (open(GRK, '>', $grk)) {
25    binmode(GRK, ":bytes");
26    # alpha beta gamma in ISO 8859-7
27    print GRK "\xe1\xe2\xe3";
28    close GRK or die "Could not close: $!";
29}
30
31{
32    is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1);
33    is(open(my $o,'>:utf8',$utf), 1);
34    is((print $o readline $i), 1);
35    close($o) or die "Could not close: $!";
36    close($i);
37}
38
39if (open(UTF, '<', $utf)) {
40    binmode(UTF, ":bytes");
41
42    # alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
43    is(scalar <UTF>, byte_utf8a_to_utf8n("\xce\xb1\xce\xb2\xce\xb3"));
44    close UTF;
45}
46
47{
48    use Encode;
49    is (open(my $i,'<:utf8',$utf), 1);
50    is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1);
51    is ((scalar print $o readline $i), 1);
52    close($o) or die "Could not close: $!";
53    close($i);
54}
55
56if (open(GRK, '<', $grk)) {
57    binmode(GRK, ":bytes");
58    is(scalar <GRK>, "\xe1\xe2\xe3");
59    close GRK;
60}
61
62$SIG{__WARN__} = sub {$warn .= $_[0]};
63
64is (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail');
65like($warn, qr/^Cannot find encoding "NoneSuch" at/);
66
67is(open(RUSSKI, '>', $russki), 1);
68print RUSSKI "\x3c\x3f\x78";
69close RUSSKI or die "Could not close: $!";
70open(RUSSKI, '<', $russki);
71binmode(RUSSKI, ":raw");
72my $buf1;
73read(RUSSKI, $buf1, 1);
74# eof(RUSSKI);
75binmode(RUSSKI, ":encoding(koi8-r)");
76my $buf2;
77read(RUSSKI, $buf2, 1);
78my $offset = tell(RUSSKI);
79is(ord $buf1, 0x3c);
80is(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f);
81is($offset, 2);
82close RUSSKI;
83
84undef $warn;
85
86# Check there is no Use of uninitialized value in concatenation (.) warning
87# due to the way @latin2iso_num was used to make aliases.
88is(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail');
89
90like($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/);
91
92# Create a string of chars that are 3 bytes in UTF-8
93my $str = "\x{1f80}" x 2048;
94
95# Write them to a file
96open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
97print F $str;
98close(F);
99
100# Read file back as UTF-8
101open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
102my $dstr = <F>;
103close(F);
104is($dstr, $str);
105
106# Try decoding some bad stuff
107open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
108if (ord('A') == 193) { # EBCDIC
109    print F "foo\x8c\x80\x80\x80bar\n\x80foo\n";
110} else {
111    print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
112}
113close(F);
114
115open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
116$dstr = join(":", <F>);
117close(F);
118if (ord('A') == 193) { # EBCDIC
119    is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n");
120} else {
121    is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n");
122}
123
124# Check that PerlIO::encoding can handle custom encodings that do funny
125# things with the buffer.
126use Encode::Encoding;
127package Extensive {
128 @ISA = Encode::Encoding;
129 __PACKAGE__->Define('extensive');
130 sub encode($$;$) {
131  my ($self,$buf,$chk) = @_;
132  my $leftovers = '';
133  if ($buf =~ /(.*\n)(?!\z)/) {
134    $buf = $1;
135    $leftovers = $';
136  }
137  if ($chk) {
138   undef $_[1];
139   my @x = (' ') x 8000; # reuse the just-freed buffer
140   $_[1] = $leftovers;   # SvPVX now points elsewhere and is shorter
141  }                      # than bufsiz
142  $buf;
143 }
144 no warnings 'once';
145 *decode = *encode;
146}
147open my $fh, ">:encoding(extensive)", \$buf;
148$fh->autoflush;
149print $fh "doughnut\n";
150print $fh "quaffee\n";
151# Print something longer than the buffer that encode() shrunk:
152print $fh "The beech leaves beech leaves on the beach by the beech.\n";
153close $fh;
154is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by"
155        ." the beech.\n", 'buffer realloc during encoding';
156$buf = "Sheila surely shod Sean\nin shoddy shoes.\n";
157open $fh, "<:encoding(extensive)", \$buf;
158is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n",
159   'buffer realloc during decoding';
160
161package Cower {
162 @ISA = Encode::Encoding;
163 __PACKAGE__->Define('cower');
164 sub encode($$;$) {
165  my ($self,$buf,$chk) = @_;
166  my $leftovers = '';
167  if ($buf =~ /(.*\n)(?!\z)/) {
168    $buf = $1;
169    $leftovers = $';
170  }
171  if ($chk) {
172   no warnings; # stupid @_[1] warning
173   @_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write)
174  }
175  $buf;
176 }
177 no warnings 'once';
178 *decode = *encode;
179}
180open $fh, ">:encoding(cower)", \$buf;
181$fh->autoflush;
182print $fh $_ for qw "pumping plum pits";
183close $fh;
184is $buf, "pumpingplumpits", 'cowing buffer during encoding';
185$buf = "pumping\nplum\npits\n";
186open $fh, "<:encoding(cower)", \$buf;
187is join("", <$fh>), "pumping\nplum\npits\n",
188  'cowing buffer during decoding';
189
190package Globber {
191 no warnings 'once';
192 @ISA = Encode::Encoding;
193 __PACKAGE__->Define('globber');
194 sub encode($$;$) {
195  my ($self,$buf,$chk) = @_;
196  $_[1] = *foo if $chk;
197  $buf;
198 }
199 *decode = *encode;
200}
201
202# Here we just want to test there is no crash.  The actual output is not so
203# important.
204# We need a double eval, as scope unwinding will close the handle,
205# which croaks.
206# With PERL_DESTRUCT_LEVEL set, we have to skip this
207# test, as it triggers bug #115692, resulting in string table warnings.
208SKIP: {
209skip "produces string table warnings", 2 if $ENV{PERL_DESTRUCT_LEVEL};
210
211eval { eval {
212    open my $fh, ">:encoding(globber)", \$buf;
213    print $fh "Agathopous Goodfoot\n";
214    close $fh;
215}; $e = $@};
216like $@||$e, qr/Close with partial character/,
217     'no crash when assigning glob to buffer in encode';
218$buf = "To hymn him who heard her herd herd\n";
219open $fh, "<:encoding(globber)", \$buf;
220my $x = <$fh>;
221close $fh;
222is $x, "To hymn him who heard her herd herd\n",
223     'no crash when assigning glob to buffer in decode';
224
225} # SKIP
226
227# decoding shouldn't mutate the original bytes [perl #132833]
228{
229    my $b = "a\0b\0\n\0";
230    open my $fh, "<:encoding(UTF16-LE)", \$b or die;
231    is scalar(<$fh>), "ab\n";
232    is $b, "a\0b\0\n\0";
233    close $fh or die;
234    is $b, "a\0b\0\n\0";
235}
236
237END {
238    1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
239}
240