xref: /openbsd-src/gnu/usr.bin/perl/ext/PerlIO-encoding/t/encoding.t (revision 2777ee89d0e541ec819d05abee114837837abbec)
1#!./perl -w
2
3BEGIN {
4    unless (find PerlIO::Layer 'perlio') {
5	print "1..0 # Skip: not perlio\n";
6	exit 0;
7    }
8    unless (eval { require Encode } ) {
9	print "1..0 # Skip: not Encode\n";
10	exit 0;
11    }
12}
13
14use Test::More tests => 24;
15
16my $grk = "grk$$";
17my $utf = "utf$$";
18my $fail1 = "fa$$";
19my $fail2 = "fb$$";
20my $russki = "koi8r$$";
21my $threebyte = "3byte$$";
22
23if (open(GRK, ">$grk")) {
24    binmode(GRK, ":bytes");
25    # alpha beta gamma in ISO 8859-7
26    print GRK "\xe1\xe2\xe3";
27    close GRK or die "Could not close: $!";
28}
29
30{
31    is(open(my $i,'<:encoding(iso-8859-7)',$grk), 1);
32    is(open(my $o,'>:utf8',$utf), 1);
33    is((print $o readline $i), 1);
34    close($o) or die "Could not close: $!";
35    close($i);
36}
37
38if (open(UTF, "<$utf")) {
39    binmode(UTF, ":bytes");
40    if (ord('A') == 193) { # EBCDIC
41	# alpha beta gamma in UTF-EBCDIC Unicode (0x3b1 0x3b2 0x3b3)
42	is(scalar <UTF>, "\xb4\x58\xb4\x59\xb4\x62");
43    } else {
44	# alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
45	is(scalar <UTF>, "\xce\xb1\xce\xb2\xce\xb3");
46    }
47    close UTF;
48}
49
50{
51    use Encode;
52    is (open(my $i,'<:utf8',$utf), 1);
53    is (open(my $o,'>:encoding(iso-8859-7)',$grk), 1);
54    is ((scalar print $o readline $i), 1);
55    close($o) or die "Could not close: $!";
56    close($i);
57}
58
59if (open(GRK, "<$grk")) {
60    binmode(GRK, ":bytes");
61    is(scalar <GRK>, "\xe1\xe2\xe3");
62    close GRK;
63}
64
65$SIG{__WARN__} = sub {$warn .= $_[0]};
66
67is (open(FAIL, ">:encoding(NoneSuch)", $fail1), undef, 'Open should fail');
68like($warn, qr/^Cannot find encoding "NoneSuch" at/);
69
70is(open(RUSSKI, ">$russki"), 1);
71print RUSSKI "\x3c\x3f\x78";
72close RUSSKI or die "Could not close: $!";
73open(RUSSKI, "$russki");
74binmode(RUSSKI, ":raw");
75my $buf1;
76read(RUSSKI, $buf1, 1);
77# eof(RUSSKI);
78binmode(RUSSKI, ":encoding(koi8-r)");
79my $buf2;
80read(RUSSKI, $buf2, 1);
81my $offset = tell(RUSSKI);
82is(ord $buf1, 0x3c);
83is(ord $buf2, (ord('A') == 193) ? 0x6f : 0x3f);
84is($offset, 2);
85close RUSSKI;
86
87undef $warn;
88
89# Check there is no Use of uninitialized value in concatenation (.) warning
90# due to the way @latin2iso_num was used to make aliases.
91is(open(FAIL, ">:encoding(latin42)", $fail2), undef, 'Open should fail');
92
93like($warn, qr/^Cannot find encoding "latin42" at.*line \d+\.$/);
94
95# Create a string of chars that are 3 bytes in UTF-8
96my $str = "\x{1f80}" x 2048;
97
98# Write them to a file
99open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
100print F $str;
101close(F);
102
103# Read file back as UTF-8
104open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
105my $dstr = <F>;
106close(F);
107is($dstr, $str);
108
109# Try decoding some bad stuff
110open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
111if (ord('A') == 193) { # EBCDIC
112    print F "foo\x8c\x80\x80\x80bar\n\x80foo\n";
113} else {
114    print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
115}
116close(F);
117
118open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
119$dstr = join(":", <F>);
120close(F);
121if (ord('A') == 193) { # EBCDIC
122    is($dstr, "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n");
123} else {
124    is($dstr, "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n");
125}
126
127# Check that PerlIO::encoding can handle custom encodings that do funny
128# things with the buffer.
129use Encode::Encoding;
130package Extensive {
131 @ISA = Encode::Encoding;
132 __PACKAGE__->Define('extensive');
133 sub encode($$;$) {
134  my ($self,$buf,$chk) = @_;
135  my $leftovers = '';
136  if ($buf =~ /(.*\n)(?!\z)/) {
137    $buf = $1;
138    $leftovers = $';
139  }
140  if ($chk) {
141   undef $_[1];
142   my @x = (' ') x 8000; # reuse the just-freed buffer
143   $_[1] = $leftovers;   # SvPVX now points elsewhere and is shorter
144  }                      # than bufsiz
145  $buf;
146 }
147 no warnings 'once';
148 *decode = *encode;
149}
150open my $fh, ">:encoding(extensive)", \$buf;
151$fh->autoflush;
152print $fh "doughnut\n";
153print $fh "quaffee\n";
154# Print something longer than the buffer that encode() shrunk:
155print $fh "The beech leaves beech leaves on the beach by the beech.\n";
156close $fh;
157is $buf, "doughnut\nquaffee\nThe beech leaves beech leaves on the beach by"
158        ." the beech.\n", 'buffer realloc during encoding';
159$buf = "Sheila surely shod Sean\nin shoddy shoes.\n";
160open $fh, "<:encoding(extensive)", \$buf;
161is join("", <$fh>), "Sheila surely shod Sean\nin shoddy shoes.\n",
162   'buffer realloc during decoding';
163
164package Cower {
165 @ISA = Encode::Encoding;
166 __PACKAGE__->Define('cower');
167 sub encode($$;$) {
168  my ($self,$buf,$chk) = @_;
169  my $leftovers = '';
170  if ($buf =~ /(.*\n)(?!\z)/) {
171    $buf = $1;
172    $leftovers = $';
173  }
174  if ($chk) {
175   no warnings; # stupid @_[1] warning
176   @_[1] = keys %{{$leftovers=>1}}; # shared hash key (copy-on-write)
177  }
178  $buf;
179 }
180 no warnings 'once';
181 *decode = *encode;
182}
183open $fh, ">:encoding(cower)", \$buf;
184$fh->autoflush;
185print $fh $_ for qw "pumping plum pits";
186close $fh;
187is $buf, "pumpingplumpits", 'cowing buffer during encoding';
188$buf = "pumping\nplum\npits\n";
189open $fh, "<:encoding(cower)", \$buf;
190is join("", <$fh>), "pumping\nplum\npits\n",
191  'cowing buffer during decoding';
192
193package Globber {
194 no warnings 'once';
195 @ISA = Encode::Encoding;
196 __PACKAGE__->Define('globber');
197 sub encode($$;$) {
198  my ($self,$buf,$chk) = @_;
199  $_[1] = *foo if $chk;
200  $buf;
201 }
202 *decode = *encode;
203}
204
205# Here we just want to test there is no crash.  The actual output is not so
206# important.
207# We need a double eval, as scope unwinding will close the handle,
208# which croaks.
209# Under debugging builds with PERL_DESTRUCT_LEVEL set, we have to skip this
210# test, as it triggers bug #115692, resulting in string table warnings.
211require Config;
212SKIP: {
213skip "produces string table warnings", 2
214  if "@{[Config::non_bincompat_options()]}" =~ /\bDEBUGGING\b/
215   && $ENV{PERL_DESTRUCT_LEVEL};
216
217eval { eval {
218    open my $fh, ">:encoding(globber)", \$buf;
219    print $fh "Agathopous Goodfoot\n";
220    close $fh;
221}; $e = $@};
222like $@||$e, qr/Close with partial character/,
223     'no crash when assigning glob to buffer in encode';
224$buf = "To hymn him who heard her herd herd\n";
225open $fh, "<:encoding(globber)", \$buf;
226my $x = <$fh>;
227close $fh;
228is $x, "To hymn him who heard her herd herd\n",
229     'no crash when assigning glob to buffer in decode';
230
231} # SKIP
232
233END {
234    1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
235}
236