xref: /openbsd-src/gnu/usr.bin/perl/ext/PerlIO-encoding/t/encoding.t (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1#!./perl -w
2
3BEGIN {
4    unless (find PerlIO::Layer 'perlio') {
5	print "1..0 # Skip: not perlio\n";
6	exit 0;
7    }
8    unless (eval { require Encode } ) {
9	print "1..0 # Skip: not Encode\n";
10	exit 0;
11    }
12}
13
14print "1..15\n";
15
16my $grk = "grk$$";
17my $utf = "utf$$";
18my $fail1 = "fa$$";
19my $fail2 = "fb$$";
20my $russki = "koi8r$$";
21my $threebyte = "3byte$$";
22
23if (open(GRK, ">$grk")) {
24    binmode(GRK, ":bytes");
25    # alpha beta gamma in ISO 8859-7
26    print GRK "\xe1\xe2\xe3";
27    close GRK or die "Could not close: $!";
28}
29
30{
31    open(my $i,'<:encoding(iso-8859-7)',$grk);
32    print "ok 1\n";
33    open(my $o,'>:utf8',$utf);
34    print "ok 2\n";
35    print $o readline($i);
36    print "ok 3\n";
37    close($o) or die "Could not close: $!";
38    close($i);
39}
40
41if (open(UTF, "<$utf")) {
42    binmode(UTF, ":bytes");
43    if (ord('A') == 193) { # EBCDIC
44	# alpha beta gamma in UTF-EBCDIC Unicode (0x3b1 0x3b2 0x3b3)
45	print "not " unless <UTF> eq "\xb4\x58\xb4\x59\xb4\x62";
46    } else {
47	# alpha beta gamma in UTF-8 Unicode (0x3b1 0x3b2 0x3b3)
48	print "not " unless <UTF> eq "\xce\xb1\xce\xb2\xce\xb3";
49    }
50    print "ok 4\n";
51    close UTF;
52}
53
54{
55    use Encode;
56    open(my $i,'<:utf8',$utf);
57    print "ok 5\n";
58    open(my $o,'>:encoding(iso-8859-7)',$grk);
59    print "ok 6\n";
60    print $o readline($i);
61    print "ok 7\n";
62    close($o) or die "Could not close: $!";
63    close($i);
64}
65
66if (open(GRK, "<$grk")) {
67    binmode(GRK, ":bytes");
68    print "not " unless <GRK> eq "\xe1\xe2\xe3";
69    print "ok 8\n";
70    close GRK;
71}
72
73$SIG{__WARN__} = sub {$warn .= $_[0]};
74
75if (open(FAIL, ">:encoding(NoneSuch)", $fail1)) {
76    print "not ok 9 # Open should fail\n";
77} else {
78    print "ok 9\n";
79}
80if (!defined $warn) {
81    print "not ok 10 # warning is undef\n";
82} elsif ($warn =~ /^Cannot find encoding "NoneSuch" at/) {
83    print "ok 10\n";
84} else {
85    print "not ok 10 # warning is '$warn'";
86}
87
88if (open(RUSSKI, ">$russki")) {
89    print RUSSKI "\x3c\x3f\x78";
90    close RUSSKI or die "Could not close: $!";
91    open(RUSSKI, "$russki");
92    binmode(RUSSKI, ":raw");
93    my $buf1;
94    read(RUSSKI, $buf1, 1);
95    # eof(RUSSKI);
96    binmode(RUSSKI, ":encoding(koi8-r)");
97    my $buf2;
98    read(RUSSKI, $buf2, 1);
99    my $offset = tell(RUSSKI);
100    if (ord($buf1) == 0x3c &&
101	ord($buf2) == (ord('A') == 193) ? 0x6f : 0x3f &&
102	$offset == 2) {
103	print "ok 11\n";
104    } else {
105	printf "not ok 11 # [%s] [%s] %d\n",
106	       join(" ", unpack("H*", $buf1)),
107	       join(" ", unpack("H*", $buf2)), $offset;
108    }
109    close(RUSSKI);
110} else {
111    print "not ok 11 # open failed: $!\n";
112}
113
114undef $warn;
115
116# Check there is no Use of uninitialized value in concatenation (.) warning
117# due to the way @latin2iso_num was used to make aliases.
118if (open(FAIL, ">:encoding(latin42)", $fail2)) {
119    print "not ok 12 # Open should fail\n";
120} else {
121    print "ok 12\n";
122}
123if (!defined $warn) {
124    print "not ok 13 # warning is undef\n";
125} elsif ($warn =~ /^Cannot find encoding "latin42" at.*line \d+\.$/) {
126    print "ok 13\n";
127} else {
128    print "not ok 13 # warning is: \n";
129    $warn =~ s/^/# /mg;
130    print "$warn";
131}
132
133# Create a string of chars that are 3 bytes in UTF-8
134my $str = "\x{1f80}" x 2048;
135
136# Write them to a file
137open(F,'>:utf8',$threebyte) || die "Cannot open $threebyte:$!";
138print F $str;
139close(F);
140
141# Read file back as UTF-8
142open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
143my $dstr = <F>;
144close(F);
145print "not " unless ($dstr eq $str);
146print "ok 14\n";
147
148# Try decoding some bad stuff
149open(F,'>:raw',$threebyte) || die "Cannot open $threebyte:$!";
150if (ord('A') == 193) { # EBCDIC
151    print F "foo\x8c\x80\x80\x80bar\n\x80foo\n";
152} else {
153    print F "foo\xF0\x80\x80\x80bar\n\x80foo\n";
154}
155close(F);
156
157open(F,'<:encoding(utf-8)',$threebyte) || die "Cannot open $threebyte:$!";
158$dstr = join(":", <F>);
159close(F);
160if (ord('A') == 193) { # EBCDIC
161    print "not " unless $dstr eq "foo\\x8C\\x80\\x80\\x80bar\n:\\x80foo\n";
162} else {
163    print "not " unless $dstr eq "foo\\xF0\\x80\\x80\\x80bar\n:\\x80foo\n";
164}
165print "ok 15\n";
166
167END {
168    1 while unlink($grk, $utf, $fail1, $fail2, $russki, $threebyte);
169}
170