xref: /openbsd-src/gnu/usr.bin/perl/cpan/Encode/t/encoding.t (revision c90a81c56dcebd6a1b73fe4aff9b03385b8e63b3)
1BEGIN {
2    require Config; import Config;
3    if ($Config{'extensions'} !~ /\bEncode\b/) {
4      print "1..0 # Skip: Encode was not built\n";
5      exit 0;
6    }
7    unless (find PerlIO::Layer 'perlio') {
8    print "1..0 # Skip: PerlIO was not built\n";
9    exit 0;
10    }
11    if (ord("A") == 193) {
12    print "1..0 # encoding pragma does not support EBCDIC platforms\n";
13    exit(0);
14    }
15}
16
17print "1..33\n";
18
19
20no warnings "deprecated";
21use encoding "latin1"; # ignored (overwritten by the next line)
22use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
23
24# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
25# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
26# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
27
28$a = "\xDF";
29$b = "\x{100}";
30
31print "not " unless ord($a) == 0x3af;
32print "ok 1\n";
33
34print "not " unless ord($b) == 0x100;
35print "ok 2\n";
36
37my $c;
38
39$c = $a . $b;
40
41print "not " unless ord($c) == 0x3af;
42print "ok 3\n";
43
44print "not " unless length($c) == 2;
45print "ok 4\n";
46
47print "not " unless ord(substr($c, 1, 1)) == 0x100;
48print "ok 5\n";
49
50print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
51print "ok 6\n";
52
53print "not " unless ord(pack("C", 0xdf)) == 0x3af;
54print "ok 7\n";
55
56# we didn't break pack/unpack, I hope
57
58print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
59print "ok 8\n";
60
61# the first octet of UTF-8 encoded 0x3af
62print "not " unless unpack("U0 C", chr(0xdf)) == 0xce;
63print "ok 9\n";
64
65print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
66print "ok 10\n";
67
68print "not " unless unpack("U", chr(0xdf)) == 0x3af;
69print "ok 11\n";
70
71# charnames must still work
72use charnames ':full';
73print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
74print "ok 12\n";
75
76# combine
77
78$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
79
80print "not " unless ord($c) == 0x3af;
81print "ok 13\n";
82
83print "not " unless ord(substr($c, 1, 1)) == 0xdf;
84print "ok 14\n";
85
86print "not " unless ord(substr($c, 2, 1)) == 0x3af;
87print "ok 15\n";
88
89# regex literals
90
91print "not " unless "\xDF"    =~ /\x{3AF}/;
92print "ok 16\n";
93
94print "not " unless "\x{3AF}" =~ /\xDF/;
95print "ok 17\n";
96
97print "not " unless "\xDF"    =~ /\xDF/;
98print "ok 18\n";
99
100print "not " unless "\x{3AF}" =~ /\x{3AF}/;
101print "ok 19\n";
102
103# eq, cmp
104
105my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = (
106    pack("C*", 0xDF ),       # byte
107    pack("C*", 0xDF, 0x20),  # ($bytes2 cmp $U) > 0
108    pack("U*", 0x3AF),       # $U eq $byte
109    pack("U*", 0xDF ),       # $Ub would eq $bytev w/o use encoding
110    pack("U*", 0x3B1),       # ($g1 cmp $byte) > 0; === chr(0xe1)
111    pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
112    pack("U*", 0x3AB),       # ($l  cmp $byte) < 0; === chr(0xdb)
113);
114
115# all the tests in this section that compare a byte encoded string
116# ato UTF-8 encoded are run in all possible vairants
117# all of the eq, ne, cmp operations tested,
118# $v z $u tested as well as $u z $v
119
120sub alleq($$){
121    my ($a,$b)    =    (shift, shift);
122     $a  eq  $b        &&     $b  eq  $a         &&
123  !( $a  ne  $b )      &&  !( $b  ne  $a )       &&
124   ( $a  cmp $b ) == 0 &&   ( $b  cmp $a ) == 0;
125}
126
127sub anyeq($$){
128    my ($a,$b)    =    (shift, shift);
129     $a  eq  $b        ||     $b  eq  $a         ||
130  !( $a  ne  $b )      ||  !( $b  ne  $a )       ||
131   ( $a  cmp $b ) == 0 ||   ( $b  cmp $a ) == 0;
132}
133
134sub allgt($$){
135    my ($a,$b)    =    (shift, shift);
136    ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
137}
138#match the correct UTF-8 string
139print "not " unless  alleq($byte, $U);
140print "ok 20\n";
141
142#do not match a wrong UTF-8 string
143print "not " if anyeq($byte, $Ub);
144print "ok 21\n";
145
146#string ordering
147print "not " unless allgt ( $g1,    $byte  )  &&
148                    allgt ( $g2,    $byte  )  &&
149                    allgt ( $byte,  $l     )  &&
150                    allgt ( $bytes, $U     );
151print "ok 22\n";
152
153# upgrade, downgrade
154
155my ($u,$v,$v2);
156$u = $v = $v2 = pack("C*", 0xDF);
157utf8::upgrade($v);                   #explicit upgrade
158$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
159
160# implicit upgrade === explicit upgrade
161print "not "  if do{{use bytes; $v ne $v2}} || $v ne $v2;
162print "ok 23\n";
163
164# utf8::upgrade is transparent and does not break equality
165print "not " unless alleq( $u, $v );
166print "ok 24\n";
167
168$u = $v = pack("C*", 0xDF);
169utf8::upgrade($v);
170#test for a roundtrip, we should get back from where we left
171eval {utf8::downgrade( $v )};
172print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
173print "ok 25\n";
174
175# some more eq, cmp
176
177$byte=pack("C*", 0xDF);
178
179print "not " unless pack("U*", 0x3AF) eq $byte;
180print "ok 26\n";
181
182print "not " if chr(0xDF) cmp $byte;
183print "ok 27\n";
184
185print "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
186                    ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
187                    ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
188                ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
189print "ok 28\n";
190
191
192{
193    # Used to core dump in 5.7.3
194    no warnings; # so test goes noiselessly
195    print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n";
196}
197
198{
199    my %h1;
200    my %h2;
201    $h1{"\xdf"}    = 41;
202    $h2{"\x{3af}"} = 42;
203    print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n";
204    print $h2{"\xdf"}    == 42 ? "ok 31\n" : "not ok 31\n";
205}
206
207# Order of finding the above-Latin1 code point should not matter: both should
208# assume Latin1/Unicode encoding
209{
210    use bytes;
211    print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/;
212    print "ok 32\n";
213    print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/;
214    print "ok 33\n";
215}
216