1b39c5158SmillertBEGIN { 2*3d61058aSafresh1 require Config; Config->import(); 3b39c5158Smillert if ($Config{'extensions'} !~ /\bEncode\b/) { 4b39c5158Smillert print "1..0 # Skip: Encode was not built\n"; 5b39c5158Smillert exit 0; 6b39c5158Smillert } 7b39c5158Smillert unless (find PerlIO::Layer 'perlio') { 8b39c5158Smillert print "1..0 # Skip: PerlIO was not built\n"; 9b39c5158Smillert exit 0; 10b39c5158Smillert } 11b39c5158Smillert if (ord("A") == 193) { 129f11ffb7Safresh1 print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n"; 139f11ffb7Safresh1 exit(0); 149f11ffb7Safresh1 } 159f11ffb7Safresh1 if ($] >= 5.025 and !$Config{usecperl}) { 16eac174f2Safresh1 print "1..0 # Skip: encoding pragma not supported in Perl 5.25 or later\n"; 17b39c5158Smillert exit(0); 18b39c5158Smillert } 19b39c5158Smillert} 20b39c5158Smillert 21b8851fccSafresh1print "1..33\n"; 22b8851fccSafresh1 23b39c5158Smillert 2491f110e0Safresh1no warnings "deprecated"; 25b39c5158Smillertuse encoding "latin1"; # ignored (overwritten by the next line) 26b39c5158Smillertuse encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) 27b39c5158Smillert 28b39c5158Smillert# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is 29b39c5158Smillert# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), 30b39c5158Smillert# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) 31b39c5158Smillert 32b39c5158Smillert$a = "\xDF"; 33b39c5158Smillert$b = "\x{100}"; 34b39c5158Smillert 35b39c5158Smillertprint "not " unless ord($a) == 0x3af; 36b39c5158Smillertprint "ok 1\n"; 37b39c5158Smillert 38b39c5158Smillertprint "not " unless ord($b) == 0x100; 39b39c5158Smillertprint "ok 2\n"; 40b39c5158Smillert 41b39c5158Smillertmy $c; 42b39c5158Smillert 43b39c5158Smillert$c = $a . $b; 44b39c5158Smillert 45b39c5158Smillertprint "not " unless ord($c) == 0x3af; 46b39c5158Smillertprint "ok 3\n"; 47b39c5158Smillert 48b39c5158Smillertprint "not " unless length($c) == 2; 49b39c5158Smillertprint "ok 4\n"; 50b39c5158Smillert 51b39c5158Smillertprint "not " unless ord(substr($c, 1, 1)) == 0x100; 52b39c5158Smillertprint "ok 5\n"; 53b39c5158Smillert 54b39c5158Smillertprint "not " unless ord(chr(0xdf)) == 0x3af; # spooky 55b39c5158Smillertprint "ok 6\n"; 56b39c5158Smillert 57b39c5158Smillertprint "not " unless ord(pack("C", 0xdf)) == 0x3af; 58b39c5158Smillertprint "ok 7\n"; 59b39c5158Smillert 60b39c5158Smillert# we didn't break pack/unpack, I hope 61b39c5158Smillert 62b39c5158Smillertprint "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; 63b39c5158Smillertprint "ok 8\n"; 64b39c5158Smillert 65b39c5158Smillert# the first octet of UTF-8 encoded 0x3af 66b39c5158Smillertprint "not " unless unpack("U0 C", chr(0xdf)) == 0xce; 67b39c5158Smillertprint "ok 9\n"; 68b39c5158Smillert 69b39c5158Smillertprint "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; 70b39c5158Smillertprint "ok 10\n"; 71b39c5158Smillert 72b39c5158Smillertprint "not " unless unpack("U", chr(0xdf)) == 0x3af; 73b39c5158Smillertprint "ok 11\n"; 74b39c5158Smillert 75b39c5158Smillert# charnames must still work 76b39c5158Smillertuse charnames ':full'; 77b39c5158Smillertprint "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; 78b39c5158Smillertprint "ok 12\n"; 79b39c5158Smillert 80b39c5158Smillert# combine 81b39c5158Smillert 82b39c5158Smillert$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); 83b39c5158Smillert 84b39c5158Smillertprint "not " unless ord($c) == 0x3af; 85b39c5158Smillertprint "ok 13\n"; 86b39c5158Smillert 87b39c5158Smillertprint "not " unless ord(substr($c, 1, 1)) == 0xdf; 88b39c5158Smillertprint "ok 14\n"; 89b39c5158Smillert 90b39c5158Smillertprint "not " unless ord(substr($c, 2, 1)) == 0x3af; 91b39c5158Smillertprint "ok 15\n"; 92b39c5158Smillert 93b39c5158Smillert# regex literals 94b39c5158Smillert 95b39c5158Smillertprint "not " unless "\xDF" =~ /\x{3AF}/; 96b39c5158Smillertprint "ok 16\n"; 97b39c5158Smillert 98b39c5158Smillertprint "not " unless "\x{3AF}" =~ /\xDF/; 99b39c5158Smillertprint "ok 17\n"; 100b39c5158Smillert 101b39c5158Smillertprint "not " unless "\xDF" =~ /\xDF/; 102b39c5158Smillertprint "ok 18\n"; 103b39c5158Smillert 104b39c5158Smillertprint "not " unless "\x{3AF}" =~ /\x{3AF}/; 105b39c5158Smillertprint "ok 19\n"; 106b39c5158Smillert 107b39c5158Smillert# eq, cmp 108b39c5158Smillert 109b39c5158Smillertmy ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 110b39c5158Smillert pack("C*", 0xDF ), # byte 111b39c5158Smillert pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 112b39c5158Smillert pack("U*", 0x3AF), # $U eq $byte 113b39c5158Smillert pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding 114b39c5158Smillert pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) 115b39c5158Smillert pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; 116b39c5158Smillert pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) 117b39c5158Smillert); 118b39c5158Smillert 119b39c5158Smillert# all the tests in this section that compare a byte encoded string 120b39c5158Smillert# ato UTF-8 encoded are run in all possible vairants 121b39c5158Smillert# all of the eq, ne, cmp operations tested, 122b39c5158Smillert# $v z $u tested as well as $u z $v 123b39c5158Smillert 124b39c5158Smillertsub alleq($$){ 125b39c5158Smillert my ($a,$b) = (shift, shift); 126b39c5158Smillert $a eq $b && $b eq $a && 127b39c5158Smillert !( $a ne $b ) && !( $b ne $a ) && 128b39c5158Smillert ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; 129b39c5158Smillert} 130b39c5158Smillert 131b39c5158Smillertsub anyeq($$){ 132b39c5158Smillert my ($a,$b) = (shift, shift); 133b39c5158Smillert $a eq $b || $b eq $a || 134b39c5158Smillert !( $a ne $b ) || !( $b ne $a ) || 135b39c5158Smillert ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; 136b39c5158Smillert} 137b39c5158Smillert 138b39c5158Smillertsub allgt($$){ 139b39c5158Smillert my ($a,$b) = (shift, shift); 140b39c5158Smillert ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; 141b39c5158Smillert} 142b39c5158Smillert#match the correct UTF-8 string 143b39c5158Smillertprint "not " unless alleq($byte, $U); 144b39c5158Smillertprint "ok 20\n"; 145b39c5158Smillert 146b39c5158Smillert#do not match a wrong UTF-8 string 147b39c5158Smillertprint "not " if anyeq($byte, $Ub); 148b39c5158Smillertprint "ok 21\n"; 149b39c5158Smillert 150b39c5158Smillert#string ordering 151b39c5158Smillertprint "not " unless allgt ( $g1, $byte ) && 152b39c5158Smillert allgt ( $g2, $byte ) && 153b39c5158Smillert allgt ( $byte, $l ) && 154b39c5158Smillert allgt ( $bytes, $U ); 155b39c5158Smillertprint "ok 22\n"; 156b39c5158Smillert 157b39c5158Smillert# upgrade, downgrade 158b39c5158Smillert 159b39c5158Smillertmy ($u,$v,$v2); 160b39c5158Smillert$u = $v = $v2 = pack("C*", 0xDF); 161b39c5158Smillertutf8::upgrade($v); #explicit upgrade 162b39c5158Smillert$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade 163b39c5158Smillert 164b39c5158Smillert# implicit upgrade === explicit upgrade 165b39c5158Smillertprint "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; 166b39c5158Smillertprint "ok 23\n"; 167b39c5158Smillert 168b39c5158Smillert# utf8::upgrade is transparent and does not break equality 169b39c5158Smillertprint "not " unless alleq( $u, $v ); 170b39c5158Smillertprint "ok 24\n"; 171b39c5158Smillert 172b39c5158Smillert$u = $v = pack("C*", 0xDF); 173b39c5158Smillertutf8::upgrade($v); 174b39c5158Smillert#test for a roundtrip, we should get back from where we left 175b39c5158Smillerteval {utf8::downgrade( $v )}; 176b39c5158Smillertprint "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; 177b39c5158Smillertprint "ok 25\n"; 178b39c5158Smillert 179b39c5158Smillert# some more eq, cmp 180b39c5158Smillert 181b39c5158Smillert$byte=pack("C*", 0xDF); 182b39c5158Smillert 183b39c5158Smillertprint "not " unless pack("U*", 0x3AF) eq $byte; 184b39c5158Smillertprint "ok 26\n"; 185b39c5158Smillert 186b39c5158Smillertprint "not " if chr(0xDF) cmp $byte; 187b39c5158Smillertprint "ok 27\n"; 188b39c5158Smillert 189b39c5158Smillertprint "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && 190b39c5158Smillert ((pack("U*", 0x3AE) cmp $byte) == -1) && 191b39c5158Smillert ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && 192b39c5158Smillert ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); 193b39c5158Smillertprint "ok 28\n"; 194b39c5158Smillert 195b39c5158Smillert 196b39c5158Smillert{ 197b39c5158Smillert # Used to core dump in 5.7.3 198b39c5158Smillert no warnings; # so test goes noiselessly 199b39c5158Smillert print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; 200b39c5158Smillert} 201b39c5158Smillert 202b39c5158Smillert{ 203b39c5158Smillert my %h1; 204b39c5158Smillert my %h2; 205b39c5158Smillert $h1{"\xdf"} = 41; 206b39c5158Smillert $h2{"\x{3af}"} = 42; 207b39c5158Smillert print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; 208b39c5158Smillert print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; 209b39c5158Smillert} 210b8851fccSafresh1 211b8851fccSafresh1# Order of finding the above-Latin1 code point should not matter: both should 212b8851fccSafresh1# assume Latin1/Unicode encoding 213b8851fccSafresh1{ 214b8851fccSafresh1 use bytes; 215b8851fccSafresh1 print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/; 216b8851fccSafresh1 print "ok 32\n"; 217b8851fccSafresh1 print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/; 218b8851fccSafresh1 print "ok 33\n"; 219b8851fccSafresh1} 220