1BEGIN { 2 require Config; Config->import(); 3 if ($Config{'extensions'} !~ /\bEncode\b/) { 4 print "1..0 # Skip: Encode was not built\n"; 5 exit 0; 6 } 7 unless (find PerlIO::Layer 'perlio') { 8 print "1..0 # Skip: PerlIO was not built\n"; 9 exit 0; 10 } 11 if (ord("A") == 193) { 12 print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n"; 13 exit(0); 14 } 15 if ($] >= 5.025 and !$Config{usecperl}) { 16 print "1..0 # Skip: encoding pragma not supported in Perl 5.25 or later\n"; 17 exit(0); 18 } 19} 20 21print "1..33\n"; 22 23 24no warnings "deprecated"; 25use encoding "latin1"; # ignored (overwritten by the next line) 26use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) 27 28# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is 29# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), 30# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) 31 32$a = "\xDF"; 33$b = "\x{100}"; 34 35print "not " unless ord($a) == 0x3af; 36print "ok 1\n"; 37 38print "not " unless ord($b) == 0x100; 39print "ok 2\n"; 40 41my $c; 42 43$c = $a . $b; 44 45print "not " unless ord($c) == 0x3af; 46print "ok 3\n"; 47 48print "not " unless length($c) == 2; 49print "ok 4\n"; 50 51print "not " unless ord(substr($c, 1, 1)) == 0x100; 52print "ok 5\n"; 53 54print "not " unless ord(chr(0xdf)) == 0x3af; # spooky 55print "ok 6\n"; 56 57print "not " unless ord(pack("C", 0xdf)) == 0x3af; 58print "ok 7\n"; 59 60# we didn't break pack/unpack, I hope 61 62print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; 63print "ok 8\n"; 64 65# the first octet of UTF-8 encoded 0x3af 66print "not " unless unpack("U0 C", chr(0xdf)) == 0xce; 67print "ok 9\n"; 68 69print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; 70print "ok 10\n"; 71 72print "not " unless unpack("U", chr(0xdf)) == 0x3af; 73print "ok 11\n"; 74 75# charnames must still work 76use charnames ':full'; 77print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; 78print "ok 12\n"; 79 80# combine 81 82$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); 83 84print "not " unless ord($c) == 0x3af; 85print "ok 13\n"; 86 87print "not " unless ord(substr($c, 1, 1)) == 0xdf; 88print "ok 14\n"; 89 90print "not " unless ord(substr($c, 2, 1)) == 0x3af; 91print "ok 15\n"; 92 93# regex literals 94 95print "not " unless "\xDF" =~ /\x{3AF}/; 96print "ok 16\n"; 97 98print "not " unless "\x{3AF}" =~ /\xDF/; 99print "ok 17\n"; 100 101print "not " unless "\xDF" =~ /\xDF/; 102print "ok 18\n"; 103 104print "not " unless "\x{3AF}" =~ /\x{3AF}/; 105print "ok 19\n"; 106 107# eq, cmp 108 109my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 110 pack("C*", 0xDF ), # byte 111 pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 112 pack("U*", 0x3AF), # $U eq $byte 113 pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding 114 pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) 115 pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; 116 pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) 117); 118 119# all the tests in this section that compare a byte encoded string 120# ato UTF-8 encoded are run in all possible vairants 121# all of the eq, ne, cmp operations tested, 122# $v z $u tested as well as $u z $v 123 124sub alleq($$){ 125 my ($a,$b) = (shift, shift); 126 $a eq $b && $b eq $a && 127 !( $a ne $b ) && !( $b ne $a ) && 128 ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; 129} 130 131sub anyeq($$){ 132 my ($a,$b) = (shift, shift); 133 $a eq $b || $b eq $a || 134 !( $a ne $b ) || !( $b ne $a ) || 135 ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; 136} 137 138sub allgt($$){ 139 my ($a,$b) = (shift, shift); 140 ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; 141} 142#match the correct UTF-8 string 143print "not " unless alleq($byte, $U); 144print "ok 20\n"; 145 146#do not match a wrong UTF-8 string 147print "not " if anyeq($byte, $Ub); 148print "ok 21\n"; 149 150#string ordering 151print "not " unless allgt ( $g1, $byte ) && 152 allgt ( $g2, $byte ) && 153 allgt ( $byte, $l ) && 154 allgt ( $bytes, $U ); 155print "ok 22\n"; 156 157# upgrade, downgrade 158 159my ($u,$v,$v2); 160$u = $v = $v2 = pack("C*", 0xDF); 161utf8::upgrade($v); #explicit upgrade 162$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade 163 164# implicit upgrade === explicit upgrade 165print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; 166print "ok 23\n"; 167 168# utf8::upgrade is transparent and does not break equality 169print "not " unless alleq( $u, $v ); 170print "ok 24\n"; 171 172$u = $v = pack("C*", 0xDF); 173utf8::upgrade($v); 174#test for a roundtrip, we should get back from where we left 175eval {utf8::downgrade( $v )}; 176print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; 177print "ok 25\n"; 178 179# some more eq, cmp 180 181$byte=pack("C*", 0xDF); 182 183print "not " unless pack("U*", 0x3AF) eq $byte; 184print "ok 26\n"; 185 186print "not " if chr(0xDF) cmp $byte; 187print "ok 27\n"; 188 189print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && 190 ((pack("U*", 0x3AE) cmp $byte) == -1) && 191 ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && 192 ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); 193print "ok 28\n"; 194 195 196{ 197 # Used to core dump in 5.7.3 198 no warnings; # so test goes noiselessly 199 print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; 200} 201 202{ 203 my %h1; 204 my %h2; 205 $h1{"\xdf"} = 41; 206 $h2{"\x{3af}"} = 42; 207 print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; 208 print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; 209} 210 211# Order of finding the above-Latin1 code point should not matter: both should 212# assume Latin1/Unicode encoding 213{ 214 use bytes; 215 print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/; 216 print "ok 32\n"; 217 print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/; 218 print "ok 33\n"; 219} 220