1BEGIN { 2 require Config; import Config; 3 if ($Config{'extensions'} !~ /\bEncode\b/) { 4 print "1..0 # Skip: Encode was not built\n"; 5 exit 0; 6 } 7 unless (find PerlIO::Layer 'perlio') { 8 print "1..0 # Skip: PerlIO was not built\n"; 9 exit 0; 10 } 11 if (ord("A") == 193) { 12 print "1..0 # encoding pragma does not support EBCDIC platforms\n"; 13 exit(0); 14 } 15} 16 17print "1..31\n"; 18 19no warnings "deprecated"; 20use encoding "latin1"; # ignored (overwritten by the next line) 21use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) 22 23# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is 24# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), 25# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) 26 27$a = "\xDF"; 28$b = "\x{100}"; 29 30print "not " unless ord($a) == 0x3af; 31print "ok 1\n"; 32 33print "not " unless ord($b) == 0x100; 34print "ok 2\n"; 35 36my $c; 37 38$c = $a . $b; 39 40print "not " unless ord($c) == 0x3af; 41print "ok 3\n"; 42 43print "not " unless length($c) == 2; 44print "ok 4\n"; 45 46print "not " unless ord(substr($c, 1, 1)) == 0x100; 47print "ok 5\n"; 48 49print "not " unless ord(chr(0xdf)) == 0x3af; # spooky 50print "ok 6\n"; 51 52print "not " unless ord(pack("C", 0xdf)) == 0x3af; 53print "ok 7\n"; 54 55# we didn't break pack/unpack, I hope 56 57print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; 58print "ok 8\n"; 59 60# the first octet of UTF-8 encoded 0x3af 61print "not " unless unpack("U0 C", chr(0xdf)) == 0xce; 62print "ok 9\n"; 63 64print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; 65print "ok 10\n"; 66 67print "not " unless unpack("U", chr(0xdf)) == 0x3af; 68print "ok 11\n"; 69 70# charnames must still work 71use charnames ':full'; 72print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; 73print "ok 12\n"; 74 75# combine 76 77$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); 78 79print "not " unless ord($c) == 0x3af; 80print "ok 13\n"; 81 82print "not " unless ord(substr($c, 1, 1)) == 0xdf; 83print "ok 14\n"; 84 85print "not " unless ord(substr($c, 2, 1)) == 0x3af; 86print "ok 15\n"; 87 88# regex literals 89 90print "not " unless "\xDF" =~ /\x{3AF}/; 91print "ok 16\n"; 92 93print "not " unless "\x{3AF}" =~ /\xDF/; 94print "ok 17\n"; 95 96print "not " unless "\xDF" =~ /\xDF/; 97print "ok 18\n"; 98 99print "not " unless "\x{3AF}" =~ /\x{3AF}/; 100print "ok 19\n"; 101 102# eq, cmp 103 104my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 105 pack("C*", 0xDF ), # byte 106 pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 107 pack("U*", 0x3AF), # $U eq $byte 108 pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding 109 pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) 110 pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; 111 pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) 112); 113 114# all the tests in this section that compare a byte encoded string 115# ato UTF-8 encoded are run in all possible vairants 116# all of the eq, ne, cmp operations tested, 117# $v z $u tested as well as $u z $v 118 119sub alleq($$){ 120 my ($a,$b) = (shift, shift); 121 $a eq $b && $b eq $a && 122 !( $a ne $b ) && !( $b ne $a ) && 123 ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; 124} 125 126sub anyeq($$){ 127 my ($a,$b) = (shift, shift); 128 $a eq $b || $b eq $a || 129 !( $a ne $b ) || !( $b ne $a ) || 130 ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; 131} 132 133sub allgt($$){ 134 my ($a,$b) = (shift, shift); 135 ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; 136} 137#match the correct UTF-8 string 138print "not " unless alleq($byte, $U); 139print "ok 20\n"; 140 141#do not match a wrong UTF-8 string 142print "not " if anyeq($byte, $Ub); 143print "ok 21\n"; 144 145#string ordering 146print "not " unless allgt ( $g1, $byte ) && 147 allgt ( $g2, $byte ) && 148 allgt ( $byte, $l ) && 149 allgt ( $bytes, $U ); 150print "ok 22\n"; 151 152# upgrade, downgrade 153 154my ($u,$v,$v2); 155$u = $v = $v2 = pack("C*", 0xDF); 156utf8::upgrade($v); #explicit upgrade 157$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade 158 159# implicit upgrade === explicit upgrade 160print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; 161print "ok 23\n"; 162 163# utf8::upgrade is transparent and does not break equality 164print "not " unless alleq( $u, $v ); 165print "ok 24\n"; 166 167$u = $v = pack("C*", 0xDF); 168utf8::upgrade($v); 169#test for a roundtrip, we should get back from where we left 170eval {utf8::downgrade( $v )}; 171print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; 172print "ok 25\n"; 173 174# some more eq, cmp 175 176$byte=pack("C*", 0xDF); 177 178print "not " unless pack("U*", 0x3AF) eq $byte; 179print "ok 26\n"; 180 181print "not " if chr(0xDF) cmp $byte; 182print "ok 27\n"; 183 184print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && 185 ((pack("U*", 0x3AE) cmp $byte) == -1) && 186 ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && 187 ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); 188print "ok 28\n"; 189 190 191{ 192 # Used to core dump in 5.7.3 193 no warnings; # so test goes noiselessly 194 print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; 195} 196 197{ 198 my %h1; 199 my %h2; 200 $h1{"\xdf"} = 41; 201 $h2{"\x{3af}"} = 42; 202 print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; 203 print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; 204} 205