1BEGIN { 2 require Config; import Config; 3 if ($Config{'extensions'} !~ /\bEncode\b/) { 4 print "1..0 # Skip: Encode was not built\n"; 5 exit 0; 6 } 7 unless (find PerlIO::Layer 'perlio') { 8 print "1..0 # Skip: PerlIO was not built\n"; 9 exit 0; 10 } 11 if (ord("A") == 193) { 12 print "1..0 # encoding pragma does not support EBCDIC platforms\n"; 13 exit(0); 14 } 15} 16 17print "1..33\n"; 18 19 20no warnings "deprecated"; 21use encoding "latin1"; # ignored (overwritten by the next line) 22use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) 23 24# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is 25# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), 26# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) 27 28$a = "\xDF"; 29$b = "\x{100}"; 30 31print "not " unless ord($a) == 0x3af; 32print "ok 1\n"; 33 34print "not " unless ord($b) == 0x100; 35print "ok 2\n"; 36 37my $c; 38 39$c = $a . $b; 40 41print "not " unless ord($c) == 0x3af; 42print "ok 3\n"; 43 44print "not " unless length($c) == 2; 45print "ok 4\n"; 46 47print "not " unless ord(substr($c, 1, 1)) == 0x100; 48print "ok 5\n"; 49 50print "not " unless ord(chr(0xdf)) == 0x3af; # spooky 51print "ok 6\n"; 52 53print "not " unless ord(pack("C", 0xdf)) == 0x3af; 54print "ok 7\n"; 55 56# we didn't break pack/unpack, I hope 57 58print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; 59print "ok 8\n"; 60 61# the first octet of UTF-8 encoded 0x3af 62print "not " unless unpack("U0 C", chr(0xdf)) == 0xce; 63print "ok 9\n"; 64 65print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; 66print "ok 10\n"; 67 68print "not " unless unpack("U", chr(0xdf)) == 0x3af; 69print "ok 11\n"; 70 71# charnames must still work 72use charnames ':full'; 73print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; 74print "ok 12\n"; 75 76# combine 77 78$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); 79 80print "not " unless ord($c) == 0x3af; 81print "ok 13\n"; 82 83print "not " unless ord(substr($c, 1, 1)) == 0xdf; 84print "ok 14\n"; 85 86print "not " unless ord(substr($c, 2, 1)) == 0x3af; 87print "ok 15\n"; 88 89# regex literals 90 91print "not " unless "\xDF" =~ /\x{3AF}/; 92print "ok 16\n"; 93 94print "not " unless "\x{3AF}" =~ /\xDF/; 95print "ok 17\n"; 96 97print "not " unless "\xDF" =~ /\xDF/; 98print "ok 18\n"; 99 100print "not " unless "\x{3AF}" =~ /\x{3AF}/; 101print "ok 19\n"; 102 103# eq, cmp 104 105my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 106 pack("C*", 0xDF ), # byte 107 pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 108 pack("U*", 0x3AF), # $U eq $byte 109 pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding 110 pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) 111 pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; 112 pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) 113); 114 115# all the tests in this section that compare a byte encoded string 116# ato UTF-8 encoded are run in all possible vairants 117# all of the eq, ne, cmp operations tested, 118# $v z $u tested as well as $u z $v 119 120sub alleq($$){ 121 my ($a,$b) = (shift, shift); 122 $a eq $b && $b eq $a && 123 !( $a ne $b ) && !( $b ne $a ) && 124 ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; 125} 126 127sub anyeq($$){ 128 my ($a,$b) = (shift, shift); 129 $a eq $b || $b eq $a || 130 !( $a ne $b ) || !( $b ne $a ) || 131 ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; 132} 133 134sub allgt($$){ 135 my ($a,$b) = (shift, shift); 136 ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; 137} 138#match the correct UTF-8 string 139print "not " unless alleq($byte, $U); 140print "ok 20\n"; 141 142#do not match a wrong UTF-8 string 143print "not " if anyeq($byte, $Ub); 144print "ok 21\n"; 145 146#string ordering 147print "not " unless allgt ( $g1, $byte ) && 148 allgt ( $g2, $byte ) && 149 allgt ( $byte, $l ) && 150 allgt ( $bytes, $U ); 151print "ok 22\n"; 152 153# upgrade, downgrade 154 155my ($u,$v,$v2); 156$u = $v = $v2 = pack("C*", 0xDF); 157utf8::upgrade($v); #explicit upgrade 158$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade 159 160# implicit upgrade === explicit upgrade 161print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; 162print "ok 23\n"; 163 164# utf8::upgrade is transparent and does not break equality 165print "not " unless alleq( $u, $v ); 166print "ok 24\n"; 167 168$u = $v = pack("C*", 0xDF); 169utf8::upgrade($v); 170#test for a roundtrip, we should get back from where we left 171eval {utf8::downgrade( $v )}; 172print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; 173print "ok 25\n"; 174 175# some more eq, cmp 176 177$byte=pack("C*", 0xDF); 178 179print "not " unless pack("U*", 0x3AF) eq $byte; 180print "ok 26\n"; 181 182print "not " if chr(0xDF) cmp $byte; 183print "ok 27\n"; 184 185print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && 186 ((pack("U*", 0x3AE) cmp $byte) == -1) && 187 ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && 188 ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); 189print "ok 28\n"; 190 191 192{ 193 # Used to core dump in 5.7.3 194 no warnings; # so test goes noiselessly 195 print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; 196} 197 198{ 199 my %h1; 200 my %h2; 201 $h1{"\xdf"} = 41; 202 $h2{"\x{3af}"} = 42; 203 print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; 204 print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; 205} 206 207# Order of finding the above-Latin1 code point should not matter: both should 208# assume Latin1/Unicode encoding 209{ 210 use bytes; 211 print "not " if "\xDF\x{100}" =~ /\x{3af}\x{100}/; 212 print "ok 32\n"; 213 print "not " if "\x{100}\xDF" =~ /\x{100}\x{3af}/; 214 print "ok 33\n"; 215} 216