1*0Sstevel@tonic-gateBEGIN { 2*0Sstevel@tonic-gate require Config; import Config; 3*0Sstevel@tonic-gate if ($Config{'extensions'} !~ /\bEncode\b/) { 4*0Sstevel@tonic-gate print "1..0 # Skip: Encode was not built\n"; 5*0Sstevel@tonic-gate exit 0; 6*0Sstevel@tonic-gate } 7*0Sstevel@tonic-gate unless (find PerlIO::Layer 'perlio') { 8*0Sstevel@tonic-gate print "1..0 # Skip: PerlIO was not built\n"; 9*0Sstevel@tonic-gate exit 0; 10*0Sstevel@tonic-gate } 11*0Sstevel@tonic-gate if (ord("A") == 193) { 12*0Sstevel@tonic-gate print "1..0 # encoding pragma does not support EBCDIC platforms\n"; 13*0Sstevel@tonic-gate exit(0); 14*0Sstevel@tonic-gate } 15*0Sstevel@tonic-gate} 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gateprint "1..31\n"; 18*0Sstevel@tonic-gate 19*0Sstevel@tonic-gateuse encoding "latin1"; # ignored (overwritten by the next line) 20*0Sstevel@tonic-gateuse encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) 21*0Sstevel@tonic-gate 22*0Sstevel@tonic-gate# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is 23*0Sstevel@tonic-gate# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), 24*0Sstevel@tonic-gate# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) 25*0Sstevel@tonic-gate 26*0Sstevel@tonic-gate$a = "\xDF"; 27*0Sstevel@tonic-gate$b = "\x{100}"; 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gateprint "not " unless ord($a) == 0x3af; 30*0Sstevel@tonic-gateprint "ok 1\n"; 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gateprint "not " unless ord($b) == 0x100; 33*0Sstevel@tonic-gateprint "ok 2\n"; 34*0Sstevel@tonic-gate 35*0Sstevel@tonic-gatemy $c; 36*0Sstevel@tonic-gate 37*0Sstevel@tonic-gate$c = $a . $b; 38*0Sstevel@tonic-gate 39*0Sstevel@tonic-gateprint "not " unless ord($c) == 0x3af; 40*0Sstevel@tonic-gateprint "ok 3\n"; 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gateprint "not " unless length($c) == 2; 43*0Sstevel@tonic-gateprint "ok 4\n"; 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gateprint "not " unless ord(substr($c, 1, 1)) == 0x100; 46*0Sstevel@tonic-gateprint "ok 5\n"; 47*0Sstevel@tonic-gate 48*0Sstevel@tonic-gateprint "not " unless ord(chr(0xdf)) == 0x3af; # spooky 49*0Sstevel@tonic-gateprint "ok 6\n"; 50*0Sstevel@tonic-gate 51*0Sstevel@tonic-gateprint "not " unless ord(pack("C", 0xdf)) == 0x3af; 52*0Sstevel@tonic-gateprint "ok 7\n"; 53*0Sstevel@tonic-gate 54*0Sstevel@tonic-gate# we didn't break pack/unpack, I hope 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gateprint "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; 57*0Sstevel@tonic-gateprint "ok 8\n"; 58*0Sstevel@tonic-gate 59*0Sstevel@tonic-gate# the first octet of UTF-8 encoded 0x3af 60*0Sstevel@tonic-gateprint "not " unless unpack("C", chr(0xdf)) == 0xce; 61*0Sstevel@tonic-gateprint "ok 9\n"; 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gateprint "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; 64*0Sstevel@tonic-gateprint "ok 10\n"; 65*0Sstevel@tonic-gate 66*0Sstevel@tonic-gateprint "not " unless unpack("U", chr(0xdf)) == 0x3af; 67*0Sstevel@tonic-gateprint "ok 11\n"; 68*0Sstevel@tonic-gate 69*0Sstevel@tonic-gate# charnames must still work 70*0Sstevel@tonic-gateuse charnames ':full'; 71*0Sstevel@tonic-gateprint "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; 72*0Sstevel@tonic-gateprint "ok 12\n"; 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate# combine 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); 77*0Sstevel@tonic-gate 78*0Sstevel@tonic-gateprint "not " unless ord($c) == 0x3af; 79*0Sstevel@tonic-gateprint "ok 13\n"; 80*0Sstevel@tonic-gate 81*0Sstevel@tonic-gateprint "not " unless ord(substr($c, 1, 1)) == 0xdf; 82*0Sstevel@tonic-gateprint "ok 14\n"; 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gateprint "not " unless ord(substr($c, 2, 1)) == 0x3af; 85*0Sstevel@tonic-gateprint "ok 15\n"; 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate# regex literals 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gateprint "not " unless "\xDF" =~ /\x{3AF}/; 90*0Sstevel@tonic-gateprint "ok 16\n"; 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gateprint "not " unless "\x{3AF}" =~ /\xDF/; 93*0Sstevel@tonic-gateprint "ok 17\n"; 94*0Sstevel@tonic-gate 95*0Sstevel@tonic-gateprint "not " unless "\xDF" =~ /\xDF/; 96*0Sstevel@tonic-gateprint "ok 18\n"; 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gateprint "not " unless "\x{3AF}" =~ /\x{3AF}/; 99*0Sstevel@tonic-gateprint "ok 19\n"; 100*0Sstevel@tonic-gate 101*0Sstevel@tonic-gate# eq, cmp 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gatemy ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 104*0Sstevel@tonic-gate pack("C*", 0xDF ), # byte 105*0Sstevel@tonic-gate pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 106*0Sstevel@tonic-gate pack("U*", 0x3AF), # $U eq $byte 107*0Sstevel@tonic-gate pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding 108*0Sstevel@tonic-gate pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) 109*0Sstevel@tonic-gate pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; 110*0Sstevel@tonic-gate pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) 111*0Sstevel@tonic-gate); 112*0Sstevel@tonic-gate 113*0Sstevel@tonic-gate# all the tests in this section that compare a byte encoded string 114*0Sstevel@tonic-gate# ato UTF-8 encoded are run in all possible vairants 115*0Sstevel@tonic-gate# all of the eq, ne, cmp operations tested, 116*0Sstevel@tonic-gate# $v z $u tested as well as $u z $v 117*0Sstevel@tonic-gate 118*0Sstevel@tonic-gatesub alleq($$){ 119*0Sstevel@tonic-gate my ($a,$b) = (shift, shift); 120*0Sstevel@tonic-gate $a eq $b && $b eq $a && 121*0Sstevel@tonic-gate !( $a ne $b ) && !( $b ne $a ) && 122*0Sstevel@tonic-gate ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; 123*0Sstevel@tonic-gate} 124*0Sstevel@tonic-gate 125*0Sstevel@tonic-gatesub anyeq($$){ 126*0Sstevel@tonic-gate my ($a,$b) = (shift, shift); 127*0Sstevel@tonic-gate $a eq $b || $b eq $a || 128*0Sstevel@tonic-gate !( $a ne $b ) || !( $b ne $a ) || 129*0Sstevel@tonic-gate ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; 130*0Sstevel@tonic-gate} 131*0Sstevel@tonic-gate 132*0Sstevel@tonic-gatesub allgt($$){ 133*0Sstevel@tonic-gate my ($a,$b) = (shift, shift); 134*0Sstevel@tonic-gate ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; 135*0Sstevel@tonic-gate} 136*0Sstevel@tonic-gate#match the correct UTF-8 string 137*0Sstevel@tonic-gateprint "not " unless alleq($byte, $U); 138*0Sstevel@tonic-gateprint "ok 20\n"; 139*0Sstevel@tonic-gate 140*0Sstevel@tonic-gate#do not match a wrong UTF-8 string 141*0Sstevel@tonic-gateprint "not " if anyeq($byte, $Ub); 142*0Sstevel@tonic-gateprint "ok 21\n"; 143*0Sstevel@tonic-gate 144*0Sstevel@tonic-gate#string ordering 145*0Sstevel@tonic-gateprint "not " unless allgt ( $g1, $byte ) && 146*0Sstevel@tonic-gate allgt ( $g2, $byte ) && 147*0Sstevel@tonic-gate allgt ( $byte, $l ) && 148*0Sstevel@tonic-gate allgt ( $bytes, $U ); 149*0Sstevel@tonic-gateprint "ok 22\n"; 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate# upgrade, downgrade 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gatemy ($u,$v,$v2); 154*0Sstevel@tonic-gate$u = $v = $v2 = pack("C*", 0xDF); 155*0Sstevel@tonic-gateutf8::upgrade($v); #explicit upgrade 156*0Sstevel@tonic-gate$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gate# implicit upgrade === explicit upgrade 159*0Sstevel@tonic-gateprint "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; 160*0Sstevel@tonic-gateprint "ok 23\n"; 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate# utf8::upgrade is transparent and does not break equality 163*0Sstevel@tonic-gateprint "not " unless alleq( $u, $v ); 164*0Sstevel@tonic-gateprint "ok 24\n"; 165*0Sstevel@tonic-gate 166*0Sstevel@tonic-gate$u = $v = pack("C*", 0xDF); 167*0Sstevel@tonic-gateutf8::upgrade($v); 168*0Sstevel@tonic-gate#test for a roundtrip, we should get back from where we left 169*0Sstevel@tonic-gateeval {utf8::downgrade( $v )}; 170*0Sstevel@tonic-gateprint "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; 171*0Sstevel@tonic-gateprint "ok 25\n"; 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate# some more eq, cmp 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate$byte=pack("C*", 0xDF); 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gateprint "not " unless pack("U*", 0x3AF) eq $byte; 178*0Sstevel@tonic-gateprint "ok 26\n"; 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gateprint "not " if chr(0xDF) cmp $byte; 181*0Sstevel@tonic-gateprint "ok 27\n"; 182*0Sstevel@tonic-gate 183*0Sstevel@tonic-gateprint "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && 184*0Sstevel@tonic-gate ((pack("U*", 0x3AE) cmp $byte) == -1) && 185*0Sstevel@tonic-gate ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && 186*0Sstevel@tonic-gate ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); 187*0Sstevel@tonic-gateprint "ok 28\n"; 188*0Sstevel@tonic-gate 189*0Sstevel@tonic-gate 190*0Sstevel@tonic-gate{ 191*0Sstevel@tonic-gate # Used to core dump in 5.7.3 192*0Sstevel@tonic-gate no warnings; # so test goes noiselessly 193*0Sstevel@tonic-gate print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; 194*0Sstevel@tonic-gate} 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gate{ 197*0Sstevel@tonic-gate my %h1; 198*0Sstevel@tonic-gate my %h2; 199*0Sstevel@tonic-gate $h1{"\xdf"} = 41; 200*0Sstevel@tonic-gate $h2{"\x{3af}"} = 42; 201*0Sstevel@tonic-gate print $h1{"\x{3af}"} == 41 ? "ok 30\n" : "not ok 30\n"; 202*0Sstevel@tonic-gate print $h2{"\xdf"} == 42 ? "ok 31\n" : "not ok 31\n"; 203*0Sstevel@tonic-gate} 204