1*0Sstevel@tonic-gate#!./perl -w 2*0Sstevel@tonic-gate 3*0Sstevel@tonic-gateBEGIN { 4*0Sstevel@tonic-gate chdir 't' if -d 't'; 5*0Sstevel@tonic-gate @INC = '../lib'; 6*0Sstevel@tonic-gate require './test.pl'; 7*0Sstevel@tonic-gate 8*0Sstevel@tonic-gate plan(tests => 97); 9*0Sstevel@tonic-gate} 10*0Sstevel@tonic-gate 11*0Sstevel@tonic-gateuse strict; 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate# Two hashes one will all keys 8-bit possible (initially), other 14*0Sstevel@tonic-gate# with a utf8 requiring key from the outset. 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gatemy %hash8 = ( "\xff" => 0xff, 17*0Sstevel@tonic-gate "\x7f" => 0x7f, 18*0Sstevel@tonic-gate ); 19*0Sstevel@tonic-gatemy %hashu = ( "\xff" => 0xff, 20*0Sstevel@tonic-gate "\x7f" => 0x7f, 21*0Sstevel@tonic-gate "\x{1ff}" => 0x1ff, 22*0Sstevel@tonic-gate ); 23*0Sstevel@tonic-gate 24*0Sstevel@tonic-gate# Check that we can find the 8-bit things by various litterals 25*0Sstevel@tonic-gateis($hash8{"\x{00ff}"},0xFF); 26*0Sstevel@tonic-gateis($hash8{"\x{007f}"},0x7F); 27*0Sstevel@tonic-gateis($hash8{"\xff"},0xFF); 28*0Sstevel@tonic-gateis($hash8{"\x7f"},0x7F); 29*0Sstevel@tonic-gateis($hashu{"\x{00ff}"},0xFF); 30*0Sstevel@tonic-gateis($hashu{"\x{007f}"},0x7F); 31*0Sstevel@tonic-gateis($hashu{"\xff"},0xFF); 32*0Sstevel@tonic-gateis($hashu{"\x7f"},0x7F); 33*0Sstevel@tonic-gate 34*0Sstevel@tonic-gate# Now try same thing with variables forced into various forms. 35*0Sstevel@tonic-gateforeach ("\x7f","\xff") 36*0Sstevel@tonic-gate { 37*0Sstevel@tonic-gate my $a = $_; # Force a copy 38*0Sstevel@tonic-gate utf8::upgrade($a); 39*0Sstevel@tonic-gate is($hash8{$a},ord($a)); 40*0Sstevel@tonic-gate is($hashu{$a},ord($a)); 41*0Sstevel@tonic-gate utf8::downgrade($a); 42*0Sstevel@tonic-gate is($hash8{$a},ord($a)); 43*0Sstevel@tonic-gate is($hashu{$a},ord($a)); 44*0Sstevel@tonic-gate my $b = $a.chr(100); 45*0Sstevel@tonic-gate chop($b); 46*0Sstevel@tonic-gate is($hash8{$b},ord($b)); 47*0Sstevel@tonic-gate is($hashu{$b},ord($b)); 48*0Sstevel@tonic-gate } 49*0Sstevel@tonic-gate 50*0Sstevel@tonic-gate# Check we have not got an spurious extra keys 51*0Sstevel@tonic-gateis(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff"); 52*0Sstevel@tonic-gateis(join('',sort { ord $a <=> ord $b } keys %hashu),"\x7f\xff\x{1ff}"); 53*0Sstevel@tonic-gate 54*0Sstevel@tonic-gate# Now add a utf8 key to the 8-bit hash 55*0Sstevel@tonic-gate$hash8{chr(0x1ff)} = 0x1ff; 56*0Sstevel@tonic-gate 57*0Sstevel@tonic-gate# Check we have not got an spurious extra keys 58*0Sstevel@tonic-gateis(join('',sort { ord $a <=> ord $b } keys %hash8),"\x7f\xff\x{1ff}"); 59*0Sstevel@tonic-gate 60*0Sstevel@tonic-gateforeach ("\x7f","\xff","\x{1ff}") 61*0Sstevel@tonic-gate { 62*0Sstevel@tonic-gate my $a = $_; 63*0Sstevel@tonic-gate utf8::upgrade($a); 64*0Sstevel@tonic-gate is($hash8{$a},ord($a)); 65*0Sstevel@tonic-gate my $b = $a.chr(100); 66*0Sstevel@tonic-gate chop($b); 67*0Sstevel@tonic-gate is($hash8{$b},ord($b)); 68*0Sstevel@tonic-gate } 69*0Sstevel@tonic-gate 70*0Sstevel@tonic-gate# and remove utf8 from the other hash 71*0Sstevel@tonic-gateis(delete $hashu{chr(0x1ff)},0x1ff); 72*0Sstevel@tonic-gateis(join('',sort keys %hashu),"\x7f\xff"); 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gateforeach ("\x7f","\xff") 75*0Sstevel@tonic-gate { 76*0Sstevel@tonic-gate my $a = $_; 77*0Sstevel@tonic-gate utf8::upgrade($a); 78*0Sstevel@tonic-gate is($hashu{$a},ord($a)); 79*0Sstevel@tonic-gate utf8::downgrade($a); 80*0Sstevel@tonic-gate is($hashu{$a},ord($a)); 81*0Sstevel@tonic-gate my $b = $a.chr(100); 82*0Sstevel@tonic-gate chop($b); 83*0Sstevel@tonic-gate is($hashu{$b},ord($b)); 84*0Sstevel@tonic-gate } 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate 88*0Sstevel@tonic-gate{ 89*0Sstevel@tonic-gate print "# Unicode hash keys and \\w\n"; 90*0Sstevel@tonic-gate # This is not really a regex test but regexes bring 91*0Sstevel@tonic-gate # out the issue nicely. 92*0Sstevel@tonic-gate use strict; 93*0Sstevel@tonic-gate my $u3 = "f\x{df}\x{100}"; 94*0Sstevel@tonic-gate my $u2 = substr($u3,0,2); 95*0Sstevel@tonic-gate my $u1 = substr($u2,0,1); 96*0Sstevel@tonic-gate my $u0 = chr (0xdf)x4; # Make this 4 chars so that all lengths are distinct. 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate my @u = ($u0, $u1, $u2, $u3); 99*0Sstevel@tonic-gate 100*0Sstevel@tonic-gate while (@u) { 101*0Sstevel@tonic-gate my %u = (map {( $_, $_)} @u); 102*0Sstevel@tonic-gate my $keys = scalar @u; 103*0Sstevel@tonic-gate $keys .= ($keys == 1) ? " key" : " keys"; 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate for (keys %u) { 106*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 107*0Sstevel@tonic-gate my $r = 0 + $u{$_} =~ /^\w+$/; 108*0Sstevel@tonic-gate is ($l, $r, "\\w on keys with $keys, key of length " . length $_); 109*0Sstevel@tonic-gate } 110*0Sstevel@tonic-gate 111*0Sstevel@tonic-gate my $more; 112*0Sstevel@tonic-gate do { 113*0Sstevel@tonic-gate $more = 0; 114*0Sstevel@tonic-gate # Want to do this direct, rather than copying to a temporary variable 115*0Sstevel@tonic-gate # The first time each will return key and value at the start of the hash. 116*0Sstevel@tonic-gate # each will return () after we've done the last pair. $more won't get 117*0Sstevel@tonic-gate # set then, and the do will exit. 118*0Sstevel@tonic-gate for (each %u) { 119*0Sstevel@tonic-gate $more = 1; 120*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 121*0Sstevel@tonic-gate my $r = 0 + $u{$_} =~ /^\w+$/; 122*0Sstevel@tonic-gate is ($l, $r, "\\w on each, with $keys, key of length " . length $_); 123*0Sstevel@tonic-gate } 124*0Sstevel@tonic-gate } while ($more); 125*0Sstevel@tonic-gate 126*0Sstevel@tonic-gate for (%u) { 127*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 128*0Sstevel@tonic-gate my $r = 0 + $u{$_} =~ /^\w+$/; 129*0Sstevel@tonic-gate is ($l, $r, "\\w on hash with $keys, key of length " . length $_); 130*0Sstevel@tonic-gate } 131*0Sstevel@tonic-gate pop @u; 132*0Sstevel@tonic-gate undef %u; 133*0Sstevel@tonic-gate } 134*0Sstevel@tonic-gate} 135*0Sstevel@tonic-gate 136*0Sstevel@tonic-gate{ 137*0Sstevel@tonic-gate my $utf8_sz = my $bytes_sz = "\x{df}"; 138*0Sstevel@tonic-gate $utf8_sz .= chr 256; 139*0Sstevel@tonic-gate chop ($utf8_sz); 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate my (%bytes_first, %utf8_first); 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate $bytes_first{$bytes_sz} = $bytes_sz; 144*0Sstevel@tonic-gate 145*0Sstevel@tonic-gate for (keys %bytes_first) { 146*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 147*0Sstevel@tonic-gate my $r = 0 + $bytes_first{$_} =~ /^\w+$/; 148*0Sstevel@tonic-gate is ($l, $r, "\\w on each, bytes"); 149*0Sstevel@tonic-gate } 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate $bytes_first{$utf8_sz} = $utf8_sz; 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gate for (keys %bytes_first) { 154*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 155*0Sstevel@tonic-gate my $r = 0 + $bytes_first{$_} =~ /^\w+$/; 156*0Sstevel@tonic-gate is ($l, $r, "\\w on each, bytes now utf8"); 157*0Sstevel@tonic-gate } 158*0Sstevel@tonic-gate 159*0Sstevel@tonic-gate $utf8_first{$utf8_sz} = $utf8_sz; 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate for (keys %utf8_first) { 162*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 163*0Sstevel@tonic-gate my $r = 0 + $utf8_first{$_} =~ /^\w+$/; 164*0Sstevel@tonic-gate is ($l, $r, "\\w on each, utf8"); 165*0Sstevel@tonic-gate } 166*0Sstevel@tonic-gate 167*0Sstevel@tonic-gate $utf8_first{$bytes_sz} = $bytes_sz; 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate for (keys %utf8_first) { 170*0Sstevel@tonic-gate my $l = 0 + /^\w+$/; 171*0Sstevel@tonic-gate my $r = 0 + $utf8_first{$_} =~ /^\w+$/; 172*0Sstevel@tonic-gate is ($l, $r, "\\w on each, utf8 now bytes"); 173*0Sstevel@tonic-gate } 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate} 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gate{ 178*0Sstevel@tonic-gate # See if utf8 barewords work [perl #22969] 179*0Sstevel@tonic-gate use utf8; 180*0Sstevel@tonic-gate my %hash = (тест => 123); 181*0Sstevel@tonic-gate is($hash{тест}, $hash{'тест'}); 182*0Sstevel@tonic-gate is($hash{тест}, 123); 183*0Sstevel@tonic-gate is($hash{'тест'}, 123); 184*0Sstevel@tonic-gate %hash = (тест => 123); 185*0Sstevel@tonic-gate is($hash{тест}, $hash{'тест'}); 186*0Sstevel@tonic-gate is($hash{тест}, 123); 187*0Sstevel@tonic-gate is($hash{'тест'}, 123); 188*0Sstevel@tonic-gate} 189