1898184e3Ssthen#!./perl -w 2b39c5158Smillert# Test for malfunctions of utf8 cache 3b39c5158Smillert 4b39c5158SmillertBEGIN { 5b39c5158Smillert chdir 't' if -d 't'; 6898184e3Ssthen require './test.pl'; 7*9f11ffb7Safresh1 set_up_inc('../lib'); 8b39c5158Smillert} 9b39c5158Smillert 10898184e3Ssthenuse strict; 11b8851fccSafresh1use Config (); 12898184e3Ssthen 13b8851fccSafresh1plan(tests => 16); 1491f110e0Safresh1 1591f110e0Safresh1SKIP: { 16b8851fccSafresh1skip_without_dynamic_extension("Devel::Peek", 2); 17b39c5158Smillert 18b8851fccSafresh1my $out = runperl(stderr => 1, 19b8851fccSafresh1 progs => [ split /\n/, <<'EOS' ]); 20b8851fccSafresh1 require Devel::Peek; 21b8851fccSafresh1 $a = qq(hello \x{1234}); 22b39c5158Smillert for (1..2) { 23b39c5158Smillert bar(substr($a, $_, 1)); 24b39c5158Smillert } 25b39c5158Smillert sub bar { 26b8851fccSafresh1 $_[0] = qq(\x{4321}); 27b39c5158Smillert Devel::Peek::Dump($_[0]); 28b39c5158Smillert } 29b8851fccSafresh1EOS 30b39c5158Smillert 31b8851fccSafresh1$out =~ s/^ALLOCATED at .*\n//m 32b8851fccSafresh1 if $Config::Config{ccflags} =~ /-DDEBUG_LEAKING_SCALARS/; 33b8851fccSafresh1like($out, qr/\ASV =/, "check we got dump output"); # [perl #121337] 34b39c5158Smillert 35b39c5158Smillertmy $utf8magic = qr{ ^ \s+ MAGIC \s = .* \n 36b39c5158Smillert \s+ MG_VIRTUAL \s = .* \n 37b39c5158Smillert \s+ MG_TYPE \s = \s PERL_MAGIC_utf8 .* \n 38b39c5158Smillert \s+ MG_LEN \s = .* \n }xm; 39b39c5158Smillert 40b8851fccSafresh1unlike($out, qr{ $utf8magic $utf8magic }x, 41b8851fccSafresh1 "no duplicate utf8 magic"); 4291f110e0Safresh1 4391f110e0Safresh1} # SKIP 4491f110e0Safresh1 4591f110e0Safresh1# With bad caching, this code used to go quadratic and take 10s of minutes. 4691f110e0Safresh1# The 'test' in this case is simply that it doesn't hang. 4791f110e0Safresh1 4891f110e0Safresh1{ 4991f110e0Safresh1 local ${^UTF8CACHE} = 1; # enable cache, disable debugging 5091f110e0Safresh1 my $x = "\x{100}" x 1000000; 5191f110e0Safresh1 while ($x =~ /./g) { 5291f110e0Safresh1 my $p = pos($x); 5391f110e0Safresh1 } 5491f110e0Safresh1 pass("quadratic pos"); 5591f110e0Safresh1} 5691f110e0Safresh1 5791f110e0Safresh1# Get-magic can reallocate the PV. Check that the cache is reset in 5891f110e0Safresh1# such cases. 5991f110e0Safresh1 6091f110e0Safresh1# Regexp vars 6191f110e0Safresh1"\x{100}" =~ /(.+)/; 6291f110e0Safresh1() = substr $1, 0, 1; 6391f110e0Safresh1"a\x{100}" =~ /(.+)/; 6491f110e0Safresh1is ord substr($1, 1, 1), 0x100, 'get-magic resets utf8cache on match vars'; 6591f110e0Safresh1 6691f110e0Safresh1# Substr lvalues 6791f110e0Safresh1my $x = "a\x{100}"; 6891f110e0Safresh1my $l = \substr $x, 0; 6991f110e0Safresh1() = substr $$l, 1, 1; 7091f110e0Safresh1substr $x, 0, 1, = "\x{100}"; 7191f110e0Safresh1is ord substr($$l, 1, 1), 0x100, 'get-magic resets utf8cache on LVALUEs'; 7291f110e0Safresh1 7391f110e0Safresh1# defelem magic 7491f110e0Safresh1my %h; 7591f110e0Safresh1sub { 7691f110e0Safresh1 $_[0] = "a\x{100}"; 7791f110e0Safresh1 () = ord substr $_[0], 1, 1; 7891f110e0Safresh1 $h{k} = "\x{100}"x2; 7991f110e0Safresh1 is ord substr($_[0], 1, 1), 0x100, 8091f110e0Safresh1 'get-magic resets uf8cache on defelems'; 8191f110e0Safresh1}->($h{k}); 8291f110e0Safresh1 8391f110e0Safresh1 8491f110e0Safresh1# Overloading can also reallocate the PV. 8591f110e0Safresh1 8691f110e0Safresh1package UTF8Toggle { 8791f110e0Safresh1 use overload '""' => 'stringify', fallback => 1; 8891f110e0Safresh1 8991f110e0Safresh1 sub new { 9091f110e0Safresh1 my $class = shift; 9191f110e0Safresh1 my $value = shift; 9291f110e0Safresh1 my $state = shift||0; 9391f110e0Safresh1 return bless [$value, $state], $class; 9491f110e0Safresh1 } 9591f110e0Safresh1 9691f110e0Safresh1 sub stringify { 9791f110e0Safresh1 my $self = shift; 9891f110e0Safresh1 $self->[1] = ! $self->[1]; 9991f110e0Safresh1 if ($self->[1]) { 10091f110e0Safresh1 utf8::downgrade($self->[0]); 10191f110e0Safresh1 } else { 10291f110e0Safresh1 utf8::upgrade($self->[0]); 10391f110e0Safresh1 } 10491f110e0Safresh1 $self->[0]; 10591f110e0Safresh1 } 10691f110e0Safresh1} 10791f110e0Safresh1my $u = UTF8Toggle->new(" \x{c2}7 "); 10891f110e0Safresh1 10991f110e0Safresh1pos $u = 2; 11091f110e0Safresh1is pos $u, 2, 'pos on overloaded utf8 toggler'; 11191f110e0Safresh1() = "$u"; # flip flag 11291f110e0Safresh1pos $u = 2; 11391f110e0Safresh1is pos $u, 2, 'pos on overloaded utf8 toggler (again)'; 11491f110e0Safresh1 11591f110e0Safresh1() = ord ${\substr $u, 1}; 11691f110e0Safresh1is ord ${\substr($u, 1)}, 0xc2, 11791f110e0Safresh1 'utf8 cache + overloading does not confuse substr lvalues'; 11891f110e0Safresh1() = "$u"; # flip flag 11991f110e0Safresh1() = ord substr $u, 1; 12091f110e0Safresh1is ord substr($u, 1), 0xc2, 12191f110e0Safresh1 'utf8 cache + overloading does not confuse substr lvalues (again)'; 12291f110e0Safresh1 12391f110e0Safresh1$u = UTF8Toggle->new(" \x{c2}7 "); 12491f110e0Safresh1() = ord ${\substr $u, 2}; 12591f110e0Safresh1{ no warnings; ${\substr($u, 2, 1)} = 0; } 12691f110e0Safresh1is $u, " \x{c2}0 ", 12791f110e0Safresh1 'utf8 cache + overloading does not confuse substr lvalue assignment'; 12891f110e0Safresh1$u = UTF8Toggle->new(" \x{c2}7 "); 12991f110e0Safresh1() = "$u"; # flip flag 13091f110e0Safresh1() = ord ${\substr $u, 2}; 13191f110e0Safresh1{ no warnings; ${\substr($u, 2, 1)} = 0; } 13291f110e0Safresh1is $u, " \x{c2}0 ", 13391f110e0Safresh1 'utf8 cache + overload does not confuse substr lv assignment (again)'; 13491f110e0Safresh1 13591f110e0Safresh1 13691f110e0Safresh1# Typeglobs and references should not get a cache 13791f110e0Safresh1use utf8; 13891f110e0Safresh1 13991f110e0Safresh1#substr 14091f110e0Safresh1my $globref = \*αabcdefg_::_; 14191f110e0Safresh1() = substr($$globref, 2, 3); 14291f110e0Safresh1*_abcdefgα:: = \%αabcdefg_::; 14391f110e0Safresh1undef %αabcdefg_::; 14491f110e0Safresh1{ no strict; () = *{"_abcdefgα::_"} } 14591f110e0Safresh1is substr($$globref, 2, 3), "abc", 'no utf8 pos cache on globs'; 14691f110e0Safresh1 14791f110e0Safresh1my $ref = bless [], "αabcd_"; 14891f110e0Safresh1() = substr($ref, 1, 3); 14991f110e0Safresh1bless $ref, "_abcdα"; 15091f110e0Safresh1is substr($ref, 1, 3), "abc", 'no utf8 pos cache on references'; 15191f110e0Safresh1 15291f110e0Safresh1#length 15391f110e0Safresh1$globref = \*αabcdefg_::_; 15491f110e0Safresh1() = "$$globref"; # turn utf8 flag on 15591f110e0Safresh1() = length($$globref); 15691f110e0Safresh1*_abcdefgα:: = \%αabcdefg_::; 15791f110e0Safresh1undef %αabcdefg_::; 15891f110e0Safresh1{ no strict; () = *{"_abcdefgα::_"} } 15991f110e0Safresh1is length($$globref), length("$$globref"), 'no utf8 length cache on globs'; 16091f110e0Safresh1 16191f110e0Safresh1$ref = bless [], "αabcd_"; 16291f110e0Safresh1() = "$ref"; # turn utf8 flag on 16391f110e0Safresh1() = length $ref; 16491f110e0Safresh1bless $ref, "α"; 16591f110e0Safresh1is length $ref, length "$ref", 'no utf8 length cache on references'; 166