xref: /openbsd-src/gnu/usr.bin/perl/t/op/utf8cache.t (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1898184e3Ssthen#!./perl -w
2b39c5158Smillert# Test for malfunctions of utf8 cache
3b39c5158Smillert
4b39c5158SmillertBEGIN {
5b39c5158Smillert    chdir 't' if -d 't';
6898184e3Ssthen    require './test.pl';
7*9f11ffb7Safresh1    set_up_inc('../lib');
8b39c5158Smillert}
9b39c5158Smillert
10898184e3Ssthenuse strict;
11b8851fccSafresh1use Config ();
12898184e3Ssthen
13b8851fccSafresh1plan(tests => 16);
1491f110e0Safresh1
1591f110e0Safresh1SKIP: {
16b8851fccSafresh1skip_without_dynamic_extension("Devel::Peek", 2);
17b39c5158Smillert
18b8851fccSafresh1my $out = runperl(stderr => 1,
19b8851fccSafresh1		  progs => [ split /\n/, <<'EOS' ]);
20b8851fccSafresh1    require Devel::Peek;
21b8851fccSafresh1    $a = qq(hello \x{1234});
22b39c5158Smillert    for (1..2) {
23b39c5158Smillert        bar(substr($a, $_, 1));
24b39c5158Smillert    }
25b39c5158Smillert    sub bar {
26b8851fccSafresh1        $_[0] = qq(\x{4321});
27b39c5158Smillert        Devel::Peek::Dump($_[0]);
28b39c5158Smillert    }
29b8851fccSafresh1EOS
30b39c5158Smillert
31b8851fccSafresh1$out =~ s/^ALLOCATED at .*\n//m
32b8851fccSafresh1    if $Config::Config{ccflags} =~ /-DDEBUG_LEAKING_SCALARS/;
33b8851fccSafresh1like($out, qr/\ASV =/, "check we got dump output"); # [perl #121337]
34b39c5158Smillert
35b39c5158Smillertmy $utf8magic = qr{ ^ \s+ MAGIC \s = .* \n
36b39c5158Smillert                      \s+ MG_VIRTUAL \s = .* \n
37b39c5158Smillert                      \s+ MG_TYPE \s = \s PERL_MAGIC_utf8 .* \n
38b39c5158Smillert                      \s+ MG_LEN \s = .* \n }xm;
39b39c5158Smillert
40b8851fccSafresh1unlike($out, qr{ $utf8magic $utf8magic }x,
41b8851fccSafresh1       "no duplicate utf8 magic");
4291f110e0Safresh1
4391f110e0Safresh1} # SKIP
4491f110e0Safresh1
4591f110e0Safresh1# With bad caching, this code used to go quadratic and take 10s of minutes.
4691f110e0Safresh1# The 'test' in this case is simply that it doesn't hang.
4791f110e0Safresh1
4891f110e0Safresh1{
4991f110e0Safresh1    local ${^UTF8CACHE} = 1; # enable cache, disable debugging
5091f110e0Safresh1    my $x = "\x{100}" x 1000000;
5191f110e0Safresh1    while ($x =~ /./g) {
5291f110e0Safresh1	my $p = pos($x);
5391f110e0Safresh1    }
5491f110e0Safresh1    pass("quadratic pos");
5591f110e0Safresh1}
5691f110e0Safresh1
5791f110e0Safresh1# Get-magic can reallocate the PV.  Check that the cache is reset in
5891f110e0Safresh1# such cases.
5991f110e0Safresh1
6091f110e0Safresh1# Regexp vars
6191f110e0Safresh1"\x{100}" =~ /(.+)/;
6291f110e0Safresh1() = substr $1, 0, 1;
6391f110e0Safresh1"a\x{100}" =~ /(.+)/;
6491f110e0Safresh1is ord substr($1, 1, 1), 0x100, 'get-magic resets utf8cache on match vars';
6591f110e0Safresh1
6691f110e0Safresh1# Substr lvalues
6791f110e0Safresh1my $x = "a\x{100}";
6891f110e0Safresh1my $l = \substr $x, 0;
6991f110e0Safresh1() = substr $$l, 1, 1;
7091f110e0Safresh1substr $x, 0, 1, = "\x{100}";
7191f110e0Safresh1is ord substr($$l, 1, 1), 0x100, 'get-magic resets utf8cache on LVALUEs';
7291f110e0Safresh1
7391f110e0Safresh1# defelem magic
7491f110e0Safresh1my %h;
7591f110e0Safresh1sub {
7691f110e0Safresh1  $_[0] = "a\x{100}";
7791f110e0Safresh1  () = ord substr $_[0], 1, 1;
7891f110e0Safresh1  $h{k} = "\x{100}"x2;
7991f110e0Safresh1  is ord substr($_[0], 1, 1), 0x100,
8091f110e0Safresh1    'get-magic resets uf8cache on defelems';
8191f110e0Safresh1}->($h{k});
8291f110e0Safresh1
8391f110e0Safresh1
8491f110e0Safresh1# Overloading can also reallocate the PV.
8591f110e0Safresh1
8691f110e0Safresh1package UTF8Toggle {
8791f110e0Safresh1    use overload '""' => 'stringify', fallback => 1;
8891f110e0Safresh1
8991f110e0Safresh1    sub new {
9091f110e0Safresh1	my $class = shift;
9191f110e0Safresh1	my $value = shift;
9291f110e0Safresh1	my $state = shift||0;
9391f110e0Safresh1	return bless [$value, $state], $class;
9491f110e0Safresh1    }
9591f110e0Safresh1
9691f110e0Safresh1    sub stringify {
9791f110e0Safresh1	my $self = shift;
9891f110e0Safresh1	$self->[1] = ! $self->[1];
9991f110e0Safresh1	if ($self->[1]) {
10091f110e0Safresh1	    utf8::downgrade($self->[0]);
10191f110e0Safresh1	} else {
10291f110e0Safresh1	    utf8::upgrade($self->[0]);
10391f110e0Safresh1	}
10491f110e0Safresh1	$self->[0];
10591f110e0Safresh1    }
10691f110e0Safresh1}
10791f110e0Safresh1my $u = UTF8Toggle->new(" \x{c2}7 ");
10891f110e0Safresh1
10991f110e0Safresh1pos $u = 2;
11091f110e0Safresh1is pos $u, 2, 'pos on overloaded utf8 toggler';
11191f110e0Safresh1() = "$u"; # flip flag
11291f110e0Safresh1pos $u = 2;
11391f110e0Safresh1is pos $u, 2, 'pos on overloaded utf8 toggler (again)';
11491f110e0Safresh1
11591f110e0Safresh1() = ord ${\substr $u, 1};
11691f110e0Safresh1is ord ${\substr($u, 1)}, 0xc2,
11791f110e0Safresh1    'utf8 cache + overloading does not confuse substr lvalues';
11891f110e0Safresh1() = "$u"; # flip flag
11991f110e0Safresh1() = ord substr $u, 1;
12091f110e0Safresh1is ord substr($u, 1), 0xc2,
12191f110e0Safresh1    'utf8 cache + overloading does not confuse substr lvalues (again)';
12291f110e0Safresh1
12391f110e0Safresh1$u = UTF8Toggle->new(" \x{c2}7 ");
12491f110e0Safresh1() = ord ${\substr $u, 2};
12591f110e0Safresh1{ no warnings; ${\substr($u, 2, 1)} = 0; }
12691f110e0Safresh1is $u, " \x{c2}0 ",
12791f110e0Safresh1    'utf8 cache + overloading does not confuse substr lvalue assignment';
12891f110e0Safresh1$u = UTF8Toggle->new(" \x{c2}7 ");
12991f110e0Safresh1() = "$u"; # flip flag
13091f110e0Safresh1() = ord ${\substr $u, 2};
13191f110e0Safresh1{ no warnings; ${\substr($u, 2, 1)} = 0; }
13291f110e0Safresh1is $u, " \x{c2}0 ",
13391f110e0Safresh1    'utf8 cache + overload does not confuse substr lv assignment (again)';
13491f110e0Safresh1
13591f110e0Safresh1
13691f110e0Safresh1# Typeglobs and references should not get a cache
13791f110e0Safresh1use utf8;
13891f110e0Safresh1
13991f110e0Safresh1#substr
14091f110e0Safresh1my $globref = \*αabcdefg_::_;
14191f110e0Safresh1() = substr($$globref, 2, 3);
14291f110e0Safresh1*_abcdefgα:: = \%αabcdefg_::;
14391f110e0Safresh1undefabcdefg_::;
14491f110e0Safresh1{ no strict; () = *{"_abcdefgα::_"} }
14591f110e0Safresh1is substr($$globref, 2, 3), "abc", 'no utf8 pos cache on globs';
14691f110e0Safresh1
14791f110e0Safresh1my $ref = bless [], "αabcd_";
14891f110e0Safresh1() = substr($ref, 1, 3);
14991f110e0Safresh1bless $ref, "_abcdα";
15091f110e0Safresh1is substr($ref, 1, 3), "abc", 'no utf8 pos cache on references';
15191f110e0Safresh1
15291f110e0Safresh1#length
15391f110e0Safresh1$globref = \*αabcdefg_::_;
15491f110e0Safresh1() = "$$globref";  # turn utf8 flag on
15591f110e0Safresh1() = length($$globref);
15691f110e0Safresh1*_abcdefgα:: = \%αabcdefg_::;
15791f110e0Safresh1undefabcdefg_::;
15891f110e0Safresh1{ no strict; () = *{"_abcdefgα::_"} }
15991f110e0Safresh1is length($$globref), length("$$globref"), 'no utf8 length cache on globs';
16091f110e0Safresh1
16191f110e0Safresh1$ref = bless [], "αabcd_";
16291f110e0Safresh1() = "$ref"; # turn utf8 flag on
16391f110e0Safresh1() = length $ref;
16491f110e0Safresh1bless $ref, "α";
16591f110e0Safresh1is length $ref, length "$ref", 'no utf8 length cache on references';
166