1*256a93a4Safresh1#!./perl 2*256a93a4Safresh1# 3*256a93a4Safresh1# ex: set ts=8 sts=4 sw=4 et: 4*256a93a4Safresh1# 5*256a93a4Safresh1# Here we test for optimizations in the regexp engine. 6*256a93a4Safresh1# We try to distinguish between "nice to have" optimizations and those 7*256a93a4Safresh1# we consider essential: failure of the latter should be considered bugs, 8*256a93a4Safresh1# while failure of the former should at worst be TODO. 9*256a93a4Safresh1# 10*256a93a4Safresh1# Format of data lines is tab-separated: pattern, minlen, anchored, floating, 11*256a93a4Safresh1# other-options, comment. 12*256a93a4Safresh1# - pattern will be subject to string eval as "qr{$pattern}". 13*256a93a4Safresh1# - minlen is a non-negative integer. 14*256a93a4Safresh1# - anchored/floating are of the form "u23:45+string". If initial "u" is 15*256a93a4Safresh1# present we expect a utf8 substring, else a byte substring; subsequent 16*256a93a4Safresh1# digits are the min offset; optional /:\d+/ is the max offset (not 17*256a93a4Safresh1# supported for anchored; assumed undef if not present for floating); 18*256a93a4Safresh1# subsequent '-' or '+' indicates if this is the substring being checked; 19*256a93a4Safresh1# "string" is the substring to expect. Use "-" for the whole entry to 20*256a93a4Safresh1# indicate no substring of this type. 21*256a93a4Safresh1# - other-options is a comma-separated list of bare flags or option=value 22*256a93a4Safresh1# strings. Those with an initial "T" mark the corresponding test TODO. 23*256a93a4Safresh1# Booleans (noscan, isall, skip, implicit, anchor SBOL, anchor MBOL, 24*256a93a4Safresh1# anchor GPOS) are expected false if not mentioned, expected true if 25*256a93a4Safresh1# supplied as bare flags. stclass may be supplied as a pattern match 26*256a93a4Safresh1# as eg "stclass=~^ANYOF". 27*256a93a4Safresh1# - as a special-case, minlenret is expected to be the same as minlen 28*256a93a4Safresh1# unless specified in other-options. 29*256a93a4Safresh1# 30*256a93a4Safresh1 31*256a93a4Safresh1use strict; 32*256a93a4Safresh1use warnings; 33*256a93a4Safresh1use 5.010; 34*256a93a4Safresh1 35*256a93a4Safresh1$| = 1; 36*256a93a4Safresh1 37*256a93a4Safresh1BEGIN { 38*256a93a4Safresh1 chdir 't' if -d 't'; 39*256a93a4Safresh1 require './test.pl'; 40*256a93a4Safresh1 set_up_inc('../lib'); 41*256a93a4Safresh1 skip_all_if_miniperl("no dynamic loading on miniperl, no re::optimization"); 42*256a93a4Safresh1} 43*256a93a4Safresh1 44*256a93a4Safresh1no warnings qw{ experimental }; 45*256a93a4Safresh1use feature qw{ refaliasing declared_refs }; 46*256a93a4Safresh1our \$TODO = \$::TODO; 47*256a93a4Safresh1 48*256a93a4Safresh1use re (); 49*256a93a4Safresh1 50*256a93a4Safresh1while (<DATA>) { 51*256a93a4Safresh1 chomp; 52*256a93a4Safresh1 if (m{^\s*(?:#|\z)}) { 53*256a93a4Safresh1 # skip blank/comment lines 54*256a93a4Safresh1 next; 55*256a93a4Safresh1 } 56*256a93a4Safresh1 my($pat, $minlen, $anchored, $floating, $other, $comment) = split /\t/; 57*256a93a4Safresh1 my %todo; 58*256a93a4Safresh1 my %opt = map { 59*256a93a4Safresh1 my($k, $v) = split /=/, $_, 2; 60*256a93a4Safresh1 ($k =~ s/^T//) ? do { $todo{$k} = $v; () } : ($k => $v); 61*256a93a4Safresh1 } split /,/, $other // ''; 62*256a93a4Safresh1 $comment = (defined $comment && length $comment) 63*256a93a4Safresh1 ? "$pat ($comment):" 64*256a93a4Safresh1 : "$pat:"; 65*256a93a4Safresh1 66*256a93a4Safresh1 my $o = re::optimization(eval "qr{$pat}"); 67*256a93a4Safresh1 ok($o, "$comment compiled ok"); 68*256a93a4Safresh1 69*256a93a4Safresh1 my $skip = $o ? undef : "could not get info for qr{$pat}"; 70*256a93a4Safresh1 my $test = 0; 71*256a93a4Safresh1 72*256a93a4Safresh1 my($got, $expect) = ($o->{minlen}, $minlen); 73*256a93a4Safresh1 if (exists $todo{minlen}) { 74*256a93a4Safresh1 ++$test; 75*256a93a4Safresh1 $skip || ok($got >= $expect, "$comment minlen $got >= $expect"); 76*256a93a4Safresh1 my $todo = $todo{minlen}; 77*256a93a4Safresh1 local $TODO = 1; 78*256a93a4Safresh1 $skip || is($got, $todo, "$comment minlen $got = $todo"); 79*256a93a4Safresh1 } else { 80*256a93a4Safresh1 ++$test; 81*256a93a4Safresh1 $skip || is($got, $expect, "$comment minlen $got = $expect"); 82*256a93a4Safresh1 } 83*256a93a4Safresh1 84*256a93a4Safresh1 ($got, $expect) = ($o->{minlenret}, $opt{minlenret} // $minlen); 85*256a93a4Safresh1 if (exists $todo{minlenret}) { 86*256a93a4Safresh1 ++$test; 87*256a93a4Safresh1 $skip || ok($got >= $expect, "$comment minlenret $got >= $expect"); 88*256a93a4Safresh1 my $todo = $todo{minlenret}; 89*256a93a4Safresh1 local $TODO = 1; 90*256a93a4Safresh1 $skip || is($got, $todo, "$comment minlenret $got = $todo"); 91*256a93a4Safresh1 } else { 92*256a93a4Safresh1 ++$test; 93*256a93a4Safresh1 $skip || is($got, $expect, "$comment minlenret $got = $expect"); 94*256a93a4Safresh1 } 95*256a93a4Safresh1 96*256a93a4Safresh1 my($autf, $aoff, $acheck, $astr) = ($anchored =~ m{ 97*256a93a4Safresh1 ^ (u?) (\d*) ([-+]) (.*) \z 98*256a93a4Safresh1 }sx) or die "Can't parse anchored test '$anchored'"; 99*256a93a4Safresh1 if ($autf eq 'u') { 100*256a93a4Safresh1 ++$test; 101*256a93a4Safresh1 $skip || is($o->{anchored}, undef, "$comment no anchored"); 102*256a93a4Safresh1 ++$test; 103*256a93a4Safresh1 local $TODO = 1 if exists $todo{'anchored utf8'}; 104*256a93a4Safresh1 $skip || is($o->{'anchored utf8'}, $astr, "$comment got anchored utf8"); 105*256a93a4Safresh1 } elsif (length $astr) { 106*256a93a4Safresh1 ++$test; 107*256a93a4Safresh1 $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8"); 108*256a93a4Safresh1 ++$test; 109*256a93a4Safresh1 local $TODO = 1 if exists $todo{anchored}; 110*256a93a4Safresh1 $skip || is($o->{anchored}, $astr, "$comment got anchored"); 111*256a93a4Safresh1 } else { 112*256a93a4Safresh1 ++$test; 113*256a93a4Safresh1 $skip || is($o->{anchored}, undef, "$comment no anchored"); 114*256a93a4Safresh1 ++$test; 115*256a93a4Safresh1 $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8"); 116*256a93a4Safresh1 } 117*256a93a4Safresh1 # skip offset checks if we failed to find a string 118*256a93a4Safresh1 my $local_skip = ( 119*256a93a4Safresh1 !$skip && !defined($o->{anchored} // $o->{anchored_utf8}) 120*256a93a4Safresh1 ) ? 'no anchored string' : undef; 121*256a93a4Safresh1 if (length $aoff) { 122*256a93a4Safresh1 ++$test; 123*256a93a4Safresh1 SKIP: { 124*256a93a4Safresh1 skip($local_skip) if $local_skip; 125*256a93a4Safresh1 local $TODO = 1 if exists $todo{'anchored min offset'}; 126*256a93a4Safresh1 $skip || is($o->{'anchored min offset'}, $aoff, 127*256a93a4Safresh1 "$comment anchored min offset"); 128*256a93a4Safresh1 } 129*256a93a4Safresh1 # we don't care about anchored max: it may be set same as min or 0 130*256a93a4Safresh1 } 131*256a93a4Safresh1 132*256a93a4Safresh1 my($futf, $fmin, $fmax, $fcheck, $fstr) = ($floating =~ m{ 133*256a93a4Safresh1 ^ (u?) (\d*) (?: : (\d*) )? ([-+]) (.*) \z 134*256a93a4Safresh1 }sx) or die "Can't parse floating test '$floating'"; 135*256a93a4Safresh1 if ($futf eq 'u') { 136*256a93a4Safresh1 ++$test; 137*256a93a4Safresh1 $skip || is($o->{floating}, undef, "$comment no floating"); 138*256a93a4Safresh1 ++$test; 139*256a93a4Safresh1 local $TODO = 1 if exists $todo{'floating utf8'}; 140*256a93a4Safresh1 $skip || is($o->{'floating utf8'}, $fstr, "$comment got floating utf8"); 141*256a93a4Safresh1 } elsif (length $fstr) { 142*256a93a4Safresh1 ++$test; 143*256a93a4Safresh1 $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8"); 144*256a93a4Safresh1 ++$test; 145*256a93a4Safresh1 local $TODO = 1 if exists $todo{floating}; 146*256a93a4Safresh1 $skip || is($o->{floating}, $fstr, "$comment got floating"); 147*256a93a4Safresh1 } else { 148*256a93a4Safresh1 ++$test; 149*256a93a4Safresh1 $skip || is($o->{floating}, undef, "$comment no floating"); 150*256a93a4Safresh1 ++$test; 151*256a93a4Safresh1 $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8"); 152*256a93a4Safresh1 } 153*256a93a4Safresh1 # skip offset checks if we failed to find a string 154*256a93a4Safresh1 $local_skip = ( 155*256a93a4Safresh1 !$skip && !defined($o->{floating} // $o->{floating_utf8}) 156*256a93a4Safresh1 ) ? 'no floating string' : undef; 157*256a93a4Safresh1 if (length $fmin) { 158*256a93a4Safresh1 ++$test; 159*256a93a4Safresh1 SKIP: { 160*256a93a4Safresh1 skip($local_skip) if $local_skip; 161*256a93a4Safresh1 local $TODO = 1 if exists $todo{'floating min offset'}; 162*256a93a4Safresh1 $skip || is($o->{'floating min offset'}, $fmin, 163*256a93a4Safresh1 "$comment floating min offset"); 164*256a93a4Safresh1 } 165*256a93a4Safresh1 } 166*256a93a4Safresh1 if (defined $fmax) { 167*256a93a4Safresh1 ++$test; 168*256a93a4Safresh1 SKIP: { 169*256a93a4Safresh1 skip($local_skip) if $local_skip; 170*256a93a4Safresh1 local $TODO = 1 if exists $todo{'floating max offset'}; 171*256a93a4Safresh1 $skip || is($o->{'floating max offset'}, $fmax, 172*256a93a4Safresh1 "$comment floating max offset"); 173*256a93a4Safresh1 } 174*256a93a4Safresh1 } 175*256a93a4Safresh1 176*256a93a4Safresh1 my $check = ($acheck eq '+') ? 'anchored' 177*256a93a4Safresh1 : ($fcheck eq '+') ? 'floating' 178*256a93a4Safresh1 : ($acheck eq '-') ? undef 179*256a93a4Safresh1 : 'none'; 180*256a93a4Safresh1 $local_skip = ( 181*256a93a4Safresh1 !$skip && $check && ( 182*256a93a4Safresh1 ($check eq 'anchored' 183*256a93a4Safresh1 && !defined($o->{anchored} // $o->{anchored_utf8})) 184*256a93a4Safresh1 || ($check eq 'floating' 185*256a93a4Safresh1 && !defined($o->{floating} // $o->{floating_utf8})) 186*256a93a4Safresh1 ) 187*256a93a4Safresh1 ) ? "$check not found" : undef; 188*256a93a4Safresh1 if (defined $check) { 189*256a93a4Safresh1 ++$test; 190*256a93a4Safresh1 SKIP: { 191*256a93a4Safresh1 skip($local_skip) if $local_skip; 192*256a93a4Safresh1 local $TODO = 1 if exists $todo{checking}; 193*256a93a4Safresh1 $skip || is($o->{checking}, $check, "$comment checking $check"); 194*256a93a4Safresh1 } 195*256a93a4Safresh1 } 196*256a93a4Safresh1 197*256a93a4Safresh1 # booleans 198*256a93a4Safresh1 for (qw{ noscan isall skip implicit }, 199*256a93a4Safresh1 'anchor SBOL', 'anchor MBOL', 'anchor GPOS' 200*256a93a4Safresh1 ) { 201*256a93a4Safresh1 my $got = $o->{$_}; 202*256a93a4Safresh1 my $expect = exists($opt{$_}) ? ($opt{$_} // 1) : 0; 203*256a93a4Safresh1 ++$test; 204*256a93a4Safresh1 local $TODO = 1 if exists $todo{"T$_"}; 205*256a93a4Safresh1 $skip || is($got, $expect ? 1 : 0, "$comment $_"); 206*256a93a4Safresh1 } 207*256a93a4Safresh1 208*256a93a4Safresh1 # integer 209*256a93a4Safresh1 for (qw{ gofs }) { 210*256a93a4Safresh1 my $got = $o->{$_}; 211*256a93a4Safresh1 my $expect = $opt{$_} // 0; 212*256a93a4Safresh1 ++$test; 213*256a93a4Safresh1 local $TODO = 1 if exists $todo{"T$_"}; 214*256a93a4Safresh1 $skip || is($got, $expect || 0, "$comment $_"); 215*256a93a4Safresh1 } 216*256a93a4Safresh1 217*256a93a4Safresh1 # string 218*256a93a4Safresh1 for (qw{ stclass }) { 219*256a93a4Safresh1 my $got = $o->{$_}; 220*256a93a4Safresh1 my $expect = $opt{$_}; 221*256a93a4Safresh1 my $qr = (defined($expect) && ($expect =~ s{^~}{})) ? 1 : 0; 222*256a93a4Safresh1 ++$test; 223*256a93a4Safresh1 local $TODO = 1 if exists $todo{"T$_"}; 224*256a93a4Safresh1 $skip || ($qr 225*256a93a4Safresh1 ? like($got, qr{$expect}, "$comment $_") 226*256a93a4Safresh1 : is($got, $expect, "$comment $_") 227*256a93a4Safresh1 ); 228*256a93a4Safresh1 } 229*256a93a4Safresh1 230*256a93a4Safresh1 skip($skip, $test) if $skip; 231*256a93a4Safresh1} 232*256a93a4Safresh1done_testing(); 233*256a93a4Safresh1__END__ 234*256a93a4Safresh1(?:) 0 - - Tisall 235*256a93a4Safresh1 236*256a93a4Safresh1# various forms of anchored substring 237*256a93a4Safresh1abc 3 0+abc - isall 238*256a93a4Safresh1.{10}abc 13 10+abc - - 239*256a93a4Safresh1(?i:)abc 3 0+abc - isall 240*256a93a4Safresh1a(?:)bc 3 0+abc - isall 241*256a93a4Safresh1a()bc 3 0+abc - - 242*256a93a4Safresh1a(?i:)bc 3 0+abc - isall 243*256a93a4Safresh1a(b)c 3 0+abc - - 244*256a93a4Safresh1a((?i:b))c 3 0+abc - Tanchored 245*256a93a4Safresh1a[bB]c 3 0+abc - Tanchored 246*256a93a4Safresh1(?=abc) 0 0+abc - Tanchored,Tminlen=3,minlenret=0 247*256a93a4Safresh1abc|abc 3 0+abc - isall 248*256a93a4Safresh1abcd|abce 4 0+abc - - 249*256a93a4Safresh1acde|bcde 4 1+cde - Tanchored,stclass=~[ab] 250*256a93a4Safresh1acdef|bcdeg 5 1+cde - Tanchored,stclass=~[ab] 251*256a93a4Safresh1 252*256a93a4Safresh1# same as above, floating 253*256a93a4Safresh1.?abc 3 - 0:1+abc - 254*256a93a4Safresh1.?.{10}abc 13 - 10:11+abc - 255*256a93a4Safresh1.?(?i:)abc 3 - 0:1+abc - 256*256a93a4Safresh1.?a(?:)bc 3 - 0:1+abc - 257*256a93a4Safresh1.?a()bc 3 - 0:1+abc - 258*256a93a4Safresh1.?a(?i:)bc 3 - 0:1+abc - 259*256a93a4Safresh1.?a(b)c 3 - 0+abc - 260*256a93a4Safresh1.?a((?i:b))c 3 - 0+abc Tfloating 261*256a93a4Safresh1.?a[bB]c 3 - 0:1+abc Tfloating 262*256a93a4Safresh1.?(?=abc) 0 - 0:1+abc Tfloating,Tminlen=3,minlenret=0 263*256a93a4Safresh1.?(?:abc|abc) 3 - 0:1+abc - 264*256a93a4Safresh1.?(?:abcd|abce) 4 - 0:1+abc - 265*256a93a4Safresh1.?(?:acde|bcde) 4 - 1:2+cde Tfloating 266*256a93a4Safresh1.?(?:acdef|bcdeg) 5 - 1:2+cde Tfloating 267*256a93a4Safresh1 268*256a93a4Safresh1a(b){2,3}c 4 -abb 1+bbc 269*256a93a4Safresh1a(b|bb)c 3 -ab 1-bc Tfloating,Tfloating min offset 270*256a93a4Safresh1a(b|bb){2}c 4 -abb 1-bbc Tanchored,Tfloating,Tfloating min offset 271*256a93a4Safresh1 272*256a93a4Safresh1abc(*COMMIT)xyz 6 0+abc - - 273*256a93a4Safresh1abc(*ACCEPT)xyz 3 0+abc - - 274*256a93a4Safresh1# Must not have stclass=[x] 275*256a93a4Safresh1(*ACCEPT)xyz 0 - - - 276*256a93a4Safresh1(a(*ACCEPT)){2} 1 0+a - - 277