1#!./perl 2# 3# ex: set ts=8 sts=4 sw=4 et: 4# 5# Here we test for optimizations in the regexp engine. 6# We try to distinguish between "nice to have" optimizations and those 7# we consider essential: failure of the latter should be considered bugs, 8# while failure of the former should at worst be TODO. 9# 10# Format of data lines is tab-separated: pattern, minlen, anchored, floating, 11# other-options, comment. 12# - pattern will be subject to string eval as "qr{$pattern}". 13# - minlen is a non-negative integer. 14# - anchored/floating are of the form "u23:45+string". If initial "u" is 15# present we expect a utf8 substring, else a byte substring; subsequent 16# digits are the min offset; optional /:\d+/ is the max offset (not 17# supported for anchored; assumed undef if not present for floating); 18# subsequent '-' or '+' indicates if this is the substring being checked; 19# "string" is the substring to expect. Use "-" for the whole entry to 20# indicate no substring of this type. 21# - other-options is a comma-separated list of bare flags or option=value 22# strings. Those with an initial "T" mark the corresponding test TODO. 23# Booleans (noscan, isall, skip, implicit, anchor SBOL, anchor MBOL, 24# anchor GPOS) are expected false if not mentioned, expected true if 25# supplied as bare flags. stclass may be supplied as a pattern match 26# as eg "stclass=~^ANYOF". 27# - as a special-case, minlenret is expected to be the same as minlen 28# unless specified in other-options. 29# 30 31use strict; 32use warnings; 33use 5.010; 34 35$| = 1; 36 37BEGIN { 38 chdir 't' if -d 't'; 39 require './test.pl'; 40 set_up_inc('../lib'); 41 skip_all_if_miniperl("no dynamic loading on miniperl, no re::optimization"); 42} 43 44no warnings qw{ experimental }; 45use feature qw{ refaliasing declared_refs }; 46our \$TODO = \$::TODO; 47 48use re (); 49 50while (<DATA>) { 51 chomp; 52 if (m{^\s*(?:#|\z)}) { 53 # skip blank/comment lines 54 next; 55 } 56 my($pat, $minlen, $anchored, $floating, $other, $comment) = split /\t/; 57 my %todo; 58 my %opt = map { 59 my($k, $v) = split /=/, $_, 2; 60 ($k =~ s/^T//) ? do { $todo{$k} = $v; () } : ($k => $v); 61 } split /,/, $other // ''; 62 $comment = (defined $comment && length $comment) 63 ? "$pat ($comment):" 64 : "$pat:"; 65 66 my $o = re::optimization(eval "qr{$pat}"); 67 ok($o, "$comment compiled ok"); 68 69 my $skip = $o ? undef : "could not get info for qr{$pat}"; 70 my $test = 0; 71 72 my($got, $expect) = ($o->{minlen}, $minlen); 73 if (exists $todo{minlen}) { 74 ++$test; 75 $skip || ok($got >= $expect, "$comment minlen $got >= $expect"); 76 my $todo = $todo{minlen}; 77 local $TODO = 1; 78 $skip || is($got, $todo, "$comment minlen $got = $todo"); 79 } else { 80 ++$test; 81 $skip || is($got, $expect, "$comment minlen $got = $expect"); 82 } 83 84 ($got, $expect) = ($o->{minlenret}, $opt{minlenret} // $minlen); 85 if (exists $todo{minlenret}) { 86 ++$test; 87 $skip || ok($got >= $expect, "$comment minlenret $got >= $expect"); 88 my $todo = $todo{minlenret}; 89 local $TODO = 1; 90 $skip || is($got, $todo, "$comment minlenret $got = $todo"); 91 } else { 92 ++$test; 93 $skip || is($got, $expect, "$comment minlenret $got = $expect"); 94 } 95 96 my($autf, $aoff, $acheck, $astr) = ($anchored =~ m{ 97 ^ (u?) (\d*) ([-+]) (.*) \z 98 }sx) or die "Can't parse anchored test '$anchored'"; 99 if ($autf eq 'u') { 100 ++$test; 101 $skip || is($o->{anchored}, undef, "$comment no anchored"); 102 ++$test; 103 local $TODO = 1 if exists $todo{'anchored utf8'}; 104 $skip || is($o->{'anchored utf8'}, $astr, "$comment got anchored utf8"); 105 } elsif (length $astr) { 106 ++$test; 107 $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8"); 108 ++$test; 109 local $TODO = 1 if exists $todo{anchored}; 110 $skip || is($o->{anchored}, $astr, "$comment got anchored"); 111 } else { 112 ++$test; 113 $skip || is($o->{anchored}, undef, "$comment no anchored"); 114 ++$test; 115 $skip || is($o->{anchored_utf8}, undef, "$comment no anchored utf8"); 116 } 117 # skip offset checks if we failed to find a string 118 my $local_skip = ( 119 !$skip && !defined($o->{anchored} // $o->{anchored_utf8}) 120 ) ? 'no anchored string' : undef; 121 if (length $aoff) { 122 ++$test; 123 SKIP: { 124 skip($local_skip) if $local_skip; 125 local $TODO = 1 if exists $todo{'anchored min offset'}; 126 $skip || is($o->{'anchored min offset'}, $aoff, 127 "$comment anchored min offset"); 128 } 129 # we don't care about anchored max: it may be set same as min or 0 130 } 131 132 my($futf, $fmin, $fmax, $fcheck, $fstr) = ($floating =~ m{ 133 ^ (u?) (\d*) (?: : (\d*) )? ([-+]) (.*) \z 134 }sx) or die "Can't parse floating test '$floating'"; 135 if ($futf eq 'u') { 136 ++$test; 137 $skip || is($o->{floating}, undef, "$comment no floating"); 138 ++$test; 139 local $TODO = 1 if exists $todo{'floating utf8'}; 140 $skip || is($o->{'floating utf8'}, $fstr, "$comment got floating utf8"); 141 } elsif (length $fstr) { 142 ++$test; 143 $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8"); 144 ++$test; 145 local $TODO = 1 if exists $todo{floating}; 146 $skip || is($o->{floating}, $fstr, "$comment got floating"); 147 } else { 148 ++$test; 149 $skip || is($o->{floating}, undef, "$comment no floating"); 150 ++$test; 151 $skip || is($o->{floating_utf8}, undef, "$comment no floating utf8"); 152 } 153 # skip offset checks if we failed to find a string 154 $local_skip = ( 155 !$skip && !defined($o->{floating} // $o->{floating_utf8}) 156 ) ? 'no floating string' : undef; 157 if (length $fmin) { 158 ++$test; 159 SKIP: { 160 skip($local_skip) if $local_skip; 161 local $TODO = 1 if exists $todo{'floating min offset'}; 162 $skip || is($o->{'floating min offset'}, $fmin, 163 "$comment floating min offset"); 164 } 165 } 166 if (defined $fmax) { 167 ++$test; 168 SKIP: { 169 skip($local_skip) if $local_skip; 170 local $TODO = 1 if exists $todo{'floating max offset'}; 171 $skip || is($o->{'floating max offset'}, $fmax, 172 "$comment floating max offset"); 173 } 174 } 175 176 my $check = ($acheck eq '+') ? 'anchored' 177 : ($fcheck eq '+') ? 'floating' 178 : ($acheck eq '-') ? undef 179 : 'none'; 180 $local_skip = ( 181 !$skip && $check && ( 182 ($check eq 'anchored' 183 && !defined($o->{anchored} // $o->{anchored_utf8})) 184 || ($check eq 'floating' 185 && !defined($o->{floating} // $o->{floating_utf8})) 186 ) 187 ) ? "$check not found" : undef; 188 if (defined $check) { 189 ++$test; 190 SKIP: { 191 skip($local_skip) if $local_skip; 192 local $TODO = 1 if exists $todo{checking}; 193 $skip || is($o->{checking}, $check, "$comment checking $check"); 194 } 195 } 196 197 # booleans 198 for (qw{ noscan isall skip implicit }, 199 'anchor SBOL', 'anchor MBOL', 'anchor GPOS' 200 ) { 201 my $got = $o->{$_}; 202 my $expect = exists($opt{$_}) ? ($opt{$_} // 1) : 0; 203 ++$test; 204 local $TODO = 1 if exists $todo{"T$_"}; 205 $skip || is($got, $expect ? 1 : 0, "$comment $_"); 206 } 207 208 # integer 209 for (qw{ gofs }) { 210 my $got = $o->{$_}; 211 my $expect = $opt{$_} // 0; 212 ++$test; 213 local $TODO = 1 if exists $todo{"T$_"}; 214 $skip || is($got, $expect || 0, "$comment $_"); 215 } 216 217 # string 218 for (qw{ stclass }) { 219 my $got = $o->{$_}; 220 my $expect = $opt{$_}; 221 my $qr = (defined($expect) && ($expect =~ s{^~}{})) ? 1 : 0; 222 ++$test; 223 local $TODO = 1 if exists $todo{"T$_"}; 224 $skip || ($qr 225 ? like($got, qr{$expect}, "$comment $_") 226 : is($got, $expect, "$comment $_") 227 ); 228 } 229 230 skip($skip, $test) if $skip; 231} 232done_testing(); 233__END__ 234(?:) 0 - - Tisall 235 236# various forms of anchored substring 237abc 3 0+abc - isall 238.{10}abc 13 10+abc - - 239(?i:)abc 3 0+abc - isall 240a(?:)bc 3 0+abc - isall 241a()bc 3 0+abc - - 242a(?i:)bc 3 0+abc - isall 243a(b)c 3 0+abc - - 244a((?i:b))c 3 0+abc - Tanchored 245a[bB]c 3 0+abc - Tanchored 246(?=abc) 0 0+abc - Tanchored,Tminlen=3,minlenret=0 247abc|abc 3 0+abc - isall 248abcd|abce 4 0+abc - - 249acde|bcde 4 1+cde - Tanchored,stclass=~[ab] 250acdef|bcdeg 5 1+cde - Tanchored,stclass=~[ab] 251 252# same as above, floating 253.?abc 3 - 0:1+abc - 254.?.{10}abc 13 - 10:11+abc - 255.?(?i:)abc 3 - 0:1+abc - 256.?a(?:)bc 3 - 0:1+abc - 257.?a()bc 3 - 0:1+abc - 258.?a(?i:)bc 3 - 0:1+abc - 259.?a(b)c 3 - 0+abc - 260.?a((?i:b))c 3 - 0+abc Tfloating 261.?a[bB]c 3 - 0:1+abc Tfloating 262.?(?=abc) 0 - 0:1+abc Tfloating,Tminlen=3,minlenret=0 263.?(?:abc|abc) 3 - 0:1+abc - 264.?(?:abcd|abce) 4 - 0:1+abc - 265.?(?:acde|bcde) 4 - 1:2+cde Tfloating 266.?(?:acdef|bcdeg) 5 - 1:2+cde Tfloating 267 268a(b){2,3}c 4 -abb 1+bbc 269a(b|bb)c 3 -ab 1-bc Tfloating,Tfloating min offset 270a(b|bb){2}c 4 -abb 1-bbc Tanchored,Tfloating,Tfloating min offset 271 272abc(*COMMIT)xyz 6 0+abc - - 273abc(*ACCEPT)xyz 3 0+abc - - 274# Must not have stclass=[x] 275(*ACCEPT)xyz 0 - - - 276(a(*ACCEPT)){2} 1 0+a - - 277