1b39c5158Smillert#!./perl 2b39c5158Smillert 3b39c5158Smillert# The tests are in a separate file 't/re/re_tests'. 4b39c5158Smillert# Each line in that file is a separate test. 5b39c5158Smillert# There are five columns, separated by tabs. 6b8851fccSafresh1# An optional sixth column is used to give a reason, only when skipping tests 7b39c5158Smillert# 89f11ffb7Safresh1# Column 1 contains the pattern, optionally enclosed in C<''> C<::> or 99f11ffb7Safresh1# C<//>. Modifiers can be put after the closing delimiter. C<''> will 109f11ffb7Safresh1# automatically be added to any other patterns. 11b39c5158Smillert# 12b39c5158Smillert# Column 2 contains the string to be matched. 13b39c5158Smillert# 14b39c5158Smillert# Column 3 contains the expected result: 15b39c5158Smillert# y expect a match 16b39c5158Smillert# n expect no match 17b39c5158Smillert# c expect an error 18b39c5158Smillert# T the test is a TODO (can be combined with y/n/c) 19898184e3Ssthen# M skip test on miniperl (combine with y/n/c/T) 20b39c5158Smillert# B test exposes a known bug in Perl, should be skipped 21b39c5158Smillert# b test exposes a known bug in Perl, should be skipped if noamp 22b39c5158Smillert# t test exposes a bug with threading, TODO if qr_embed_thr 2391f110e0Safresh1# s test should only be run for regex_sets_compat.t 2491f110e0Safresh1# S test should not be run for regex_sets_compat.t 25b8851fccSafresh1# a test should only be run on ASCII platforms 26b8851fccSafresh1# e test should only be run on EBCDIC platforms 27b39c5158Smillert# 28b39c5158Smillert# Columns 4 and 5 are used only if column 3 contains C<y> or C<c>. 29b39c5158Smillert# 30b39c5158Smillert# Column 4 contains a string, usually C<$&>. 31b39c5158Smillert# 32b39c5158Smillert# Column 5 contains the expected result of double-quote 33b39c5158Smillert# interpolating that string after the match, or start of error message. 34b39c5158Smillert# 35b39c5158Smillert# Column 6, if present, contains a reason why the test is skipped. 36b39c5158Smillert# This is printed with "skipped", for harness to pick up. 37b39c5158Smillert# 386fb12b70Safresh1# Column 7 can be used for comments 396fb12b70Safresh1# 40b39c5158Smillert# \n in the tests are interpolated, as are variables of the form ${\w+}. 41b39c5158Smillert# 42b39c5158Smillert# Blanks lines are treated as PASSING tests to keep the line numbers 43b39c5158Smillert# linked to the test number. 44b39c5158Smillert# 45b39c5158Smillert# If you want to add a regular expression test that can't be expressed 46b39c5158Smillert# in this format, don't add it here: put it in re/pat.t instead. 47b39c5158Smillert# 48b39c5158Smillert# Note that the inputs get passed on as "m're'", so the re bypasses the lexer. 49b39c5158Smillert# This means this file cannot be used for testing anything that the lexer 50b39c5158Smillert# handles; in 5.12 this means just \N{NAME} and \N{U+...}. 51b39c5158Smillert# 52b39c5158Smillert# Note that columns 2,3 and 5 are all enclosed in double quotes and then 53b39c5158Smillert# evalled; so something like a\"\x{100}$1 has length 3+length($1). 54b8851fccSafresh1# 55b8851fccSafresh1# \x... and \o{...} constants are automatically converted to the native 56b8851fccSafresh1# character set if necessary. \[0-7] constants aren't 57b39c5158Smillert 58898184e3Ssthenmy ($file, $iters); 59b39c5158SmillertBEGIN { 60b39c5158Smillert $iters = shift || 1; # Poor man performance suite, 10000 is OK. 61b39c5158Smillert 62b39c5158Smillert # Do this open before any chdir 63b39c5158Smillert $file = shift; 64b39c5158Smillert if (defined $file) { 65b39c5158Smillert open TESTS, $file or die "Can't open $file"; 66b39c5158Smillert } 67b39c5158Smillert 68b39c5158Smillert chdir 't' if -d 't'; 69b8851fccSafresh1 @INC = qw '../lib ../ext/re'; 70b8851fccSafresh1 if (!defined &DynaLoader::boot_DynaLoader) { # miniperl 71b8851fccSafresh1 print("1..0 # Skip Unicode tables not built yet\n"), exit 7256d68f1eSafresh1 unless eval 'require "unicore/UCD.pl"'; 73b8851fccSafresh1 } 749f11ffb7Safresh1 759f11ffb7Safresh1 # Some of the tests need a locale; which one doesn't much matter, except 769f11ffb7Safresh1 # that it be valid. Make sure of that 779f11ffb7Safresh1 eval { require POSIX; 789f11ffb7Safresh1 POSIX->import(qw(LC_ALL setlocale)); 799f11ffb7Safresh1 POSIX::setlocale(&LC_ALL, "C"); 809f11ffb7Safresh1 }; 81b39c5158Smillert} 82898184e3Ssthen 83898184e3Ssthensub _comment { 84898184e3Ssthen return map { /^#/ ? "$_\n" : "# $_\n" } 85898184e3Ssthen map { split /\n/ } @_; 86b39c5158Smillert} 87b39c5158Smillert 88b39c5158Smillertuse strict; 89b39c5158Smillertuse warnings FATAL=>"all"; 90b46d8ef2Safresh1no warnings 'experimental::vlb'; 919f11ffb7Safresh1our ($bang, $ffff, $nulnul); # used by the tests 929f11ffb7Safresh1our ($qr, $skip_amp, $qr_embed, $qr_embed_thr, $regex_sets, $alpha_assertions, $no_null); # set by our callers 9391f110e0Safresh1 9456d68f1eSafresh1if ($no_null && ! eval { require XS::APItest }) { 9556d68f1eSafresh1 print("1..0 # Skip XS::APItest not available\n"), exit 9656d68f1eSafresh1} 9756d68f1eSafresh1 989f11ffb7Safresh1my $expanded_text = "expanded name from original test number"; 999f11ffb7Safresh1my $expanded_text_re = qr/$expanded_text/; 100b39c5158Smillert 101b39c5158Smillertif (!defined $file) { 102898184e3Ssthen open TESTS, 're/re_tests' or die "Can't open re/re_tests: $!"; 103b39c5158Smillert} 104b39c5158Smillert 105b39c5158Smillertmy @tests = <TESTS>; 106b39c5158Smillert 107b39c5158Smillertclose TESTS; 108b39c5158Smillert 10956d68f1eSafresh1my $test_num = 0; 11056d68f1eSafresh1 11156d68f1eSafresh1# Some scenarios add extra tests to those just read in. For those where there 11256d68f1eSafresh1# is a character set translation, the added test will already have been 11356d68f1eSafresh1# translated, so any test number beginning with this one shouldn't be 11456d68f1eSafresh1# translated again. 11556d68f1eSafresh1my $first_already_converted_test_num = @tests + 1; 11656d68f1eSafresh1 11756d68f1eSafresh1sub convert_from_ascii_guts { 11856d68f1eSafresh1 my $string_ref = shift; 11956d68f1eSafresh1 12056d68f1eSafresh1 return if $test_num >= $first_already_converted_test_num; 12156d68f1eSafresh1 12256d68f1eSafresh1 #my $save = $string_ref; 12356d68f1eSafresh1 # Convert \x{...}, \o{...} 12456d68f1eSafresh1 $$string_ref =~ s/ (?<! \\ ) \\x\{ ( .*? ) } / "\\x{" . sprintf("%X", utf8::unicode_to_native(hex $1)) . "}" /gex; 12556d68f1eSafresh1 $$string_ref =~ s/ (?<! \\ ) \\o\{ ( .*? ) } / "\\o{" . sprintf("%o", utf8::unicode_to_native(oct $1)) . "}" /gex; 12656d68f1eSafresh1 12756d68f1eSafresh1 # Convert \xAB 12856d68f1eSafresh1 $$string_ref =~ s/ (?<! \\ ) \\x ( [A-Fa-f0-9]{2} ) / "\\x" . sprintf("%02X", utf8::unicode_to_native(hex $1)) /gex; 12956d68f1eSafresh1 13056d68f1eSafresh1 # Convert \xA 13156d68f1eSafresh1 $$string_ref =~ s/ (?<! \\ ) \\x ( [A-Fa-f0-9] ) (?! [A-Fa-f0-9] ) / "\\x" . sprintf("%X", utf8::unicode_to_native(hex $1)) /gex; 13256d68f1eSafresh1 13356d68f1eSafresh1 #print STDERR __LINE__, ": $save\n$string_ref\n" if $save ne $string_ref; 13456d68f1eSafresh1 return; 13556d68f1eSafresh1} 13656d68f1eSafresh1 13756d68f1eSafresh1*convert_from_ascii = (ord("A") == 65) 13856d68f1eSafresh1 ? sub { 1; } 13956d68f1eSafresh1 : \&convert_from_ascii_guts; 14056d68f1eSafresh1 141b39c5158Smillert$bang = sprintf "\\%03o", ord "!"; # \41 would not be portable. 142b39c5158Smillert$ffff = chr(0xff) x 2; 143b39c5158Smillert$nulnul = "\0" x 2; 144898184e3Ssthenmy $OP = $qr ? 'qr' : 'm'; 145b39c5158Smillert 146b39c5158Smillert$| = 1; 147*e0680481Safresh1$::normalize_pat = $::normalize_pat; # silence warning 148b39c5158SmillertTEST: 149b39c5158Smillertforeach (@tests) { 15056d68f1eSafresh1 $test_num++; 151b39c5158Smillert if (!/\S/ || /^\s*#/ || /^__END__$/) { 152b8851fccSafresh1 chomp; 153b8851fccSafresh1 my ($not,$comment)= split /\s*#\s*/, $_, 2; 154b8851fccSafresh1 $comment ||= "(blank line)"; 15556d68f1eSafresh1 print "ok $test_num # $comment\n"; 156b39c5158Smillert next; 157b39c5158Smillert } 158b39c5158Smillert chomp; 15991f110e0Safresh1 s/\\n/\n/g unless $regex_sets; 160b8851fccSafresh1 my ($pat, $subject, $result, $repl, $expect, $reason, $comment) = split(/\t/,$_,7); 1619f11ffb7Safresh1 $comment = "" unless defined $comment; 1626fb12b70Safresh1 if (!defined $subject) { 16356d68f1eSafresh1 die "Bad test definition on line $test_num: $_\n"; 1646fb12b70Safresh1 } 165b39c5158Smillert $reason = '' unless defined $reason; 166b39c5158Smillert my $input = join(':',$pat,$subject,$result,$repl,$expect); 167b8851fccSafresh1 168b39c5158Smillert # the double '' below keeps simple syntax highlighters from going crazy 169b39c5158Smillert $pat = "'$pat'" unless $pat =~ /^[:''\/]/; 170b39c5158Smillert $pat =~ s/(\$\{\w+\})/$1/eeg; 17191f110e0Safresh1 $pat =~ s/\\n/\n/g unless $regex_sets; 17256d68f1eSafresh1 convert_from_ascii(\$pat); 173b8851fccSafresh1 1749f11ffb7Safresh1 my $no_null_pat; 1759f11ffb7Safresh1 if ($no_null && $pat =~ /^'(.*)'\z/) { 1769f11ffb7Safresh1 $no_null_pat = XS::APItest::string_without_null($1); 1779f11ffb7Safresh1 } 1789f11ffb7Safresh1 17956d68f1eSafresh1 convert_from_ascii(\$subject); 180b39c5158Smillert $subject = eval qq("$subject"); die $@ if $@; 181b8851fccSafresh1 18256d68f1eSafresh1 convert_from_ascii(\$expect); 183b39c5158Smillert $expect = eval qq("$expect"); die $@ if $@; 184*e0680481Safresh1 my $has_amp = $input =~ /\$[&\`\']/; 185*e0680481Safresh1 $expect = $repl = '-' if $skip_amp and $has_amp; 186b8851fccSafresh1 187b39c5158Smillert my $todo_qr = $qr_embed_thr && ($result =~ s/t//); 188b39c5158Smillert my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//)); 189898184e3Ssthen ++$skip if $result =~ s/M// && !defined &DynaLoader::boot_DynaLoader; 190*e0680481Safresh1 191*e0680481Safresh1 if ($::normalize_pat) { 192*e0680481Safresh1 my $opat= $pat; 193*e0680481Safresh1 # Convert (x)? to (?:(x)|) and (x)+ to (?:(x))+ and (x)* to (?:(x))* 194*e0680481Safresh1 $pat =~ s/\(([\w|.]+)\)\?(?![+*?])/(?:($1)|)/g; 195*e0680481Safresh1 $pat =~ s/\(([\w|.]+)\)([+*])(?![+*?])/(?:($1))$2/g; 196*e0680481Safresh1 if ($opat eq $pat) { 197*e0680481Safresh1 # we didn't change anything, no point in testing it again. 198*e0680481Safresh1 $skip++; 199*e0680481Safresh1 $reason = "Test not valid for $0"; 200*e0680481Safresh1 } elsif ($comment=~/!\s*normal/) { 201*e0680481Safresh1 $result .= "T"; 202*e0680481Safresh1 $comment = "# Known to be broken under $0"; 203*e0680481Safresh1 } 204*e0680481Safresh1 } 205*e0680481Safresh1 20691f110e0Safresh1 if ($result =~ s/ ( [Ss] ) //x) { 20791f110e0Safresh1 if (($1 eq 'S' && $regex_sets) || ($1 eq 's' && ! $regex_sets)) { 20891f110e0Safresh1 $skip++; 20991f110e0Safresh1 $reason = "Test not valid for $0"; 21091f110e0Safresh1 } 21191f110e0Safresh1 } 212b8851fccSafresh1 if ($result =~ s/a// && ord("A") != 65) { 213b8851fccSafresh1 $skip++; 214b8851fccSafresh1 $reason = "Test is only valid for ASCII platforms. $reason"; 215b8851fccSafresh1 } 216b8851fccSafresh1 if ($result =~ s/e// && ord("A") != 193) { 217b8851fccSafresh1 $skip++; 218b8851fccSafresh1 $reason = "Test is only valid for EBCDIC platforms. $reason"; 219b8851fccSafresh1 } 220b39c5158Smillert $reason = 'skipping $&' if $reason eq '' && $skip_amp; 221b39c5158Smillert $result =~ s/B//i unless $skip; 222*e0680481Safresh1 my $todo= ($result =~ s/T// && (!$skip_amp || !$has_amp)) ? " # TODO" : ""; 22356d68f1eSafresh1 my $testname= $test_num; 224b8851fccSafresh1 if ($comment) { 225b8851fccSafresh1 $comment=~s/^\s*(?:#\s*)?//; 226b8851fccSafresh1 $testname .= " - $comment" if $comment; 227b8851fccSafresh1 } 2289f11ffb7Safresh1 if (! $skip && $alpha_assertions) { 2299f11ffb7Safresh1 my $assertions_re = qr/ (?: \Q(?\E (?: > | <? [=>] ) ) /x; 2309f11ffb7Safresh1 if ($pat !~ $assertions_re && $comment !~ $expanded_text_re) { 2319f11ffb7Safresh1 $skip++; 2329f11ffb7Safresh1 $reason = "Pattern doesn't contain assertions"; 2339f11ffb7Safresh1 } 2349f11ffb7Safresh1 elsif ($comment !~ $expanded_text_re) { 2359f11ffb7Safresh1 my $expanded_pat = $pat; 2369f11ffb7Safresh1 2379f11ffb7Safresh1 $pat =~ s/\( \? > /(*atomic:/xg; 2389f11ffb7Safresh1 2399f11ffb7Safresh1 if ($pat =~ s/\( \? = /(*pla:/xg) { 2409f11ffb7Safresh1 $expanded_pat =~ s//(*positive_lookahead:/g; 2419f11ffb7Safresh1 } 2429f11ffb7Safresh1 if ($pat =~ s/\( \? ! /(*nla:/xg) { 2439f11ffb7Safresh1 $expanded_pat =~ s//(*negative_lookahead:/g; 2449f11ffb7Safresh1 } 2459f11ffb7Safresh1 if ($pat =~ s/\( \? <= /(*plb:/xg) { 2469f11ffb7Safresh1 $expanded_pat =~ s//(*positive_lookbehind:/g; 2479f11ffb7Safresh1 } 2489f11ffb7Safresh1 if ($pat =~ s/\( \? <! /(*nlb:/xg) { 2499f11ffb7Safresh1 $expanded_pat =~ s//(*negative_lookbehind:/g; 2509f11ffb7Safresh1 } 2519f11ffb7Safresh1 if ($expanded_pat ne $pat) { 25256d68f1eSafresh1 $comment .= " $expanded_text $test_num"; 2539f11ffb7Safresh1 push @tests, join "\t", $expanded_pat, 2549f11ffb7Safresh1 $subject // "", 2559f11ffb7Safresh1 $result // "", 2569f11ffb7Safresh1 $repl // "", 2579f11ffb7Safresh1 $expect // "", 2589f11ffb7Safresh1 $reason // "", 2599f11ffb7Safresh1 $comment; 2609f11ffb7Safresh1 } 2619f11ffb7Safresh1 } 2629f11ffb7Safresh1 } 2639f11ffb7Safresh1 elsif (! $skip && $regex_sets) { 264b39c5158Smillert 26591f110e0Safresh1 # If testing regex sets, change the [bracketed] classes into 266b8851fccSafresh1 # (?[bracketed]). But note that '\[' and '\c[' don't introduce such a 267b8851fccSafresh1 # class. (We don't bother looking for an odd number of backslashes, 268b8851fccSafresh1 # as this hasn't been needed so far.) 269b8851fccSafresh1 if ($pat !~ / (?<!\\c) (?<!\\) \[ /x) { 27091f110e0Safresh1 $skip++; 27191f110e0Safresh1 $reason = "Pattern doesn't contain [brackets]"; 27291f110e0Safresh1 } 27391f110e0Safresh1 else { # Use non-regex features of Perl to accomplish this. 27491f110e0Safresh1 my $modified = ""; 27591f110e0Safresh1 my $in_brackets = 0; 27691f110e0Safresh1 27791f110e0Safresh1 # Go through the pattern character-by-character. We also add 27891f110e0Safresh1 # blanks around each token to test the /x parts of (?[ ]) 27991f110e0Safresh1 my $pat_len = length($pat); 28091f110e0Safresh1 CHAR: for (my $i = 0; $i < $pat_len; $i++) { 28191f110e0Safresh1 my $curchar = substr($pat, $i, 1); 28291f110e0Safresh1 if ($curchar eq '\\') { 28391f110e0Safresh1 $modified .= " " if $in_brackets; 28491f110e0Safresh1 $modified .= $curchar; 28591f110e0Safresh1 $i++; 28691f110e0Safresh1 28791f110e0Safresh1 # Get the character the backslash is escaping 28891f110e0Safresh1 $curchar = substr($pat, $i, 1); 28991f110e0Safresh1 $modified .= $curchar; 29091f110e0Safresh1 29191f110e0Safresh1 # If the character following that is a '{}', treat the 29291f110e0Safresh1 # entire amount as a single token 29391f110e0Safresh1 if ($i < $pat_len -1 && substr($pat, $i+1, 1) eq '{') { 29491f110e0Safresh1 my $j = index($pat, '}', $i+2); 29591f110e0Safresh1 if ($j < 0) { 29691f110e0Safresh1 last unless $in_brackets; 29791f110e0Safresh1 if ($result eq 'c') { 29891f110e0Safresh1 $skip++; 29991f110e0Safresh1 $reason = "Can't handle compilation errors with unmatched '{'"; 30091f110e0Safresh1 } 30191f110e0Safresh1 else { 302b8851fccSafresh1 print "not ok $testname # Problem in $0; original = '$pat'; mod = '$modified'\n"; 30391f110e0Safresh1 next TEST; 30491f110e0Safresh1 } 30591f110e0Safresh1 } 30691f110e0Safresh1 $modified .= substr($pat, $i+1, $j - $i); 30791f110e0Safresh1 $i = $j; 30891f110e0Safresh1 } 30991f110e0Safresh1 elsif ($curchar eq 'x') { 31091f110e0Safresh1 31191f110e0Safresh1 # \x without brackets is supposed to be followed by 2 31291f110e0Safresh1 # hex digits. Take up to 2, and then add a blank 31391f110e0Safresh1 # after the last one. This avoids getting errors from 31491f110e0Safresh1 # (?[ ]) for run-ons, like \xabc 31591f110e0Safresh1 my $j = $i + 1; 31691f110e0Safresh1 for (; $j < $i + 3 && $j < $pat_len; $j++) { 31791f110e0Safresh1 my $curord = ord(substr($pat, $j, 1)); 31891f110e0Safresh1 if (!(($curord >= ord("A") && $curord <= ord("F")) 31991f110e0Safresh1 || ($curord >= ord("a") && $curord <= ord("f")) 32091f110e0Safresh1 || ($curord >= ord("0") && $curord <= ord("9")))) 32191f110e0Safresh1 { 32291f110e0Safresh1 $j++; 32391f110e0Safresh1 last; 32491f110e0Safresh1 } 32591f110e0Safresh1 } 32691f110e0Safresh1 $j--; 3279f11ffb7Safresh1 $modified .= substr($pat, $i + 1, $j - $i); 3289f11ffb7Safresh1 $modified .= " " if $in_brackets; 32991f110e0Safresh1 $i = $j; 33091f110e0Safresh1 } 33191f110e0Safresh1 elsif (ord($curchar) >= ord('0') 33291f110e0Safresh1 && (ord($curchar) <= ord('7'))) 33391f110e0Safresh1 { 33491f110e0Safresh1 # Similarly, octal constants have up to 3 digits. 33591f110e0Safresh1 my $j = $i + 1; 33691f110e0Safresh1 for (; $j < $i + 3 && $j < $pat_len; $j++) { 33791f110e0Safresh1 my $curord = ord(substr($pat, $j, 1)); 33891f110e0Safresh1 if (! ($curord >= ord("0") && $curord <= ord("7"))) { 33991f110e0Safresh1 $j++; 34091f110e0Safresh1 last; 34191f110e0Safresh1 } 34291f110e0Safresh1 } 34391f110e0Safresh1 $j--; 34491f110e0Safresh1 $modified .= substr($pat, $i + 1, $j - $i); 34591f110e0Safresh1 $i = $j; 34691f110e0Safresh1 } 34791f110e0Safresh1 34891f110e0Safresh1 next; 34991f110e0Safresh1 } # End of processing a backslash sequence 35091f110e0Safresh1 35191f110e0Safresh1 if (! $in_brackets # Skip (?{ }) 35291f110e0Safresh1 && $curchar eq '(' 35391f110e0Safresh1 && $i < $pat_len - 2 35491f110e0Safresh1 && substr($pat, $i+1, 1) eq '?' 35591f110e0Safresh1 && substr($pat, $i+2, 1) eq '{') 35691f110e0Safresh1 { 35791f110e0Safresh1 $skip++; 35891f110e0Safresh1 $reason = "Pattern contains '(?{'"; 35991f110e0Safresh1 last; 36091f110e0Safresh1 } 36191f110e0Safresh1 36291f110e0Safresh1 # Closing ']' 36391f110e0Safresh1 if ($curchar eq ']' && $in_brackets) { 36491f110e0Safresh1 $modified .= " ] ])"; 36591f110e0Safresh1 $in_brackets = 0; 36691f110e0Safresh1 next; 36791f110e0Safresh1 } 36891f110e0Safresh1 36991f110e0Safresh1 # A regular character. 37091f110e0Safresh1 if ($curchar ne '[') { 3719f11ffb7Safresh1 $modified .= " " if $in_brackets; 37291f110e0Safresh1 $modified .= $curchar; 37391f110e0Safresh1 next; 37491f110e0Safresh1 } 37591f110e0Safresh1 37691f110e0Safresh1 # Here is a '['; If not in a bracketed class, treat as the 37791f110e0Safresh1 # beginning of one. 37891f110e0Safresh1 if (! $in_brackets) { 37991f110e0Safresh1 $in_brackets = 1; 38091f110e0Safresh1 $modified .= "(?[ [ "; 38191f110e0Safresh1 38291f110e0Safresh1 # An immediately following ']' or '^]' is not the ending 38391f110e0Safresh1 # of the class, but is to be treated literally. 38491f110e0Safresh1 if ($i < $pat_len - 1 38591f110e0Safresh1 && substr($pat, $i+1, 1) eq ']') 38691f110e0Safresh1 { 38791f110e0Safresh1 $i ++; 38891f110e0Safresh1 $modified .= " ] "; 38991f110e0Safresh1 } 39091f110e0Safresh1 elsif ($i < $pat_len - 2 39191f110e0Safresh1 && substr($pat, $i+1, 1) eq '^' 39291f110e0Safresh1 && substr($pat, $i+2, 1) eq ']') 39391f110e0Safresh1 { 39491f110e0Safresh1 $i += 2; 39591f110e0Safresh1 $modified .= " ^ ] "; 39691f110e0Safresh1 } 39791f110e0Safresh1 next; 39891f110e0Safresh1 } 39991f110e0Safresh1 40091f110e0Safresh1 # Here is a plain '[' within [ ]. Could mean wants to 40191f110e0Safresh1 # match a '[', or it could be a posix class that has a 40291f110e0Safresh1 # corresponding ']'. Absorb either 40391f110e0Safresh1 40491f110e0Safresh1 $modified .= ' ['; 40591f110e0Safresh1 last if $i >= $pat_len - 1; 40691f110e0Safresh1 40791f110e0Safresh1 $i++; 40891f110e0Safresh1 $curchar = substr($pat, $i, 1); 40991f110e0Safresh1 if ($curchar =~ /[:=.]/) { 41091f110e0Safresh1 for (my $j = $i + 1; $j < $pat_len; $j++) { 41191f110e0Safresh1 next unless substr($pat, $j, 1) eq ']'; 41291f110e0Safresh1 last if $j - $i < 2; 41391f110e0Safresh1 if (substr($pat, $j - 1, 1) eq $curchar) { 41491f110e0Safresh1 # Here, is a posix class 41591f110e0Safresh1 $modified .= substr($pat, $i, $j - $i + 1) . " "; 41691f110e0Safresh1 $i = $j; 41791f110e0Safresh1 next CHAR; 41891f110e0Safresh1 } 41991f110e0Safresh1 } 42091f110e0Safresh1 } 42191f110e0Safresh1 42291f110e0Safresh1 # Here wasn't a posix class, just process normally 42391f110e0Safresh1 $modified .= " $curchar "; 42491f110e0Safresh1 } 42591f110e0Safresh1 42691f110e0Safresh1 if ($in_brackets && ! $skip) { 42791f110e0Safresh1 if ($result eq 'c') { 42891f110e0Safresh1 $skip++; 42991f110e0Safresh1 $reason = "Can't figure out where to put the (?[ and ]) since is a compilation error"; 43091f110e0Safresh1 } 43191f110e0Safresh1 else { 432b8851fccSafresh1 print "not ok $testname # Problem in $0; original = '$pat'; mod = '$modified'\n"; 43391f110e0Safresh1 next TEST; 43491f110e0Safresh1 } 43591f110e0Safresh1 } 43691f110e0Safresh1 43791f110e0Safresh1 # Use our modified pattern instead of the original 43891f110e0Safresh1 $pat = $modified; 43991f110e0Safresh1 } 44091f110e0Safresh1 } 441*e0680481Safresh1 if ($::normalize_pat){ 442*e0680481Safresh1 if (!$skip && ($result eq "y" or $result eq "n")) { 443*e0680481Safresh1 my $opat= $pat; 444*e0680481Safresh1 # Convert (x)? to (?:(x)|) and (x)+ to (?:(x))+ and (x)* to (?:(x))* 445*e0680481Safresh1 $pat =~ s/\(([\w|.]+)\)\?(?![+*?])/(?:($1)|)/g; 446*e0680481Safresh1 $pat =~ s/\(([\w|.]+)\)([+*])(?![+*?])/(?:($1))$2/g; 447*e0680481Safresh1 # inject an EVAL into the front of the pattern. 448*e0680481Safresh1 # this should disable all optimizations. 449*e0680481Safresh1 $pat =~ s/\A(.)/$1(?{ \$the_counter++ })/ 450*e0680481Safresh1 or die $pat; 451*e0680481Safresh1 } elsif (!$skip) { 452*e0680481Safresh1 $skip = $reason = "Test not applicable to $0"; 453*e0680481Safresh1 } 454*e0680481Safresh1 } 455b39c5158Smillert 4569f11ffb7Safresh1 for my $study ('', 'study $subject;', 'utf8::upgrade($subject);', 4579f11ffb7Safresh1 'utf8::upgrade($subject); study $subject;') { 458*e0680481Safresh1 if ( $skip ) { 459*e0680481Safresh1 print "ok $testname # skipped", length($reason) ? ". $reason" : '', "\n"; 460*e0680481Safresh1 next TEST; 461*e0680481Safresh1 } 462*e0680481Safresh1 our $the_counter = 0; # used in normalization tests 463898184e3Ssthen # Need to make a copy, else the utf8::upgrade of an already studied 464b39c5158Smillert # scalar confuses things. 465b39c5158Smillert my $subject = $subject; 4669f11ffb7Safresh1 $subject = XS::APItest::string_without_null($subject) if $no_null; 467b39c5158Smillert my $c = $iters; 468b39c5158Smillert my ($code, $match, $got); 469b39c5158Smillert if ($repl eq 'pos') { 4709f11ffb7Safresh1 my $patcode = defined $no_null_pat ? '/$no_null_pat/g' 4719f11ffb7Safresh1 : "m${pat}g"; 472b39c5158Smillert $code= <<EOFCODE; 4739f11ffb7Safresh1 $study 474b39c5158Smillert pos(\$subject)=0; 4759f11ffb7Safresh1 \$match = ( \$subject =~ $patcode ); 476b39c5158Smillert \$got = pos(\$subject); 477b39c5158SmillertEOFCODE 478b39c5158Smillert } 479b39c5158Smillert elsif ($qr_embed) { 480b39c5158Smillert $code= <<EOFCODE; 481b39c5158Smillert my \$RE = qr$pat; 4829f11ffb7Safresh1 $study 483b39c5158Smillert \$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--; 484b39c5158Smillert \$got = "$repl"; 485b39c5158SmillertEOFCODE 486b39c5158Smillert } 487b39c5158Smillert elsif ($qr_embed_thr) { 488b39c5158Smillert $code= <<EOFCODE; 489b39c5158Smillert # Can't run the match in a subthread, but can do this and 490b39c5158Smillert # clone the pattern the other way. 491b39c5158Smillert my \$RE = threads->new(sub {qr$pat})->join(); 4929f11ffb7Safresh1 $study 493b39c5158Smillert \$match = (\$subject =~ /(?:)\$RE(?:)/) while \$c--; 494b39c5158Smillert \$got = "$repl"; 495b39c5158SmillertEOFCODE 496b39c5158Smillert } 4979f11ffb7Safresh1 elsif ($no_null) { 4989f11ffb7Safresh1 my $patcode = defined $no_null_pat ? '/$no_null_pat/' 4999f11ffb7Safresh1 : $pat; 5009f11ffb7Safresh1 $code= <<EOFCODE; 5019f11ffb7Safresh1 $study 5029f11ffb7Safresh1 \$match = (\$subject =~ $OP$pat) while \$c--; 5039f11ffb7Safresh1 \$got = "$repl"; 5049f11ffb7Safresh1EOFCODE 5059f11ffb7Safresh1 } 506b39c5158Smillert else { 507b39c5158Smillert $code= <<EOFCODE; 5089f11ffb7Safresh1 $study 509b39c5158Smillert \$match = (\$subject =~ $OP$pat) while \$c--; 510b39c5158Smillert \$got = "$repl"; 511b39c5158SmillertEOFCODE 512b39c5158Smillert } 513eac174f2Safresh1 $code = "$code" if $regex_sets; 514b39c5158Smillert #$code.=qq[\n\$expect="$expect";\n]; 515b39c5158Smillert #use Devel::Peek; 516b39c5158Smillert #die Dump($code) if $pat=~/\\h/ and $subject=~/\x{A0}/; 517b39c5158Smillert { 518b39c5158Smillert # Probably we should annotate specific tests with which warnings 519b39c5158Smillert # categories they're known to trigger, and hence should be 520b39c5158Smillert # disabled just for that test 521b8851fccSafresh1 no warnings qw(uninitialized regexp deprecated); 522b39c5158Smillert eval $code; 523b39c5158Smillert } 524b39c5158Smillert chomp( my $err = $@ ); 525*e0680481Safresh1 if ($result eq 'c') { 526b8851fccSafresh1 if ($err !~ m!^\Q$expect!) { print "not ok $testname$todo (compile) $input => '$err'\n"; next TEST } 527b39c5158Smillert last; # no need to study a syntax error 528b39c5158Smillert } 529b39c5158Smillert elsif ( $todo_qr ) { 530b8851fccSafresh1 print "not ok $testname # TODO", length($reason) ? " - $reason" : '', "\n"; 531b39c5158Smillert next TEST; 532b39c5158Smillert } 533b39c5158Smillert elsif ($@) { 534b8851fccSafresh1 print "not ok $testname$todo $input => error '$err'\n", _comment("$code\n$@\n"); next TEST; 535b39c5158Smillert } 536b39c5158Smillert elsif ($result =~ /^n/) { 537b8851fccSafresh1 if ($match) { print "not ok $testname$todo ($study) $input => false positive\n"; next TEST } 538b39c5158Smillert } 539b39c5158Smillert else { 540b39c5158Smillert if (!$match || $got ne $expect) { 541b39c5158Smillert eval { require Data::Dumper }; 54291f110e0Safresh1 no warnings "utf8"; # But handle should be utf8 543898184e3Ssthen if ($@ || !defined &DynaLoader::boot_DynaLoader) { 544898184e3Ssthen # Data::Dumper will load on miniperl, but fail when used in 545898184e3Ssthen # anger as it tries to load B. I'd prefer to keep the 546898184e3Ssthen # regular calls below outside of an eval so that real 547898184e3Ssthen # (unknown) failures get spotted, not ignored. 548b8851fccSafresh1 print "not ok $testname$todo ($study) $input => '$got', match=$match\n", _comment("$code\n"); 549b39c5158Smillert } 550b39c5158Smillert else { # better diagnostics 551b39c5158Smillert my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump; 552b39c5158Smillert my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump; 5539f11ffb7Safresh1 my $e = Data::Dumper->new([$expect],['expected'])->Useqq(1)->Dump; 5549f11ffb7Safresh1 print "not ok $testname$todo ($study) $input => '$got', match=$match\n", _comment("$s\n$code\n$g\n$e\n"); 555b39c5158Smillert } 556b39c5158Smillert next TEST; 557b39c5158Smillert } 558b39c5158Smillert } 559b39c5158Smillert } 560b8851fccSafresh1 print "ok $testname$todo\n"; 561b39c5158Smillert} 562b39c5158Smillert 5639f11ffb7Safresh1printf "1..%d\n# $iters iterations\n", scalar @tests; 5649f11ffb7Safresh1 565b39c5158Smillert1; 566