1#!./perl 2# 3# This is a home for regular expression tests that don't fit into 4# the format supported by re/regexp.t. If you want to add a test 5# that does fit that format, add it to re/re_tests, not here. 6 7use strict; 8use warnings; 9use 5.010; 10 11sub run_tests; 12 13$| = 1; 14 15 16BEGIN { 17 chdir 't' if -d 't'; 18 @INC = ('../lib','.'); 19 require Config; import Config; 20 require './test.pl'; 21} 22 23plan tests => 727; # Update this when adding/deleting tests. 24 25run_tests() unless caller; 26 27# 28# Tests start here. 29# 30sub run_tests { 31 32 { 33 my $x = "abc\ndef\n"; 34 (my $x_pretty = $x) =~ s/\n/\\n/g; 35 36 ok $x =~ /^abc/, qq ["$x_pretty" =~ /^abc/]; 37 ok $x !~ /^def/, qq ["$x_pretty" !~ /^def/]; 38 39 # used to be a test for $* 40 ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m]; 41 42 ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]); 43 ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]); 44 45 ok $x =~ /def/, qq ["$x_pretty" =~ /def/]; 46 ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]); 47 48 ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/]; 49 ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]); 50 51 ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/]; 52 ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]); 53 } 54 55 { 56 $_ = '123'; 57 ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/]; 58 } 59 60 { 61 $_ = 'aaabbbccc'; 62 ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc', 63 qq [\$_ = '$_'; /(a*b*)(c*)/]; 64 ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/]; 65 unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]); 66 67 $_ = 'aaabccc'; 68 ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]; 69 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 70 71 $_ = 'aaaccc'; 72 ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/]; 73 unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]); 74 75 $_ = 'abcdef'; 76 ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/]; 77 ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/]; 78 ok m|bc/*d|, qq [\$_ = '$_'; m|bc/*d|]; 79 ok /^$_$/, qq [\$_ = '$_'; /^\$_\$/]; 80 } 81 82 { 83 # used to be a test for $* 84 ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m]; 85 } 86 87 { 88 our %XXX = map {($_ => $_)} 123, 234, 345; 89 90 our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3'); 91 while ($_ = shift(@XXX)) { 92 my $e = index ($_, 'not') >= 0 ? '' : 1; 93 my $r = m?(.*)?; 94 is($r, $e, "?(.*)?"); 95 /not/ && reset; 96 if (/not ok 2/) { 97 if ($^O eq 'VMS') { 98 $_ = shift(@XXX); 99 } 100 else { 101 reset 'X'; 102 } 103 } 104 } 105 106 SKIP: { 107 if ($^O eq 'VMS') { 108 skip "Reset 'X'", 1; 109 } 110 ok !keys %XXX, "%XXX is empty"; 111 } 112 113 } 114 115 { 116 my $message = "Test empty pattern"; 117 my $xyz = 'xyz'; 118 my $cde = 'cde'; 119 120 $cde =~ /[^ab]*/; 121 $xyz =~ //; 122 is($&, $xyz, $message); 123 124 my $foo = '[^ab]*'; 125 $cde =~ /$foo/; 126 $xyz =~ //; 127 is($&, $xyz, $message); 128 129 $cde =~ /$foo/; 130 my $null; 131 no warnings 'uninitialized'; 132 $xyz =~ /$null/; 133 is($&, $xyz, $message); 134 135 $null = ""; 136 $xyz =~ /$null/; 137 is($&, $xyz, $message); 138 } 139 140 { 141 my $message = q !Check $`, $&, $'!; 142 $_ = 'abcdefghi'; 143 /def/; # optimized up to cmd 144 is("$`:$&:$'", 'abc:def:ghi', $message); 145 146 no warnings 'void'; 147 /cde/ + 0; # optimized only to spat 148 is("$`:$&:$'", 'ab:cde:fghi', $message); 149 150 /[d][e][f]/; # not optimized 151 is("$`:$&:$'", 'abc:def:ghi', $message); 152 } 153 154 { 155 $_ = 'now is the {time for all} good men to come to.'; 156 / \{([^}]*)}/; 157 is($1, 'time for all', "Match braces"); 158 } 159 160 { 161 my $message = "{N,M} quantifier"; 162 $_ = 'xxx {3,4} yyy zzz'; 163 ok(/( {3,4})/, $message); 164 is($1, ' ', $message); 165 unlike($_, qr/( {4,})/, $message); 166 ok(/( {2,3}.)/, $message); 167 is($1, ' y', $message); 168 ok(/(y{2,3}.)/, $message); 169 is($1, 'yyy ', $message); 170 unlike($_, qr/x {3,4}/, $message); 171 unlike($_, qr/^xxx {3,4}/, $message); 172 } 173 174 { 175 my $message = "Test /g"; 176 local $" = ":"; 177 $_ = "now is the time for all good men to come to."; 178 my @words = /(\w+)/g; 179 my $exp = "now:is:the:time:for:all:good:men:to:come:to"; 180 181 is("@words", $exp, $message); 182 183 @words = (); 184 while (/\w+/g) { 185 push (@words, $&); 186 } 187 is("@words", $exp, $message); 188 189 @words = (); 190 pos = 0; 191 while (/to/g) { 192 push(@words, $&); 193 } 194 is("@words", "to:to", $message); 195 196 pos $_ = 0; 197 @words = /to/g; 198 is("@words", "to:to", $message); 199 } 200 201 { 202 $_ = "abcdefghi"; 203 204 my $pat1 = 'def'; 205 my $pat2 = '^def'; 206 my $pat3 = '.def.'; 207 my $pat4 = 'abc'; 208 my $pat5 = '^abc'; 209 my $pat6 = 'abc$'; 210 my $pat7 = 'ghi'; 211 my $pat8 = '\w*ghi'; 212 my $pat9 = 'ghi$'; 213 214 my $t1 = my $t2 = my $t3 = my $t4 = my $t5 = 215 my $t6 = my $t7 = my $t8 = my $t9 = 0; 216 217 for my $iter (1 .. 5) { 218 $t1++ if /$pat1/o; 219 $t2++ if /$pat2/o; 220 $t3++ if /$pat3/o; 221 $t4++ if /$pat4/o; 222 $t5++ if /$pat5/o; 223 $t6++ if /$pat6/o; 224 $t7++ if /$pat7/o; 225 $t8++ if /$pat8/o; 226 $t9++ if /$pat9/o; 227 } 228 my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9"; 229 is($x, '505550555', "Test /o"); 230 } 231 232 { 233 my $xyz = 'xyz'; 234 ok "abc" =~ /^abc$|$xyz/, "| after \$"; 235 236 # perl 4.009 says "unmatched ()" 237 my $message = '$ inside ()'; 238 239 my $result; 240 eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"'; 241 is($@, "", $message); 242 is($result, "abc:bc", $message); 243 } 244 245 { 246 my $message = "Scalar /g"; 247 $_ = "abcfooabcbar"; 248 249 ok( /abc/g && $` eq "", $message); 250 ok( /abc/g && $` eq "abcfoo", $message); 251 ok(!/abc/g, $message); 252 253 $message = "Scalar /gi"; 254 pos = 0; 255 ok( /ABC/gi && $` eq "", $message); 256 ok( /ABC/gi && $` eq "abcfoo", $message); 257 ok(!/ABC/gi, $message); 258 259 $message = "Scalar /g"; 260 pos = 0; 261 ok( /abc/g && $' eq "fooabcbar", $message); 262 ok( /abc/g && $' eq "bar", $message); 263 264 $_ .= ''; 265 my @x = /abc/g; 266 is(@x, 2, "/g reset after assignment"); 267 } 268 269 { 270 my $message = '/g, \G and pos'; 271 $_ = "abdc"; 272 pos $_ = 2; 273 /\Gc/gc; 274 is(pos $_, 2, $message); 275 /\Gc/g; 276 is(pos $_, undef, $message); 277 } 278 279 { 280 my $message = '(?{ })'; 281 our $out = 1; 282 'abc' =~ m'a(?{ $out = 2 })b'; 283 is($out, 2, $message); 284 285 $out = 1; 286 'abc' =~ m'a(?{ $out = 3 })c'; 287 is($out, 1, $message); 288 } 289 290 { 291 $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6'; 292 my @out = /(?<!foo)bar./g; 293 is("@out", 'bar2 barf', "Negative lookbehind"); 294 } 295 296 { 297 my $message = "REG_INFTY tests"; 298 # Tests which depend on REG_INFTY 299 300 # Defaults assumed if this fails 301 eval { require Config; }; 302 $::reg_infty = $Config::Config{reg_infty} // 32767; 303 $::reg_infty_m = $::reg_infty - 1; 304 $::reg_infty_p = $::reg_infty + 1; 305 $::reg_infty_m = $::reg_infty_m; # Suppress warning. 306 307 # As well as failing if the pattern matches do unexpected things, the 308 # next three tests will fail if you should have picked up a lower-than- 309 # default value for $reg_infty from Config.pm, but have not. 310 311 is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message); 312 is($@, '', $message); 313 is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message); 314 is($@, '', $message); 315 isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message); 316 is($@, '', $message); 317 318 eval "'aaa' =~ /a{1,$::reg_infty}/"; 319 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 320 eval "'aaa' =~ /a{1,$::reg_infty_p}/"; 321 like($@, qr/^\QQuantifier in {,} bigger than/, $message); 322 } 323 324 { 325 # Poke a couple more parse failures 326 my $context = 'x' x 256; 327 eval qq("${context}y" =~ /(?<=$context)y/); 328 ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit"; 329 } 330 331 { 332 # Long Monsters 333 for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory 334 my $a = 'a' x $l; 335 my $message = "Long monster, length = $l"; 336 like("ba$a=", qr/a$a=/, $message); 337 unlike("b$a=", qr/a$a=/, $message); 338 like("b$a=", qr/ba+=/, $message); 339 340 like("ba$a=", qr/b(?:a|b)+=/, $message); 341 } 342 } 343 344 { 345 # 20000 nodes, each taking 3 words per string, and 1 per branch 346 my $long_constant_len = join '|', 12120 .. 32645; 347 my $long_var_len = join '|', 8120 .. 28645; 348 my %ans = ( 'ax13876y25677lbc' => 1, 349 'ax13876y25677mcb' => 0, # not b. 350 'ax13876y35677nbc' => 0, # Num too big 351 'ax13876y25677y21378obc' => 1, 352 'ax13876y25677y21378zbc' => 0, # Not followed by [k-o] 353 'ax13876y25677y21378y21378kbc' => 1, 354 'ax13876y25677y21378y21378kcb' => 0, # Not b. 355 'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs 356 ); 357 358 for (keys %ans) { 359 my $message = "20000 nodes, const-len '$_'"; 360 ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message; 361 362 $message = "20000 nodes, var-len '$_'"; 363 ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message; 364 } 365 } 366 367 { 368 my $message = "Complicated backtracking"; 369 $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e"; 370 my $expect = "(bla()) ((l)u((e))) (l(e)e)"; 371 372 use vars '$c'; 373 sub matchit { 374 m/ 375 ( 376 \( 377 (?{ $c = 1 }) # Initialize 378 (?: 379 (?(?{ $c == 0 }) # PREVIOUS iteration was OK, stop the loop 380 (?! 381 ) # Fail: will unwind one iteration back 382 ) 383 (?: 384 [^()]+ # Match a big chunk 385 (?= 386 [()] 387 ) # Do not try to match subchunks 388 | 389 \( 390 (?{ ++$c }) 391 | 392 \) 393 (?{ --$c }) 394 ) 395 )+ # This may not match with different subblocks 396 ) 397 (?(?{ $c != 0 }) 398 (?! 399 ) # Fail 400 ) # Otherwise the chunk 1 may succeed with $c>0 401 /xg; 402 } 403 404 my @ans = (); 405 my $res; 406 push @ans, $res while $res = matchit; 407 is("@ans", "1 1 1", $message); 408 409 @ans = matchit; 410 is("@ans", $expect, $message); 411 412 $message = "Recursion with (??{ })"; 413 our $matched; 414 $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/; 415 416 @ans = my @ans1 = (); 417 push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g; 418 419 is("@ans", "1 1 1", $message); 420 is("@ans1", $expect, $message); 421 422 @ans = m/$matched/g; 423 is("@ans", $expect, $message); 424 425 } 426 427 { 428 ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/'; 429 } 430 431 { 432 my @ans = ('a/b' =~ m%(.*/)?(.*)%); # Stack may be bad 433 is("@ans", 'a/ b', "Stack may be bad"); 434 } 435 436 { 437 my $message = "Eval-group not allowed at runtime"; 438 my $code = '{$blah = 45}'; 439 our $blah = 12; 440 eval { /(?$code)/ }; 441 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 442 443 $blah = 12; 444 my $res = eval { "xx" =~ /(?$code)/o }; 445 { 446 no warnings 'uninitialized'; 447 chomp $@; my $message = "$message '$@', '$res', '$blah'"; 448 ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message); 449 } 450 451 $code = '=xx'; 452 $blah = 12; 453 $res = eval { "xx" =~ /(?$code)/o }; 454 { 455 no warnings 'uninitialized'; 456 my $message = "$message '$@', '$res', '$blah'"; 457 ok(!$@ && $res, $message); 458 } 459 460 $code = '{$blah = 45}'; 461 $blah = 12; 462 eval "/(?$code)/"; 463 is($blah, 45, $message); 464 465 $blah = 12; 466 /(?{$blah = 45})/; 467 is($blah, 45, $message); 468 } 469 470 { 471 my $message = "Pos checks"; 472 my $x = 'banana'; 473 $x =~ /.a/g; 474 is(pos $x, 2, $message); 475 476 $x =~ /.z/gc; 477 is(pos $x, 2, $message); 478 479 sub f { 480 my $p = $_[0]; 481 return $p; 482 } 483 484 $x =~ /.a/g; 485 is(f (pos $x), 4, $message); 486 } 487 488 { 489 my $message = 'Checking $^R'; 490 our $x = $^R = 67; 491 'foot' =~ /foo(?{$x = 12; 75})[t]/; 492 is($^R, 75, $message); 493 494 $x = $^R = 67; 495 'foot' =~ /foo(?{$x = 12; 75})[xy]/; 496 ok($^R eq '67' && $x eq '12', $message); 497 498 $x = $^R = 67; 499 'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/; 500 ok($^R eq '79' && $x eq '12', $message); 501 } 502 503 { 504 is(qr/\b\v$/i, '(?^i:\b\v$)', 'qr/\b\v$/i'); 505 is(qr/\b\v$/s, '(?^s:\b\v$)', 'qr/\b\v$/s'); 506 is(qr/\b\v$/m, '(?^m:\b\v$)', 'qr/\b\v$/m'); 507 is(qr/\b\v$/x, '(?^x:\b\v$)', 'qr/\b\v$/x'); 508 is(qr/\b\v$/xism, '(?^msix:\b\v$)', 'qr/\b\v$/xism'); 509 is(qr/\b\v$/, '(?^:\b\v$)', 'qr/\b\v$/'); 510 } 511 512 SKIP: { # Test that charset modifier work, and are interpolated 513 if ( 514 !$Config::Config{d_setlocale} 515 || $Config::Config{ccflags} =~ /\bD?NO_LOCALE(_|\b)/ 516 ) { 517 skip "no locale support", 13 518 } 519 is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier'); 520 is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles'); 521 is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles'); 522 is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles'); 523 is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles'); 524 525 my $dual = qr/\b\v$/; 526 my $locale; 527 528 SKIP: { 529 skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); 530 531 BEGIN { 532 if($Config{d_setlocale}) { 533 require locale; import locale; 534 } 535 } 536 $locale = qr/\b\v$/; 537 is($locale, '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale'); 538 no locale; 539 } 540 541 use feature 'unicode_strings'; 542 my $unicode = qr/\b\v$/; 543 is($unicode, '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings'); 544 is(qr/abc$dual/, '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 545 546 SKIP: { 547 skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); 548 549 is(qr/abc$locale/, '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings'); 550 } 551 552 no feature 'unicode_strings'; 553 SKIP: { 554 skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale}); 555 556 is(qr/abc$locale/, '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings'); 557 } 558 559 is(qr/def$unicode/, '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings'); 560 561 SKIP: { 562 skip 'No locale testing without d_setlocale', 2 if(!$Config{d_setlocale}); 563 564 BEGIN { 565 if($Config{d_setlocale}) { 566 require locale; import locale; 567 } 568 } 569 is(qr/abc$dual/, '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale'); 570 is(qr/abc$unicode/, '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale'); 571 } 572 } 573 574 { 575 my $message = "Look around"; 576 $_ = 'xabcx'; 577 foreach my $ans ('', 'c') { 578 ok(/(?<=(?=a)..)((?=c)|.)/g, $message); 579 is($1, $ans, $message); 580 } 581 } 582 583 { 584 my $message = "Empty clause"; 585 $_ = 'a'; 586 foreach my $ans ('', 'a', '') { 587 ok(/^|a|$/g, $message); 588 is($&, $ans, $message); 589 } 590 } 591 592 { 593 sub prefixify { 594 my $message = "Prefixify"; 595 { 596 my ($v, $a, $b, $res) = @_; 597 ok($v =~ s/\Q$a\E/$b/, $message); 598 is($v, $res, $message); 599 } 600 } 601 602 prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch'); 603 prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch'); 604 } 605 606 { 607 $_ = 'var="foo"'; 608 /(\")/; 609 ok $1 && /$1/, "Capture a quote"; 610 } 611 612 { 613 no warnings 'closure'; 614 my $message = '(?{ $var } refers to package vars'; 615 package aa; 616 our $c = 2; 617 $::c = 3; 618 '' =~ /(?{ $c = 4 })/; 619 main::is($c, 4, $message); 620 main::is($::c, 3, $message); 621 } 622 623 { 624 is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef); 625 like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/, 626 'POSIX class [: :] must have valid name'); 627 628 for my $d (qw [= .]) { 629 is(eval "/[[${d}foo${d}]]/", undef); 630 like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/, 631 "POSIX syntax [[$d $d]] is an error"); 632 } 633 } 634 635 { 636 # test if failure of patterns returns empty list 637 my $message = "Failed pattern returns empty list"; 638 $_ = 'aaa'; 639 @_ = /bbb/; 640 is("@_", "", $message); 641 642 @_ = /bbb/g; 643 is("@_", "", $message); 644 645 @_ = /(bbb)/; 646 is("@_", "", $message); 647 648 @_ = /(bbb)/g; 649 is("@_", "", $message); 650 } 651 652 { 653 my $message = '@- and @+ tests'; 654 655 /a(?=.$)/; 656 is($#+, 0, $message); 657 is($#-, 0, $message); 658 is($+ [0], 2, $message); 659 is($- [0], 1, $message); 660 ok(!defined $+ [1] && !defined $- [1] && 661 !defined $+ [2] && !defined $- [2], $message); 662 663 /a(a)(a)/; 664 is($#+, 2, $message); 665 is($#-, 2, $message); 666 is($+ [0], 3, $message); 667 is($- [0], 0, $message); 668 is($+ [1], 2, $message); 669 is($- [1], 1, $message); 670 is($+ [2], 3, $message); 671 is($- [2], 2, $message); 672 ok(!defined $+ [3] && !defined $- [3] && 673 !defined $+ [4] && !defined $- [4], $message); 674 675 # Exists has a special check for @-/@+ - bug 45147 676 ok(exists $-[0], $message); 677 ok(exists $+[0], $message); 678 ok(exists $-[2], $message); 679 ok(exists $+[2], $message); 680 ok(!exists $-[3], $message); 681 ok(!exists $+[3], $message); 682 ok(exists $-[-1], $message); 683 ok(exists $+[-1], $message); 684 ok(exists $-[-3], $message); 685 ok(exists $+[-3], $message); 686 ok(!exists $-[-4], $message); 687 ok(!exists $+[-4], $message); 688 689 /.(a)(b)?(a)/; 690 is($#+, 3, $message); 691 is($#-, 3, $message); 692 is($+ [1], 2, $message); 693 is($- [1], 1, $message); 694 is($+ [3], 3, $message); 695 is($- [3], 2, $message); 696 ok(!defined $+ [2] && !defined $- [2] && 697 !defined $+ [4] && !defined $- [4], $message); 698 699 /.(a)/; 700 is($#+, 1, $message); 701 is($#-, 1, $message); 702 is($+ [0], 2, $message); 703 is($- [0], 0, $message); 704 is($+ [1], 2, $message); 705 is($- [1], 1, $message); 706 ok(!defined $+ [2] && !defined $- [2] && 707 !defined $+ [3] && !defined $- [3], $message); 708 709 /.(a)(ba*)?/; 710 is($#+, 2, $message); 711 is($#-, 1, $message); 712 713 # Check that values don’t stick 714 " "=~/()()()(.)(..)/; 715 my($m,$p) = (\$-[5], \$+[5]); 716 () = "$$_" for $m, $p; # FETCH (or eqv.) 717 " " =~ /()/; 718 is $$m, undef, 'values do not stick to @- elements'; 719 is $$p, undef, 'values do not stick to @+ elements'; 720 } 721 722 foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)', 723 '@- = qw (foo bar)', '$^N = 42') { 724 is(eval $_, undef); 725 like($@, qr/^Modification of a read-only value attempted/, 726 '$^N, @- and @+ are read-only'); 727 } 728 729 { 730 my $message = '\G testing'; 731 $_ = 'aaa'; 732 pos = 1; 733 my @a = /\Ga/g; 734 is("@a", "a a", $message); 735 736 my $str = 'abcde'; 737 pos $str = 2; 738 unlike($str, qr/^\G/, $message); 739 unlike($str, qr/^.\G/, $message); 740 like($str, qr/^..\G/, $message); 741 unlike($str, qr/^...\G/, $message); 742 ok($str =~ /\G../ && $& eq 'cd', $message); 743 ok($str =~ /.\G./ && $& eq 'bc', $message); 744 745 } 746 747 { 748 my $message = '\G and intuit and anchoring'; 749 $_ = "abcdef"; 750 pos = 0; 751 ok($_ =~ /\Gabc/, $message); 752 ok($_ =~ /^\Gabc/, $message); 753 754 pos = 3; 755 ok($_ =~ /\Gdef/, $message); 756 pos = 3; 757 ok($_ =~ /\Gdef$/, $message); 758 pos = 3; 759 ok($_ =~ /abc\Gdef$/, $message); 760 pos = 3; 761 ok($_ =~ /^abc\Gdef$/, $message); 762 pos = 3; 763 ok($_ =~ /c\Gd/, $message); 764 pos = 3; 765 ok($_ =~ /..\GX?def/, $message); 766 } 767 768 { 769 my $s = '123'; 770 pos($s) = 1; 771 my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1 772 is("@a", "1", '\G looping'); 773 } 774 775 776 { 777 my $message = 'pos inside (?{ })'; 778 my $str = 'abcde'; 779 our ($foo, $bar); 780 like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message); 781 is($foo, $str, $message); 782 is($bar, 2, $message); 783 is(pos $str, undef, $message); 784 785 undef $foo; 786 undef $bar; 787 pos $str = undef; 788 ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message); 789 is($foo, $str, $message); 790 is($bar, 2, $message); 791 is(pos $str, 3, $message); 792 793 $_ = $str; 794 undef $foo; 795 undef $bar; 796 like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message); 797 is($foo, $str, $message); 798 is($bar, 2, $message); 799 800 undef $foo; 801 undef $bar; 802 ok(/b(?{$foo = $_; $bar = pos})c/g, $message); 803 is($foo, $str, $message); 804 is($bar, 2, $message); 805 is(pos, 3, $message); 806 807 undef $foo; 808 undef $bar; 809 pos = undef; 810 1 while /b(?{$foo = $_; $bar = pos})c/g; 811 is($foo, $str, $message); 812 is($bar, 2, $message); 813 is(pos, undef, $message); 814 815 undef $foo; 816 undef $bar; 817 $_ = 'abcde|abcde'; 818 ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message); 819 is($foo, 'abcde|abcde', $message); 820 is($bar, 8, $message); 821 is($_, 'axde|axde', $message); 822 823 # List context: 824 $_ = 'abcde|abcde'; 825 our @res; 826 () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g; 827 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 828 is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message); 829 830 @res = (); 831 () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g; 832 @res = map {defined $_ ? "'$_'" : 'undef'} @res; 833 is("@res", "'' 'ab' 'cde|abcde' " . 834 "'' 'abc' 'de|abcde' " . 835 "'abcd' 'e|' 'abcde' " . 836 "'abcde|' 'ab' 'cde' " . 837 "'abcde|' 'abc' 'de'", $message); 838 } 839 840 { 841 my $message = '\G anchor checks'; 842 my $foo = 'aabbccddeeffgg'; 843 pos ($foo) = 1; 844 845 ok($foo =~ /.\G(..)/g, $message); 846 is($1, 'ab', $message); 847 848 pos ($foo) += 1; 849 ok($foo =~ /.\G(..)/g, $message); 850 is($1, 'cc', $message); 851 852 pos ($foo) += 1; 853 ok($foo =~ /.\G(..)/g, $message); 854 is($1, 'de', $message); 855 856 ok($foo =~ /\Gef/g, $message); 857 858 undef pos $foo; 859 ok($foo =~ /\G(..)/g, $message); 860 is($1, 'aa', $message); 861 862 ok($foo =~ /\G(..)/g, $message); 863 is($1, 'bb', $message); 864 865 pos ($foo) = 5; 866 ok($foo =~ /\G(..)/g, $message); 867 is($1, 'cd', $message); 868 } 869 870 { 871 my $message = 'basic \G floating checks'; 872 my $foo = 'aabbccddeeffgg'; 873 pos ($foo) = 1; 874 875 ok($foo =~ /a+\G(..)/g, "$message: a+\\G"); 876 is($1, 'ab', "$message: ab"); 877 878 pos ($foo) += 1; 879 ok($foo =~ /b+\G(..)/g, "$message: b+\\G"); 880 is($1, 'cc', "$message: cc"); 881 882 pos ($foo) += 1; 883 ok($foo =~ /d+\G(..)/g, "$message: d+\\G"); 884 is($1, 'de', "$message: de"); 885 886 ok($foo =~ /\Gef/g, "$message: \\Gef"); 887 888 pos ($foo) = 1; 889 890 ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)"); 891 is($1, 'aa', "$message: aa"); 892 893 pos ($foo) = 2; 894 895 ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)"); 896 is($1, 'ab', "$message: ab"); 897 898 } 899 900 { 901 $_ = '123x123'; 902 my @res = /(\d*|x)/g; 903 local $" = '|'; 904 is("@res", "123||x|123|", "0 match in alternation"); 905 } 906 907 { 908 my $message = "Match against temporaries (created via pp_helem())" . 909 " is safe"; 910 ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message); 911 is($1, "bar", $message); 912 } 913 914 { 915 my $message = 'package $i inside (?{ }), ' . 916 'saved substrings and changing $_'; 917 our @a = qw [foo bar]; 918 our @b = (); 919 s/(\w)(?{push @b, $1})/,$1,/g for @a; 920 is("@b", "f o o b a r", $message); 921 is("@a", ",f,,o,,o, ,b,,a,,r,", $message); 922 923 $message = 'lexical $i inside (?{ }), ' . 924 'saved substrings and changing $_'; 925 no warnings 'closure'; 926 my @c = qw [foo bar]; 927 my @d = (); 928 s/(\w)(?{push @d, $1})/,$1,/g for @c; 929 is("@d", "f o o b a r", $message); 930 is("@c", ",f,,o,,o, ,b,,a,,r,", $message); 931 } 932 933 { 934 my $message = 'Brackets'; 935 our $brackets; 936 $brackets = qr { 937 { (?> [^{}]+ | (??{ $brackets }) )* } 938 }x; 939 940 ok("{{}" =~ $brackets, $message); 941 is($&, "{}", $message); 942 ok("something { long { and } hairy" =~ $brackets, $message); 943 is($&, "{ and }", $message); 944 ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message); 945 is($&, "{ and }", $message); 946 } 947 948 { 949 $_ = "a-a\nxbb"; 950 pos = 1; 951 ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg'); 952 } 953 954 { 955 my $message = '\G anchor checks'; 956 my $text = "aaXbXcc"; 957 pos ($text) = 0; 958 ok($text !~ /\GXb*X/g, $message); 959 } 960 961 { 962 $_ = "xA\n" x 500; 963 unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"'); 964 965 my $text = "abc dbf"; 966 my @res = ($text =~ /.*?(b).*?\b/g); 967 is("@res", "b b", '\b is not special'); 968 } 969 970 { 971 my $message = '\S, [\S], \s, [\s]'; 972 my @a = map chr, 0 .. 255; 973 my @b = grep m/\S/, @a; 974 my @c = grep m/[^\s]/, @a; 975 is("@b", "@c", $message); 976 977 @b = grep /\S/, @a; 978 @c = grep /[\S]/, @a; 979 is("@b", "@c", $message); 980 981 @b = grep /\s/, @a; 982 @c = grep /[^\S]/, @a; 983 is("@b", "@c", $message); 984 985 @b = grep /\s/, @a; 986 @c = grep /[\s]/, @a; 987 is("@b", "@c", $message); 988 } 989 { 990 my $message = '\D, [\D], \d, [\d]'; 991 my @a = map chr, 0 .. 255; 992 my @b = grep /\D/, @a; 993 my @c = grep /[^\d]/, @a; 994 is("@b", "@c", $message); 995 996 @b = grep /\D/, @a; 997 @c = grep /[\D]/, @a; 998 is("@b", "@c", $message); 999 1000 @b = grep /\d/, @a; 1001 @c = grep /[^\D]/, @a; 1002 is("@b", "@c", $message); 1003 1004 @b = grep /\d/, @a; 1005 @c = grep /[\d]/, @a; 1006 is("@b", "@c", $message); 1007 } 1008 { 1009 my $message = '\W, [\W], \w, [\w]'; 1010 my @a = map chr, 0 .. 255; 1011 my @b = grep /\W/, @a; 1012 my @c = grep /[^\w]/, @a; 1013 is("@b", "@c", $message); 1014 1015 @b = grep /\W/, @a; 1016 @c = grep /[\W]/, @a; 1017 is("@b", "@c", $message); 1018 1019 @b = grep /\w/, @a; 1020 @c = grep /[^\W]/, @a; 1021 is("@b", "@c", $message); 1022 1023 @b = grep /\w/, @a; 1024 @c = grep /[\w]/, @a; 1025 is("@b", "@c", $message); 1026 } 1027 1028 { 1029 # see if backtracking optimization works correctly 1030 my $message = 'Backtrack optimization'; 1031 like("\n\n", qr/\n $ \n/x, $message); 1032 like("\n\n", qr/\n* $ \n/x, $message); 1033 like("\n\n", qr/\n+ $ \n/x, $message); 1034 like("\n\n", qr/\n? $ \n/x, $message); 1035 like("\n\n", qr/\n*? $ \n/x, $message); 1036 like("\n\n", qr/\n+? $ \n/x, $message); 1037 like("\n\n", qr/\n?? $ \n/x, $message); 1038 unlike("\n\n", qr/\n*+ $ \n/x, $message); 1039 unlike("\n\n", qr/\n++ $ \n/x, $message); 1040 like("\n\n", qr/\n?+ $ \n/x, $message); 1041 } 1042 1043 { 1044 package S; 1045 use overload '""' => sub {'Object S'}; 1046 sub new {bless []} 1047 1048 my $message = "Ref stringification"; 1049 ::ok(do { \my $v} =~ /^SCALAR/, "Scalar ref stringification") or diag($message); 1050 ::ok(do {\\my $v} =~ /^REF/, "Ref ref stringification") or diag($message); 1051 ::ok([] =~ /^ARRAY/, "Array ref stringification") or diag($message); 1052 ::ok({} =~ /^HASH/, "Hash ref stringification") or diag($message); 1053 ::ok('S' -> new =~ /^Object S/, "Object stringification") or diag($message); 1054 } 1055 1056 { 1057 my $message = "Test result of match used as match"; 1058 ok('a1b' =~ ('xyz' =~ /y/), $message); 1059 is($`, 'a', $message); 1060 ok('a1b' =~ ('xyz' =~ /t/), $message); 1061 is($`, 'a', $message); 1062 } 1063 1064 { 1065 my $message = '"1" is not \s'; 1066 warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)}, 1067 undef, "$message (did not warn)"); 1068 } 1069 1070 { 1071 my $message = '\s, [[:space:]] and [[:blank:]]'; 1072 my %space = (spc => " ", 1073 tab => "\t", 1074 cr => "\r", 1075 lf => "\n", 1076 ff => "\f", 1077 # There's no \v but the vertical tabulator seems miraculously 1078 # be 11 both in ASCII and EBCDIC. 1079 vt => chr(11), 1080 false => "space"); 1081 1082 my @space0 = sort grep {$space {$_} =~ /\s/ } keys %space; 1083 my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space; 1084 my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space; 1085 1086 is("@space0", "cr ff lf spc tab vt", $message); 1087 is("@space1", "cr ff lf spc tab vt", $message); 1088 is("@space2", "spc tab", $message); 1089 } 1090 1091 { 1092 my $n= 50; 1093 # this must be a high number and go from 0 to N, as the bug we are looking for doesn't 1094 # seem to be predictable. Slight changes to the test make it fail earlier or later. 1095 foreach my $i (0 .. $n) 1096 { 1097 my $str= "\n" x $i; 1098 ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i"; 1099 } 1100 } 1101 { 1102 # we are actually testing that we dont die when executing these patterns 1103 use utf8; 1104 my $e = "Böck"; 1105 ok(utf8::is_utf8($e),"got a unicode string - rt75680"); 1106 1107 ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680"); 1108 ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680"); 1109 ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680"); 1110 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680"); 1111 } 1112 { 1113 # we are actually testing that we dont die when executing these patterns 1114 my $e = "B\x{f6}ck"; 1115 ok(!utf8::is_utf8($e), "got a latin string - rt75680"); 1116 1117 ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680"); 1118 ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680"); 1119 ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680"); 1120 ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680"); 1121 } 1122 1123 { 1124 # 1125 # Tests for bug 77414. 1126 # 1127 1128 my $message = '\p property after empty * match'; 1129 { 1130 like("1", qr/\s*\pN/, $message); 1131 like("-", qr/\s*\p{Dash}/, $message); 1132 like(" ", qr/\w*\p{Blank}/, $message); 1133 } 1134 1135 like("1", qr/\s*\pN+/, $message); 1136 like("-", qr/\s*\p{Dash}{1}/, $message); 1137 like(" ", qr/\w*\p{Blank}{1,4}/, $message); 1138 1139 } 1140 1141 SKIP: { # Some constructs with Latin1 characters cause a utf8 string not 1142 # to match itself in non-utf8 1143 if ($::IS_EBCDIC) { 1144 skip "Needs to be customized to run on EBCDIC", 6; 1145 } 1146 my $c = "\xc0"; 1147 my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/; 1148 utf8::upgrade($utf8_pattern); 1149 ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8"; 1150 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8"; 1151 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not"; 1152 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not"; 1153 utf8::upgrade($c); 1154 ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not"; 1155 ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not"; 1156 ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8"; 1157 ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8"; 1158 } 1159 1160 SKIP: { # Make sure can override the formatting 1161 if ($::IS_EBCDIC) { 1162 skip "Needs to be customized to run on EBCDIC", 2; 1163 } 1164 use feature 'unicode_strings'; 1165 ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/'; 1166 ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/'; 1167 } 1168 1169 { 1170 my $str= "\x{100}"; 1171 chop $str; 1172 my $qr= qr/$str/; 1173 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212"); 1174 $str= ""; 1175 $qr= qr/$str/; 1176 is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212"); 1177 1178 } 1179 1180 { 1181 local $::TODO = "[perl #38133]"; 1182 1183 "A" =~ /(((?:A))?)+/; 1184 my $first = $2; 1185 1186 "A" =~ /(((A))?)+/; 1187 my $second = $2; 1188 1189 is($first, $second); 1190 } 1191 1192 { 1193 # RT #3516: \G in a m//g expression causes problems 1194 my $count = 0; 1195 while ("abc" =~ m/(\G[ac])?/g) { 1196 last if $count++ > 10; 1197 } 1198 ok($count < 10, 'RT #3516 A'); 1199 1200 $count = 0; 1201 while ("abc" =~ m/(\G|.)[ac]/g) { 1202 last if $count++ > 10; 1203 } 1204 ok($count < 10, 'RT #3516 B'); 1205 1206 $count = 0; 1207 while ("abc" =~ m/(\G?[ac])?/g) { 1208 last if $count++ > 10; 1209 } 1210 ok($count < 10, 'RT #3516 C'); 1211 } 1212 { 1213 # RT #84294: Is this a bug in the simple Perl regex? 1214 # : Nested buffers and (?{...}) dont play nicely on partial matches 1215 our @got= (); 1216 ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match"); 1217 my $want= "'ab', 'a', 'b'"; 1218 my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got); 1219 is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state'); 1220 } 1221 1222 { 1223 # Suppress warnings, as the non-unicode one comes out even if turn off 1224 # warnings here (because the execution is done in another scope). 1225 local $SIG{__WARN__} = sub {}; 1226 my $str = "\x{110000}"; 1227 1228 unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}"); 1229 like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}"); 1230 like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}"); 1231 unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}"); 1232 } 1233 1234 { 1235 # Test that IDstart works, but because the author (khw) knows 1236 # regexes much better than the rest of the core, it is being done here 1237 # in the context of a regex which relies on buffer names beginng with 1238 # IDStarts. 1239 use utf8; 1240 my $str = "abc"; 1241 like($str, qr/(?<a>abc)/, "'a' is legal IDStart"); 1242 like($str, qr/(?<_>abc)/, "'_' is legal IDStart"); 1243 like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart"); 1244 like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart"); 1245 1246 # This test works on Unicode 6.0 in which U+2118 and U+212E are legal 1247 # IDStarts there, but are not Word characters, and therefore Perl 1248 # doesn't allow them to be IDStarts. But there is no guarantee that 1249 # Unicode won't change things around in the future so that at some 1250 # future Unicode revision these tests would need to be revised. 1251 foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) { 1252 my $prog = <<"EOP"; 1253use utf8;; 1254"abc" =~ qr/(?<$char>abc)/; 1255EOP 1256 utf8::encode($prog); 1257 fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {}, 1258 sprintf("'U+%04X not legal IDFirst'", ord($char))); 1259 } 1260 } 1261 1262 { # [perl #101710] 1263 my $pat = "b"; 1264 utf8::upgrade($pat); 1265 like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string"); 1266 } 1267 1268 { # Crash with @a =~ // warning 1269 local $SIG{__WARN__} = sub { 1270 pass 'no crash for @a =~ // warning' 1271 }; 1272 eval ' sub { my @a =~ // } '; 1273 } 1274 1275 { # Concat overloading and qr// thingies 1276 my @refs; 1277 my $qr = qr//; 1278 package Cat { 1279 require overload; 1280 overload->import( 1281 '""' => sub { ${$_[0]} }, 1282 '.' => sub { 1283 push @refs, ref $_[1] if ref $_[1]; 1284 bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]" 1285 } 1286 ); 1287 } 1288 my $s = "foo"; 1289 my $o = bless \$s, Cat::; 1290 /$o$qr/; 1291 is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth'; 1292 } 1293 1294 { 1295 my $count=0; 1296 my $str="\n"; 1297 $count++ while $str=~/.*/g; 1298 is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g'; 1299 my $class_count= 0; 1300 $class_count++ while $str=~/[^\n]*/g; 1301 is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same'; 1302 my $anch_count= 0; 1303 $anch_count++ while $str=~/^.*/mg; 1304 is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once'; 1305 } 1306 1307 { # [perl #111174] 1308 use re '/u'; 1309 like "\xe0", qr/(?i:\xc0)/, "(?i: shouldn't lose the passed in /u"; 1310 use re '/a'; 1311 unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a"; 1312 use re '/aa'; 1313 unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa"; 1314 } 1315 1316 { 1317 # the test for whether the pattern should be re-compiled should 1318 # consider the UTF8ness of the previous and current pattern 1319 # string, as well as the physical bytes of the pattern string 1320 1321 for my $s ("\xc4\x80", "\x{100}") { 1322 ok($s =~ /^$s$/, "re-compile check is UTF8-aware"); 1323 } 1324 } 1325 1326 # #113682 more overloading and qr// 1327 # when doing /foo$overloaded/, if $overloaded returns 1328 # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval' 1329 # shouldn't be required. Via '.', it still is. 1330 { 1331 package Qr0; 1332 use overload 'qr' => sub { qr/(??{50})/ }; 1333 1334 package Qr1; 1335 use overload '""' => sub { qr/(??{51})/ }; 1336 1337 package Qr2; 1338 use overload '.' => sub { $_[1] . qr/(??{52})/ }; 1339 1340 package Qr3; 1341 use overload '""' => sub { qr/(??{7})/ }, 1342 '.' => sub { $_[1] . qr/(??{53})/ }; 1343 1344 package Qr_indirect; 1345 use overload '""' => sub { $_[0][0] }; 1346 1347 package main; 1348 1349 for my $i (0..3) { 1350 my $o = bless [], "Qr$i"; 1351 if ((0,0,1,1)[$i]) { 1352 eval { "A5$i" =~ /^A$o$/ }; 1353 like($@, qr/Eval-group not allowed/, "Qr$i"); 1354 eval { "5$i" =~ /$o/ }; 1355 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1356 "Qr$i bare"); 1357 { 1358 use re 'eval'; 1359 ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval"); 1360 eval { "5$i" =~ /$o/ }; 1361 like($@, ($i == 3 ? qr/^$/ : qr/no method found,/), 1362 "Qr$i bare - with use re eval"); 1363 } 1364 } 1365 else { 1366 ok("A5$i" =~ /^A$o$/, "Qr$i"); 1367 ok("5$i" =~ /$o/, "Qr$i bare"); 1368 } 1369 } 1370 1371 my $o = bless [ bless [], "Qr1" ], 'Qr_indirect'; 1372 ok("A51" =~ /^A$o/, "Qr_indirect"); 1373 ok("51" =~ /$o/, "Qr_indirect bare"); 1374 } 1375 1376 { # Various flags weren't being set when a [] is optimized into an 1377 # EXACTish node 1378 ; 1379 ; 1380 ok("\x{017F}\x{017F}" =~ qr/^[\x{00DF}]?$/i, "[] to EXACTish optimization"); 1381 } 1382 1383 { 1384 for my $char (":", "\x{f7}", "\x{2010}") { 1385 my $utf8_char = $char; 1386 utf8::upgrade($utf8_char); 1387 my $display = $char; 1388 $display = display($display); 1389 my $utf8_display = "utf8::upgrade(\"$display\")"; 1390 1391 like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/"); 1392 like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/"); 1393 like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/"); 1394 like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/"); 1395 } 1396 } 1397 1398 { 1399 # #116148: Pattern utf8ness sticks around globally 1400 # the utf8 in the first match was sticking around for the second 1401 # match 1402 1403 use feature 'unicode_strings'; 1404 1405 my $x = "\x{263a}"; 1406 $x =~ /$x/; 1407 1408 my $text = "Perl"; 1409 ok("Perl" =~ /P.*$/i, '#116148'); 1410 } 1411 1412 { # 117327: Sequence (?#...) not recognized in regex 1413 # The space between the '(' and '?' is now deprecated; this test should 1414 # be removed when the deprecation is made fatal. 1415 no warnings; 1416 like("ab", qr/a( ?#foo)b/x); 1417 } 1418 1419 { # 118297: Mixing up- and down-graded strings in regex 1420 utf8::upgrade(my $u = "\x{e5}"); 1421 utf8::downgrade(my $d = "\x{e5}"); 1422 my $warned; 1423 local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ }; 1424 my $re = qr/$u$d/; 1425 ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern"); 1426 my $c = "\x{e5}\x{e5}"; 1427 utf8::downgrade($c); 1428 like($c, $re, "mixed up-/downgraded pattern matches downgraded string"); 1429 utf8::upgrade($c); 1430 like($c, $re, "mixed up-/downgraded pattern matches upgraded string"); 1431 } 1432 1433 { 1434 # if we have 87 capture buffers defined then \87 should refer to the 87th. 1435 # test that this is true for 1..100 1436 # Note that this test causes the engine to recurse at runtime, and 1437 # hence use a lot of C stack. 1438 for my $i (1..100) { 1439 my $capture= "a"; 1440 $capture= "($capture)" for 1 .. $i; 1441 for my $mid ("","b") { 1442 my $str= "a${mid}a"; 1443 my $backref= "\\$i"; 1444 eval { 1445 ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/"); 1446 1; 1447 } or do { 1448 is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/"); 1449 }; 1450 } 1451 } 1452 } 1453 1454 # this mixture of readonly (not COWable) and COWable strings 1455 # messed up the capture buffers under COW. The actual test results 1456 # are incidental; the issue is was an AddressSanitizer failure 1457 { 1458 my $c ='AB'; 1459 my $res = ''; 1460 for ($c, 'C', $c, 'DE') { 1461 ok(/(.)/, "COWable match"); 1462 $res .= $1; 1463 } 1464 is($res, "ACAD"); 1465 } 1466 1467 1468 { 1469 # RT #45667 1470 # /[#$x]/x didn't interpolate the var $x. 1471 my $b = 'cd'; 1472 my $s = 'abcd$%#&'; 1473 $s =~ s/[a#$b%]/X/g; 1474 is ($s, 'XbXX$XX&', 'RT #45667 without /x'); 1475 $s = 'abcd$%#&'; 1476 $s =~ s/[a#$b%]/X/gx; 1477 is ($s, 'XbXX$XX&', 'RT #45667 with /x'); 1478 } 1479 1480 { 1481 no warnings "uninitialized"; 1482 my @a; 1483 $a[1]++; 1484 /@a/; 1485 pass('no crash with /@a/ when array has nonexistent elems'); 1486 } 1487 1488 { 1489 is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'), 1490 "ok\n", 1491 'deleting *^R does not result in crashes'; 1492 no warnings 'once'; 1493 *^R = *caretRglobwithnoscalar; 1494 "" =~ /(?{42})/; 1495 is $^R, 42, 'assigning to *^R does not result in a crash'; 1496 is runperl( 1497 stderr => 1, 1498 prog => 'eval q|' 1499 .' q-..- =~ /(??{undef *^R;q--})(?{42})/; ' 1500 .' print qq-$^R\n-' 1501 .'|' 1502 ), 1503 "42\n", 1504 'undefining *^R within (??{}) does not result in a crash'; 1505 } 1506 1507 { 1508 # [perl #120446] 1509 # this code should be virtually instantaneous. If it takes 10s of 1510 # seconds, there a bug in intuit_start. 1511 # (this test doesn't actually test for slowness - that involves 1512 # too much danger of false positives on loaded machines - but by 1513 # putting it here, hopefully someone might notice if it suddenly 1514 # runs slowly) 1515 my $s = ('a' x 1_000_000) . 'b'; 1516 my $i = 0; 1517 for (1..10_000) { 1518 pos($s) = $_; 1519 $i++ if $s =~/\Gb/g; 1520 } 1521 is($i, 0, "RT 120446: mustn't run slowly"); 1522 } 1523 1524 { 1525 # [perl #120692] 1526 # these tests should be virtually instantaneous. If they take 10s of 1527 # seconds, there's a bug in intuit_start. 1528 1529 my $s = 'ab' x 1_000_000; 1530 utf8::upgrade($s); 1531 1 while $s =~ m/\Ga+ba+b/g; 1532 pass("RT#120692 \\G mustn't run slowly"); 1533 1534 $s=~ /^a{1,2}x/ for 1..10_000; 1535 pass("RT#120692 a{1,2} mustn't run slowly"); 1536 1537 $s=~ /ab.{1,2}x/; 1538 pass("RT#120692 ab.{1,2} mustn't run slowly"); 1539 1540 $s = "-a-bc" x 250_000; 1541 $s .= "1a1bc"; 1542 utf8::upgrade($s); 1543 ok($s =~ /\da\d{0,30000}bc/, "\\d{30000}"); 1544 1545 $s = "-ab\n" x 250_000; 1546 $s .= "abx"; 1547 ok($s =~ /^ab.*x/m, "distant float with /m"); 1548 1549 my $r = qr/^abcd/; 1550 $s = "abcd-xyz\n" x 500_000; 1551 $s =~ /$r\d{1,2}xyz/m for 1..200; 1552 pass("BOL within //m mustn't run slowly"); 1553 1554 $s = "abcdefg" x 1_000_000; 1555 $s =~ /(?-m:^)abcX?fg/m for 1..100; 1556 pass("BOL within //m mustn't skip absolute anchored check"); 1557 1558 $s = "abcdefg" x 1_000_000; 1559 $s =~ /^XX\d{1,10}cde/ for 1..100; 1560 pass("abs anchored float string should fail quickly"); 1561 1562 } 1563 1564 # These are based on looking at the code in regcomp.c 1565 # We don't look for specific code, just the existence of an SSC 1566 foreach my $re (qw( qr/a?c/ 1567 qr/a?c/i 1568 qr/[ab]?c/ 1569 qr/\R?c/ 1570 qr/\d?c/d 1571 qr/\w?c/l 1572 qr/\s?c/a 1573 qr/[[:alpha:]]?c/u 1574 )) { 1575 SKIP: { 1576 skip "no re-debug under miniperl" if is_miniperl; 1577 my $prog = <<"EOP"; 1578use re qw(Debug COMPILE); 1579$re; 1580EOP 1581 fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class"); 1582 } 1583 } 1584 1585 { 1586 like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; 1587 } 1588 1589 1590 1591 { # Was getting optimized into EXACT (non-folding node) 1592 my $x = qr/[x]/i; 1593 utf8::upgrade($x); 1594 like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case"); 1595 } 1596 1597 { # [perl #123539] 1598 like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1599 like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, ""); 1600 } 1601 1602 { # [perl #123604] 1603 my($s, $x, @x) = ('abc', 'a', 'd'); 1604 my $long = 'b' x 2000; 1605 my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0}; 1606 $eval =~ s{bbb}{$long}; 1607 my $match = eval $eval; 1608 ok(1, "did not crash"); 1609 ok($match, "[bbb...] resolved as character class, not subscript"); 1610 } 1611 1612 { # Test that we handle some malformed UTF-8 without looping [perl 1613 # #123562] 1614 1615 my $code=' 1616 BEGIN{require q(test.pl);} 1617 use Encode qw(_utf8_on); 1618 my $malformed = "a\x80\n"; 1619 _utf8_on($malformed); 1620 watchdog(3); 1621 $malformed =~ /(\n\r|\r)$/; 1622 print q(No infinite loop here!); 1623 '; 1624 fresh_perl_like($code, qr/Malformed UTF-8 character/, {}, 1625 "test that we handle some UTF-8 malformations without looping" ); 1626 } 1627} # End of sub run_tests 1628 16291; 1630