xref: /openbsd-src/gnu/usr.bin/perl/t/re/pat.t (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1#!./perl
2#
3# This is a home for regular expression tests that don't fit into
4# the format supported by re/regexp.t.  If you want to add a test
5# that does fit that format, add it to re/re_tests, not here.
6
7use strict;
8use warnings;
9use 5.010;
10
11sub run_tests;
12
13$| = 1;
14
15
16BEGIN {
17    chdir 't' if -d 't';
18    @INC = ('../lib','.');
19    require Config; import Config;
20    require './test.pl';
21}
22
23plan tests => 727;  # Update this when adding/deleting tests.
24
25run_tests() unless caller;
26
27#
28# Tests start here.
29#
30sub run_tests {
31
32    {
33        my $x = "abc\ndef\n";
34	(my $x_pretty = $x) =~ s/\n/\\n/g;
35
36        ok $x =~ /^abc/,  qq ["$x_pretty" =~ /^abc/];
37        ok $x !~ /^def/,  qq ["$x_pretty" !~ /^def/];
38
39        # used to be a test for $*
40        ok $x =~ /^def/m, qq ["$x_pretty" =~ /^def/m];
41
42        ok(!($x =~ /^xxx/), qq ["$x_pretty" =~ /^xxx/]);
43        ok(!($x !~ /^abc/), qq ["$x_pretty" !~ /^abc/]);
44
45         ok $x =~ /def/, qq ["$x_pretty" =~ /def/];
46        ok(!($x !~ /def/), qq ["$x_pretty" !~ /def/]);
47
48         ok $x !~ /.def/, qq ["$x_pretty" !~ /.def/];
49        ok(!($x =~ /.def/), qq ["$x_pretty" =~ /.def/]);
50
51         ok $x =~ /\ndef/, qq ["$x_pretty" =~ /\\ndef/];
52        ok(!($x !~ /\ndef/), qq ["$x_pretty" !~ /\\ndef/]);
53    }
54
55    {
56        $_ = '123';
57        ok /^([0-9][0-9]*)/, qq [\$_ = '$_'; /^([0-9][0-9]*)/];
58    }
59
60    {
61        $_ = 'aaabbbccc';
62         ok /(a*b*)(c*)/ && $1 eq 'aaabbb' && $2 eq 'ccc',
63                                             qq [\$_ = '$_'; /(a*b*)(c*)/];
64         ok /(a+b+c+)/ && $1 eq 'aaabbbccc', qq [\$_ = '$_'; /(a+b+c+)/];
65        unlike($_, qr/a+b?c+/, qq [\$_ = '$_'; /a+b?c+/]);
66
67        $_ = 'aaabccc';
68         ok /a+b?c+/, qq [\$_ = '$_'; /a+b?c+/];
69         ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
70
71        $_ = 'aaaccc';
72         ok /a*b?c*/, qq [\$_ = '$_'; /a*b?c*/];
73        unlike($_, qr/a*b+c*/, qq [\$_ = '$_'; /a*b+c*/]);
74
75        $_ = 'abcdef';
76         ok /bcd|xyz/, qq [\$_ = '$_'; /bcd|xyz/];
77         ok /xyz|bcd/, qq [\$_ = '$_'; /xyz|bcd/];
78         ok m|bc/*d|,  qq [\$_ = '$_'; m|bc/*d|];
79         ok /^$_$/,    qq [\$_ = '$_'; /^\$_\$/];
80    }
81
82    {
83        # used to be a test for $*
84        ok "ab\ncd\n" =~ /^cd/m, q ["ab\ncd\n" =~ /^cd/m];
85    }
86
87    {
88        our %XXX = map {($_ => $_)} 123, 234, 345;
89
90        our @XXX = ('ok 1','not ok 1', 'ok 2','not ok 2','not ok 3');
91        while ($_ = shift(@XXX)) {
92            my $e = index ($_, 'not') >= 0 ? '' : 1;
93            my $r = m?(.*)?;
94            is($r, $e, "?(.*)?");
95            /not/ && reset;
96            if (/not ok 2/) {
97                if ($^O eq 'VMS') {
98                    $_ = shift(@XXX);
99                }
100                else {
101                    reset 'X';
102                }
103            }
104        }
105
106        SKIP: {
107            if ($^O eq 'VMS') {
108                skip "Reset 'X'", 1;
109            }
110            ok !keys %XXX, "%XXX is empty";
111        }
112
113    }
114
115    {
116        my $message = "Test empty pattern";
117        my $xyz = 'xyz';
118        my $cde = 'cde';
119
120        $cde =~ /[^ab]*/;
121        $xyz =~ //;
122        is($&, $xyz, $message);
123
124        my $foo = '[^ab]*';
125        $cde =~ /$foo/;
126        $xyz =~ //;
127        is($&, $xyz, $message);
128
129        $cde =~ /$foo/;
130        my $null;
131        no warnings 'uninitialized';
132        $xyz =~ /$null/;
133        is($&, $xyz, $message);
134
135        $null = "";
136        $xyz =~ /$null/;
137        is($&, $xyz, $message);
138    }
139
140    {
141        my $message = q !Check $`, $&, $'!;
142        $_ = 'abcdefghi';
143        /def/;        # optimized up to cmd
144        is("$`:$&:$'", 'abc:def:ghi', $message);
145
146        no warnings 'void';
147        /cde/ + 0;    # optimized only to spat
148        is("$`:$&:$'", 'ab:cde:fghi', $message);
149
150        /[d][e][f]/;    # not optimized
151        is("$`:$&:$'", 'abc:def:ghi', $message);
152    }
153
154    {
155        $_ = 'now is the {time for all} good men to come to.';
156        / \{([^}]*)}/;
157        is($1, 'time for all', "Match braces");
158    }
159
160    {
161        my $message = "{N,M} quantifier";
162        $_ = 'xxx {3,4}  yyy   zzz';
163        ok(/( {3,4})/, $message);
164        is($1, '   ', $message);
165        unlike($_, qr/( {4,})/, $message);
166        ok(/( {2,3}.)/, $message);
167        is($1, '  y', $message);
168        ok(/(y{2,3}.)/, $message);
169        is($1, 'yyy ', $message);
170        unlike($_, qr/x {3,4}/, $message);
171        unlike($_, qr/^xxx {3,4}/, $message);
172    }
173
174    {
175        my $message = "Test /g";
176        local $" = ":";
177        $_ = "now is the time for all good men to come to.";
178        my @words = /(\w+)/g;
179        my $exp   = "now:is:the:time:for:all:good:men:to:come:to";
180
181        is("@words", $exp, $message);
182
183        @words = ();
184        while (/\w+/g) {
185            push (@words, $&);
186        }
187        is("@words", $exp, $message);
188
189        @words = ();
190        pos = 0;
191        while (/to/g) {
192            push(@words, $&);
193        }
194        is("@words", "to:to", $message);
195
196        pos $_ = 0;
197        @words = /to/g;
198        is("@words", "to:to", $message);
199    }
200
201    {
202        $_ = "abcdefghi";
203
204        my $pat1 = 'def';
205        my $pat2 = '^def';
206        my $pat3 = '.def.';
207        my $pat4 = 'abc';
208        my $pat5 = '^abc';
209        my $pat6 = 'abc$';
210        my $pat7 = 'ghi';
211        my $pat8 = '\w*ghi';
212        my $pat9 = 'ghi$';
213
214        my $t1 = my $t2 = my $t3 = my $t4 = my $t5 =
215        my $t6 = my $t7 = my $t8 = my $t9 = 0;
216
217        for my $iter (1 .. 5) {
218            $t1++ if /$pat1/o;
219            $t2++ if /$pat2/o;
220            $t3++ if /$pat3/o;
221            $t4++ if /$pat4/o;
222            $t5++ if /$pat5/o;
223            $t6++ if /$pat6/o;
224            $t7++ if /$pat7/o;
225            $t8++ if /$pat8/o;
226            $t9++ if /$pat9/o;
227        }
228        my $x = "$t1$t2$t3$t4$t5$t6$t7$t8$t9";
229        is($x, '505550555', "Test /o");
230    }
231
232    {
233        my $xyz = 'xyz';
234        ok "abc" =~ /^abc$|$xyz/, "| after \$";
235
236        # perl 4.009 says "unmatched ()"
237        my $message = '$ inside ()';
238
239        my $result;
240        eval '"abc" =~ /a(bc$)|$xyz/; $result = "$&:$1"';
241        is($@, "", $message);
242        is($result, "abc:bc", $message);
243    }
244
245    {
246        my $message = "Scalar /g";
247        $_ = "abcfooabcbar";
248
249        ok( /abc/g && $` eq "", $message);
250        ok( /abc/g && $` eq "abcfoo", $message);
251        ok(!/abc/g, $message);
252
253        $message = "Scalar /gi";
254        pos = 0;
255        ok( /ABC/gi && $` eq "", $message);
256        ok( /ABC/gi && $` eq "abcfoo", $message);
257        ok(!/ABC/gi, $message);
258
259        $message = "Scalar /g";
260        pos = 0;
261        ok( /abc/g && $' eq "fooabcbar", $message);
262        ok( /abc/g && $' eq "bar", $message);
263
264        $_ .= '';
265        my @x = /abc/g;
266        is(@x, 2, "/g reset after assignment");
267    }
268
269    {
270        my $message = '/g, \G and pos';
271        $_ = "abdc";
272        pos $_ = 2;
273        /\Gc/gc;
274        is(pos $_, 2, $message);
275        /\Gc/g;
276        is(pos $_, undef, $message);
277    }
278
279    {
280        my $message = '(?{ })';
281        our $out = 1;
282        'abc' =~ m'a(?{ $out = 2 })b';
283        is($out, 2, $message);
284
285        $out = 1;
286        'abc' =~ m'a(?{ $out = 3 })c';
287        is($out, 1, $message);
288    }
289
290    {
291        $_ = 'foobar1 bar2 foobar3 barfoobar5 foobar6';
292        my @out = /(?<!foo)bar./g;
293        is("@out", 'bar2 barf', "Negative lookbehind");
294    }
295
296    {
297        my $message = "REG_INFTY tests";
298        # Tests which depend on REG_INFTY
299
300	#  Defaults assumed if this fails
301	eval { require Config; };
302        $::reg_infty   = $Config::Config{reg_infty} // 32767;
303        $::reg_infty_m = $::reg_infty - 1;
304        $::reg_infty_p = $::reg_infty + 1;
305        $::reg_infty_m = $::reg_infty_m;   # Suppress warning.
306
307        # As well as failing if the pattern matches do unexpected things, the
308        # next three tests will fail if you should have picked up a lower-than-
309        # default value for $reg_infty from Config.pm, but have not.
310
311        is(eval q{('aaa' =~ /(a{1,$::reg_infty_m})/)[0]}, 'aaa', $message);
312        is($@, '', $message);
313        is(eval q{('a' x $::reg_infty_m) =~ /a{$::reg_infty_m}/}, 1, $message);
314        is($@, '', $message);
315        isnt(q{('a' x ($::reg_infty_m - 1)) !~ /a{$::reg_infty_m}/}, 1, $message);
316        is($@, '', $message);
317
318        eval "'aaa' =~ /a{1,$::reg_infty}/";
319        like($@, qr/^\QQuantifier in {,} bigger than/, $message);
320        eval "'aaa' =~ /a{1,$::reg_infty_p}/";
321        like($@, qr/^\QQuantifier in {,} bigger than/, $message);
322    }
323
324    {
325        # Poke a couple more parse failures
326        my $context = 'x' x 256;
327        eval qq("${context}y" =~ /(?<=$context)y/);
328        ok $@ =~ /^\QLookbehind longer than 255 not/, "Lookbehind limit";
329    }
330
331    {
332        # Long Monsters
333        for my $l (125, 140, 250, 270, 300000, 30) { # Ordered to free memory
334            my $a = 'a' x $l;
335	    my $message = "Long monster, length = $l";
336	    like("ba$a=", qr/a$a=/, $message);
337            unlike("b$a=", qr/a$a=/, $message);
338            like("b$a=", qr/ba+=/, $message);
339
340	    like("ba$a=", qr/b(?:a|b)+=/, $message);
341        }
342    }
343
344    {
345        # 20000 nodes, each taking 3 words per string, and 1 per branch
346        my $long_constant_len = join '|', 12120 .. 32645;
347        my $long_var_len = join '|', 8120 .. 28645;
348        my %ans = ( 'ax13876y25677lbc' => 1,
349                    'ax13876y25677mcb' => 0, # not b.
350                    'ax13876y35677nbc' => 0, # Num too big
351                    'ax13876y25677y21378obc' => 1,
352                    'ax13876y25677y21378zbc' => 0,    # Not followed by [k-o]
353                    'ax13876y25677y21378y21378kbc' => 1,
354                    'ax13876y25677y21378y21378kcb' => 0, # Not b.
355                    'ax13876y25677y21378y21378y21378kbc' => 0, # 5 runs
356                  );
357
358        for (keys %ans) {
359	    my $message = "20000 nodes, const-len '$_'";
360            ok !($ans{$_} xor /a(?=([yx]($long_constant_len)){2,4}[k-o]).*b./o), $message;
361
362	    $message = "20000 nodes, var-len '$_'";
363            ok !($ans{$_} xor /a(?=([yx]($long_var_len)){2,4}[k-o]).*b./o,), $message;
364        }
365    }
366
367    {
368        my $message = "Complicated backtracking";
369        $_ = " a (bla()) and x(y b((l)u((e))) and b(l(e)e)e";
370        my $expect = "(bla()) ((l)u((e))) (l(e)e)";
371
372        use vars '$c';
373        sub matchit {
374          m/
375             (
376               \(
377               (?{ $c = 1 })    # Initialize
378               (?:
379                 (?(?{ $c == 0 })   # PREVIOUS iteration was OK, stop the loop
380                   (?!
381                   )        # Fail: will unwind one iteration back
382                 )
383                 (?:
384                   [^()]+        # Match a big chunk
385                   (?=
386                     [()]
387                   )        # Do not try to match subchunks
388                 |
389                   \(
390                   (?{ ++$c })
391                 |
392                   \)
393                   (?{ --$c })
394                 )
395               )+        # This may not match with different subblocks
396             )
397             (?(?{ $c != 0 })
398               (?!
399               )        # Fail
400             )            # Otherwise the chunk 1 may succeed with $c>0
401           /xg;
402        }
403
404        my @ans = ();
405        my $res;
406        push @ans, $res while $res = matchit;
407        is("@ans", "1 1 1", $message);
408
409        @ans = matchit;
410        is("@ans", $expect, $message);
411
412        $message = "Recursion with (??{ })";
413        our $matched;
414        $matched = qr/\((?:(?>[^()]+)|(??{$matched}))*\)/;
415
416        @ans = my @ans1 = ();
417        push (@ans, $res), push (@ans1, $&) while $res = m/$matched/g;
418
419        is("@ans", "1 1 1", $message);
420        is("@ans1", $expect, $message);
421
422        @ans = m/$matched/g;
423        is("@ans", $expect, $message);
424
425    }
426
427    {
428        ok "abc" =~ /^(??{"a"})b/, '"abc" =~ /^(??{"a"})b/';
429    }
430
431    {
432        my @ans = ('a/b' =~ m%(.*/)?(.*)%);    # Stack may be bad
433        is("@ans", 'a/ b', "Stack may be bad");
434    }
435
436    {
437        my $message = "Eval-group not allowed at runtime";
438        my $code = '{$blah = 45}';
439        our $blah = 12;
440        eval { /(?$code)/ };
441        ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message);
442
443	$blah = 12;
444	my $res = eval { "xx" =~ /(?$code)/o };
445	{
446	    no warnings 'uninitialized';
447	    chomp $@; my $message = "$message '$@', '$res', '$blah'";
448	    ok($@ && $@ =~ /not allowed at runtime/ && $blah == 12, $message);
449	}
450
451        $code = '=xx';
452	$blah = 12;
453	$res = eval { "xx" =~ /(?$code)/o };
454	{
455	    no warnings 'uninitialized';
456	    my $message = "$message '$@', '$res', '$blah'";
457	    ok(!$@ && $res, $message);
458	}
459
460        $code = '{$blah = 45}';
461        $blah = 12;
462        eval "/(?$code)/";
463        is($blah, 45, $message);
464
465        $blah = 12;
466        /(?{$blah = 45})/;
467        is($blah, 45, $message);
468    }
469
470    {
471        my $message = "Pos checks";
472        my $x = 'banana';
473        $x =~ /.a/g;
474        is(pos $x, 2, $message);
475
476        $x =~ /.z/gc;
477        is(pos $x, 2, $message);
478
479        sub f {
480            my $p = $_[0];
481            return $p;
482        }
483
484        $x =~ /.a/g;
485        is(f (pos $x), 4, $message);
486    }
487
488    {
489        my $message = 'Checking $^R';
490        our $x = $^R = 67;
491        'foot' =~ /foo(?{$x = 12; 75})[t]/;
492        is($^R, 75, $message);
493
494        $x = $^R = 67;
495        'foot' =~ /foo(?{$x = 12; 75})[xy]/;
496        ok($^R eq '67' && $x eq '12', $message);
497
498        $x = $^R = 67;
499        'foot' =~ /foo(?{ $^R + 12 })((?{ $x = 12; $^R + 17 })[xy])?/;
500        ok($^R eq '79' && $x eq '12', $message);
501    }
502
503    {
504        is(qr/\b\v$/i,    '(?^i:\b\v$)', 'qr/\b\v$/i');
505        is(qr/\b\v$/s,    '(?^s:\b\v$)', 'qr/\b\v$/s');
506        is(qr/\b\v$/m,    '(?^m:\b\v$)', 'qr/\b\v$/m');
507        is(qr/\b\v$/x,    '(?^x:\b\v$)', 'qr/\b\v$/x');
508        is(qr/\b\v$/xism, '(?^msix:\b\v$)',  'qr/\b\v$/xism');
509        is(qr/\b\v$/,     '(?^:\b\v$)', 'qr/\b\v$/');
510    }
511
512    SKIP: {   # Test that charset modifier work, and are interpolated
513        if (
514            !$Config::Config{d_setlocale}
515        || $Config::Config{ccflags} =~ /\bD?NO_LOCALE(_|\b)/
516        ) {
517            skip "no locale support", 13
518        }
519        is(qr/\b\v$/, '(?^:\b\v$)', 'Verify no locale, no unicode_strings gives default modifier');
520        is(qr/(?l:\b\v$)/, '(?^:(?l:\b\v$))', 'Verify infix l modifier compiles');
521        is(qr/(?u:\b\v$)/, '(?^:(?u:\b\v$))', 'Verify infix u modifier compiles');
522        is(qr/(?l)\b\v$/, '(?^:(?l)\b\v$)', 'Verify (?l) compiles');
523        is(qr/(?u)\b\v$/, '(?^:(?u)\b\v$)', 'Verify (?u) compiles');
524
525        my $dual = qr/\b\v$/;
526        my $locale;
527
528      SKIP: {
529            skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
530
531            BEGIN {
532                if($Config{d_setlocale}) {
533                    require locale; import locale;
534                }
535            }
536            $locale = qr/\b\v$/;
537            is($locale,    '(?^l:\b\v$)', 'Verify has l modifier when compiled under use locale');
538            no locale;
539        }
540
541        use feature 'unicode_strings';
542        my $unicode = qr/\b\v$/;
543        is($unicode,    '(?^u:\b\v$)', 'Verify has u modifier when compiled under unicode_strings');
544        is(qr/abc$dual/,    '(?^u:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
545
546      SKIP: {
547            skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
548
549            is(qr/abc$locale/,    '(?^u:abc(?^l:\b\v$))', 'Verify retains l when interpolated under unicode_strings');
550        }
551
552        no feature 'unicode_strings';
553      SKIP: {
554            skip 'No locale testing without d_setlocale', 1 if(!$Config{d_setlocale});
555
556            is(qr/abc$locale/,    '(?^:abc(?^l:\b\v$))', 'Verify retains l when interpolated outside locale and unicode strings');
557        }
558
559        is(qr/def$unicode/,    '(?^:def(?^u:\b\v$))', 'Verify retains u when interpolated outside locale and unicode strings');
560
561      SKIP: {
562            skip 'No locale testing without d_setlocale', 2 if(!$Config{d_setlocale});
563
564             BEGIN {
565                if($Config{d_setlocale}) {
566                    require locale; import locale;
567                }
568            }
569            is(qr/abc$dual/,    '(?^l:abc(?^:\b\v$))', 'Verify retains d meaning when interpolated under locale');
570            is(qr/abc$unicode/,    '(?^l:abc(?^u:\b\v$))', 'Verify retains u when interpolated under locale');
571        }
572    }
573
574    {
575        my $message = "Look around";
576        $_ = 'xabcx';
577        foreach my $ans ('', 'c') {
578            ok(/(?<=(?=a)..)((?=c)|.)/g, $message);
579            is($1, $ans, $message);
580        }
581    }
582
583    {
584        my $message = "Empty clause";
585        $_ = 'a';
586        foreach my $ans ('', 'a', '') {
587            ok(/^|a|$/g, $message);
588            is($&, $ans, $message);
589        }
590    }
591
592    {
593        sub prefixify {
594        my $message = "Prefixify";
595            {
596                my ($v, $a, $b, $res) = @_;
597                ok($v =~ s/\Q$a\E/$b/, $message);
598                is($v, $res, $message);
599            }
600        }
601
602        prefixify ('/a/b/lib/arch', "/a/b/lib", 'X/lib', 'X/lib/arch');
603        prefixify ('/a/b/man/arch', "/a/b/man", 'X/man', 'X/man/arch');
604    }
605
606    {
607        $_ = 'var="foo"';
608        /(\")/;
609        ok $1 && /$1/, "Capture a quote";
610    }
611
612    {
613        no warnings 'closure';
614        my $message = '(?{ $var } refers to package vars';
615        package aa;
616        our $c = 2;
617        $::c = 3;
618        '' =~ /(?{ $c = 4 })/;
619        main::is($c, 4, $message);
620        main::is($::c, 3, $message);
621    }
622
623    {
624        is(eval 'q(a:[b]:) =~ /[x[:foo:]]/', undef);
625	like ($@, qr/POSIX class \[:[^:]+:\] unknown in regex/,
626	      'POSIX class [: :] must have valid name');
627
628        for my $d (qw [= .]) {
629            is(eval "/[[${d}foo${d}]]/", undef);
630	    like ($@, qr/\QPOSIX syntax [$d $d] is reserved for future extensions/,
631		  "POSIX syntax [[$d $d]] is an error");
632        }
633    }
634
635    {
636        # test if failure of patterns returns empty list
637        my $message = "Failed pattern returns empty list";
638        $_ = 'aaa';
639        @_ = /bbb/;
640        is("@_", "", $message);
641
642        @_ = /bbb/g;
643        is("@_", "", $message);
644
645        @_ = /(bbb)/;
646        is("@_", "", $message);
647
648        @_ = /(bbb)/g;
649        is("@_", "", $message);
650    }
651
652    {
653        my $message = '@- and @+ tests';
654
655        /a(?=.$)/;
656        is($#+, 0, $message);
657        is($#-, 0, $message);
658        is($+ [0], 2, $message);
659        is($- [0], 1, $message);
660        ok(!defined $+ [1] && !defined $- [1] &&
661           !defined $+ [2] && !defined $- [2], $message);
662
663        /a(a)(a)/;
664        is($#+, 2, $message);
665        is($#-, 2, $message);
666        is($+ [0], 3, $message);
667        is($- [0], 0, $message);
668        is($+ [1], 2, $message);
669        is($- [1], 1, $message);
670        is($+ [2], 3, $message);
671        is($- [2], 2, $message);
672        ok(!defined $+ [3] && !defined $- [3] &&
673           !defined $+ [4] && !defined $- [4], $message);
674
675        # Exists has a special check for @-/@+ - bug 45147
676        ok(exists $-[0], $message);
677        ok(exists $+[0], $message);
678        ok(exists $-[2], $message);
679        ok(exists $+[2], $message);
680        ok(!exists $-[3], $message);
681        ok(!exists $+[3], $message);
682        ok(exists $-[-1], $message);
683        ok(exists $+[-1], $message);
684        ok(exists $-[-3], $message);
685        ok(exists $+[-3], $message);
686        ok(!exists $-[-4], $message);
687        ok(!exists $+[-4], $message);
688
689        /.(a)(b)?(a)/;
690        is($#+, 3, $message);
691        is($#-, 3, $message);
692        is($+ [1], 2, $message);
693        is($- [1], 1, $message);
694        is($+ [3], 3, $message);
695        is($- [3], 2, $message);
696        ok(!defined $+ [2] && !defined $- [2] &&
697           !defined $+ [4] && !defined $- [4], $message);
698
699        /.(a)/;
700        is($#+, 1, $message);
701        is($#-, 1, $message);
702        is($+ [0], 2, $message);
703        is($- [0], 0, $message);
704        is($+ [1], 2, $message);
705        is($- [1], 1, $message);
706        ok(!defined $+ [2] && !defined $- [2] &&
707           !defined $+ [3] && !defined $- [3], $message);
708
709        /.(a)(ba*)?/;
710        is($#+, 2, $message);
711        is($#-, 1, $message);
712
713        # Check that values don’t stick
714        "     "=~/()()()(.)(..)/;
715        my($m,$p) = (\$-[5], \$+[5]);
716        () = "$$_" for $m, $p; # FETCH (or eqv.)
717        " " =~ /()/;
718        is $$m, undef, 'values do not stick to @- elements';
719        is $$p, undef, 'values do not stick to @+ elements';
720    }
721
722    foreach ('$+[0] = 13', '$-[0] = 13', '@+ = (7, 6, 5)',
723	     '@- = qw (foo bar)', '$^N = 42') {
724	is(eval $_, undef);
725        like($@, qr/^Modification of a read-only value attempted/,
726	     '$^N, @- and @+ are read-only');
727    }
728
729    {
730        my $message = '\G testing';
731        $_ = 'aaa';
732        pos = 1;
733        my @a = /\Ga/g;
734        is("@a", "a a", $message);
735
736        my $str = 'abcde';
737        pos $str = 2;
738        unlike($str, qr/^\G/, $message);
739        unlike($str, qr/^.\G/, $message);
740        like($str, qr/^..\G/, $message);
741        unlike($str, qr/^...\G/, $message);
742        ok($str =~ /\G../ && $& eq 'cd', $message);
743        ok($str =~ /.\G./ && $& eq 'bc', $message);
744
745    }
746
747    {
748        my $message = '\G and intuit and anchoring';
749	$_ = "abcdef";
750	pos = 0;
751	ok($_ =~ /\Gabc/, $message);
752	ok($_ =~ /^\Gabc/, $message);
753
754	pos = 3;
755	ok($_ =~ /\Gdef/, $message);
756	pos = 3;
757	ok($_ =~ /\Gdef$/, $message);
758	pos = 3;
759	ok($_ =~ /abc\Gdef$/, $message);
760	pos = 3;
761	ok($_ =~ /^abc\Gdef$/, $message);
762	pos = 3;
763	ok($_ =~ /c\Gd/, $message);
764	pos = 3;
765	ok($_ =~ /..\GX?def/, $message);
766    }
767
768    {
769        my $s = '123';
770        pos($s) = 1;
771        my @a = $s =~ /(\d)\G/g; # this infinitely looped up till 5.19.1
772        is("@a", "1", '\G looping');
773    }
774
775
776    {
777        my $message = 'pos inside (?{ })';
778        my $str = 'abcde';
779        our ($foo, $bar);
780        like($str, qr/b(?{$foo = $_; $bar = pos})c/, $message);
781        is($foo, $str, $message);
782        is($bar, 2, $message);
783        is(pos $str, undef, $message);
784
785        undef $foo;
786        undef $bar;
787        pos $str = undef;
788        ok($str =~ /b(?{$foo = $_; $bar = pos})c/g, $message);
789        is($foo, $str, $message);
790        is($bar, 2, $message);
791        is(pos $str, 3, $message);
792
793        $_ = $str;
794        undef $foo;
795        undef $bar;
796        like($_, qr/b(?{$foo = $_; $bar = pos})c/, $message);
797        is($foo, $str, $message);
798        is($bar, 2, $message);
799
800        undef $foo;
801        undef $bar;
802        ok(/b(?{$foo = $_; $bar = pos})c/g, $message);
803        is($foo, $str, $message);
804        is($bar, 2, $message);
805        is(pos, 3, $message);
806
807        undef $foo;
808        undef $bar;
809        pos = undef;
810        1 while /b(?{$foo = $_; $bar = pos})c/g;
811        is($foo, $str, $message);
812        is($bar, 2, $message);
813        is(pos, undef, $message);
814
815        undef $foo;
816        undef $bar;
817        $_ = 'abcde|abcde';
818        ok(s/b(?{$foo = $_; $bar = pos})c/x/g, $message);
819        is($foo, 'abcde|abcde', $message);
820        is($bar, 8, $message);
821        is($_, 'axde|axde', $message);
822
823        # List context:
824        $_ = 'abcde|abcde';
825        our @res;
826        () = /([ace]).(?{push @res, $1,$2})([ce])(?{push @res, $1,$2})/g;
827        @res = map {defined $_ ? "'$_'" : 'undef'} @res;
828        is("@res", "'a' undef 'a' 'c' 'e' undef 'a' undef 'a' 'c'", $message);
829
830        @res = ();
831        () = /([ace]).(?{push @res, $`,$&,$'})([ce])(?{push @res, $`,$&,$'})/g;
832        @res = map {defined $_ ? "'$_'" : 'undef'} @res;
833        is("@res", "'' 'ab' 'cde|abcde' " .
834                     "'' 'abc' 'de|abcde' " .
835                     "'abcd' 'e|' 'abcde' " .
836                     "'abcde|' 'ab' 'cde' " .
837                     "'abcde|' 'abc' 'de'", $message);
838    }
839
840    {
841        my $message = '\G anchor checks';
842        my $foo = 'aabbccddeeffgg';
843        pos ($foo) = 1;
844
845	ok($foo =~ /.\G(..)/g, $message);
846	is($1, 'ab', $message);
847
848	pos ($foo) += 1;
849	ok($foo =~ /.\G(..)/g, $message);
850	is($1, 'cc', $message);
851
852	pos ($foo) += 1;
853	ok($foo =~ /.\G(..)/g, $message);
854	is($1, 'de', $message);
855
856	ok($foo =~ /\Gef/g, $message);
857
858        undef pos $foo;
859        ok($foo =~ /\G(..)/g, $message);
860        is($1, 'aa', $message);
861
862        ok($foo =~ /\G(..)/g, $message);
863        is($1, 'bb', $message);
864
865        pos ($foo) = 5;
866        ok($foo =~ /\G(..)/g, $message);
867        is($1, 'cd', $message);
868    }
869
870    {
871        my $message = 'basic \G floating checks';
872        my $foo = 'aabbccddeeffgg';
873        pos ($foo) = 1;
874
875	ok($foo =~ /a+\G(..)/g, "$message: a+\\G");
876	is($1, 'ab', "$message: ab");
877
878	pos ($foo) += 1;
879	ok($foo =~ /b+\G(..)/g, "$message: b+\\G");
880	is($1, 'cc', "$message: cc");
881
882	pos ($foo) += 1;
883	ok($foo =~ /d+\G(..)/g, "$message: d+\\G");
884	is($1, 'de', "$message: de");
885
886	ok($foo =~ /\Gef/g, "$message: \\Gef");
887
888        pos ($foo) = 1;
889
890	ok($foo =~ /(?=a+\G)(..)/g, "$message: (?a+\\G)");
891	is($1, 'aa', "$message: aa");
892
893        pos ($foo) = 2;
894
895	ok($foo =~ /a(?=a+\G)(..)/g, "$message: a(?=a+\\G)");
896	is($1, 'ab', "$message: ab");
897
898    }
899
900    {
901        $_ = '123x123';
902        my @res = /(\d*|x)/g;
903        local $" = '|';
904        is("@res", "123||x|123|", "0 match in alternation");
905    }
906
907    {
908        my $message = "Match against temporaries (created via pp_helem())" .
909                         " is safe";
910        ok({foo => "bar\n" . $^X} -> {foo} =~ /^(.*)\n/g, $message);
911        is($1, "bar", $message);
912    }
913
914    {
915        my $message = 'package $i inside (?{ }), ' .
916                         'saved substrings and changing $_';
917        our @a = qw [foo bar];
918        our @b = ();
919        s/(\w)(?{push @b, $1})/,$1,/g for @a;
920        is("@b", "f o o b a r", $message);
921        is("@a", ",f,,o,,o, ,b,,a,,r,", $message);
922
923        $message = 'lexical $i inside (?{ }), ' .
924                         'saved substrings and changing $_';
925        no warnings 'closure';
926        my @c = qw [foo bar];
927        my @d = ();
928        s/(\w)(?{push @d, $1})/,$1,/g for @c;
929        is("@d", "f o o b a r", $message);
930        is("@c", ",f,,o,,o, ,b,,a,,r,", $message);
931    }
932
933    {
934        my $message = 'Brackets';
935        our $brackets;
936        $brackets = qr {
937            {  (?> [^{}]+ | (??{ $brackets }) )* }
938        }x;
939
940        ok("{{}" =~ $brackets, $message);
941        is($&, "{}", $message);
942        ok("something { long { and } hairy" =~ $brackets, $message);
943        is($&, "{ and }", $message);
944        ok("something { long { and } hairy" =~ m/((??{ $brackets }))/, $message);
945        is($&, "{ and }", $message);
946    }
947
948    {
949        $_ = "a-a\nxbb";
950        pos = 1;
951        ok(!m/^-.*bb/mg, '$_ = "a-a\nxbb"; m/^-.*bb/mg');
952    }
953
954    {
955        my $message = '\G anchor checks';
956        my $text = "aaXbXcc";
957        pos ($text) = 0;
958        ok($text !~ /\GXb*X/g, $message);
959    }
960
961    {
962        $_ = "xA\n" x 500;
963        unlike($_, qr/^\s*A/m, '$_ = "xA\n" x 500; /^\s*A/m"');
964
965        my $text = "abc dbf";
966        my @res = ($text =~ /.*?(b).*?\b/g);
967        is("@res", "b b", '\b is not special');
968    }
969
970    {
971        my $message = '\S, [\S], \s, [\s]';
972        my @a = map chr, 0 .. 255;
973        my @b = grep m/\S/, @a;
974        my @c = grep m/[^\s]/, @a;
975        is("@b", "@c", $message);
976
977        @b = grep /\S/, @a;
978        @c = grep /[\S]/, @a;
979        is("@b", "@c", $message);
980
981        @b = grep /\s/, @a;
982        @c = grep /[^\S]/, @a;
983        is("@b", "@c", $message);
984
985        @b = grep /\s/, @a;
986        @c = grep /[\s]/, @a;
987        is("@b", "@c", $message);
988    }
989    {
990        my $message = '\D, [\D], \d, [\d]';
991        my @a = map chr, 0 .. 255;
992        my @b = grep /\D/, @a;
993        my @c = grep /[^\d]/, @a;
994        is("@b", "@c", $message);
995
996        @b = grep /\D/, @a;
997        @c = grep /[\D]/, @a;
998        is("@b", "@c", $message);
999
1000        @b = grep /\d/, @a;
1001        @c = grep /[^\D]/, @a;
1002        is("@b", "@c", $message);
1003
1004        @b = grep /\d/, @a;
1005        @c = grep /[\d]/, @a;
1006        is("@b", "@c", $message);
1007    }
1008    {
1009        my $message = '\W, [\W], \w, [\w]';
1010        my @a = map chr, 0 .. 255;
1011        my @b = grep /\W/, @a;
1012        my @c = grep /[^\w]/, @a;
1013        is("@b", "@c", $message);
1014
1015        @b = grep /\W/, @a;
1016        @c = grep /[\W]/, @a;
1017        is("@b", "@c", $message);
1018
1019        @b = grep /\w/, @a;
1020        @c = grep /[^\W]/, @a;
1021        is("@b", "@c", $message);
1022
1023        @b = grep /\w/, @a;
1024        @c = grep /[\w]/, @a;
1025        is("@b", "@c", $message);
1026    }
1027
1028    {
1029        # see if backtracking optimization works correctly
1030        my $message = 'Backtrack optimization';
1031        like("\n\n", qr/\n   $ \n/x, $message);
1032        like("\n\n", qr/\n*  $ \n/x, $message);
1033        like("\n\n", qr/\n+  $ \n/x, $message);
1034        like("\n\n", qr/\n?  $ \n/x, $message);
1035        like("\n\n", qr/\n*? $ \n/x, $message);
1036        like("\n\n", qr/\n+? $ \n/x, $message);
1037        like("\n\n", qr/\n?? $ \n/x, $message);
1038        unlike("\n\n", qr/\n*+ $ \n/x, $message);
1039        unlike("\n\n", qr/\n++ $ \n/x, $message);
1040        like("\n\n", qr/\n?+ $ \n/x, $message);
1041    }
1042
1043    {
1044        package S;
1045        use overload '""' => sub {'Object S'};
1046        sub new {bless []}
1047
1048        my $message  = "Ref stringification";
1049      ::ok(do { \my $v} =~ /^SCALAR/,   "Scalar ref stringification") or diag($message);
1050      ::ok(do {\\my $v} =~ /^REF/,      "Ref ref stringification") or diag($message);
1051      ::ok([]           =~ /^ARRAY/,    "Array ref stringification") or diag($message);
1052      ::ok({}           =~ /^HASH/,     "Hash ref stringification") or diag($message);
1053      ::ok('S' -> new   =~ /^Object S/, "Object stringification") or diag($message);
1054    }
1055
1056    {
1057        my $message = "Test result of match used as match";
1058        ok('a1b' =~ ('xyz' =~ /y/), $message);
1059        is($`, 'a', $message);
1060        ok('a1b' =~ ('xyz' =~ /t/), $message);
1061        is($`, 'a', $message);
1062    }
1063
1064    {
1065        my $message = '"1" is not \s';
1066        warning_is(sub {unlike("1\n" x 102, qr/^\s*\n/m, $message)},
1067		   undef, "$message (did not warn)");
1068    }
1069
1070    {
1071        my $message = '\s, [[:space:]] and [[:blank:]]';
1072        my %space = (spc   => " ",
1073                     tab   => "\t",
1074                     cr    => "\r",
1075                     lf    => "\n",
1076                     ff    => "\f",
1077        # There's no \v but the vertical tabulator seems miraculously
1078        # be 11 both in ASCII and EBCDIC.
1079                     vt    => chr(11),
1080                     false => "space");
1081
1082        my @space0 = sort grep {$space {$_} =~ /\s/         } keys %space;
1083        my @space1 = sort grep {$space {$_} =~ /[[:space:]]/} keys %space;
1084        my @space2 = sort grep {$space {$_} =~ /[[:blank:]]/} keys %space;
1085
1086        is("@space0", "cr ff lf spc tab vt", $message);
1087        is("@space1", "cr ff lf spc tab vt", $message);
1088        is("@space2", "spc tab", $message);
1089    }
1090
1091    {
1092        my $n= 50;
1093        # this must be a high number and go from 0 to N, as the bug we are looking for doesn't
1094        # seem to be predictable. Slight changes to the test make it fail earlier or later.
1095        foreach my $i (0 .. $n)
1096        {
1097            my $str= "\n" x $i;
1098            ok $str=~/.*\z/, "implicit MBOL check string disable does not break things length=$i";
1099        }
1100    }
1101    {
1102        # we are actually testing that we dont die when executing these patterns
1103        use utf8;
1104        my $e = "Böck";
1105        ok(utf8::is_utf8($e),"got a unicode string - rt75680");
1106
1107        ok($e !~ m/.*?[x]$/, "unicode string against /.*?[x]\$/ - rt75680");
1108        ok($e !~ m/.*?\p{Space}$/i, "unicode string against /.*?\\p{space}\$/i - rt75680");
1109        ok($e !~ m/.*?[xyz]$/, "unicode string against /.*?[xyz]\$/ - rt75680");
1110        ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/, "unicode string against big pattern - rt75680");
1111    }
1112    {
1113        # we are actually testing that we dont die when executing these patterns
1114        my $e = "B\x{f6}ck";
1115        ok(!utf8::is_utf8($e), "got a latin string - rt75680");
1116
1117        ok($e !~ m/.*?[x]$/, "latin string against /.*?[x]\$/ - rt75680");
1118        ok($e !~ m/.*?\p{Space}$/i, "latin string against /.*?\\p{space}\$/i - rt75680");
1119        ok($e !~ m/.*?[xyz]$/,"latin string against /.*?[xyz]\$/ - rt75680");
1120        ok($e !~ m/(.*?)[,\p{isSpace}]+((?:\p{isAlpha}[\p{isSpace}\.]{1,2})+)\p{isSpace}*$/,"latin string against big pattern - rt75680");
1121    }
1122
1123    {
1124        #
1125        # Tests for bug 77414.
1126        #
1127
1128        my $message = '\p property after empty * match';
1129        {
1130            like("1", qr/\s*\pN/, $message);
1131            like("-", qr/\s*\p{Dash}/, $message);
1132            like(" ", qr/\w*\p{Blank}/, $message);
1133        }
1134
1135        like("1", qr/\s*\pN+/, $message);
1136        like("-", qr/\s*\p{Dash}{1}/, $message);
1137        like(" ", qr/\w*\p{Blank}{1,4}/, $message);
1138
1139    }
1140
1141    SKIP: {   # Some constructs with Latin1 characters cause a utf8 string not
1142              # to match itself in non-utf8
1143        if ($::IS_EBCDIC) {
1144            skip "Needs to be customized to run on EBCDIC", 6;
1145        }
1146        my $c = "\xc0";
1147        my $pattern = my $utf8_pattern = qr/((\xc0)+,?)/;
1148        utf8::upgrade($utf8_pattern);
1149        ok $c =~ $pattern, "\\xc0 =~ $pattern; Neither pattern nor target utf8";
1150        ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; Neither pattern nor target utf8";
1151        ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; pattern utf8, target not";
1152        ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; pattern utf8, target not";
1153        utf8::upgrade($c);
1154        ok $c =~ $pattern, "\\xc0 =~ $pattern; target utf8, pattern not";
1155        ok $c =~ /$pattern/i, "\\xc0 =~ /$pattern/i; target utf8, pattern not";
1156        ok $c =~ $utf8_pattern, "\\xc0 =~ $pattern; Both target and pattern utf8";
1157        ok $c =~ /$utf8_pattern/i, "\\xc0 =~ /$pattern/i; Both target and pattern utf8";
1158    }
1159
1160    SKIP: {   # Make sure can override the formatting
1161        if ($::IS_EBCDIC) {
1162            skip "Needs to be customized to run on EBCDIC", 2;
1163        }
1164        use feature 'unicode_strings';
1165        ok "\xc0" =~ /\w/, 'Under unicode_strings: "\xc0" =~ /\w/';
1166        ok "\xc0" !~ /(?d:\w)/, 'Under unicode_strings: "\xc0" !~ /(?d:\w)/';
1167    }
1168
1169    {
1170        my $str= "\x{100}";
1171        chop $str;
1172        my $qr= qr/$str/;
1173        is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag enabled - Bug #80212");
1174        $str= "";
1175        $qr= qr/$str/;
1176        is("$qr", "(?^:)", "Empty pattern qr// stringifies to (?^:) with unicode flag disabled - Bug #80212");
1177
1178    }
1179
1180    {
1181        local $::TODO = "[perl #38133]";
1182
1183        "A" =~ /(((?:A))?)+/;
1184        my $first = $2;
1185
1186        "A" =~ /(((A))?)+/;
1187        my $second = $2;
1188
1189        is($first, $second);
1190    }
1191
1192    {
1193	# RT #3516: \G in a m//g expression causes problems
1194	my $count = 0;
1195	while ("abc" =~ m/(\G[ac])?/g) {
1196	    last if $count++ > 10;
1197	}
1198	ok($count < 10, 'RT #3516 A');
1199
1200	$count = 0;
1201	while ("abc" =~ m/(\G|.)[ac]/g) {
1202	    last if $count++ > 10;
1203	}
1204	ok($count < 10, 'RT #3516 B');
1205
1206	$count = 0;
1207	while ("abc" =~ m/(\G?[ac])?/g) {
1208	    last if $count++ > 10;
1209	}
1210	ok($count < 10, 'RT #3516 C');
1211    }
1212    {
1213        # RT #84294: Is this a bug in the simple Perl regex?
1214        #          : Nested buffers and (?{...}) dont play nicely on partial matches
1215        our @got= ();
1216        ok("ab" =~ /((\w+)(?{ push @got, $2 })){2}/,"RT #84294: Pattern should match");
1217        my $want= "'ab', 'a', 'b'";
1218        my $got= join(", ", map { defined($_) ? "'$_'" : "undef" } @got);
1219        is($got,$want,'RT #84294: check that "ab" =~ /((\w+)(?{ push @got, $2 })){2}/ leaves @got in the correct state');
1220    }
1221
1222    {
1223        # Suppress warnings, as the non-unicode one comes out even if turn off
1224        # warnings here (because the execution is done in another scope).
1225        local $SIG{__WARN__} = sub {};
1226        my $str = "\x{110000}";
1227
1228        unlike($str, qr/\p{ASCII_Hex_Digit=True}/, "Non-Unicode doesn't match \\p{AHEX=True}");
1229        like($str, qr/\p{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\p{AHEX=False}");
1230        like($str, qr/\P{ASCII_Hex_Digit=True}/, "Non-Unicode matches \\P{AHEX=True}");
1231        unlike($str, qr/\P{ASCII_Hex_Digit=False}/, "Non-Unicode matches \\P{AHEX=FALSE}");
1232    }
1233
1234    {
1235        # Test that IDstart works, but because the author (khw) knows
1236        # regexes much better than the rest of the core, it is being done here
1237        # in the context of a regex which relies on buffer names beginng with
1238        # IDStarts.
1239        use utf8;
1240        my $str = "abc";
1241        like($str, qr/(?<a>abc)/, "'a' is legal IDStart");
1242        like($str, qr/(?<_>abc)/, "'_' is legal IDStart");
1243        like($str, qr/(?<ß>abc)/, "U+00DF is legal IDStart");
1244        like($str, qr/(?<ℕ>abc)/, "U+2115' is legal IDStart");
1245
1246        # This test works on Unicode 6.0 in which U+2118 and U+212E are legal
1247        # IDStarts there, but are not Word characters, and therefore Perl
1248        # doesn't allow them to be IDStarts.  But there is no guarantee that
1249        # Unicode won't change things around in the future so that at some
1250        # future Unicode revision these tests would need to be revised.
1251        foreach my $char ("%", "×", chr(0x2118), chr(0x212E)) {
1252            my $prog = <<"EOP";
1253use utf8;;
1254"abc" =~ qr/(?<$char>abc)/;
1255EOP
1256            utf8::encode($prog);
1257            fresh_perl_like($prog, qr!Group name must start with a non-digit word character!, {},
1258                        sprintf("'U+%04X not legal IDFirst'", ord($char)));
1259        }
1260    }
1261
1262    { # [perl #101710]
1263        my $pat = "b";
1264        utf8::upgrade($pat);
1265        like("\xffb", qr/$pat/i, "/i: utf8 pattern, non-utf8 string, latin1-char preceding matching char in string");
1266    }
1267
1268    { # Crash with @a =~ // warning
1269	local $SIG{__WARN__} = sub {
1270             pass 'no crash for @a =~ // warning'
1271        };
1272	eval ' sub { my @a =~ // } ';
1273    }
1274
1275    { # Concat overloading and qr// thingies
1276	my @refs;
1277	my $qr = qr//;
1278        package Cat {
1279            require overload;
1280            overload->import(
1281		'""' => sub { ${$_[0]} },
1282		'.' => sub {
1283		    push @refs, ref $_[1] if ref $_[1];
1284		    bless $_[2] ? \"$_[1]${$_[0]}" : \"${$_[0]}$_[1]"
1285		}
1286            );
1287	}
1288	my $s = "foo";
1289	my $o = bless \$s, Cat::;
1290	/$o$qr/;
1291	is "@refs", "Regexp", '/$o$qr/ passes qr ref to cat overload meth';
1292    }
1293
1294    {
1295        my $count=0;
1296        my $str="\n";
1297        $count++ while $str=~/.*/g;
1298        is $count, 2, 'test that ANCH_MBOL works properly. We should get 2 from $count++ while "\n"=~/.*/g';
1299        my $class_count= 0;
1300        $class_count++ while $str=~/[^\n]*/g;
1301        is $class_count, $count, 'while "\n"=~/.*/g and while "\n"=~/[^\n]*/g should behave the same';
1302        my $anch_count= 0;
1303        $anch_count++ while $str=~/^.*/mg;
1304        is $anch_count, 1, 'while "\n"=~/^.*/mg should match only once';
1305    }
1306
1307    { # [perl #111174]
1308        use re '/u';
1309        like "\xe0", qr/(?i:\xc0)/, "(?i: shouldn't lose the passed in /u";
1310        use re '/a';
1311        unlike "\x{100}", qr/(?i:\w)/, "(?i: shouldn't lose the passed in /a";
1312        use re '/aa';
1313        unlike 'k', qr/(?i:\N{KELVIN SIGN})/, "(?i: shouldn't lose the passed in /aa";
1314    }
1315
1316    {
1317	# the test for whether the pattern should be re-compiled should
1318	# consider the UTF8ness of the previous and current pattern
1319	# string, as well as the physical bytes of the pattern string
1320
1321	for my $s ("\xc4\x80", "\x{100}") {
1322	    ok($s =~ /^$s$/, "re-compile check is UTF8-aware");
1323	}
1324    }
1325
1326    #  #113682 more overloading and qr//
1327    # when doing /foo$overloaded/, if $overloaded returns
1328    # a qr/(?{})/ via qr or "" overloading, then 'use re 'eval'
1329    # shouldn't be required. Via '.', it still is.
1330    {
1331        package Qr0;
1332	use overload 'qr' => sub { qr/(??{50})/ };
1333
1334        package Qr1;
1335	use overload '""' => sub { qr/(??{51})/ };
1336
1337        package Qr2;
1338	use overload '.'  => sub { $_[1] . qr/(??{52})/ };
1339
1340        package Qr3;
1341	use overload '""' => sub { qr/(??{7})/ },
1342		     '.'  => sub { $_[1] . qr/(??{53})/ };
1343
1344        package Qr_indirect;
1345	use overload '""'  => sub { $_[0][0] };
1346
1347	package main;
1348
1349	for my $i (0..3) {
1350	    my $o = bless [], "Qr$i";
1351	    if ((0,0,1,1)[$i]) {
1352		eval { "A5$i" =~ /^A$o$/ };
1353		like($@, qr/Eval-group not allowed/, "Qr$i");
1354		eval { "5$i" =~ /$o/ };
1355		like($@, ($i == 3 ? qr/^$/ : qr/no method found,/),
1356			"Qr$i bare");
1357		{
1358		    use re 'eval';
1359		    ok("A5$i" =~ /^A$o$/, "Qr$i - with use re eval");
1360		    eval { "5$i" =~ /$o/ };
1361		    like($@, ($i == 3 ? qr/^$/ : qr/no method found,/),
1362			    "Qr$i bare - with use re eval");
1363		}
1364	    }
1365	    else {
1366		ok("A5$i" =~ /^A$o$/, "Qr$i");
1367		ok("5$i" =~ /$o/, "Qr$i bare");
1368	    }
1369	}
1370
1371	my $o = bless [ bless [], "Qr1" ], 'Qr_indirect';
1372	ok("A51" =~ /^A$o/, "Qr_indirect");
1373	ok("51" =~ /$o/, "Qr_indirect bare");
1374    }
1375
1376    {   # Various flags weren't being set when a [] is optimized into an
1377        # EXACTish node
1378        ;
1379        ;
1380        ok("\x{017F}\x{017F}" =~ qr/^[\x{00DF}]?$/i, "[] to EXACTish optimization");
1381    }
1382
1383    {
1384        for my $char (":", "\x{f7}", "\x{2010}") {
1385            my $utf8_char = $char;
1386            utf8::upgrade($utf8_char);
1387            my $display = $char;
1388            $display = display($display);
1389            my $utf8_display = "utf8::upgrade(\"$display\")";
1390
1391            like($char, qr/^$char?$/, "\"$display\" =~ /^$display?\$/");
1392            like($char, qr/^$utf8_char?$/, "my \$p = \"$display\"; utf8::upgrade(\$p); \"$display\" =~ /^\$p?\$/");
1393            like($utf8_char, qr/^$char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); \"\$c\" =~ /^$display?\$/");
1394            like($utf8_char, qr/^$utf8_char?$/, "my \$c = \"$display\"; utf8::upgrade(\$c); my \$p = \"$display\"; utf8::upgrade(\$p); \"\$c\" =~ /^\$p?\$/");
1395        }
1396    }
1397
1398    {
1399	# #116148: Pattern utf8ness sticks around globally
1400	# the utf8 in the first match was sticking around for the second
1401	# match
1402
1403	use feature 'unicode_strings';
1404
1405	my $x = "\x{263a}";
1406	$x =~ /$x/;
1407
1408	my $text = "Perl";
1409	ok("Perl" =~ /P.*$/i, '#116148');
1410    }
1411
1412    { # 117327: Sequence (?#...) not recognized in regex
1413      # The space between the '(' and '?' is now deprecated; this test should
1414      # be removed when the deprecation is made fatal.
1415        no warnings;
1416        like("ab", qr/a( ?#foo)b/x);
1417    }
1418
1419    { # 118297: Mixing up- and down-graded strings in regex
1420        utf8::upgrade(my $u = "\x{e5}");
1421        utf8::downgrade(my $d = "\x{e5}");
1422        my $warned;
1423        local $SIG{__WARN__} = sub { $warned++ if $_[0] =~ /\AMalformed UTF-8/ };
1424        my $re = qr/$u$d/;
1425        ok(!$warned, "no warnings when interpolating mixed up-/downgraded strings in pattern");
1426        my $c = "\x{e5}\x{e5}";
1427        utf8::downgrade($c);
1428        like($c, $re, "mixed up-/downgraded pattern matches downgraded string");
1429        utf8::upgrade($c);
1430        like($c, $re, "mixed up-/downgraded pattern matches upgraded string");
1431    }
1432
1433    {
1434        # if we have 87 capture buffers defined then \87 should refer to the 87th.
1435        # test that this is true for 1..100
1436        # Note that this test causes the engine to recurse at runtime, and
1437        # hence use a lot of C stack.
1438        for my $i (1..100) {
1439            my $capture= "a";
1440            $capture= "($capture)" for 1 .. $i;
1441            for my $mid ("","b") {
1442                my $str= "a${mid}a";
1443                my $backref= "\\$i";
1444                eval {
1445                    ok($str=~/$capture$mid$backref/,"\\$i works with $i buffers '$str'=~/...$mid$backref/");
1446                    1;
1447                } or do {
1448                    is("$@","","\\$i works with $i buffers works with $i buffers '$str'=~/...$mid$backref/");
1449                };
1450            }
1451        }
1452    }
1453
1454    # this mixture of readonly (not COWable) and COWable strings
1455    # messed up the capture buffers under COW. The actual test results
1456    # are incidental; the issue is was an AddressSanitizer failure
1457    {
1458	my $c ='AB';
1459	my $res = '';
1460	for ($c, 'C', $c, 'DE') {
1461	    ok(/(.)/, "COWable match");
1462	    $res .= $1;
1463	}
1464	is($res, "ACAD");
1465    }
1466
1467
1468    {
1469	# RT #45667
1470	# /[#$x]/x didn't interpolate the var $x.
1471	my $b = 'cd';
1472	my $s = 'abcd$%#&';
1473	$s =~ s/[a#$b%]/X/g;
1474	is ($s, 'XbXX$XX&', 'RT #45667 without /x');
1475	$s = 'abcd$%#&';
1476	$s =~ s/[a#$b%]/X/gx;
1477	is ($s, 'XbXX$XX&', 'RT #45667 with /x');
1478    }
1479
1480    {
1481	no warnings "uninitialized";
1482	my @a;
1483	$a[1]++;
1484	/@a/;
1485	pass('no crash with /@a/ when array has nonexistent elems');
1486    }
1487
1488    {
1489	is runperl(prog => 'delete $::{qq-\cR-}; //; print qq-ok\n-'),
1490	   "ok\n",
1491	   'deleting *^R does not result in crashes';
1492	no warnings 'once';
1493	*^R = *caretRglobwithnoscalar;
1494	"" =~ /(?{42})/;
1495	is $^R, 42, 'assigning to *^R does not result in a crash';
1496	is runperl(
1497	     stderr => 1,
1498	     prog => 'eval q|'
1499	            .' q-..- =~ /(??{undef *^R;q--})(?{42})/; '
1500                    .' print qq-$^R\n-'
1501	            .'|'
1502	   ),
1503	   "42\n",
1504	   'undefining *^R within (??{}) does not result in a crash';
1505    }
1506
1507    {
1508        # [perl #120446]
1509        # this code should be virtually instantaneous. If it takes 10s of
1510        # seconds, there a bug in intuit_start.
1511        # (this test doesn't actually test for slowness - that involves
1512        # too much danger of false positives on loaded machines - but by
1513        # putting it here, hopefully someone might notice if it suddenly
1514        # runs slowly)
1515        my $s = ('a' x 1_000_000) . 'b';
1516        my $i = 0;
1517        for (1..10_000) {
1518            pos($s) = $_;
1519            $i++ if $s =~/\Gb/g;
1520        }
1521        is($i, 0, "RT 120446: mustn't run slowly");
1522    }
1523
1524    {
1525        # [perl #120692]
1526        # these tests should be virtually instantaneous. If they take 10s of
1527        # seconds, there's a bug in intuit_start.
1528
1529        my $s = 'ab' x 1_000_000;
1530        utf8::upgrade($s);
1531        1 while $s =~ m/\Ga+ba+b/g;
1532        pass("RT#120692 \\G mustn't run slowly");
1533
1534        $s=~ /^a{1,2}x/ for  1..10_000;
1535        pass("RT#120692 a{1,2} mustn't run slowly");
1536
1537        $s=~ /ab.{1,2}x/;
1538        pass("RT#120692 ab.{1,2} mustn't run slowly");
1539
1540        $s = "-a-bc" x 250_000;
1541        $s .= "1a1bc";
1542        utf8::upgrade($s);
1543        ok($s =~ /\da\d{0,30000}bc/, "\\d{30000}");
1544
1545        $s = "-ab\n" x 250_000;
1546        $s .= "abx";
1547        ok($s =~ /^ab.*x/m, "distant float with /m");
1548
1549        my $r = qr/^abcd/;
1550        $s = "abcd-xyz\n" x 500_000;
1551        $s =~ /$r\d{1,2}xyz/m for 1..200;
1552        pass("BOL within //m  mustn't run slowly");
1553
1554        $s = "abcdefg" x 1_000_000;
1555        $s =~ /(?-m:^)abcX?fg/m for 1..100;
1556        pass("BOL within //m  mustn't skip absolute anchored check");
1557
1558        $s = "abcdefg" x 1_000_000;
1559        $s =~ /^XX\d{1,10}cde/ for 1..100;
1560        pass("abs anchored float string should fail quickly");
1561
1562    }
1563
1564    # These are based on looking at the code in regcomp.c
1565    # We don't look for specific code, just the existence of an SSC
1566    foreach my $re (qw(     qr/a?c/
1567                            qr/a?c/i
1568                            qr/[ab]?c/
1569                            qr/\R?c/
1570                            qr/\d?c/d
1571                            qr/\w?c/l
1572                            qr/\s?c/a
1573                            qr/[[:alpha:]]?c/u
1574    )) {
1575      SKIP: {
1576        skip "no re-debug under miniperl" if is_miniperl;
1577        my $prog = <<"EOP";
1578use re qw(Debug COMPILE);
1579$re;
1580EOP
1581        fresh_perl_like($prog, qr/synthetic stclass/, { stderr=>1 }, "$re generates a synthetic start class");
1582      }
1583    }
1584
1585    {
1586        like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
1587    }
1588
1589
1590
1591    {   # Was getting optimized into EXACT (non-folding node)
1592        my $x = qr/[x]/i;
1593        utf8::upgrade($x);
1594        like("X", qr/$x/, "UTF-8 of /[x]/i matches upper case");
1595    }
1596
1597    {	# [perl #123539]
1598        like("TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, "");
1599        like("TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff", qr/TffffffffffffT\x{100}TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT5TTTTTTTTTTTTTTTTTTTTTTTTT3TTgTTTTTTTTTTTTTTTTTTTTT2TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHiHHHHHHHfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff&ffff/il, "");
1600    }
1601
1602	{	# [perl #123604]
1603		my($s, $x, @x) = ('abc', 'a', 'd');
1604		my $long = 'b' x 2000;
1605		my $eval = q{$s =~ m{$x[bbb]c} ? 1 : 0};
1606		$eval =~ s{bbb}{$long};
1607		my $match = eval $eval;
1608		ok(1, "did not crash");
1609		ok($match, "[bbb...] resolved as character class, not subscript");
1610	}
1611
1612        {   # Test that we handle some malformed UTF-8 without looping [perl
1613            # #123562]
1614
1615            my $code='
1616                BEGIN{require q(test.pl);}
1617                use Encode qw(_utf8_on);
1618                my $malformed = "a\x80\n";
1619                _utf8_on($malformed);
1620                watchdog(3);
1621                $malformed =~ /(\n\r|\r)$/;
1622                print q(No infinite loop here!);
1623            ';
1624            fresh_perl_like($code, qr/Malformed UTF-8 character/, {},
1625                "test that we handle some UTF-8 malformations without looping" );
1626        }
1627} # End of sub run_tests
1628
16291;
1630