xref: /openbsd-src/gnu/usr.bin/perl/t/re/subst.t (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1#!./perl -w
2
3BEGIN {
4    chdir 't' if -d 't';
5    @INC = '../lib';
6    require Config; import Config;
7    require './test.pl';
8}
9
10plan( tests => 236 );
11
12$_ = 'david';
13$a = s/david/rules/r;
14ok( $_ eq 'david' && $a eq 'rules', 'non-destructive substitute' );
15
16$a = "david" =~ s/david/rules/r;
17ok( $a eq 'rules', 's///r with constant' );
18
19$a = "david" =~ s/david/"is"."great"/er;
20ok( $a eq 'isgreat', 's///er' );
21
22$a = "daviddavid" =~ s/david/cool/gr;
23ok( $a eq 'coolcool', 's///gr' );
24
25$a = 'david';
26$b = $a =~ s/david/sucks/r =~ s/sucks/rules/r;
27ok( $a eq 'david' && $b eq 'rules', 'chained s///r' );
28
29$a = 'david';
30$b = $a =~ s/xxx/sucks/r;
31ok( $a eq 'david' && $b eq 'david', 'non matching s///r' );
32
33$a = 'david';
34for (0..2) {
35    ok( 'david' =~ s/$a/rules/ro eq 'rules', 's///ro '.$_ );
36}
37
38$a = 'david';
39eval '$b = $a !~ s/david/is great/r';
40like( $@, qr{Using !~ with s///r doesn't make sense}, 's///r !~ operator gives error' );
41
42{
43        no warnings 'uninitialized';
44        $a = undef;
45        $b = $a =~ s/left/right/r;
46        ok ( !defined $a && !defined $b, 's///r with undef input' );
47
48        use warnings;
49        warning_like(sub { $b = $a =~ s/left/right/r },
50		     qr/^Use of uninitialized value/,
51		     's///r Uninitialized warning');
52
53        $a = 'david';
54        warning_like(sub {eval 's/david/sucks/r; 1'},
55		     qr/^Useless use of non-destructive substitution/,
56		     's///r void context warning');
57}
58
59$a = '';
60$b = $a =~ s/david/rules/r;
61ok( $a eq '' && $b eq '', 's///r on empty string' );
62
63$_ = 'david';
64@b = s/david/rules/r;
65ok( $_ eq 'david' && $b[0] eq 'rules', 's///r in list context' );
66
67# Magic value and s///r
68require Tie::Scalar;
69tie $m, 'Tie::StdScalar';  # makes $a magical
70$m = "david";
71$b = $m =~ s/david/rules/r;
72ok( $m eq 'david' && $b eq 'rules', 's///r with magic input' );
73
74$m = $b =~ s/rules/david/r;
75ok( defined tied($m), 's///r magic isn\'t lost' );
76
77$b = $m =~ s/xxx/yyy/r;
78ok( ! defined tied($b), 's///r magic isn\'t contagious' );
79
80my $ref = \("aaa" =~ s/aaa/bbb/r);
81is (Internals::SvREFCNT($$ref), 1, 's///r does not leak');
82$ref = \("aaa" =~ s/aaa/bbb/rg);
83is (Internals::SvREFCNT($$ref), 1, 's///rg does not leak');
84
85$x = 'foo';
86$_ = "x";
87s/x/\$x/;
88ok( $_ eq '$x', ":$_: eq :\$x:" );
89
90$_ = "x";
91s/x/$x/;
92ok( $_ eq 'foo', ":$_: eq :foo:" );
93
94$_ = "x";
95s/x/\$x $x/;
96ok( $_ eq '$x foo', ":$_: eq :\$x foo:" );
97
98$b = 'cd';
99($a = 'abcdef') =~ s<(b${b}e)>'\n$1';
100ok( $1 eq 'bcde' && $a eq 'a\n$1f', ":$1: eq :bcde: ; :$a: eq :a\\n\$1f:" );
101
102$a = 'abacada';
103ok( ($a =~ s/a/x/g) == 4 && $a eq 'xbxcxdx' );
104
105ok( ($a =~ s/a/y/g) == 0 && $a eq 'xbxcxdx' );
106
107ok( ($a =~ s/b/y/g) == 1 && $a eq 'xyxcxdx' );
108
109$_ = 'ABACADA';
110ok( /a/i && s///gi && $_ eq 'BCD' );
111
112$_ = '\\' x 4;
113ok( length($_) == 4 );
114$snum = s/\\/\\\\/g;
115ok( $_ eq '\\' x 8 && $snum == 4 );
116
117$_ = '\/' x 4;
118ok( length($_) == 8 );
119$snum = s/\//\/\//g;
120ok( $_ eq '\\//' x 4 && $snum == 4 );
121ok( length($_) == 12 );
122
123$_ = 'aaaXXXXbbb';
124s/^a//;
125ok( $_ eq 'aaXXXXbbb' );
126
127$_ = 'aaaXXXXbbb';
128s/a//;
129ok( $_ eq 'aaXXXXbbb' );
130
131$_ = 'aaaXXXXbbb';
132s/^a/b/;
133ok( $_ eq 'baaXXXXbbb' );
134
135$_ = 'aaaXXXXbbb';
136s/a/b/;
137ok( $_ eq 'baaXXXXbbb' );
138
139$_ = 'aaaXXXXbbb';
140s/aa//;
141ok( $_ eq 'aXXXXbbb' );
142
143$_ = 'aaaXXXXbbb';
144s/aa/b/;
145ok( $_ eq 'baXXXXbbb' );
146
147$_ = 'aaaXXXXbbb';
148s/b$//;
149ok( $_ eq 'aaaXXXXbb' );
150
151$_ = 'aaaXXXXbbb';
152s/b//;
153ok( $_ eq 'aaaXXXXbb' );
154
155$_ = 'aaaXXXXbbb';
156s/bb//;
157ok( $_ eq 'aaaXXXXb' );
158
159$_ = 'aaaXXXXbbb';
160s/aX/y/;
161ok( $_ eq 'aayXXXbbb' );
162
163$_ = 'aaaXXXXbbb';
164s/Xb/z/;
165ok( $_ eq 'aaaXXXzbb' );
166
167$_ = 'aaaXXXXbbb';
168s/aaX.*Xbb//;
169ok( $_ eq 'ab' );
170
171$_ = 'aaaXXXXbbb';
172s/bb/x/;
173ok( $_ eq 'aaaXXXXxb' );
174
175# now for some unoptimized versions of the same.
176
177$_ = 'aaaXXXXbbb';
178$x ne $x || s/^a//;
179ok( $_ eq 'aaXXXXbbb' );
180
181$_ = 'aaaXXXXbbb';
182$x ne $x || s/a//;
183ok( $_ eq 'aaXXXXbbb' );
184
185$_ = 'aaaXXXXbbb';
186$x ne $x || s/^a/b/;
187ok( $_ eq 'baaXXXXbbb' );
188
189$_ = 'aaaXXXXbbb';
190$x ne $x || s/a/b/;
191ok( $_ eq 'baaXXXXbbb' );
192
193$_ = 'aaaXXXXbbb';
194$x ne $x || s/aa//;
195ok( $_ eq 'aXXXXbbb' );
196
197$_ = 'aaaXXXXbbb';
198$x ne $x || s/aa/b/;
199ok( $_ eq 'baXXXXbbb' );
200
201$_ = 'aaaXXXXbbb';
202$x ne $x || s/b$//;
203ok( $_ eq 'aaaXXXXbb' );
204
205$_ = 'aaaXXXXbbb';
206$x ne $x || s/b//;
207ok( $_ eq 'aaaXXXXbb' );
208
209$_ = 'aaaXXXXbbb';
210$x ne $x || s/bb//;
211ok( $_ eq 'aaaXXXXb' );
212
213$_ = 'aaaXXXXbbb';
214$x ne $x || s/aX/y/;
215ok( $_ eq 'aayXXXbbb' );
216
217$_ = 'aaaXXXXbbb';
218$x ne $x || s/Xb/z/;
219ok( $_ eq 'aaaXXXzbb' );
220
221$_ = 'aaaXXXXbbb';
222$x ne $x || s/aaX.*Xbb//;
223ok( $_ eq 'ab' );
224
225$_ = 'aaaXXXXbbb';
226$x ne $x || s/bb/x/;
227ok( $_ eq 'aaaXXXXxb' );
228
229$_ = 'abc123xyz';
230s/(\d+)/$1*2/e;              # yields 'abc246xyz'
231ok( $_ eq 'abc246xyz' );
232s/(\d+)/sprintf("%5d",$1)/e; # yields 'abc  246xyz'
233ok( $_ eq 'abc  246xyz' );
234s/(\w)/$1 x 2/eg;            # yields 'aabbcc  224466xxyyzz'
235ok( $_ eq 'aabbcc  224466xxyyzz' );
236
237$_ = "aaaaa";
238ok( y/a/b/ == 5 );
239ok( y/a/b/ == 0 );
240ok( y/b// == 5 );
241ok( y/b/c/s == 5 );
242ok( y/c// == 1 );
243ok( y/c//d == 1 );
244ok( $_ eq "" );
245
246$_ = "Now is the %#*! time for all good men...";
247ok( ($x=(y/a-zA-Z //cd)) == 7 );
248ok( y/ / /s == 8 );
249
250$_ = 'abcdefghijklmnopqrstuvwxyz0123456789';
251tr/a-z/A-Z/;
252
253ok( $_ eq 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' );
254
255# same as tr/A-Z/a-z/;
256if (defined $Config{ebcdic} && $Config{ebcdic} eq 'define') {	# EBCDIC.
257    no utf8;
258    y[\301-\351][\201-\251];
259} else {		# Ye Olde ASCII.  Or something like it.
260    y[\101-\132][\141-\172];
261}
262
263ok( $_ eq 'abcdefghijklmnopqrstuvwxyz0123456789' );
264
265SKIP: {
266    skip("not ASCII",1) unless (ord("+") == ord(",") - 1
267			     && ord(",") == ord("-") - 1
268			     && ord("a") == ord("b") - 1
269			     && ord("b") == ord("c") - 1);
270    $_ = '+,-';
271    tr/+--/a-c/;
272    ok( $_ eq 'abc' );
273}
274
275$_ = '+,-';
276tr/+\--/a\/c/;
277ok( $_ eq 'a,/' );
278
279$_ = '+,-';
280tr/-+,/ab\-/;
281ok( $_ eq 'b-a' );
282
283
284# test recursive substitutions
285# code based on the recursive expansion of makefile variables
286
287my %MK = (
288    AAAAA => '$(B)', B=>'$(C)', C => 'D',			# long->short
289    E     => '$(F)', F=>'p $(G) q', G => 'HHHHH',	# short->long
290    DIR => '$(UNDEFINEDNAME)/xxx',
291);
292sub var {
293    my($var,$level) = @_;
294    return "\$($var)" unless exists $MK{$var};
295    return exp_vars($MK{$var}, $level+1); # can recurse
296}
297sub exp_vars {
298    my($str,$level) = @_;
299    $str =~ s/\$\((\w+)\)/var($1, $level+1)/ge; # can recurse
300    #warn "exp_vars $level = '$str'\n";
301    $str;
302}
303
304ok( exp_vars('$(AAAAA)',0)           eq 'D' );
305ok( exp_vars('$(E)',0)               eq 'p HHHHH q' );
306ok( exp_vars('$(DIR)',0)             eq '$(UNDEFINEDNAME)/xxx' );
307ok( exp_vars('foo $(DIR)/yyy bar',0) eq 'foo $(UNDEFINEDNAME)/xxx/yyy bar' );
308
309$_ = "abcd";
310s/(..)/$x = $1, m#.#/eg;
311ok( $x eq "cd", 'a match nested in the RHS of a substitution' );
312
313# Subst and lookbehind
314
315$_="ccccc";
316$snum = s/(?<!x)c/x/g;
317ok( $_ eq "xxxxx" && $snum == 5 );
318
319$_="ccccc";
320$snum = s/(?<!x)(c)/x/g;
321ok( $_ eq "xxxxx" && $snum == 5 );
322
323$_="foobbarfoobbar";
324$snum = s/(?<!r)foobbar/foobar/g;
325ok( $_ eq "foobarfoobbar" && $snum == 1 );
326
327$_="foobbarfoobbar";
328$snum = s/(?<!ar)(foobbar)/foobar/g;
329ok( $_ eq "foobarfoobbar" && $snum == 1 );
330
331$_="foobbarfoobbar";
332$snum = s/(?<!ar)foobbar/foobar/g;
333ok( $_ eq "foobarfoobbar" && $snum == 1 );
334
335eval 's{foo} # this is a comment, not a delimiter
336       {bar};';
337ok( ! @?, 'parsing of split subst with comment' );
338
339$snum = eval '$_="exactly"; s sxsys;m 3(yactl)3;$1';
340is( $snum, 'yactl', 'alpha delimiters are allowed' );
341
342$_="baacbaa";
343$snum = tr/a/b/s;
344ok( $_ eq "bbcbb" && $snum == 4,
345    'check if squashing works at the end of string' );
346
347$_ = "ab";
348ok( s/a/b/ == 1 );
349
350$_ = <<'EOL';
351     $url = new URI::URL "http://www/";   die if $url eq "xXx";
352EOL
353$^R = 'junk';
354
355$foo = ' $@%#lowercase $@%# lowercase UPPERCASE$@%#UPPERCASE' .
356  ' $@%#lowercase$@%#lowercase$@%# lowercase lowercase $@%#lowercase' .
357  ' lowercase $@%#MiXeD$@%# ';
358
359$snum =
360s{  \d+          \b [,.;]? (?{ 'digits' })
361   |
362    [a-z]+       \b [,.;]? (?{ 'lowercase' })
363   |
364    [A-Z]+       \b [,.;]? (?{ 'UPPERCASE' })
365   |
366    [A-Z] [a-z]+ \b [,.;]? (?{ 'Capitalized' })
367   |
368    [A-Za-z]+    \b [,.;]? (?{ 'MiXeD' })
369   |
370    [A-Za-z0-9]+ \b [,.;]? (?{ 'alphanumeric' })
371   |
372    \s+                    (?{ ' ' })
373   |
374    [^A-Za-z0-9\s]+          (?{ '$@%#' })
375}{$^R}xg;
376ok( $_ eq $foo );
377ok( $snum == 31 );
378
379$_ = 'a' x 6;
380$snum = s/a(?{})//g;
381ok( $_ eq '' && $snum == 6 );
382
383$_ = 'x' x 20;
384$snum = s/(\d*|x)/<$1>/g;
385$foo = '<>' . ('<x><>' x 20) ;
386ok( $_ eq $foo && $snum == 41 );
387
388$t = 'aaaaaaaaa';
389
390$_ = $t;
391pos = 6;
392$snum = s/\Ga/xx/g;
393ok( $_ eq 'aaaaaaxxxxxx' && $snum == 3 );
394
395$_ = $t;
396pos = 6;
397$snum = s/\Ga/x/g;
398ok( $_ eq 'aaaaaaxxx' && $snum == 3 );
399
400$_ = $t;
401pos = 6;
402s/\Ga/xx/;
403ok( $_ eq 'aaaaaaxxaa' );
404
405$_ = $t;
406pos = 6;
407s/\Ga/x/;
408ok( $_ eq 'aaaaaaxaa' );
409
410$_ = $t;
411$snum = s/\Ga/xx/g;
412ok( $_ eq 'xxxxxxxxxxxxxxxxxx' && $snum == 9 );
413
414$_ = $t;
415$snum = s/\Ga/x/g;
416ok( $_ eq 'xxxxxxxxx' && $snum == 9 );
417
418$_ = $t;
419s/\Ga/xx/;
420ok( $_ eq 'xxaaaaaaaa' );
421
422$_ = $t;
423s/\Ga/x/;
424ok( $_ eq 'xaaaaaaaa' );
425
426$_ = 'aaaa';
427$snum = s/\ba/./g;
428ok( $_ eq '.aaa' && $snum == 1 );
429
430eval q% s/a/"b"}/e %;
431ok( $@ =~ /Bad evalled substitution/ );
432eval q% ($_ = "x") =~ s/(.)/"$1 "/e %;
433ok( $_ eq "x " and !length $@ );
434$x = $x = 'interp';
435eval q% ($_ = "x") =~ s/x(($x)*)/"$1"/e %;
436ok( $_ eq '' and !length $@ );
437
438$_ = "C:/";
439ok( !s/^([a-z]:)/\u$1/ );
440
441$_ = "Charles Bronson";
442$snum = s/\B\w//g;
443ok( $_ eq "C B" && $snum == 12 );
444
445{
446    use utf8;
447    my $s = "H\303\266he";
448    my $l = my $r = $s;
449    $l =~ s/[^\w]//g;
450    $r =~ s/[^\w\.]//g;
451    is($l, $r, "use utf8 \\w");
452}
453
454my $pv1 = my $pv2  = "Andreas J. K\303\266nig";
455$pv1 =~ s/A/\x{100}/;
456substr($pv2,0,1) = "\x{100}";
457is($pv1, $pv2);
458
459SKIP: {
460    skip("EBCDIC", 3) if ord("A") == 193;
461
462    {
463	# Gregor Chrupala <gregor.chrupala@star-group.net>
464	use utf8;
465	$a = 'Espa&ntilde;a';
466	$a =~ s/&ntilde;/ñ/;
467	like($a, qr/ñ/, "use utf8 RHS");
468    }
469
470    {
471	use utf8;
472	$a = 'España España';
473	$a =~ s/ñ/&ntilde;/;
474	like($a, qr/ñ/, "use utf8 LHS");
475    }
476
477    {
478	use utf8;
479	$a = 'España';
480	$a =~ s/ñ/ñ/;
481	like($a, qr/ñ/, "use utf8 LHS and RHS");
482    }
483}
484
485{
486    # SADAHIRO Tomoyuki <bqw10602@nifty.com>
487
488    $a = "\x{100}\x{101}";
489    $a =~ s/\x{101}/\xFF/;
490    like($a, qr/\xFF/);
491    is(length($a), 2, "SADAHIRO utf8 s///");
492
493    $a = "\x{100}\x{101}";
494    $a =~ s/\x{101}/"\xFF"/e;
495    like($a, qr/\xFF/);
496    is(length($a), 2);
497
498    $a = "\x{100}\x{101}";
499    $a =~ s/\x{101}/\xFF\xFF\xFF/;
500    like($a, qr/\xFF\xFF\xFF/);
501    is(length($a), 4);
502
503    $a = "\x{100}\x{101}";
504    $a =~ s/\x{101}/"\xFF\xFF\xFF"/e;
505    like($a, qr/\xFF\xFF\xFF/);
506    is(length($a), 4);
507
508    $a = "\xFF\x{101}";
509    $a =~ s/\xFF/\x{100}/;
510    like($a, qr/\x{100}/);
511    is(length($a), 2);
512
513    $a = "\xFF\x{101}";
514    $a =~ s/\xFF/"\x{100}"/e;
515    like($a, qr/\x{100}/);
516    is(length($a), 2);
517
518    $a = "\xFF";
519    $a =~ s/\xFF/\x{100}/;
520    like($a, qr/\x{100}/);
521    is(length($a), 1);
522
523    $a = "\xFF";
524    $a =~ s/\xFF/"\x{100}"/e;
525    like($a, qr/\x{100}/);
526    is(length($a), 1);
527}
528
529{
530    # subst with mixed utf8/non-utf8 type
531    my($ua, $ub, $uc, $ud) = ("\x{101}", "\x{102}", "\x{103}", "\x{104}");
532    my($na, $nb) = ("\x{ff}", "\x{fe}");
533    my $a = "$ua--$ub";
534    my $b;
535    ($b = $a) =~ s/--/$na/;
536    is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8");
537    ($b = $a) =~ s/--/--$na--/;
538    is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8");
539    ($b = $a) =~ s/--/$uc/;
540    is($b, "$ua$uc$ub", "s///: replace utf8 into utf8");
541    ($b = $a) =~ s/--/--$uc--/;
542    is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8");
543    $a = "$na--$nb";
544    ($b = $a) =~ s/--/$ua/;
545    is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8");
546    ($b = $a) =~ s/--/--$ua--/;
547    is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8");
548
549    # now with utf8 pattern
550    $a = "$ua--$ub";
551    ($b = $a) =~ s/-($ud)?-/$na/;
552    is($b, "$ua$na$ub", "s///: replace non-utf8 into utf8 (utf8 pattern)");
553    ($b = $a) =~ s/-($ud)?-/--$na--/;
554    is($b, "$ua--$na--$ub", "s///: replace long non-utf8 into utf8 (utf8 pattern)");
555    ($b = $a) =~ s/-($ud)?-/$uc/;
556    is($b, "$ua$uc$ub", "s///: replace utf8 into utf8 (utf8 pattern)");
557    ($b = $a) =~ s/-($ud)?-/--$uc--/;
558    is($b, "$ua--$uc--$ub", "s///: replace long utf8 into utf8 (utf8 pattern)");
559    $a = "$na--$nb";
560    ($b = $a) =~ s/-($ud)?-/$ua/;
561    is($b, "$na$ua$nb", "s///: replace utf8 into non-utf8 (utf8 pattern)");
562    ($b = $a) =~ s/-($ud)?-/--$ua--/;
563    is($b, "$na--$ua--$nb", "s///: replace long utf8 into non-utf8 (utf8 pattern)");
564    ($b = $a) =~ s/-($ud)?-/$na/;
565    is($b, "$na$na$nb", "s///: replace non-utf8 into non-utf8 (utf8 pattern)");
566    ($b = $a) =~ s/-($ud)?-/--$na--/;
567    is($b, "$na--$na--$nb", "s///: replace long non-utf8 into non-utf8 (utf8 pattern)");
568}
569
570$_ = 'aaaa';
571$r = 'x';
572$s = s/a(?{})/$r/g;
573is("<$_> <$s>", "<xxxx> <4>", "[perl #7806]");
574
575$_ = 'aaaa';
576$s = s/a(?{})//g;
577is("<$_> <$s>", "<> <4>", "[perl #7806]");
578
579# [perl #19048] Coredump in silly replacement
580{
581    local $^W = 0;
582    $_="abcdef\n";
583    s!.!!eg;
584    is($_, "\n", "[perl #19048]");
585}
586
587# [perl #17757] interaction between saw_ampersand and study
588{
589    my $f = eval q{ $& };
590    $f = "xx";
591    study $f;
592    $f =~ s/x/y/g;
593    is($f, "yy", "[perl #17757]");
594}
595
596# [perl #20684] returned a zero count
597$_ = "1111";
598is(s/(??{1})/2/eg, 4, '#20684 s/// with (??{..}) inside');
599
600# [perl #20682] @- not visible in replacement
601$_ = "123";
602/(2)/;	# seed @- with something else
603s/(1)(2)(3)/$#- (@-)/;
604is($_, "3 (0 0 1 2)", '#20682 @- not visible in replacement');
605
606# [perl #20682] $^N not visible in replacement
607$_ = "abc";
608/(a)/; s/(b)|(c)/-$^N/g;
609is($_,'a-b-c','#20682 $^N not visible in replacement');
610
611# [perl #22351] perl bug with 'e' substitution modifier
612my $name = "chris";
613{
614    no warnings 'uninitialized';
615    $name =~ s/hr//e;
616}
617is($name, "cis", q[#22351 bug with 'e' substitution modifier]);
618
619
620# [perl #34171] $1 didn't honour 'use bytes' in s//e
621{
622    my $s="\x{100}";
623    my $x;
624    {
625	use bytes;
626	$s=~ s/(..)/$x=$1/e
627    }
628    is(length($x), 2, '[perl #34171]');
629}
630
631
632{ # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not
633    my $c;
634
635    ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g;
636    is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g");
637
638    ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g;
639    is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g");
640}
641{
642    $_ = "xy";
643    no warnings 'uninitialized';
644    /(((((((((x)))))))))(z)/;	# clear $10
645    s/(((((((((x)))))))))(y)/${10}/;
646    is($_,"y","RT#6006: \$_ eq '$_'");
647    $_ = "xr";
648    s/(((((((((x)))))))))(r)/fooba${10}/;
649    is($_,"foobar","RT#6006: \$_ eq '$_'");
650}
651{
652    my $want=("\n" x 11).("B\n" x 11)."B";
653    $_="B";
654    our $i;
655    for $i(1..11){
656	s/^.*$/$&/gm;
657	$_="\n$_\n$&";
658    }
659    is($want,$_,"RT#17542");
660}
661
662{
663    my @tests = ('ABC', "\xA3\xA4\xA5", "\x{410}\x{411}\x{412}");
664    foreach (@tests) {
665	my $id = ord $_;
666	s/./pos/ge;
667	is($_, "012", "RT#52104: $id");
668    }
669}
670
671fresh_perl_is( '$_=q(foo);s/(.)\G//g;print' => 'foo', {},
672                '[perl #69056] positive GPOS regex segfault' );
673fresh_perl_is( '$_="abcdef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'aXXdef', {},
674                'positive GPOS regex substitution failure (#69056, #114884)' );
675fresh_perl_is( '$_="abcdefg123456"; s/(?<=...\G)?(\d)/($1)/; print' => 'abcdefg(1)23456', {},
676                'positive GPOS lookbehind regex substitution failure #114884' );
677
678# s/..\G//g should stop after the first iteration, rather than working its
679# way backwards, or looping infinitely, or SEGVing (for example)
680{
681    my ($s, $count);
682
683    # use a function to disable constant folding
684    my $f = sub { substr("789", 0, $_[0]) };
685
686    $s = '123456';
687    pos($s) = 4;
688    $count = $s =~ s/\d\d\G/7/g;
689    is($count, 1, "..\\G count (short)");
690    is($s, "12756", "..\\G s (short)");
691
692    $s = '123456';
693    pos($s) = 4;
694    $count = $s =~ s/\d\d\G/78/g;
695    is($count, 1, "..\\G count (equal)");
696    is($s, "127856", "..\\G s (equal)");
697
698    $s = '123456';
699    pos($s) = 4;
700    $count = $s =~ s/\d\d\G/789/g;
701    is($count, 1, "..\\G count (long)");
702    is($s, "1278956", "..\\G s (long)");
703
704
705    $s = '123456';
706    pos($s) = 4;
707    $count = $s =~ s/\d\d\G/$f->(1)/eg;
708    is($count, 1, "..\\G count (short code)");
709    is($s, "12756", "..\\G s (short code)");
710
711    $s = '123456';
712    pos($s) = 4;
713    $count = $s =~ s/\d\d\G/$f->(2)/eg;
714    is($count, 1, "..\\G count (equal code)");
715    is($s, "127856", "..\\G s (equal code)");
716
717    $s = '123456';
718    pos($s) = 4;
719    $count = $s =~ s/\d\d\G/$f->(3)/eg;
720    is($count, 1, "..\\G count (long code)");
721    is($s, "1278956", "..\\G s (long code)");
722
723    $s = '123456';
724    pos($s) = 4;
725    $count = $s =~ s/\d\d(?=\d\G)/7/g;
726    is($count, 1, "..\\G count (lookahead short)");
727    is($s, "17456", "..\\G s (lookahead short)");
728
729    $s = '123456';
730    pos($s) = 4;
731    $count = $s =~ s/\d\d(?=\d\G)/78/g;
732    is($count, 1, "..\\G count (lookahead equal)");
733    is($s, "178456", "..\\G s (lookahead equal)");
734
735    $s = '123456';
736    pos($s) = 4;
737    $count = $s =~ s/\d\d(?=\d\G)/789/g;
738    is($count, 1, "..\\G count (lookahead long)");
739    is($s, "1789456", "..\\G s (lookahead long)");
740
741
742    $s = '123456';
743    pos($s) = 4;
744    $count = $s =~ s/\d\d(?=\d\G)/$f->(1)/eg;
745    is($count, 1, "..\\G count (lookahead short code)");
746    is($s, "17456", "..\\G s (lookahead short code)");
747
748    $s = '123456';
749    pos($s) = 4;
750    $count = $s =~ s/\d\d(?=\d\G)/$f->(2)/eg;
751    is($count, 1, "..\\G count (lookahead equal code)");
752    is($s, "178456", "..\\G s (lookahead equal code)");
753
754    $s = '123456';
755    pos($s) = 4;
756    $count = $s =~ s/\d\d(?=\d\G)/$f->(3)/eg;
757    is($count, 1, "..\\G count (lookahead long code)");
758    is($s, "1789456", "..\\G s (lookahead long code)");
759}
760
761
762# [perl #71470] $var =~ s/$qr//e calling get-magic on $_ as well as $var
763{
764 local *_;
765 my $scratch;
766 sub qrBug::TIESCALAR { bless[pop], 'qrBug' }
767 sub qrBug::FETCH { $scratch .= "[fetching $_[0][0]]"; 'prew' }
768 sub qrBug::STORE{}
769 tie my $kror, qrBug => '$kror';
770 tie $_, qrBug => '$_';
771 my $qr = qr/(?:)/;
772 $kror =~ s/$qr/""/e;
773 is(
774   $scratch, '[fetching $kror]',
775  'bug: $var =~ s/$qr//e calling get-magic on $_ as well as $var',
776 );
777}
778
779{ # Bug #41530; replacing non-utf8 with a utf8 causes problems
780    my $string = "a\x{a0}a";
781    my $sub_string = $string;
782    ok(! utf8::is_utf8($sub_string), "Verify that string isn't initially utf8");
783    $sub_string =~ s/a/\x{100}/g;
784    ok(utf8::is_utf8($sub_string),
785                        'Verify replace of non-utf8 with utf8 upgrades to utf8');
786    is($sub_string, "\x{100}\x{A0}\x{100}",
787                            'Verify #41530 fixed: replace of non-utf8 with utf8');
788
789    my $non_sub_string = $string;
790    ok(! utf8::is_utf8($non_sub_string),
791                                    "Verify that string isn't initially utf8");
792    $non_sub_string =~ s/b/\x{100}/g;
793    ok(! utf8::is_utf8($non_sub_string),
794            "Verify that failed substitute doesn't change string's utf8ness");
795    is($non_sub_string, $string,
796                        "Verify that failed substitute doesn't change string");
797}
798
799{ # Verify largish octal in replacement pattern
800
801    my $string = "a";
802    $string =~ s/a/\400/;
803    is($string, chr 0x100, "Verify that handles s/foo/\\400/");
804    $string =~ s/./\600/;
805    is($string, chr 0x180, "Verify that handles s/foo/\\600/");
806    $string =~ s/./\777/;
807    is($string, chr 0x1FF, "Verify that handles s/foo/\\777/");
808}
809
810# Scoping of s//the RHS/ when there is no /e
811# Tests based on [perl #19078]
812{
813 local *_;
814 my $output = ''; my %a;
815 no warnings 'uninitialized';
816
817 $_="CCCGGG";
818 s!.!<@a{$output .= ("$&"),/[$&]/g}>!g;
819 $output .= $_;
820 is(
821   $output, "CCCGGG<   ><  >< ><   ><  >< >",
822  's/// sets PL_curpm for each iteration even when the RHS has set it'
823 );
824
825 s/C/$a{m\G\}/;
826 is(
827  "$&", G =>
828  'Match vars reflect the last match after s/pat/$a{m|pat|}/ without /e'
829 );
830}
831
832{
833    # a tied scalar that returned a plain string, got messed up
834    # when substituted with a UTF8 replacement string, due to
835    # magic getting called multiple times, and pointers now pointing
836    # to stale/freed strings
837    # The original fix for this caused infinite loops for non- or cow-
838    # strings, so we test those, too.
839    package FOO;
840    my $fc;
841    sub TIESCALAR { bless [ "abcdefgh" ] }
842    sub FETCH { $fc++; $_[0][0] }
843    sub STORE { $_[0][0] = $_[1] }
844
845    my $s;
846    tie $s, 'FOO';
847    $s =~ s/..../\x{101}/;
848    ::is($fc, 1, "tied UTF8 stuff FETCH count");
849    ::is("$s", "\x{101}efgh", "tied UTF8 stuff");
850
851    ::watchdog(300);
852    $fc = 0;
853    $s = *foo;
854    $s =~ s/..../\x{101}/;
855    ::is($fc, 1, '$tied_glob =~ s/non-utf8/utf8/ fetch count');
856    ::is("$s", "\x{101}::foo", '$tied_glob =~ s/non-utf8/utf8/ result');
857    $fc = 0;
858    $s = *foo;
859    $s =~ s/(....)/\x{101}/g;
860    ::is($fc, 1, '$tied_glob =~ s/(non-utf8)/utf8/g fetch count');
861    ::is("$s", "\x{101}\x{101}o",
862         '$tied_glob =~ s/(non-utf8)/utf8/g result');
863    $fc = 0;
864    $s = "\xff\xff\xff\xff\xff";
865    $s =~ s/..../\x{101}/;
866    ::is($fc, 1, '$tied_latin1 =~ s/non-utf8/utf8/ fetch count');
867    ::is("$s", "\x{101}\xff", '$tied_latin1 =~ s/non-utf8/utf8/ result');
868    $fc = 0;
869    { package package_name; tied($s)->[0] = __PACKAGE__ };
870    $s =~ s/..../\x{101}/;
871    ::is($fc, 1, '$tied_cow =~ s/non-utf8/utf8/ fetch count');
872    ::is("$s", "\x{101}age_name", '$tied_cow =~ s/non-utf8/utf8/ result');
873    $fc = 0;
874    $s = \1;
875    $s =~ s/..../\x{101}/;
876    ::is($fc, 1, '$tied_ref =~ s/non-utf8/utf8/ fetch count');
877    ::like("$s", qr/^\x{101}AR\(0x.*\)\z/,
878           '$tied_ref =~ s/non-utf8/utf8/ result');
879}
880
881# RT #97954
882{
883    my $count;
884
885    sub bam::DESTROY {
886	--$count;
887    }
888
889    my $z_zapp = bless [], 'bam';
890    ++$count;
891
892    is($count, 1, '1 object');
893    is($z_zapp =~ s/.*/R/r, 'R', 'substitution happens');
894    is(ref $z_zapp, 'bam', 'still 1 object');
895    is($count, 1, 'still 1 object');
896    undef $z_zapp;
897    is($count, 0, 'now 0 objects');
898
899    $z_zapp = bless [], 'bam';
900    ++$count;
901
902    is($count, 1, '1 object');
903    like($z_zapp =~ s/./R/rg, qr/\AR{8,}\z/, 'substitution happens');
904    is(ref $z_zapp, 'bam', 'still 1 object');
905    is($count, 1, 'still 1 object');
906    undef $z_zapp;
907    is($count, 0, 'now 0 objects');
908}
909
910is(*bam =~ s/\*//r, 'main::bam', 'Can s///r a tyepglob');
911is(*bam =~ s/\*//rg, 'main::bam', 'Can s///rg a tyepglob');
912
913{
914 sub cowBug::TIESCALAR { bless[], 'cowBug' }
915 sub cowBug::FETCH { __PACKAGE__ }
916 sub cowBug::STORE{}
917 tie my $kror, cowBug =>;
918 $kror =~ s/(?:)/""/e;
919}
920pass("s/// on tied var returning a cow");
921
922# a test for 6502e08109cd003b2cdf39bc94ef35e52203240b
923# previously this would segfault
924
925{
926    my $s = "abc";
927    eval { $s =~ s/(.)/die/e; };
928    like($@, qr/Died at/, "s//die/e");
929}
930
931
932# Test problems with constant replacement optimisation
933# [perl #26986] logop in repl resulting in incorrect optimisation
934"g" =~ /(.)/;
935@l{'a'..'z'} = 'A'..':';
936$_ = "hello";
937{ s/(.)/$l{my $a||$1}/g }
938is $_, "HELLO",
939  'logop in s/// repl does not result in "constant" repl optimisation';
940# Aliases to match vars
941"g" =~ /(.)/;
942$_ = "hello";
943{
944    local *a = *1;
945    s/(.)\1/$a/g;
946}
947is $_, 'helo', 's/pat/$alias_to_match_var/';
948"g" =~ /(.)/;
949$_ = "hello";
950{
951    local *a = *1;
952    s/e(.)\1/a$a/g;
953}
954is $_, 'halo', 's/pat/foo$alias_to_match_var/';
955# Last-used pattern containing re-evals that modify "constant" rhs
956{
957    local *a;
958    $x = "hello";
959    $x =~ /(?{*a = \"a"})./;
960    undef *a;
961    $x =~ s//$a/g;
962    is $x, 'aaaaa',
963	'last-used pattern disables constant repl optimisation';
964}
965
966
967$_ = "\xc4\x80";
968$a = "";
969utf8::upgrade $a;
970$_ =~ s/$/$a/;
971is $_, "\xc4\x80", "empty utf8 repl does not result in mangled utf8";
972
973$@ = "\x{30cb}eval 18";
974$@ =~ s/eval \d+/eval 11/;
975is $@, "\x{30cb}eval 11",
976  'loading utf8 tables does not interfere with matches against $@';
977
978$reftobe = 3;
979$reftobe =~ s/3/$reftobe=\ 3;4/e;
980is $reftobe, '4', 'clobbering target with ref in s//.../e';
981$locker{key} = 3;
982SKIP:{
983    skip "no Hash::Util under miniperl", 2 if is_miniperl;
984    require Hash::Util;
985    eval {
986	$locker{key} =~ s/3/
987	    $locker{key} = 3;
988	    &Hash::Util::lock_hash(\%locker);4
989	/e;
990    };
991    is $locker{key}, '3', 'locking target in $hash{key} =~ s//.../e';
992    like $@, qr/^Modification of a read-only value/, 'err msg';
993}
994delete $::{does_not_exist}; # just in case
995eval { no warnings; $::{does_not_exist}=~s/(?:)/*{"does_not_exist"}; 4/e };
996like $@, qr/^Modification of a read-only value/,
997    'vivifying stash elem in $that::{elem} =~ s//.../e';
998
999# COWs should not be exempt from read-only checks.  s/// croaks on read-
1000# only values even when the pattern does not match, but it was not doing so
1001# for COWs.
1002eval { for (__PACKAGE__) { s/b/c/; } };
1003like $@, qr/^Modification of a read-only value/,
1004    'read-only COW =~ s/does not match// should croak';
1005