191f110e0Safresh1#!./perl 291f110e0Safresh1 391f110e0Safresh1# Checks if the parser behaves correctly in edge case 491f110e0Safresh1# (including weird syntax errors) 591f110e0Safresh1 691f110e0Safresh1BEGIN { 7b8851fccSafresh1 chdir 't' if -d 't'; 891f110e0Safresh1 require './test.pl'; 9b8851fccSafresh1 skip_all_without_unicode_tables(); 1091f110e0Safresh1} 1191f110e0Safresh1 1291f110e0Safresh1use 5.016; 1391f110e0Safresh1use utf8; 1491f110e0Safresh1use open qw( :utf8 :std ); 1591f110e0Safresh1no warnings qw(misc reserved); 1691f110e0Safresh1 17b46d8ef2Safresh1plan (tests => 66880); 1891f110e0Safresh1 19b8851fccSafresh1# ${single:colon} should not be treated as a simple variable, but as a 20b8851fccSafresh1# block with a label inside. 2191f110e0Safresh1{ 2291f110e0Safresh1 no strict; 2391f110e0Safresh1 2491f110e0Safresh1 local $@; 25b8851fccSafresh1 eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'"; 26b8851fccSafresh1 is ${"\x{30cd}colon"}, 'label, not var', 27b8851fccSafresh1 '${\x{30cd}single:\x{30cd}colon} should be block-label'; 2891f110e0Safresh1 2991f110e0Safresh1 local $@; 3091f110e0Safresh1 no utf8; 31b8851fccSafresh1 evalbytes '${single:colon} = "block/label, not var"'; 32b8851fccSafresh1 is($::colon, 33b8851fccSafresh1 'block/label, not var', 3491f110e0Safresh1 '...same with ${single:colon}' 3591f110e0Safresh1 ); 3691f110e0Safresh1} 3791f110e0Safresh1 3891f110e0Safresh1# ${yadda'etc} and ${yadda::etc} should both work under strict 3991f110e0Safresh1{ 4091f110e0Safresh1 local $@; 4191f110e0Safresh1 eval q<use strict; ${flark::fleem}>; 4291f110e0Safresh1 is($@, '', q<${package::var} works>); 4391f110e0Safresh1 44*e0680481Safresh1 no warnings qw(syntax deprecated); 4591f110e0Safresh1 local $@; 4691f110e0Safresh1 eval q<use strict; ${fleem'flark}>; 4791f110e0Safresh1 is($@, '', q<...as does ${package'var}>); 4891f110e0Safresh1} 4991f110e0Safresh1 5091f110e0Safresh1# The first character in ${...} should respect the rules 5191f110e0Safresh1{ 5291f110e0Safresh1 local $@; 5391f110e0Safresh1 use utf8; 5491f110e0Safresh1 eval '${☭asd} = 1'; 5591f110e0Safresh1 like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special)) 5691f110e0Safresh1} 5791f110e0Safresh1 5891f110e0Safresh1# Checking that at least some of the special variables work 59b46d8ef2Safresh1for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) { 606fb12b70Safresh1 SKIP: { 6191f110e0Safresh1 local $@; 6291f110e0Safresh1 evalbytes "\$$v;"; 6391f110e0Safresh1 is $@, '', "No syntax error for \$$v"; 6491f110e0Safresh1 6591f110e0Safresh1 local $@; 6691f110e0Safresh1 eval "use utf8; \$$v;"; 67b8851fccSafresh1 is $@, '', "No syntax error for \$$v under 'use utf8'"; 6891f110e0Safresh1 } 696fb12b70Safresh1} 7091f110e0Safresh1 7191f110e0Safresh1# Checking if the Latin-1 range behaves as expected, and that the behavior is the 7291f110e0Safresh1# same whenever under strict or not. 73b8851fccSafresh1for ( 0x0 .. 0xff ) { 74b8851fccSafresh1 my @warnings; 75b8851fccSafresh1 local $SIG {__WARN__} = sub {push @warnings, @_ }; 76b8851fccSafresh1 my $ord = utf8::unicode_to_native($_); 77b8851fccSafresh1 my $chr = chr $ord; 78b8851fccSafresh1 my $syntax_error = 0; # Do we expect this code point to generate a 79b8851fccSafresh1 # syntax error? Assume not, for now 80b8851fccSafresh1 my $deprecated = 0; 81b8851fccSafresh1 my $name; 82b8851fccSafresh1 83b8851fccSafresh1 # A different number of tests are run depending on the branches in this 84b8851fccSafresh1 # loop iteration. This allows us to add skips to make the reported total 85b8851fccSafresh1 # the same for each iteration. 86b8851fccSafresh1 my $tests = 0; 87b8851fccSafresh1 my $max_tests = 6; 88b8851fccSafresh1 89b8851fccSafresh1 if ($chr =~ /[[:graph:]]/a) { 90b8851fccSafresh1 $name = "'$chr'"; 91b8851fccSafresh1 $syntax_error = 1 if $chr eq '{'; 92b8851fccSafresh1 } 93b8851fccSafresh1 elsif ($chr =~ /[[:space:]]/a) { 94b8851fccSafresh1 $name = sprintf "\\x%02x, an ASCII space character", $ord; 95b8851fccSafresh1 $syntax_error = 1; 96b8851fccSafresh1 } 97b8851fccSafresh1 elsif ($chr =~ /[[:cntrl:]]/a) { 98b8851fccSafresh1 $name = sprintf "\\x%02x, an ASCII control", $ord; 99b8851fccSafresh1 $syntax_error = 1; 100b8851fccSafresh1 } 101b8851fccSafresh1 elsif ($chr =~ /\pC/) { 102b8851fccSafresh1 if ($chr eq "\N{SHY}") { 103b8851fccSafresh1 $name = sprintf "\\x%02x, SHY", $ord; 104b8851fccSafresh1 } 105b8851fccSafresh1 else { 106b8851fccSafresh1 $name = sprintf "\\x%02x, a C1 control", $ord; 107b8851fccSafresh1 } 1089f11ffb7Safresh1 $syntax_error = 1; 109b8851fccSafresh1 $deprecated = ! $syntax_error; 110b8851fccSafresh1 } 111b8851fccSafresh1 elsif ($chr =~ /\p{XIDStart}/) { 112b8851fccSafresh1 $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord; 113b8851fccSafresh1 } 114b8851fccSafresh1 elsif ($chr =~ /\p{XPosixSpace}/) { 115b8851fccSafresh1 $name = sprintf "\\x%02x, a non-ASCII space character", $ord; 1169f11ffb7Safresh1 $syntax_error = 1; 117b8851fccSafresh1 $deprecated = ! $syntax_error; 118b8851fccSafresh1 } 119b8851fccSafresh1 else { 120b8851fccSafresh1 $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord; 121b8851fccSafresh1 } 12291f110e0Safresh1 no warnings 'closure'; 123b8851fccSafresh1 my $esc = sprintf("%X", $ord); 12491f110e0Safresh1 utf8::downgrade($chr); 12591f110e0Safresh1 if ($chr !~ /\p{XIDS}/u) { 126b8851fccSafresh1 if ($syntax_error) { 127b8851fccSafresh1 evalbytes "\$$chr"; 128b8851fccSafresh1 like($@, qr/ syntax\ error | Unrecognized\ character /x, 129b8851fccSafresh1 "$name as a length-1 variable generates a syntax error"); 130b8851fccSafresh1 $tests++; 131b8851fccSafresh1 utf8::upgrade($chr); 1329f11ffb7Safresh1 eval "no strict; \$$chr = 4;", 133b8851fccSafresh1 like($@, qr/ syntax\ error | Unrecognized\ character /x, 134b8851fccSafresh1 " ... and the same under 'use utf8'"); 135b8851fccSafresh1 $tests++; 136b8851fccSafresh1 } 137b8851fccSafresh1 elsif ($chr =~ /[[:punct:][:digit:]]/a) { 138b46d8ef2Safresh1 next if ($chr eq '#' or $chr eq '*'); # RT 133583 139b8851fccSafresh1 140b8851fccSafresh1 # Unlike other variables, we dare not try setting the length-1 141b8851fccSafresh1 # variables that are ASCII punctuation and digits. This is 142b8851fccSafresh1 # because many of these variables have meaning to the system, and 143b8851fccSafresh1 # setting them could have side effects or not work as expected 144b8851fccSafresh1 # (And using fresh_perl() doesn't always help.) For all these we 145b8851fccSafresh1 # just verify that they don't generate a syntax error. 146b8851fccSafresh1 local $@; 147b8851fccSafresh1 evalbytes "\$$chr;"; 148b8851fccSafresh1 is $@, '', "$name as a length-1 variable doesn't generate a syntax error"; 149b8851fccSafresh1 $tests++; 150b8851fccSafresh1 utf8::upgrade($chr); 151b8851fccSafresh1 evalbytes "no strict; use utf8; \$$chr;", 152b8851fccSafresh1 is $@, '', " ... and the same under 'use utf8'"; 153b8851fccSafresh1 $tests++; 154b8851fccSafresh1 } 155b8851fccSafresh1 else { 15691f110e0Safresh1 is evalbytes "no strict; \$$chr = 10", 15791f110e0Safresh1 10, 158b8851fccSafresh1 "$name is legal as a length-1 variable"; 159b8851fccSafresh1 $tests++; 160b8851fccSafresh1 if ($chr =~ /[[:ascii:]]/) { 161b8851fccSafresh1 utf8::upgrade($chr); 162b8851fccSafresh1 is evalbytes "no strict; use utf8; \$$chr = 1", 163b8851fccSafresh1 1, 164b8851fccSafresh1 " ... and is legal under 'use utf8'"; 165b8851fccSafresh1 $tests++; 166b8851fccSafresh1 } 167b8851fccSafresh1 else { 16891f110e0Safresh1 utf8::upgrade($chr); 16991f110e0Safresh1 local $@; 17091f110e0Safresh1 eval "no strict; use utf8; \$$chr = 1"; 17191f110e0Safresh1 like $@, 17291f110e0Safresh1 qr/\QUnrecognized character \x{\E\L$esc/, 173b8851fccSafresh1 " ... but is illegal as a length-1 variable under 'use utf8'"; 174b8851fccSafresh1 $tests++; 175b8851fccSafresh1 } 176b8851fccSafresh1 } 17791f110e0Safresh1 } 17891f110e0Safresh1 else { 17991f110e0Safresh1 { 18091f110e0Safresh1 no utf8; 18191f110e0Safresh1 local $@; 18291f110e0Safresh1 evalbytes "no strict; \$$chr = 1"; 183b8851fccSafresh1 is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable"); 184b8851fccSafresh1 $tests++; 18591f110e0Safresh1 186b8851fccSafresh1 if ($chr !~ /[[:ascii:]]/) { 18791f110e0Safresh1 local $@; 18891f110e0Safresh1 evalbytes "use strict; \$$chr = 1"; 18991f110e0Safresh1 is($@, 19091f110e0Safresh1 '', 191b8851fccSafresh1 " ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS" 19291f110e0Safresh1 ); 193b8851fccSafresh1 $tests++; 19491f110e0Safresh1 19591f110e0Safresh1 local $@; 19691f110e0Safresh1 evalbytes "\$a$chr = 1"; 19791f110e0Safresh1 like($@, 19891f110e0Safresh1 qr/Unrecognized character /, 199b8851fccSafresh1 " ... but under 'no utf8', it's not allowed in length-2+ variables" 20091f110e0Safresh1 ); 201b8851fccSafresh1 $tests++; 202b8851fccSafresh1 } 20391f110e0Safresh1 } 20491f110e0Safresh1 { 20591f110e0Safresh1 use utf8; 206b8851fccSafresh1 my $utf8 = $chr; 207b8851fccSafresh1 utf8::upgrade($utf8); 20891f110e0Safresh1 local $@; 209b8851fccSafresh1 eval "no strict; \$$utf8 = 1"; 210b8851fccSafresh1 is($@, '', " ... and under 'use utf8', 'no strict', is a valid length-1 variable"); 211b8851fccSafresh1 $tests++; 21291f110e0Safresh1 21391f110e0Safresh1 local $@; 214b8851fccSafresh1 eval "use strict; \$$utf8 = 1"; 215b8851fccSafresh1 if ($chr =~ /[ab]/) { # These are special, for sort() 216b8851fccSafresh1 is($@, '', " ... and under 'use utf8', 'use strict'," 217b8851fccSafresh1 . " is a valid length-1 variable (\$a and \$b are special)"); 218b8851fccSafresh1 $tests++; 21991f110e0Safresh1 } 220b8851fccSafresh1 else { 221b8851fccSafresh1 like($@, 222b8851fccSafresh1 qr/Global symbol "\$$utf8" requires explicit package name/, 223b8851fccSafresh1 " ... and under utf8 has to be required under strict" 224b8851fccSafresh1 ); 225b8851fccSafresh1 $tests++; 226b8851fccSafresh1 } 227b8851fccSafresh1 } 228b8851fccSafresh1 } 229b8851fccSafresh1 230b8851fccSafresh1 if (! $deprecated) { 231b8851fccSafresh1 if ($chr =~ /[#*]/) { 232b8851fccSafresh1 233b8851fccSafresh1 # Length-1 variables with these two characters used to be used by 234b8851fccSafresh1 # Perl, but now a warning is generated that they're gone. 235b8851fccSafresh1 # Ignore such warnings. 236b8851fccSafresh1 for (my $i = @warnings - 1; $i >= 0; $i--) { 237b8851fccSafresh1 splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/; 238b8851fccSafresh1 } 239b8851fccSafresh1 } 240b8851fccSafresh1 my $message = " ... and doesn't generate any warnings"; 241b8851fccSafresh1 $message = " TODO $message" if $ord == 0 242b8851fccSafresh1 || $chr =~ /\s/a; 243b8851fccSafresh1 244b8851fccSafresh1 if (! ok(@warnings == 0, $message)) { 245b8851fccSafresh1 note join "\n", @warnings; 246b8851fccSafresh1 } 247b8851fccSafresh1 $tests++; 248b8851fccSafresh1 } 249b8851fccSafresh1 elsif (! @warnings) { 250b8851fccSafresh1 fail(" ... and generates deprecation warnings (since is deprecated)"); 251b8851fccSafresh1 $tests++; 252b8851fccSafresh1 } 253b8851fccSafresh1 else { 254b8851fccSafresh1 ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings), 255b8851fccSafresh1 " ... and generates deprecation warnings (only)"); 256b8851fccSafresh1 $tests++; 257b8851fccSafresh1 } 258b8851fccSafresh1 259b8851fccSafresh1 SKIP: { 260b8851fccSafresh1 die "Wrong max count for tests" if $tests > $max_tests; 261b8851fccSafresh1 skip("untaken tests", $max_tests - $tests) if $max_tests > $tests; 26291f110e0Safresh1 } 26391f110e0Safresh1} 26491f110e0Safresh1 26591f110e0Safresh1{ 26691f110e0Safresh1 use utf8; 26791f110e0Safresh1 my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla 26891f110e0Safresh1 is($@, '', "ASCII character + combining character works as a variable name"); 26991f110e0Safresh1 is($ret, 100, " ... and returns the correct value"); 27091f110e0Safresh1} 27191f110e0Safresh1 27291f110e0Safresh1# From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail 27391f110e0Safresh1for my $chr ( 27491f110e0Safresh1 "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}", 27591f110e0Safresh1 "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}", 27691f110e0Safresh1 "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}", 27791f110e0Safresh1 ) 27891f110e0Safresh1{ 27991f110e0Safresh1 no warnings 'non_unicode'; 28091f110e0Safresh1 my $esc = sprintf("%x", ord $chr); 28191f110e0Safresh1 local $@; 28291f110e0Safresh1 eval "\$$chr = 1; \$$chr"; 28391f110e0Safresh1 like($@, 28491f110e0Safresh1 qr/\QUnrecognized character \x{$esc};/, 28591f110e0Safresh1 "\\x{$esc} is illegal for a length-one identifier" 28691f110e0Safresh1 ); 28791f110e0Safresh1} 28891f110e0Safresh1 28991f110e0Safresh1for my $i (0x100..0xffff) { 29091f110e0Safresh1 my $chr = chr($i); 29191f110e0Safresh1 my $esc = sprintf("%x", $i); 29291f110e0Safresh1 local $@; 29391f110e0Safresh1 eval "my \$$chr = q<test>; \$$chr;"; 29491f110e0Safresh1 if ( $chr =~ /^\p{_Perl_IDStart}$/ ) { 29591f110e0Safresh1 is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i)); 29691f110e0Safresh1 } 29791f110e0Safresh1 else { 29891f110e0Safresh1 like($@, 29991f110e0Safresh1 qr/\QUnrecognized character \x{$esc};/, 30091f110e0Safresh1 "\\x{$esc} isn't XIDS, illegal as a length-1 variable", 30191f110e0Safresh1 ) 30291f110e0Safresh1 } 30391f110e0Safresh1} 30491f110e0Safresh1 30591f110e0Safresh1{ 30691f110e0Safresh1 # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz 30756d68f1eSafresh1 # https://github.com/Perl/perl5/issues/12841 30891f110e0Safresh1 no strict; 30991f110e0Safresh1 31091f110e0Safresh1 local $@; 31191f110e0Safresh1 eval <<'EOP'; 31291f110e0Safresh1 q{$} =~ /(.)/; 31391f110e0Safresh1 is($$1, $$, q{$$1 parses as ${$1}}); 31491f110e0Safresh1 31591f110e0Safresh1 $doof = "test"; 31691f110e0Safresh1 $test = "Got here"; 31791f110e0Safresh1 $::{+$$} = *doof; 31891f110e0Safresh1 31991f110e0Safresh1 is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} ); 32091f110e0Safresh1EOP 32191f110e0Safresh1 is($@, '', q{$$1 parses correctly}); 32291f110e0Safresh1 32391f110e0Safresh1 for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) { 32491f110e0Safresh1 my $esc = sprintf("\\x{%x}", ord $chr); 32591f110e0Safresh1 local $@; 32691f110e0Safresh1 eval <<" EOP"; 32791f110e0Safresh1 \$$chr = q{\$}; 32891f110e0Safresh1 \$\$$chr; 32991f110e0Safresh1 EOP 33091f110e0Safresh1 33191f110e0Safresh1 like($@, 33291f110e0Safresh1 qr/syntax error|Unrecognized character/, 33391f110e0Safresh1 qq{\$\$$esc is a syntax error} 33491f110e0Safresh1 ); 33591f110e0Safresh1 } 33691f110e0Safresh1} 33791f110e0Safresh1 33891f110e0Safresh1{ 33991f110e0Safresh1 # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz 34056d68f1eSafresh1 # https://github.com/Perl/perl5/issues/12849 34191f110e0Safresh1 local $@; 34291f110e0Safresh1 my $var = 10; 34391f110e0Safresh1 eval ' ${ var }'; 34491f110e0Safresh1 34591f110e0Safresh1 is( 34691f110e0Safresh1 $@, 34791f110e0Safresh1 '', 34891f110e0Safresh1 '${ var } works under strict' 34991f110e0Safresh1 ); 35091f110e0Safresh1 35191f110e0Safresh1 { 35291f110e0Safresh1 no strict; 3536fb12b70Safresh1 354b8851fccSafresh1 for my $var ( '$', "^GLOBAL_PHASE", "^V" ) { 35591f110e0Safresh1 eval "\${ $var}"; 35691f110e0Safresh1 is($@, '', "\${ $var} works" ); 35791f110e0Safresh1 eval "\${$var }"; 35891f110e0Safresh1 is($@, '', "\${$var } works" ); 35991f110e0Safresh1 eval "\${ $var }"; 36091f110e0Safresh1 is($@, '', "\${ $var } works" ); 36191f110e0Safresh1 } 362b8851fccSafresh1 my $var = "\7LOBAL_PHASE"; 363b8851fccSafresh1 eval "\${ $var}"; 364b8851fccSafresh1 like($@, qr/Unrecognized character \\x07/, 365b8851fccSafresh1 "\${ $var} generates 'Unrecognized character' error" ); 366b8851fccSafresh1 eval "\${$var }"; 367b8851fccSafresh1 like($@, qr/Unrecognized character \\x07/, 368b8851fccSafresh1 "\${$var } generates 'Unrecognized character' error" ); 369b8851fccSafresh1 eval "\${ $var }"; 370b8851fccSafresh1 like($@, qr/Unrecognized character \\x07/, 371b8851fccSafresh1 "\${ $var } generates 'Unrecognized character' error" ); 37291f110e0Safresh1 } 37391f110e0Safresh1} 3746fb12b70Safresh1 3756fb12b70Safresh1{ 3766fb12b70Safresh1 is( 3776fb12b70Safresh1 "".eval "*{\nOIN}", 3786fb12b70Safresh1 "*main::OIN", 3796fb12b70Safresh1 "Newlines at the start of an identifier should be skipped over" 3806fb12b70Safresh1 ); 3816fb12b70Safresh1 3826fb12b70Safresh1 383b8851fccSafresh1 SKIP: { 384b8851fccSafresh1 skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1) 385b8851fccSafresh1 if $::IS_EBCDIC; 3866fb12b70Safresh1 is( 3876fb12b70Safresh1 "".eval "*{^JOIN}", 3886fb12b70Safresh1 "*main::\nOIN", 3896fb12b70Safresh1 " ... but \$^J is still legal" 3906fb12b70Safresh1 ); 3916fb12b70Safresh1 } 3926fb12b70Safresh1 393b8851fccSafresh1 my $ret = eval "\${\cT\n}"; 394b8851fccSafresh1 like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message'); 3956fb12b70Safresh1} 3966fb12b70Safresh1 3976fb12b70Safresh1{ 3986fb12b70Safresh1 # Prior to 5.19.4, the following changed behavior depending 3996fb12b70Safresh1 # on the presence of the newline after '@{'. 4006fb12b70Safresh1 sub foo (&) { [1] } 4016fb12b70Safresh1 my %foo = (a=>2); 4026fb12b70Safresh1 my $ret = @{ foo { "a" } }; 4036fb12b70Safresh1 is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}'); 4046fb12b70Safresh1 4056fb12b70Safresh1 $ret = @{ 4066fb12b70Safresh1 foo { "a" } 4076fb12b70Safresh1 }; 4086fb12b70Safresh1 is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}'); 4096fb12b70Safresh1 4106fb12b70Safresh1} 411