1898184e3Ssthen#!./perl 2898184e3Ssthen 3898184e3Ssthen# Checks if the parser behaves correctly in edge cases 4898184e3Ssthen# (including weird syntax errors) 5898184e3Ssthen 6898184e3SsthenBEGIN { 7b8851fccSafresh1 chdir 't' if -d 't'; 8898184e3Ssthen require './test.pl'; 99f11ffb7Safresh1 require './charset_tools.pl'; 10b8851fccSafresh1 skip_all_without_unicode_tables(); 11898184e3Ssthen} 12898184e3Ssthen 13b46d8ef2Safresh1plan (tests => 58); 14898184e3Ssthen 15898184e3Ssthenuse utf8; 16898184e3Ssthenuse open qw( :utf8 :std ); 17898184e3Ssthen 18b8851fccSafresh1is *tèst, "*main::tèst", "sanity check."; 19898184e3Ssthenok $::{"tèst"}, "gets the right glob in the stash."; 20898184e3Ssthen 21898184e3Ssthenmy $glob_by_sub = sub { *main::method }->(); 22898184e3Ssthen 23898184e3Ssthenis *main::method, "*main::method", "glob stringy works"; 24898184e3Ssthenis "" . *main::method, "*main::method", "glob stringify-through-concat works"; 25898184e3Ssthenis $glob_by_sub, "*main::method", "glob stringy works"; 26898184e3Ssthenis "" . $glob_by_sub, "*main::method", ""; 27898184e3Ssthen 28898184e3Ssthensub gimme_glob { 29898184e3Ssthen no strict 'refs'; 30898184e3Ssthen is *{$_[0]}, "*main::$_[0]"; 31898184e3Ssthen *{$_[0]}; 32898184e3Ssthen} 33898184e3Ssthen 34898184e3Ssthenis "" . gimme_glob("下郎"), "*main::下郎"; 35898184e3Ssthen$a = *下郎; 36898184e3Ssthenis "" . $a, "*main::下郎"; 37898184e3Ssthen 38898184e3Ssthen*{gimme_glob("下郎")} = sub {}; 39898184e3Ssthen 40898184e3Ssthen{ 41898184e3Ssthen ok defined *{"下郎"}{CODE}; 42898184e3Ssthen ok !defined *{"\344\270\213\351\203\216"}{CODE}; 43898184e3Ssthen} 44898184e3Ssthen 45898184e3Ssthen$Lèon = 1; 46898184e3Ssthenis ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,"; 47898184e3Ssthenok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one."; 48898184e3Ssthen 49898184e3Ssthenmy $a = "foo" . chr(190); 50898184e3Ssthenmy $b = $a . chr(256); 51898184e3Ssthenchop $b; # $b is $a with utf8 on 52898184e3Ssthen 53898184e3Ssthenis $a, $b, '$a equals $b'; 54898184e3Ssthen 55898184e3Ssthen*$b = sub { 5 }; 56898184e3Ssthen 57898184e3Ssthenis eval { main->$a }, 5, q!$a can call $b's sub!; 58898184e3Ssthenok !$@, "..and there's no error."; 59898184e3Ssthen 60898184e3Ssthenmy $c = $b; 61898184e3Ssthenutf8::encode($c); 62898184e3Ssthenok $b ne $c, '$b unequal $c'; 63898184e3Sstheneval { main->$c }; 64898184e3Ssthenok $@, q!$c can't call $b's sub.!; 65898184e3Ssthen 66898184e3Ssthen# Now define another sub under the downgraded name: 67898184e3Ssthen*$a = sub { 6 }; 68898184e3Ssthen# Call it: 69898184e3Ssthenis eval { main->$a }, 6, "Adding a new sub to *a and calling it works,"; 70898184e3Ssthenok !$@, "..without errors."; 71898184e3Sstheneval { main->$c }; 72898184e3Ssthenok $@, "but it's still unreachable through *c"; 73898184e3Ssthen 74898184e3Ssthen*$b = \10; 75898184e3Ssthenis ${*$a{SCALAR}}, 10; 76898184e3Ssthenis ${*$b{SCALAR}}, 10; 77898184e3Ssthenis ${*$c{SCALAR}}, undef; 78898184e3Ssthen 79898184e3Ssthenopendir FÒÒ, "."; 80898184e3Ssthenclosedir FÒÒ; 81898184e3Ssthen::ok($::{"FÒÒ"}, "Bareword generates the right glob."); 82898184e3Ssthen::ok(!$::{"F\303\222\303\222"}); 83898184e3Ssthen 84898184e3Ssthensub участники { 1 } 85898184e3Ssthen 86898184e3Ssthenok $::{"участники"}, "non-const sub declarations generate the right glob"; 87b8851fccSafresh1is $::{"участники"}->(), 1; 88898184e3Ssthen 89898184e3Ssthensub 原 () { 1 } 90898184e3Ssthen 91898184e3Ssthenis grep({ $_ eq "\x{539f}" } keys %::), 1, "Constant subs generate the right glob."; 92898184e3Ssthenis grep({ $_ eq "\345\216\237" } keys %::), 0; 93898184e3Ssthen 94898184e3Ssthen#These should probably go elsewhere. 95898184e3Sstheneval q{ sub wròng1 (_$); wròng1(1,2) }; 96898184e3Ssthenlike( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' ); 97898184e3Ssthen 98898184e3Sstheneval q{ sub ча::ики ($__); ча::ики(1,2) }; 99898184e3Ssthenlike( $@, qr/Malformed prototype for ча::ики/ ); 100898184e3Ssthen 101898184e3Ssthenour $問 = 10; 102898184e3Ssthenis $問, 10, "our works"; 103898184e3Ssthenis $main::問, 10, "...as does getting the same variable through the fully qualified name"; 104898184e3Ssthenis ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't"; 105898184e3Ssthen 106898184e3Ssthen{ 107898184e3Ssthen use charnames qw( :full ); 108898184e3Ssthen 109898184e3Ssthen eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !; 110898184e3Ssthen $@ =~ s/eval \d+/eval 11/; 111898184e3Ssthen is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after my $ニ <-- HERE near column 8 at (eval 11) line 1. 112898184e3Ssthen', "'Unrecognized character' croak is UTF-8 clean"; 1136fb12b70Safresh1 1146fb12b70Safresh1 eval "q\0foobar\0 \x{FFFF}+1"; 1156fb12b70Safresh1 $@ =~ s/eval \d+/eval 11/; 1166fb12b70Safresh1 is( 1176fb12b70Safresh1 $@, 1186fb12b70Safresh1 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n", 1196fb12b70Safresh1 "...and nul-clean" 1206fb12b70Safresh1 ); 1216fb12b70Safresh1 1226fb12b70Safresh1 { 1236fb12b70Safresh1 use re 'eval'; 1246fb12b70Safresh1 my $f = qq{(?{\$ネ+ 1; \x{1F42A} })}; 1256fb12b70Safresh1 eval { "a" =~ /^a$f/ }; 1266fb12b70Safresh1 my $e = $@; 1276fb12b70Safresh1 $e =~ s/eval \d+/eval 11/; 1286fb12b70Safresh1 is( 1296fb12b70Safresh1 $e, 1306fb12b70Safresh1 "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n", 1316fb12b70Safresh1 "Messages from a re-eval are UTF-8 clean" 1326fb12b70Safresh1 ); 1336fb12b70Safresh1 1346fb12b70Safresh1 $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })}; 1356fb12b70Safresh1 eval { "a" =~ /^a$f/ }; 1366fb12b70Safresh1 my $e = $@; 1376fb12b70Safresh1 $e =~ s/eval \d+/eval 11/; 1386fb12b70Safresh1 is( 1396fb12b70Safresh1 $e, 1406fb12b70Safresh1 "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n", 1416fb12b70Safresh1 "...and nul-clean" 1426fb12b70Safresh1 ); 1436fb12b70Safresh1 } 1446fb12b70Safresh1 1456fb12b70Safresh1 { 1466fb12b70Safresh1 eval qq{\$ネ+ 1; \x{1F42A}}; 1476fb12b70Safresh1 $@ =~ s/eval \d+/eval 11/; 1486fb12b70Safresh1 is( 1496fb12b70Safresh1 $@, 1506fb12b70Safresh1 "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n", 1516fb12b70Safresh1 "Unrecognized character error doesn't cut off in the middle of characters" 1526fb12b70Safresh1 ) 1536fb12b70Safresh1 } 1546fb12b70Safresh1 155898184e3Ssthen} 156898184e3Ssthen 157898184e3Ssthen{ 158898184e3Ssthen use feature 'state'; 159898184e3Ssthen for ( qw( my state our ) ) { 160898184e3Ssthen local $@; 161898184e3Ssthen eval "$_ Foo $x = 1;"; 162898184e3Ssthen like $@, qr/No such class Foo/u, "'No such class' warning for $_ is UTF-8 clean"; 163898184e3Ssthen } 164898184e3Ssthen} 165898184e3Ssthen 166898184e3Ssthen{ 167898184e3Ssthen local $@; 168898184e3Ssthen eval "our \$main::\x{30cb};"; 169898184e3Ssthen like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean"; 170898184e3Ssthen} 171898184e3Ssthen 172898184e3Ssthen{ 173898184e3Ssthen use feature 'state'; 174898184e3Ssthen local $@; 175898184e3Ssthen for ( qw( my state ) ) { 176898184e3Ssthen eval "$_ \$::\x{30cb};"; 177898184e3Ssthen like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!; 178898184e3Ssthen } 179898184e3Ssthen} 180898184e3Ssthen 181898184e3Ssthen{ 182898184e3Ssthen local $@; 183898184e3Ssthen eval qq!print \x{30cb}, "comma""!; 184898184e3Ssthen like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles."; 185898184e3Ssthen} 186898184e3Ssthen 187898184e3Ssthen# tests for "Bad name" 188898184e3Sstheneval q{ Foo::$bar }; 189898184e3Ssthenlike( $@, qr/Bad name after Foo::/, 'Bad name after Foo::' ); 190898184e3Sstheneval q{ Foo''bar }; 191898184e3Ssthenlike( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' ); 19291f110e0Safresh1 19391f110e0Safresh1{ 19491f110e0Safresh1 no warnings 'utf8'; 1959f11ffb7Safresh1 local $SIG{__WARN__} = sub { }; # The eval will also output a warning, 1969f11ffb7Safresh1 # which we ignore 197b8851fccSafresh1 my $malformed_to_be = ($::IS_EBCDIC) # Overlong sequence 198b8851fccSafresh1 ? "\x{74}\x{41}" 199b8851fccSafresh1 : "\x{c0}\x{a0}"; 20091f110e0Safresh1 CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\""; 2019f11ffb7Safresh1 like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}'); 20291f110e0Safresh1} 203b8851fccSafresh1 204b8851fccSafresh1# RT# 124216: Perl_sv_clear: Assertion 205b8851fccSafresh1# If a parsing error occurred during a forced token within an interpolated 206b8851fccSafresh1# context, the stack unwinding failed to restore PL_lex_defer and so after 207b8851fccSafresh1# error recovery the state restored after the forced token was processed 208b8851fccSafresh1# was the wrong one, resulting in the lexer thinking we're still inside a 209b8851fccSafresh1# quoted string and things getting freed multiple times. 210b8851fccSafresh1# 211b8851fccSafresh1# The \x{3030} char isn't a legal var name, and this triggers the error. 212b8851fccSafresh1# 213b8851fccSafresh1# NB: this only failed if the closing quote of the interpolated string is 214b8851fccSafresh1# the last char of the file (i.e. no trailing \n). 215b8851fccSafresh1 216b8851fccSafresh1{ 217b8851fccSafresh1 my $bad = "\x{3030}"; 218b8851fccSafresh1 # Write out the individual utf8 bytes making up \x{3030}. This 219b8851fccSafresh1 # avoids 'Wide char in print' warnings from test.pl. (We may still 220b8851fccSafresh1 # get that warning when compiling the prog itself, since the 221b8851fccSafresh1 # error it prints to stderr contains a wide char.) 222b8851fccSafresh1 utf8::encode($bad); 223b8851fccSafresh1 224b8851fccSafresh1 fresh_perl_like(qq{use utf8; "\$$bad"}, 225b8851fccSafresh1 qr/ 226b8851fccSafresh1 \A 227b8851fccSafresh1 ( \QWide character in print at - line 1.\E\n )? 228b8851fccSafresh1 \Qsyntax error at - line 1, near \E"\$.*"\n 229b8851fccSafresh1 \QExecution of - aborted due to compilation errors.\E\z 230b8851fccSafresh1 /xm, 231b8851fccSafresh1 232b8851fccSafresh1 {stderr => 1}, "RT# 124216"); 233b8851fccSafresh1} 2349f11ffb7Safresh1 2359f11ffb7Safresh1SKIP: { 2369f11ffb7Safresh1 2379f11ffb7Safresh1 use Config; 2389f11ffb7Safresh1 if ($Config{uvsize} < 8) { 2399f11ffb7Safresh1 skip("test is only valid on 64-bit ints", 4); 2409f11ffb7Safresh1 } 2419f11ffb7Safresh1 else { 2429f11ffb7Safresh1 my $a; 2439f11ffb7Safresh1 my $b; 2449f11ffb7Safresh1 2459f11ffb7Safresh1 # This caused a memory fault [perl #128738] 2469f11ffb7Safresh1 $b = byte_utf8a_to_utf8n("\xFE\x82\x80\x80\x80\x80\x80"); # 0x80000000 2479f11ffb7Safresh1 eval "\$a = q ${b}abc${b}"; 2489f11ffb7Safresh1 is $@, "", 2499f11ffb7Safresh1 "No errors in eval'ing a string with large code point delimiter"; 2509f11ffb7Safresh1 is $a, 'abc', 2519f11ffb7Safresh1 "Got expected result in eval'ing a string with a large code point" 2529f11ffb7Safresh1 . " delimiter"; 2539f11ffb7Safresh1 2549f11ffb7Safresh1 $b = byte_utf8a_to_utf8n("\xFE\x83\xBF\xBF\xBF\xBF\xBF"); # 0xFFFFFFFF 2559f11ffb7Safresh1 eval "\$a = q ${b}Hello, \\\\whirled!${b}"; 2569f11ffb7Safresh1 is $@, "", 2579f11ffb7Safresh1 "No errors in eval'ing a string with large code point delimiter"; 2589f11ffb7Safresh1 is $a, 'Hello, \whirled!', 2599f11ffb7Safresh1 "Got expected result in eval'ing a string with a large code point" 2609f11ffb7Safresh1 . " delimiter"; 2619f11ffb7Safresh1 } 2629f11ffb7Safresh1} 2639f11ffb7Safresh1 264b46d8ef2Safresh1fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)'); 265b46d8ef2Safresh1use utf8; 266b46d8ef2Safresh1eval "sort \x{100}%"; 267b46d8ef2Safresh1die $@; 268b46d8ef2Safresh1EOS 269b46d8ef2Safresh1syntax error at (eval 1) line 1, at EOF 270*e0680481Safresh1Execution of (eval 1) aborted due to compilation errors. 271b46d8ef2Safresh1EXPECT 2729f11ffb7Safresh1 2739f11ffb7Safresh1# New tests go here ^^^^^ 2749f11ffb7Safresh1 2759f11ffb7Safresh1# Keep this test last, as it will mess up line number reporting for any 2769f11ffb7Safresh1# subsequent tests. 2779f11ffb7Safresh1 2789f11ffb7Safresh1<<END; 2799f11ffb7Safresh1${ 2809f11ffb7Safresh1#line 57 2819f11ffb7Safresh1qq ϟϟ } 2829f11ffb7Safresh1END 2839f11ffb7Safresh1is __LINE__, 59, '#line directive and qq with uni delims inside heredoc'; 2849f11ffb7Safresh1 2859f11ffb7Safresh1# Put new tests above the line number tests. 286