xref: /openbsd-src/gnu/usr.bin/perl/t/uni/parser.t (revision e068048151d29f2562a32185e21a8ba885482260)
1898184e3Ssthen#!./perl
2898184e3Ssthen
3898184e3Ssthen# Checks if the parser behaves correctly in edge cases
4898184e3Ssthen# (including weird syntax errors)
5898184e3Ssthen
6898184e3SsthenBEGIN {
7b8851fccSafresh1    chdir 't' if -d 't';
8898184e3Ssthen    require './test.pl';
99f11ffb7Safresh1    require './charset_tools.pl';
10b8851fccSafresh1    skip_all_without_unicode_tables();
11898184e3Ssthen}
12898184e3Ssthen
13b46d8ef2Safresh1plan (tests => 58);
14898184e3Ssthen
15898184e3Ssthenuse utf8;
16898184e3Ssthenuse open qw( :utf8 :std );
17898184e3Ssthen
18b8851fccSafresh1is *tèst, "*main::tèst", "sanity check.";
19898184e3Ssthenok $::{"tèst"}, "gets the right glob in the stash.";
20898184e3Ssthen
21898184e3Ssthenmy $glob_by_sub = sub { *main::method }->();
22898184e3Ssthen
23898184e3Ssthenis *main::method, "*main::method", "glob stringy works";
24898184e3Ssthenis "" . *main::method, "*main::method", "glob stringify-through-concat works";
25898184e3Ssthenis $glob_by_sub, "*main::method", "glob stringy works";
26898184e3Ssthenis "" . $glob_by_sub, "*main::method", "";
27898184e3Ssthen
28898184e3Ssthensub gimme_glob {
29898184e3Ssthen    no strict 'refs';
30898184e3Ssthen    is *{$_[0]}, "*main::$_[0]";
31898184e3Ssthen    *{$_[0]};
32898184e3Ssthen}
33898184e3Ssthen
34898184e3Ssthenis "" . gimme_glob("下郎"), "*main::下郎";
35898184e3Ssthen$a = *下郎;
36898184e3Ssthenis "" . $a, "*main::下郎";
37898184e3Ssthen
38898184e3Ssthen*{gimme_glob("下郎")} = sub {};
39898184e3Ssthen
40898184e3Ssthen{
41898184e3Ssthen    ok defined *{"下郎"}{CODE};
42898184e3Ssthen    ok !defined *{"\344\270\213\351\203\216"}{CODE};
43898184e3Ssthen}
44898184e3Ssthen
45898184e3Ssthen$Lèon = 1;
46898184e3Ssthenis ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,";
47898184e3Ssthenok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one.";
48898184e3Ssthen
49898184e3Ssthenmy $a = "foo" . chr(190);
50898184e3Ssthenmy $b = $a    . chr(256);
51898184e3Ssthenchop $b; # $b is $a with utf8 on
52898184e3Ssthen
53898184e3Ssthenis $a, $b, '$a equals $b';
54898184e3Ssthen
55898184e3Ssthen*$b = sub { 5 };
56898184e3Ssthen
57898184e3Ssthenis eval { main->$a }, 5, q!$a can call $b's sub!;
58898184e3Ssthenok !$@, "..and there's no error.";
59898184e3Ssthen
60898184e3Ssthenmy $c = $b;
61898184e3Ssthenutf8::encode($c);
62898184e3Ssthenok $b ne $c, '$b unequal $c';
63898184e3Sstheneval { main->$c };
64898184e3Ssthenok $@, q!$c can't call $b's sub.!;
65898184e3Ssthen
66898184e3Ssthen# Now define another sub under the downgraded name:
67898184e3Ssthen*$a = sub { 6 };
68898184e3Ssthen# Call it:
69898184e3Ssthenis eval { main->$a }, 6, "Adding a new sub to *a and calling it works,";
70898184e3Ssthenok !$@, "..without errors.";
71898184e3Sstheneval { main->$c };
72898184e3Ssthenok $@, "but it's still unreachable through *c";
73898184e3Ssthen
74898184e3Ssthen*$b = \10;
75898184e3Ssthenis ${*$a{SCALAR}}, 10;
76898184e3Ssthenis ${*$b{SCALAR}}, 10;
77898184e3Ssthenis ${*$c{SCALAR}}, undef;
78898184e3Ssthen
79898184e3Ssthenopendir FÒÒ, ".";
80898184e3Ssthenclosedir FÒÒ;
81898184e3Ssthen::ok($::{"FÒÒ"}, "Bareword generates the right glob.");
82898184e3Ssthen::ok(!$::{"F\303\222\303\222"});
83898184e3Ssthen
84898184e3Ssthensub участники { 1 }
85898184e3Ssthen
86898184e3Ssthenok $::{"участники"}, "non-const sub declarations generate the right glob";
87b8851fccSafresh1is $::{"участники"}->(), 1;
88898184e3Ssthen
89898184e3Ssthensub 原 () { 1 }
90898184e3Ssthen
91898184e3Ssthenis grep({ $_ eq "\x{539f}"     } keys %::), 1, "Constant subs generate the right glob.";
92898184e3Ssthenis grep({ $_ eq "\345\216\237" } keys %::), 0;
93898184e3Ssthen
94898184e3Ssthen#These should probably go elsewhere.
95898184e3Sstheneval q{ sub wròng1 (_$); wròng1(1,2) };
96898184e3Ssthenlike( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' );
97898184e3Ssthen
98898184e3Sstheneval q{ sub ча::ики ($__); ча::ики(1,2) };
99898184e3Ssthenlike( $@, qr/Malformed prototype for ча::ики/ );
100898184e3Ssthen
101898184e3Ssthenour $問 = 10;
102898184e3Ssthenis $問, 10, "our works";
103898184e3Ssthenis $main::問, 10, "...as does getting the same variable through the fully qualified name";
104898184e3Ssthenis ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";
105898184e3Ssthen
106898184e3Ssthen{
107898184e3Ssthen    use charnames qw( :full );
108898184e3Ssthen
109898184e3Ssthen    eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !;
110898184e3Ssthen    $@ =~ s/eval \d+/eval 11/;
111898184e3Ssthen    is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after  my $ニ <-- HERE near column 8 at (eval 11) line 1.
112898184e3Ssthen', "'Unrecognized character' croak is UTF-8 clean";
1136fb12b70Safresh1
1146fb12b70Safresh1    eval "q\0foobar\0 \x{FFFF}+1";
1156fb12b70Safresh1    $@ =~ s/eval \d+/eval 11/;
1166fb12b70Safresh1    is(
1176fb12b70Safresh1        $@,
1186fb12b70Safresh1       "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n",
1196fb12b70Safresh1       "...and nul-clean"
1206fb12b70Safresh1    );
1216fb12b70Safresh1
1226fb12b70Safresh1    {
1236fb12b70Safresh1        use re 'eval';
1246fb12b70Safresh1        my $f = qq{(?{\$ネ+ 1; \x{1F42A} })};
1256fb12b70Safresh1        eval { "a" =~ /^a$f/ };
1266fb12b70Safresh1        my $e = $@;
1276fb12b70Safresh1        $e =~ s/eval \d+/eval 11/;
1286fb12b70Safresh1        is(
1296fb12b70Safresh1            $e,
1306fb12b70Safresh1            "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n",
1316fb12b70Safresh1            "Messages from a re-eval are UTF-8 clean"
1326fb12b70Safresh1        );
1336fb12b70Safresh1
1346fb12b70Safresh1        $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })};
1356fb12b70Safresh1        eval { "a" =~ /^a$f/ };
1366fb12b70Safresh1        my $e = $@;
1376fb12b70Safresh1        $e =~ s/eval \d+/eval 11/;
1386fb12b70Safresh1        is(
1396fb12b70Safresh1            $e,
1406fb12b70Safresh1            "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n",
1416fb12b70Safresh1           "...and nul-clean"
1426fb12b70Safresh1        );
1436fb12b70Safresh1    }
1446fb12b70Safresh1
1456fb12b70Safresh1    {
1466fb12b70Safresh1        eval qq{\$ネ+ 1; \x{1F42A}};
1476fb12b70Safresh1        $@ =~ s/eval \d+/eval 11/;
1486fb12b70Safresh1        is(
1496fb12b70Safresh1            $@,
1506fb12b70Safresh1            "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n",
1516fb12b70Safresh1            "Unrecognized character error doesn't cut off in the middle of characters"
1526fb12b70Safresh1        )
1536fb12b70Safresh1    }
1546fb12b70Safresh1
155898184e3Ssthen}
156898184e3Ssthen
157898184e3Ssthen{
158898184e3Ssthen    use feature 'state';
159898184e3Ssthen    for ( qw( my state our ) ) {
160898184e3Ssthen        local $@;
161898184e3Ssthen        eval "$_ Foo $x = 1;";
162898184e3Ssthen        like $@, qr/No such class Foo/u, "'No such class' warning for $_ is UTF-8 clean";
163898184e3Ssthen    }
164898184e3Ssthen}
165898184e3Ssthen
166898184e3Ssthen{
167898184e3Ssthen    local $@;
168898184e3Ssthen    eval "our \$main::\x{30cb};";
169898184e3Ssthen    like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean";
170898184e3Ssthen}
171898184e3Ssthen
172898184e3Ssthen{
173898184e3Ssthen    use feature 'state';
174898184e3Ssthen    local $@;
175898184e3Ssthen    for ( qw( my state ) ) {
176898184e3Ssthen        eval "$_ \$::\x{30cb};";
177898184e3Ssthen        like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!;
178898184e3Ssthen    }
179898184e3Ssthen}
180898184e3Ssthen
181898184e3Ssthen{
182898184e3Ssthen    local $@;
183898184e3Ssthen    eval qq!print \x{30cb}, "comma""!;
184898184e3Ssthen    like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
185898184e3Ssthen}
186898184e3Ssthen
187898184e3Ssthen# tests for "Bad name"
188898184e3Sstheneval q{ Foo::$bar };
189898184e3Ssthenlike( $@, qr/Bad name after Foo::/, 'Bad name after Foo::' );
190898184e3Sstheneval q{ Foo''bar };
191898184e3Ssthenlike( $@, qr/Bad name after Foo'/, 'Bad name after Foo\'' );
19291f110e0Safresh1
19391f110e0Safresh1{
19491f110e0Safresh1    no warnings 'utf8';
1959f11ffb7Safresh1    local $SIG{__WARN__} = sub { }; # The eval will also output a warning,
1969f11ffb7Safresh1                                    # which we ignore
197b8851fccSafresh1    my $malformed_to_be = ($::IS_EBCDIC)   # Overlong sequence
198b8851fccSafresh1                           ? "\x{74}\x{41}"
199b8851fccSafresh1                           : "\x{c0}\x{a0}";
20091f110e0Safresh1    CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
2019f11ffb7Safresh1    like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}');
20291f110e0Safresh1}
203b8851fccSafresh1
204b8851fccSafresh1# RT# 124216: Perl_sv_clear: Assertion
205b8851fccSafresh1# If a parsing error occurred during a forced token within an interpolated
206b8851fccSafresh1# context, the stack unwinding failed to restore PL_lex_defer and so after
207b8851fccSafresh1# error recovery the state restored after the forced token was processed
208b8851fccSafresh1# was the wrong one, resulting in the lexer thinking we're still inside a
209b8851fccSafresh1# quoted string and things getting freed multiple times.
210b8851fccSafresh1#
211b8851fccSafresh1# The \x{3030} char isn't a legal var name, and this triggers the error.
212b8851fccSafresh1#
213b8851fccSafresh1# NB: this only failed if the closing quote of the interpolated string is
214b8851fccSafresh1# the last char of the file (i.e. no trailing \n).
215b8851fccSafresh1
216b8851fccSafresh1{
217b8851fccSafresh1    my $bad = "\x{3030}";
218b8851fccSafresh1    # Write out the individual utf8 bytes making up \x{3030}. This
219b8851fccSafresh1    # avoids 'Wide char in print' warnings from test.pl. (We may still
220b8851fccSafresh1    # get that warning when compiling the prog itself, since the
221b8851fccSafresh1    # error it prints to stderr contains a wide char.)
222b8851fccSafresh1    utf8::encode($bad);
223b8851fccSafresh1
224b8851fccSafresh1    fresh_perl_like(qq{use utf8; "\$$bad"},
225b8851fccSafresh1        qr/
226b8851fccSafresh1            \A
227b8851fccSafresh1            ( \QWide character in print at - line 1.\E\n )?
228b8851fccSafresh1            \Qsyntax error at - line 1, near \E"\$.*"\n
229b8851fccSafresh1            \QExecution of - aborted due to compilation errors.\E\z
230b8851fccSafresh1        /xm,
231b8851fccSafresh1
232b8851fccSafresh1        {stderr => 1}, "RT# 124216");
233b8851fccSafresh1}
2349f11ffb7Safresh1
2359f11ffb7Safresh1SKIP: {
2369f11ffb7Safresh1
2379f11ffb7Safresh1    use Config;
2389f11ffb7Safresh1    if ($Config{uvsize} < 8) {
2399f11ffb7Safresh1        skip("test is only valid on 64-bit ints", 4);
2409f11ffb7Safresh1    }
2419f11ffb7Safresh1    else {
2429f11ffb7Safresh1        my $a;
2439f11ffb7Safresh1        my $b;
2449f11ffb7Safresh1
2459f11ffb7Safresh1        # This caused a memory fault [perl #128738]
2469f11ffb7Safresh1        $b = byte_utf8a_to_utf8n("\xFE\x82\x80\x80\x80\x80\x80"); # 0x80000000
2479f11ffb7Safresh1        eval "\$a = q ${b}abc${b}";
2489f11ffb7Safresh1        is $@, "",
2499f11ffb7Safresh1               "No errors in eval'ing a string with large code point delimiter";
2509f11ffb7Safresh1        is $a, 'abc',
2519f11ffb7Safresh1               "Got expected result in eval'ing a string with a large code point"
2529f11ffb7Safresh1            . " delimiter";
2539f11ffb7Safresh1
2549f11ffb7Safresh1        $b = byte_utf8a_to_utf8n("\xFE\x83\xBF\xBF\xBF\xBF\xBF"); # 0xFFFFFFFF
2559f11ffb7Safresh1        eval "\$a = q ${b}Hello, \\\\whirled!${b}";
2569f11ffb7Safresh1        is $@, "",
2579f11ffb7Safresh1               "No errors in eval'ing a string with large code point delimiter";
2589f11ffb7Safresh1        is $a, 'Hello, \whirled!',
2599f11ffb7Safresh1               "Got expected result in eval'ing a string with a large code point"
2609f11ffb7Safresh1            . " delimiter";
2619f11ffb7Safresh1    }
2629f11ffb7Safresh1}
2639f11ffb7Safresh1
264b46d8ef2Safresh1fresh_perl_is(<<'EOS', <<'EXPECT', {}, 'no panic in pad_findmy_pvn (#134061)');
265b46d8ef2Safresh1use utf8;
266b46d8ef2Safresh1eval "sort \x{100}%";
267b46d8ef2Safresh1die $@;
268b46d8ef2Safresh1EOS
269b46d8ef2Safresh1syntax error at (eval 1) line 1, at EOF
270*e0680481Safresh1Execution of (eval 1) aborted due to compilation errors.
271b46d8ef2Safresh1EXPECT
2729f11ffb7Safresh1
2739f11ffb7Safresh1# New tests go here ^^^^^
2749f11ffb7Safresh1
2759f11ffb7Safresh1# Keep this test last, as it will mess up line number reporting for any
2769f11ffb7Safresh1# subsequent tests.
2779f11ffb7Safresh1
2789f11ffb7Safresh1<<END;
2799f11ffb7Safresh1${
2809f11ffb7Safresh1#line 57
2819f11ffb7Safresh1qq ϟϟ }
2829f11ffb7Safresh1END
2839f11ffb7Safresh1is __LINE__, 59, '#line directive and qq with uni delims inside heredoc';
2849f11ffb7Safresh1
2859f11ffb7Safresh1# Put new tests above the line number tests.
286