xref: /openbsd-src/gnu/usr.bin/perl/t/uni/variables.t (revision e068048151d29f2562a32185e21a8ba885482260)
191f110e0Safresh1#!./perl
291f110e0Safresh1
391f110e0Safresh1# Checks if the parser behaves correctly in edge case
491f110e0Safresh1# (including weird syntax errors)
591f110e0Safresh1
691f110e0Safresh1BEGIN {
7b8851fccSafresh1    chdir 't' if -d 't';
891f110e0Safresh1    require './test.pl';
9b8851fccSafresh1    skip_all_without_unicode_tables();
1091f110e0Safresh1}
1191f110e0Safresh1
1291f110e0Safresh1use 5.016;
1391f110e0Safresh1use utf8;
1491f110e0Safresh1use open qw( :utf8 :std );
1591f110e0Safresh1no warnings qw(misc reserved);
1691f110e0Safresh1
17b46d8ef2Safresh1plan (tests => 66880);
1891f110e0Safresh1
19b8851fccSafresh1# ${single:colon} should not be treated as a simple variable, but as a
20b8851fccSafresh1# block with a label inside.
2191f110e0Safresh1{
2291f110e0Safresh1    no strict;
2391f110e0Safresh1
2491f110e0Safresh1    local $@;
25b8851fccSafresh1    eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'";
26b8851fccSafresh1    is ${"\x{30cd}colon"}, 'label, not var',
27b8851fccSafresh1         '${\x{30cd}single:\x{30cd}colon} should be block-label';
2891f110e0Safresh1
2991f110e0Safresh1    local $@;
3091f110e0Safresh1    no utf8;
31b8851fccSafresh1    evalbytes '${single:colon} = "block/label, not var"';
32b8851fccSafresh1    is($::colon,
33b8851fccSafresh1         'block/label, not var',
3491f110e0Safresh1         '...same with ${single:colon}'
3591f110e0Safresh1        );
3691f110e0Safresh1}
3791f110e0Safresh1
3891f110e0Safresh1# ${yadda'etc} and ${yadda::etc} should both work under strict
3991f110e0Safresh1{
4091f110e0Safresh1    local $@;
4191f110e0Safresh1    eval q<use strict; ${flark::fleem}>;
4291f110e0Safresh1    is($@, '', q<${package::var} works>);
4391f110e0Safresh1
44*e0680481Safresh1    no warnings qw(syntax deprecated);
4591f110e0Safresh1    local $@;
4691f110e0Safresh1    eval q<use strict; ${fleem'flark}>;
4791f110e0Safresh1    is($@, '', q<...as does ${package'var}>);
4891f110e0Safresh1}
4991f110e0Safresh1
5091f110e0Safresh1# The first character in ${...} should respect the rules
5191f110e0Safresh1{
5291f110e0Safresh1   local $@;
5391f110e0Safresh1   use utf8;
5491f110e0Safresh1   eval '${☭asd} = 1';
5591f110e0Safresh1   like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special))
5691f110e0Safresh1}
5791f110e0Safresh1
5891f110e0Safresh1# Checking that at least some of the special variables work
59b46d8ef2Safresh1for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) {
606fb12b70Safresh1  SKIP: {
6191f110e0Safresh1    local $@;
6291f110e0Safresh1    evalbytes "\$$v;";
6391f110e0Safresh1    is $@, '', "No syntax error for \$$v";
6491f110e0Safresh1
6591f110e0Safresh1    local $@;
6691f110e0Safresh1    eval "use utf8; \$$v;";
67b8851fccSafresh1    is $@, '', "No syntax error for \$$v under 'use utf8'";
6891f110e0Safresh1  }
696fb12b70Safresh1}
7091f110e0Safresh1
7191f110e0Safresh1# Checking if the Latin-1 range behaves as expected, and that the behavior is the
7291f110e0Safresh1# same whenever under strict or not.
73b8851fccSafresh1for ( 0x0 .. 0xff ) {
74b8851fccSafresh1    my @warnings;
75b8851fccSafresh1    local $SIG {__WARN__} = sub {push @warnings, @_ };
76b8851fccSafresh1    my $ord = utf8::unicode_to_native($_);
77b8851fccSafresh1    my $chr = chr $ord;
78b8851fccSafresh1    my $syntax_error = 0;   # Do we expect this code point to generate a
79b8851fccSafresh1                            # syntax error?  Assume not, for now
80b8851fccSafresh1    my $deprecated = 0;
81b8851fccSafresh1    my $name;
82b8851fccSafresh1
83b8851fccSafresh1    # A different number of tests are run depending on the branches in this
84b8851fccSafresh1    # loop iteration.  This allows us to add skips to make the reported total
85b8851fccSafresh1    # the same for each iteration.
86b8851fccSafresh1    my $tests = 0;
87b8851fccSafresh1    my $max_tests = 6;
88b8851fccSafresh1
89b8851fccSafresh1    if ($chr =~ /[[:graph:]]/a) {
90b8851fccSafresh1        $name = "'$chr'";
91b8851fccSafresh1        $syntax_error = 1 if $chr eq '{';
92b8851fccSafresh1    }
93b8851fccSafresh1    elsif ($chr =~ /[[:space:]]/a) {
94b8851fccSafresh1        $name = sprintf "\\x%02x, an ASCII space character", $ord;
95b8851fccSafresh1        $syntax_error = 1;
96b8851fccSafresh1    }
97b8851fccSafresh1    elsif ($chr =~ /[[:cntrl:]]/a) {
98b8851fccSafresh1        $name = sprintf "\\x%02x, an ASCII control", $ord;
99b8851fccSafresh1        $syntax_error = 1;
100b8851fccSafresh1    }
101b8851fccSafresh1    elsif ($chr =~ /\pC/) {
102b8851fccSafresh1        if ($chr eq "\N{SHY}") {
103b8851fccSafresh1            $name = sprintf "\\x%02x, SHY", $ord;
104b8851fccSafresh1        }
105b8851fccSafresh1        else {
106b8851fccSafresh1            $name = sprintf "\\x%02x, a C1 control", $ord;
107b8851fccSafresh1        }
1089f11ffb7Safresh1        $syntax_error = 1;
109b8851fccSafresh1        $deprecated = ! $syntax_error;
110b8851fccSafresh1    }
111b8851fccSafresh1    elsif ($chr =~ /\p{XIDStart}/) {
112b8851fccSafresh1        $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord;
113b8851fccSafresh1    }
114b8851fccSafresh1    elsif ($chr =~ /\p{XPosixSpace}/) {
115b8851fccSafresh1        $name = sprintf "\\x%02x, a non-ASCII space character", $ord;
1169f11ffb7Safresh1        $syntax_error = 1;
117b8851fccSafresh1        $deprecated = ! $syntax_error;
118b8851fccSafresh1    }
119b8851fccSafresh1    else {
120b8851fccSafresh1        $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord;
121b8851fccSafresh1    }
12291f110e0Safresh1    no warnings 'closure';
123b8851fccSafresh1    my $esc = sprintf("%X", $ord);
12491f110e0Safresh1    utf8::downgrade($chr);
12591f110e0Safresh1    if ($chr !~ /\p{XIDS}/u) {
126b8851fccSafresh1        if ($syntax_error) {
127b8851fccSafresh1            evalbytes "\$$chr";
128b8851fccSafresh1            like($@, qr/ syntax\ error | Unrecognized\ character /x,
129b8851fccSafresh1                     "$name as a length-1 variable generates a syntax error");
130b8851fccSafresh1            $tests++;
131b8851fccSafresh1            utf8::upgrade($chr);
1329f11ffb7Safresh1            eval "no strict; \$$chr = 4;",
133b8851fccSafresh1            like($@, qr/ syntax\ error | Unrecognized\ character /x,
134b8851fccSafresh1                     "  ... and the same under 'use utf8'");
135b8851fccSafresh1            $tests++;
136b8851fccSafresh1        }
137b8851fccSafresh1        elsif ($chr =~ /[[:punct:][:digit:]]/a) {
138b46d8ef2Safresh1            next if ($chr eq '#' or $chr eq '*'); # RT 133583
139b8851fccSafresh1
140b8851fccSafresh1            # Unlike other variables, we dare not try setting the length-1
141b8851fccSafresh1            # variables that are ASCII punctuation and digits.  This is
142b8851fccSafresh1            # because many of these variables have meaning to the system, and
143b8851fccSafresh1            # setting them could have side effects or not work as expected
144b8851fccSafresh1            # (And using fresh_perl() doesn't always help.) For all these we
145b8851fccSafresh1            # just verify that they don't generate a syntax error.
146b8851fccSafresh1            local $@;
147b8851fccSafresh1            evalbytes "\$$chr;";
148b8851fccSafresh1            is $@, '', "$name as a length-1 variable doesn't generate a syntax error";
149b8851fccSafresh1            $tests++;
150b8851fccSafresh1            utf8::upgrade($chr);
151b8851fccSafresh1            evalbytes "no strict; use utf8; \$$chr;",
152b8851fccSafresh1            is $@, '', "  ... and the same under 'use utf8'";
153b8851fccSafresh1            $tests++;
154b8851fccSafresh1        }
155b8851fccSafresh1        else {
15691f110e0Safresh1            is evalbytes "no strict; \$$chr = 10",
15791f110e0Safresh1                10,
158b8851fccSafresh1                "$name is legal as a length-1 variable";
159b8851fccSafresh1            $tests++;
160b8851fccSafresh1            if ($chr =~ /[[:ascii:]]/) {
161b8851fccSafresh1                utf8::upgrade($chr);
162b8851fccSafresh1                is evalbytes "no strict; use utf8; \$$chr = 1",
163b8851fccSafresh1                    1,
164b8851fccSafresh1                    "  ... and is legal under 'use utf8'";
165b8851fccSafresh1                $tests++;
166b8851fccSafresh1            }
167b8851fccSafresh1            else {
16891f110e0Safresh1                utf8::upgrade($chr);
16991f110e0Safresh1                local $@;
17091f110e0Safresh1                eval "no strict; use utf8; \$$chr = 1";
17191f110e0Safresh1                like $@,
17291f110e0Safresh1                    qr/\QUnrecognized character \x{\E\L$esc/,
173b8851fccSafresh1                    "  ... but is illegal as a length-1 variable under 'use utf8'";
174b8851fccSafresh1                $tests++;
175b8851fccSafresh1            }
176b8851fccSafresh1        }
17791f110e0Safresh1    }
17891f110e0Safresh1    else {
17991f110e0Safresh1        {
18091f110e0Safresh1            no utf8;
18191f110e0Safresh1            local $@;
18291f110e0Safresh1            evalbytes "no strict; \$$chr = 1";
183b8851fccSafresh1            is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable");
184b8851fccSafresh1            $tests++;
18591f110e0Safresh1
186b8851fccSafresh1            if ($chr !~ /[[:ascii:]]/) {
18791f110e0Safresh1                local $@;
18891f110e0Safresh1                evalbytes "use strict; \$$chr = 1";
18991f110e0Safresh1                is($@,
19091f110e0Safresh1                    '',
191b8851fccSafresh1                    "  ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS"
19291f110e0Safresh1                );
193b8851fccSafresh1                $tests++;
19491f110e0Safresh1
19591f110e0Safresh1                local $@;
19691f110e0Safresh1                evalbytes "\$a$chr = 1";
19791f110e0Safresh1                like($@,
19891f110e0Safresh1                    qr/Unrecognized character /,
199b8851fccSafresh1                    "  ... but under 'no utf8', it's not allowed in length-2+ variables"
20091f110e0Safresh1                );
201b8851fccSafresh1                $tests++;
202b8851fccSafresh1            }
20391f110e0Safresh1        }
20491f110e0Safresh1        {
20591f110e0Safresh1            use utf8;
206b8851fccSafresh1            my $utf8 = $chr;
207b8851fccSafresh1            utf8::upgrade($utf8);
20891f110e0Safresh1            local $@;
209b8851fccSafresh1            eval "no strict; \$$utf8 = 1";
210b8851fccSafresh1            is($@, '', "  ... and under 'use utf8', 'no strict', is a valid length-1 variable");
211b8851fccSafresh1            $tests++;
21291f110e0Safresh1
21391f110e0Safresh1            local $@;
214b8851fccSafresh1            eval "use strict; \$$utf8 = 1";
215b8851fccSafresh1            if ($chr =~ /[ab]/) {   # These are special, for sort()
216b8851fccSafresh1                is($@, '', "  ... and under 'use utf8', 'use strict',"
217b8851fccSafresh1                    . " is a valid length-1 variable (\$a and \$b are special)");
218b8851fccSafresh1                $tests++;
21991f110e0Safresh1            }
220b8851fccSafresh1            else {
221b8851fccSafresh1                like($@,
222b8851fccSafresh1                    qr/Global symbol "\$$utf8" requires explicit package name/,
223b8851fccSafresh1                    "  ... and under utf8 has to be required under strict"
224b8851fccSafresh1                );
225b8851fccSafresh1                $tests++;
226b8851fccSafresh1            }
227b8851fccSafresh1        }
228b8851fccSafresh1    }
229b8851fccSafresh1
230b8851fccSafresh1    if (! $deprecated) {
231b8851fccSafresh1        if ($chr =~ /[#*]/) {
232b8851fccSafresh1
233b8851fccSafresh1            # Length-1 variables with these two characters used to be used by
234b8851fccSafresh1            # Perl, but now a warning is generated that they're gone.
235b8851fccSafresh1            # Ignore such warnings.
236b8851fccSafresh1            for (my $i = @warnings - 1; $i >= 0; $i--) {
237b8851fccSafresh1                splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/;
238b8851fccSafresh1            }
239b8851fccSafresh1        }
240b8851fccSafresh1        my $message = "  ... and doesn't generate any warnings";
241b8851fccSafresh1        $message = "  TODO $message" if    $ord == 0
242b8851fccSafresh1                                        || $chr =~ /\s/a;
243b8851fccSafresh1
244b8851fccSafresh1        if (! ok(@warnings == 0, $message)) {
245b8851fccSafresh1            note join "\n", @warnings;
246b8851fccSafresh1        }
247b8851fccSafresh1        $tests++;
248b8851fccSafresh1    }
249b8851fccSafresh1    elsif (! @warnings) {
250b8851fccSafresh1        fail("  ... and generates deprecation warnings (since is deprecated)");
251b8851fccSafresh1        $tests++;
252b8851fccSafresh1    }
253b8851fccSafresh1    else {
254b8851fccSafresh1        ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings),
255b8851fccSafresh1           "  ... and generates deprecation warnings (only)");
256b8851fccSafresh1        $tests++;
257b8851fccSafresh1    }
258b8851fccSafresh1
259b8851fccSafresh1    SKIP: {
260b8851fccSafresh1        die "Wrong max count for tests" if $tests > $max_tests;
261b8851fccSafresh1        skip("untaken tests", $max_tests - $tests) if $max_tests > $tests;
26291f110e0Safresh1    }
26391f110e0Safresh1}
26491f110e0Safresh1
26591f110e0Safresh1{
26691f110e0Safresh1    use utf8;
26791f110e0Safresh1    my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla
26891f110e0Safresh1    is($@, '', "ASCII character + combining character works as a variable name");
26991f110e0Safresh1    is($ret, 100, "  ... and returns the correct value");
27091f110e0Safresh1}
27191f110e0Safresh1
27291f110e0Safresh1# From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail
27391f110e0Safresh1for my $chr (
27491f110e0Safresh1      "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}",
27591f110e0Safresh1      "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}",
27691f110e0Safresh1      "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}",
27791f110e0Safresh1   )
27891f110e0Safresh1{
27991f110e0Safresh1   no warnings 'non_unicode';
28091f110e0Safresh1   my $esc = sprintf("%x", ord $chr);
28191f110e0Safresh1   local $@;
28291f110e0Safresh1   eval "\$$chr = 1; \$$chr";
28391f110e0Safresh1   like($@,
28491f110e0Safresh1        qr/\QUnrecognized character \x{$esc};/,
28591f110e0Safresh1        "\\x{$esc} is illegal for a length-one identifier"
28691f110e0Safresh1       );
28791f110e0Safresh1}
28891f110e0Safresh1
28991f110e0Safresh1for my $i (0x100..0xffff) {
29091f110e0Safresh1   my $chr = chr($i);
29191f110e0Safresh1   my $esc = sprintf("%x", $i);
29291f110e0Safresh1   local $@;
29391f110e0Safresh1   eval "my \$$chr = q<test>; \$$chr;";
29491f110e0Safresh1   if ( $chr =~ /^\p{_Perl_IDStart}$/ ) {
29591f110e0Safresh1      is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i));
29691f110e0Safresh1   }
29791f110e0Safresh1   else {
29891f110e0Safresh1      like($@,
29991f110e0Safresh1           qr/\QUnrecognized character \x{$esc};/,
30091f110e0Safresh1           "\\x{$esc} isn't XIDS, illegal as a length-1 variable",
30191f110e0Safresh1          )
30291f110e0Safresh1   }
30391f110e0Safresh1}
30491f110e0Safresh1
30591f110e0Safresh1{
30691f110e0Safresh1    # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz
30756d68f1eSafresh1    # https://github.com/Perl/perl5/issues/12841
30891f110e0Safresh1    no strict;
30991f110e0Safresh1
31091f110e0Safresh1    local $@;
31191f110e0Safresh1    eval <<'EOP';
31291f110e0Safresh1    q{$} =~ /(.)/;
31391f110e0Safresh1    is($$1, $$, q{$$1 parses as ${$1}});
31491f110e0Safresh1
31591f110e0Safresh1    $doof = "test";
31691f110e0Safresh1    $test = "Got here";
31791f110e0Safresh1    $::{+$$} = *doof;
31891f110e0Safresh1
31991f110e0Safresh1    is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} );
32091f110e0Safresh1EOP
32191f110e0Safresh1    is($@, '', q{$$1 parses correctly});
32291f110e0Safresh1
32391f110e0Safresh1    for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) {
32491f110e0Safresh1        my $esc = sprintf("\\x{%x}", ord $chr);
32591f110e0Safresh1        local $@;
32691f110e0Safresh1        eval <<"    EOP";
32791f110e0Safresh1            \$$chr = q{\$};
32891f110e0Safresh1            \$\$$chr;
32991f110e0Safresh1    EOP
33091f110e0Safresh1
33191f110e0Safresh1        like($@,
33291f110e0Safresh1             qr/syntax error|Unrecognized character/,
33391f110e0Safresh1             qq{\$\$$esc is a syntax error}
33491f110e0Safresh1        );
33591f110e0Safresh1    }
33691f110e0Safresh1}
33791f110e0Safresh1
33891f110e0Safresh1{
33991f110e0Safresh1    # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz
34056d68f1eSafresh1    # https://github.com/Perl/perl5/issues/12849
34191f110e0Safresh1    local $@;
34291f110e0Safresh1    my $var = 10;
34391f110e0Safresh1    eval ' ${  var  }';
34491f110e0Safresh1
34591f110e0Safresh1    is(
34691f110e0Safresh1        $@,
34791f110e0Safresh1        '',
34891f110e0Safresh1        '${  var  } works under strict'
34991f110e0Safresh1    );
35091f110e0Safresh1
35191f110e0Safresh1    {
35291f110e0Safresh1        no strict;
3536fb12b70Safresh1
354b8851fccSafresh1        for my $var ( '$', "^GLOBAL_PHASE", "^V" ) {
35591f110e0Safresh1            eval "\${ $var}";
35691f110e0Safresh1            is($@, '', "\${ $var} works" );
35791f110e0Safresh1            eval "\${$var }";
35891f110e0Safresh1            is($@, '', "\${$var } works" );
35991f110e0Safresh1            eval "\${ $var }";
36091f110e0Safresh1            is($@, '', "\${ $var } works" );
36191f110e0Safresh1        }
362b8851fccSafresh1        my $var = "\7LOBAL_PHASE";
363b8851fccSafresh1        eval "\${ $var}";
364b8851fccSafresh1        like($@, qr/Unrecognized character \\x07/,
365b8851fccSafresh1             "\${ $var} generates 'Unrecognized character' error" );
366b8851fccSafresh1        eval "\${$var }";
367b8851fccSafresh1        like($@, qr/Unrecognized character \\x07/,
368b8851fccSafresh1             "\${$var } generates 'Unrecognized character' error" );
369b8851fccSafresh1        eval "\${ $var }";
370b8851fccSafresh1        like($@, qr/Unrecognized character \\x07/,
371b8851fccSafresh1             "\${ $var } generates 'Unrecognized character' error" );
37291f110e0Safresh1    }
37391f110e0Safresh1}
3746fb12b70Safresh1
3756fb12b70Safresh1{
3766fb12b70Safresh1    is(
3776fb12b70Safresh1        "".eval "*{\nOIN}",
3786fb12b70Safresh1        "*main::OIN",
3796fb12b70Safresh1        "Newlines at the start of an identifier should be skipped over"
3806fb12b70Safresh1    );
3816fb12b70Safresh1
3826fb12b70Safresh1
383b8851fccSafresh1    SKIP: {
384b8851fccSafresh1        skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1)
385b8851fccSafresh1                                                                if $::IS_EBCDIC;
3866fb12b70Safresh1        is(
3876fb12b70Safresh1            "".eval "*{^JOIN}",
3886fb12b70Safresh1            "*main::\nOIN",
3896fb12b70Safresh1            "  ... but \$^J is still legal"
3906fb12b70Safresh1        );
3916fb12b70Safresh1    }
3926fb12b70Safresh1
393b8851fccSafresh1    my $ret = eval "\${\cT\n}";
394b8851fccSafresh1    like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message');
3956fb12b70Safresh1}
3966fb12b70Safresh1
3976fb12b70Safresh1{
3986fb12b70Safresh1    # Prior to 5.19.4, the following changed behavior depending
3996fb12b70Safresh1    # on the presence of the newline after '@{'.
4006fb12b70Safresh1    sub foo (&) { [1] }
4016fb12b70Safresh1    my %foo = (a=>2);
4026fb12b70Safresh1    my $ret = @{ foo { "a" } };
4036fb12b70Safresh1    is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}');
4046fb12b70Safresh1
4056fb12b70Safresh1    $ret = @{
4066fb12b70Safresh1            foo { "a" }
4076fb12b70Safresh1        };
4086fb12b70Safresh1    is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}');
4096fb12b70Safresh1
4106fb12b70Safresh1}
411