xref: /openbsd-src/gnu/usr.bin/perl/t/uni/variables.t (revision de8cc8edbc71bd3e3bc7fbffa27ba0e564c37d8b)
1#!./perl
2
3# Checks if the parser behaves correctly in edge case
4# (including weird syntax errors)
5
6BEGIN {
7    chdir 't' if -d 't';
8    require './test.pl';
9    skip_all_without_unicode_tables();
10}
11
12use 5.016;
13use utf8;
14use open qw( :utf8 :std );
15no warnings qw(misc reserved);
16
17plan (tests => 66880);
18
19# ${single:colon} should not be treated as a simple variable, but as a
20# block with a label inside.
21{
22    no strict;
23
24    local $@;
25    eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'";
26    is ${"\x{30cd}colon"}, 'label, not var',
27         '${\x{30cd}single:\x{30cd}colon} should be block-label';
28
29    local $@;
30    no utf8;
31    evalbytes '${single:colon} = "block/label, not var"';
32    is($::colon,
33         'block/label, not var',
34         '...same with ${single:colon}'
35        );
36}
37
38# ${yadda'etc} and ${yadda::etc} should both work under strict
39{
40    local $@;
41    eval q<use strict; ${flark::fleem}>;
42    is($@, '', q<${package::var} works>);
43
44    local $@;
45    eval q<use strict; ${fleem'flark}>;
46    is($@, '', q<...as does ${package'var}>);
47}
48
49# The first character in ${...} should respect the rules
50{
51   local $@;
52   use utf8;
53   eval '${☭asd} = 1';
54   like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special))
55}
56
57# Checking that at least some of the special variables work
58for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) {
59  SKIP: {
60    local $@;
61    evalbytes "\$$v;";
62    is $@, '', "No syntax error for \$$v";
63
64    local $@;
65    eval "use utf8; \$$v;";
66    is $@, '', "No syntax error for \$$v under 'use utf8'";
67  }
68}
69
70# Checking if the Latin-1 range behaves as expected, and that the behavior is the
71# same whenever under strict or not.
72for ( 0x0 .. 0xff ) {
73    my @warnings;
74    local $SIG {__WARN__} = sub {push @warnings, @_ };
75    my $ord = utf8::unicode_to_native($_);
76    my $chr = chr $ord;
77    my $syntax_error = 0;   # Do we expect this code point to generate a
78                            # syntax error?  Assume not, for now
79    my $deprecated = 0;
80    my $name;
81
82    # A different number of tests are run depending on the branches in this
83    # loop iteration.  This allows us to add skips to make the reported total
84    # the same for each iteration.
85    my $tests = 0;
86    my $max_tests = 6;
87
88    if ($chr =~ /[[:graph:]]/a) {
89        $name = "'$chr'";
90        $syntax_error = 1 if $chr eq '{';
91    }
92    elsif ($chr =~ /[[:space:]]/a) {
93        $name = sprintf "\\x%02x, an ASCII space character", $ord;
94        $syntax_error = 1;
95    }
96    elsif ($chr =~ /[[:cntrl:]]/a) {
97        $name = sprintf "\\x%02x, an ASCII control", $ord;
98        $syntax_error = 1;
99    }
100    elsif ($chr =~ /\pC/) {
101        if ($chr eq "\N{SHY}") {
102            $name = sprintf "\\x%02x, SHY", $ord;
103        }
104        else {
105            $name = sprintf "\\x%02x, a C1 control", $ord;
106        }
107        $syntax_error = 1;
108        $deprecated = ! $syntax_error;
109    }
110    elsif ($chr =~ /\p{XIDStart}/) {
111        $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord;
112    }
113    elsif ($chr =~ /\p{XPosixSpace}/) {
114        $name = sprintf "\\x%02x, a non-ASCII space character", $ord;
115        $syntax_error = 1;
116        $deprecated = ! $syntax_error;
117    }
118    else {
119        $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord;
120    }
121    no warnings 'closure';
122    my $esc = sprintf("%X", $ord);
123    utf8::downgrade($chr);
124    if ($chr !~ /\p{XIDS}/u) {
125        if ($syntax_error) {
126            evalbytes "\$$chr";
127            like($@, qr/ syntax\ error | Unrecognized\ character /x,
128                     "$name as a length-1 variable generates a syntax error");
129            $tests++;
130            utf8::upgrade($chr);
131            eval "no strict; \$$chr = 4;",
132            like($@, qr/ syntax\ error | Unrecognized\ character /x,
133                     "  ... and the same under 'use utf8'");
134            $tests++;
135        }
136        elsif ($chr =~ /[[:punct:][:digit:]]/a) {
137            next if ($chr eq '#' or $chr eq '*'); # RT 133583
138
139            # Unlike other variables, we dare not try setting the length-1
140            # variables that are ASCII punctuation and digits.  This is
141            # because many of these variables have meaning to the system, and
142            # setting them could have side effects or not work as expected
143            # (And using fresh_perl() doesn't always help.) For all these we
144            # just verify that they don't generate a syntax error.
145            local $@;
146            evalbytes "\$$chr;";
147            is $@, '', "$name as a length-1 variable doesn't generate a syntax error";
148            $tests++;
149            utf8::upgrade($chr);
150            evalbytes "no strict; use utf8; \$$chr;",
151            is $@, '', "  ... and the same under 'use utf8'";
152            $tests++;
153        }
154        else {
155            is evalbytes "no strict; \$$chr = 10",
156                10,
157                "$name is legal as a length-1 variable";
158            $tests++;
159            if ($chr =~ /[[:ascii:]]/) {
160                utf8::upgrade($chr);
161                is evalbytes "no strict; use utf8; \$$chr = 1",
162                    1,
163                    "  ... and is legal under 'use utf8'";
164                $tests++;
165            }
166            else {
167                utf8::upgrade($chr);
168                local $@;
169                eval "no strict; use utf8; \$$chr = 1";
170                like $@,
171                    qr/\QUnrecognized character \x{\E\L$esc/,
172                    "  ... but is illegal as a length-1 variable under 'use utf8'";
173                $tests++;
174            }
175        }
176    }
177    else {
178        {
179            no utf8;
180            local $@;
181            evalbytes "no strict; \$$chr = 1";
182            is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable");
183            $tests++;
184
185            if ($chr !~ /[[:ascii:]]/) {
186                local $@;
187                evalbytes "use strict; \$$chr = 1";
188                is($@,
189                    '',
190                    "  ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS"
191                );
192                $tests++;
193
194                local $@;
195                evalbytes "\$a$chr = 1";
196                like($@,
197                    qr/Unrecognized character /,
198                    "  ... but under 'no utf8', it's not allowed in length-2+ variables"
199                );
200                $tests++;
201            }
202        }
203        {
204            use utf8;
205            my $utf8 = $chr;
206            utf8::upgrade($utf8);
207            local $@;
208            eval "no strict; \$$utf8 = 1";
209            is($@, '', "  ... and under 'use utf8', 'no strict', is a valid length-1 variable");
210            $tests++;
211
212            local $@;
213            eval "use strict; \$$utf8 = 1";
214            if ($chr =~ /[ab]/) {   # These are special, for sort()
215                is($@, '', "  ... and under 'use utf8', 'use strict',"
216                    . " is a valid length-1 variable (\$a and \$b are special)");
217                $tests++;
218            }
219            else {
220                like($@,
221                    qr/Global symbol "\$$utf8" requires explicit package name/,
222                    "  ... and under utf8 has to be required under strict"
223                );
224                $tests++;
225            }
226        }
227    }
228
229    if (! $deprecated) {
230        if ($chr =~ /[#*]/) {
231
232            # Length-1 variables with these two characters used to be used by
233            # Perl, but now a warning is generated that they're gone.
234            # Ignore such warnings.
235            for (my $i = @warnings - 1; $i >= 0; $i--) {
236                splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/;
237            }
238        }
239        my $message = "  ... and doesn't generate any warnings";
240        $message = "  TODO $message" if    $ord == 0
241                                        || $chr =~ /\s/a;
242
243        if (! ok(@warnings == 0, $message)) {
244            note join "\n", @warnings;
245        }
246        $tests++;
247    }
248    elsif (! @warnings) {
249        fail("  ... and generates deprecation warnings (since is deprecated)");
250        $tests++;
251    }
252    else {
253        ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings),
254           "  ... and generates deprecation warnings (only)");
255        $tests++;
256    }
257
258    SKIP: {
259        die "Wrong max count for tests" if $tests > $max_tests;
260        skip("untaken tests", $max_tests - $tests) if $max_tests > $tests;
261    }
262}
263
264{
265    use utf8;
266    my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla
267    is($@, '', "ASCII character + combining character works as a variable name");
268    is($ret, 100, "  ... and returns the correct value");
269}
270
271# From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail
272for my $chr (
273      "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}",
274      "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}",
275      "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}",
276   )
277{
278   no warnings 'non_unicode';
279   my $esc = sprintf("%x", ord $chr);
280   local $@;
281   eval "\$$chr = 1; \$$chr";
282   like($@,
283        qr/\QUnrecognized character \x{$esc};/,
284        "\\x{$esc} is illegal for a length-one identifier"
285       );
286}
287
288for my $i (0x100..0xffff) {
289   my $chr = chr($i);
290   my $esc = sprintf("%x", $i);
291   local $@;
292   eval "my \$$chr = q<test>; \$$chr;";
293   if ( $chr =~ /^\p{_Perl_IDStart}$/ ) {
294      is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i));
295   }
296   else {
297      like($@,
298           qr/\QUnrecognized character \x{$esc};/,
299           "\\x{$esc} isn't XIDS, illegal as a length-1 variable",
300          )
301   }
302}
303
304{
305    # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz
306    # https://rt.perl.org/rt3/Public/Bug/Display.html?id=117101
307    no strict;
308
309    local $@;
310    eval <<'EOP';
311    q{$} =~ /(.)/;
312    is($$1, $$, q{$$1 parses as ${$1}});
313
314    $doof = "test";
315    $test = "Got here";
316    $::{+$$} = *doof;
317
318    is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} );
319EOP
320    is($@, '', q{$$1 parses correctly});
321
322    for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) {
323        my $esc = sprintf("\\x{%x}", ord $chr);
324        local $@;
325        eval <<"    EOP";
326            \$$chr = q{\$};
327            \$\$$chr;
328    EOP
329
330        like($@,
331             qr/syntax error|Unrecognized character/,
332             qq{\$\$$esc is a syntax error}
333        );
334    }
335}
336
337{
338    # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz
339    # https://rt.perl.org/rt3/Ticket/Display.html?id=117145
340    local $@;
341    my $var = 10;
342    eval ' ${  var  }';
343
344    is(
345        $@,
346        '',
347        '${  var  } works under strict'
348    );
349
350    {
351        no strict;
352
353        for my $var ( '$', "^GLOBAL_PHASE", "^V" ) {
354            eval "\${ $var}";
355            is($@, '', "\${ $var} works" );
356            eval "\${$var }";
357            is($@, '', "\${$var } works" );
358            eval "\${ $var }";
359            is($@, '', "\${ $var } works" );
360        }
361        my $var = "\7LOBAL_PHASE";
362        eval "\${ $var}";
363        like($@, qr/Unrecognized character \\x07/,
364             "\${ $var} generates 'Unrecognized character' error" );
365        eval "\${$var }";
366        like($@, qr/Unrecognized character \\x07/,
367             "\${$var } generates 'Unrecognized character' error" );
368        eval "\${ $var }";
369        like($@, qr/Unrecognized character \\x07/,
370             "\${ $var } generates 'Unrecognized character' error" );
371    }
372}
373
374{
375    is(
376        "".eval "*{\nOIN}",
377        "*main::OIN",
378        "Newlines at the start of an identifier should be skipped over"
379    );
380
381
382    SKIP: {
383        skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1)
384                                                                if $::IS_EBCDIC;
385        is(
386            "".eval "*{^JOIN}",
387            "*main::\nOIN",
388            "  ... but \$^J is still legal"
389        );
390    }
391
392    my $ret = eval "\${\cT\n}";
393    like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message');
394}
395
396{
397    # Prior to 5.19.4, the following changed behavior depending
398    # on the presence of the newline after '@{'.
399    sub foo (&) { [1] }
400    my %foo = (a=>2);
401    my $ret = @{ foo { "a" } };
402    is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}');
403
404    $ret = @{
405            foo { "a" }
406        };
407    is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}');
408
409}
410