1#!./perl 2 3# Checks if the parser behaves correctly in edge case 4# (including weird syntax errors) 5 6BEGIN { 7 chdir 't' if -d 't'; 8 require './test.pl'; 9 skip_all_without_unicode_tables(); 10} 11 12use 5.016; 13use utf8; 14use open qw( :utf8 :std ); 15no warnings qw(misc reserved); 16 17plan (tests => 66880); 18 19# ${single:colon} should not be treated as a simple variable, but as a 20# block with a label inside. 21{ 22 no strict; 23 24 local $@; 25 eval "\${\x{30cd}single:\x{30cd}colon} = 'label, not var'"; 26 is ${"\x{30cd}colon"}, 'label, not var', 27 '${\x{30cd}single:\x{30cd}colon} should be block-label'; 28 29 local $@; 30 no utf8; 31 evalbytes '${single:colon} = "block/label, not var"'; 32 is($::colon, 33 'block/label, not var', 34 '...same with ${single:colon}' 35 ); 36} 37 38# ${yadda'etc} and ${yadda::etc} should both work under strict 39{ 40 local $@; 41 eval q<use strict; ${flark::fleem}>; 42 is($@, '', q<${package::var} works>); 43 44 local $@; 45 eval q<use strict; ${fleem'flark}>; 46 is($@, '', q<...as does ${package'var}>); 47} 48 49# The first character in ${...} should respect the rules 50{ 51 local $@; 52 use utf8; 53 eval '${☭asd} = 1'; 54 like($@, qr/\QUnrecognized character/, q(the first character in ${...} isn't special)) 55} 56 57# Checking that at least some of the special variables work 58for my $v (qw( ^V ; < > ( ) {^GLOBAL_PHASE} ^W _ 1 4 0 ] ! @ / \ = )) { 59 SKIP: { 60 local $@; 61 evalbytes "\$$v;"; 62 is $@, '', "No syntax error for \$$v"; 63 64 local $@; 65 eval "use utf8; \$$v;"; 66 is $@, '', "No syntax error for \$$v under 'use utf8'"; 67 } 68} 69 70# Checking if the Latin-1 range behaves as expected, and that the behavior is the 71# same whenever under strict or not. 72for ( 0x0 .. 0xff ) { 73 my @warnings; 74 local $SIG {__WARN__} = sub {push @warnings, @_ }; 75 my $ord = utf8::unicode_to_native($_); 76 my $chr = chr $ord; 77 my $syntax_error = 0; # Do we expect this code point to generate a 78 # syntax error? Assume not, for now 79 my $deprecated = 0; 80 my $name; 81 82 # A different number of tests are run depending on the branches in this 83 # loop iteration. This allows us to add skips to make the reported total 84 # the same for each iteration. 85 my $tests = 0; 86 my $max_tests = 6; 87 88 if ($chr =~ /[[:graph:]]/a) { 89 $name = "'$chr'"; 90 $syntax_error = 1 if $chr eq '{'; 91 } 92 elsif ($chr =~ /[[:space:]]/a) { 93 $name = sprintf "\\x%02x, an ASCII space character", $ord; 94 $syntax_error = 1; 95 } 96 elsif ($chr =~ /[[:cntrl:]]/a) { 97 $name = sprintf "\\x%02x, an ASCII control", $ord; 98 $syntax_error = 1; 99 } 100 elsif ($chr =~ /\pC/) { 101 if ($chr eq "\N{SHY}") { 102 $name = sprintf "\\x%02x, SHY", $ord; 103 } 104 else { 105 $name = sprintf "\\x%02x, a C1 control", $ord; 106 } 107 $syntax_error = 1; 108 $deprecated = ! $syntax_error; 109 } 110 elsif ($chr =~ /\p{XIDStart}/) { 111 $name = sprintf "\\x%02x, a non-ASCII XIDS character", $ord; 112 } 113 elsif ($chr =~ /\p{XPosixSpace}/) { 114 $name = sprintf "\\x%02x, a non-ASCII space character", $ord; 115 $syntax_error = 1; 116 $deprecated = ! $syntax_error; 117 } 118 else { 119 $name = sprintf "\\x%02x, a non-ASCII, non-XIDS graphic character", $ord; 120 } 121 no warnings 'closure'; 122 my $esc = sprintf("%X", $ord); 123 utf8::downgrade($chr); 124 if ($chr !~ /\p{XIDS}/u) { 125 if ($syntax_error) { 126 evalbytes "\$$chr"; 127 like($@, qr/ syntax\ error | Unrecognized\ character /x, 128 "$name as a length-1 variable generates a syntax error"); 129 $tests++; 130 utf8::upgrade($chr); 131 eval "no strict; \$$chr = 4;", 132 like($@, qr/ syntax\ error | Unrecognized\ character /x, 133 " ... and the same under 'use utf8'"); 134 $tests++; 135 } 136 elsif ($chr =~ /[[:punct:][:digit:]]/a) { 137 next if ($chr eq '#' or $chr eq '*'); # RT 133583 138 139 # Unlike other variables, we dare not try setting the length-1 140 # variables that are ASCII punctuation and digits. This is 141 # because many of these variables have meaning to the system, and 142 # setting them could have side effects or not work as expected 143 # (And using fresh_perl() doesn't always help.) For all these we 144 # just verify that they don't generate a syntax error. 145 local $@; 146 evalbytes "\$$chr;"; 147 is $@, '', "$name as a length-1 variable doesn't generate a syntax error"; 148 $tests++; 149 utf8::upgrade($chr); 150 evalbytes "no strict; use utf8; \$$chr;", 151 is $@, '', " ... and the same under 'use utf8'"; 152 $tests++; 153 } 154 else { 155 is evalbytes "no strict; \$$chr = 10", 156 10, 157 "$name is legal as a length-1 variable"; 158 $tests++; 159 if ($chr =~ /[[:ascii:]]/) { 160 utf8::upgrade($chr); 161 is evalbytes "no strict; use utf8; \$$chr = 1", 162 1, 163 " ... and is legal under 'use utf8'"; 164 $tests++; 165 } 166 else { 167 utf8::upgrade($chr); 168 local $@; 169 eval "no strict; use utf8; \$$chr = 1"; 170 like $@, 171 qr/\QUnrecognized character \x{\E\L$esc/, 172 " ... but is illegal as a length-1 variable under 'use utf8'"; 173 $tests++; 174 } 175 } 176 } 177 else { 178 { 179 no utf8; 180 local $@; 181 evalbytes "no strict; \$$chr = 1"; 182 is($@, '', "$name under 'no utf8', 'no strict', is a valid length-1 variable"); 183 $tests++; 184 185 if ($chr !~ /[[:ascii:]]/) { 186 local $@; 187 evalbytes "use strict; \$$chr = 1"; 188 is($@, 189 '', 190 " ... and under 'no utf8' does not have to be required under strict, even though it matches XIDS" 191 ); 192 $tests++; 193 194 local $@; 195 evalbytes "\$a$chr = 1"; 196 like($@, 197 qr/Unrecognized character /, 198 " ... but under 'no utf8', it's not allowed in length-2+ variables" 199 ); 200 $tests++; 201 } 202 } 203 { 204 use utf8; 205 my $utf8 = $chr; 206 utf8::upgrade($utf8); 207 local $@; 208 eval "no strict; \$$utf8 = 1"; 209 is($@, '', " ... and under 'use utf8', 'no strict', is a valid length-1 variable"); 210 $tests++; 211 212 local $@; 213 eval "use strict; \$$utf8 = 1"; 214 if ($chr =~ /[ab]/) { # These are special, for sort() 215 is($@, '', " ... and under 'use utf8', 'use strict'," 216 . " is a valid length-1 variable (\$a and \$b are special)"); 217 $tests++; 218 } 219 else { 220 like($@, 221 qr/Global symbol "\$$utf8" requires explicit package name/, 222 " ... and under utf8 has to be required under strict" 223 ); 224 $tests++; 225 } 226 } 227 } 228 229 if (! $deprecated) { 230 if ($chr =~ /[#*]/) { 231 232 # Length-1 variables with these two characters used to be used by 233 # Perl, but now a warning is generated that they're gone. 234 # Ignore such warnings. 235 for (my $i = @warnings - 1; $i >= 0; $i--) { 236 splice @warnings, $i, 1 if $warnings[$i] =~ /is no longer supported/; 237 } 238 } 239 my $message = " ... and doesn't generate any warnings"; 240 $message = " TODO $message" if $ord == 0 241 || $chr =~ /\s/a; 242 243 if (! ok(@warnings == 0, $message)) { 244 note join "\n", @warnings; 245 } 246 $tests++; 247 } 248 elsif (! @warnings) { 249 fail(" ... and generates deprecation warnings (since is deprecated)"); 250 $tests++; 251 } 252 else { 253 ok((scalar @warnings == grep { $_ =~ /deprecated/ } @warnings), 254 " ... and generates deprecation warnings (only)"); 255 $tests++; 256 } 257 258 SKIP: { 259 die "Wrong max count for tests" if $tests > $max_tests; 260 skip("untaken tests", $max_tests - $tests) if $max_tests > $tests; 261 } 262} 263 264{ 265 use utf8; 266 my $ret = eval "my \$c\x{327} = 100; \$c\x{327}"; # c + cedilla 267 is($@, '', "ASCII character + combining character works as a variable name"); 268 is($ret, 100, " ... and returns the correct value"); 269} 270 271# From Tom Christiansen's 'highly illegal variable names are now accidentally legal' mail 272for my $chr ( 273 "\N{EM DASH}", "\x{F8FF}", "\N{POUND SIGN}", "\N{SOFT HYPHEN}", 274 "\N{THIN SPACE}", "\x{11_1111}", "\x{DC00}", "\N{COMBINING DIAERESIS}", 275 "\N{COMBINING ENCLOSING CIRCLE BACKSLASH}", 276 ) 277{ 278 no warnings 'non_unicode'; 279 my $esc = sprintf("%x", ord $chr); 280 local $@; 281 eval "\$$chr = 1; \$$chr"; 282 like($@, 283 qr/\QUnrecognized character \x{$esc};/, 284 "\\x{$esc} is illegal for a length-one identifier" 285 ); 286} 287 288for my $i (0x100..0xffff) { 289 my $chr = chr($i); 290 my $esc = sprintf("%x", $i); 291 local $@; 292 eval "my \$$chr = q<test>; \$$chr;"; 293 if ( $chr =~ /^\p{_Perl_IDStart}$/ ) { 294 is($@, '', sprintf("\\x{%04x} is XIDS, works as a length-1 variable", $i)); 295 } 296 else { 297 like($@, 298 qr/\QUnrecognized character \x{$esc};/, 299 "\\x{$esc} isn't XIDS, illegal as a length-1 variable", 300 ) 301 } 302} 303 304{ 305 # Bleadperl v5.17.9-109-g3283393 breaks ZEFRAM/Module-Runtime-0.013.tar.gz 306 # https://rt.perl.org/rt3/Public/Bug/Display.html?id=117101 307 no strict; 308 309 local $@; 310 eval <<'EOP'; 311 q{$} =~ /(.)/; 312 is($$1, $$, q{$$1 parses as ${$1}}); 313 314 $doof = "test"; 315 $test = "Got here"; 316 $::{+$$} = *doof; 317 318 is( $$$$1, $test, q{$$$$1 parses as ${${${$1}}}} ); 319EOP 320 is($@, '', q{$$1 parses correctly}); 321 322 for my $chr ( q{@}, "\N{U+FF10}", "\N{U+0300}" ) { 323 my $esc = sprintf("\\x{%x}", ord $chr); 324 local $@; 325 eval <<" EOP"; 326 \$$chr = q{\$}; 327 \$\$$chr; 328 EOP 329 330 like($@, 331 qr/syntax error|Unrecognized character/, 332 qq{\$\$$esc is a syntax error} 333 ); 334 } 335} 336 337{ 338 # bleadperl v5.17.9-109-g3283393 breaks JEREMY/File-Signature-1.009.tar.gz 339 # https://rt.perl.org/rt3/Ticket/Display.html?id=117145 340 local $@; 341 my $var = 10; 342 eval ' ${ var }'; 343 344 is( 345 $@, 346 '', 347 '${ var } works under strict' 348 ); 349 350 { 351 no strict; 352 353 for my $var ( '$', "^GLOBAL_PHASE", "^V" ) { 354 eval "\${ $var}"; 355 is($@, '', "\${ $var} works" ); 356 eval "\${$var }"; 357 is($@, '', "\${$var } works" ); 358 eval "\${ $var }"; 359 is($@, '', "\${ $var } works" ); 360 } 361 my $var = "\7LOBAL_PHASE"; 362 eval "\${ $var}"; 363 like($@, qr/Unrecognized character \\x07/, 364 "\${ $var} generates 'Unrecognized character' error" ); 365 eval "\${$var }"; 366 like($@, qr/Unrecognized character \\x07/, 367 "\${$var } generates 'Unrecognized character' error" ); 368 eval "\${ $var }"; 369 like($@, qr/Unrecognized character \\x07/, 370 "\${ $var } generates 'Unrecognized character' error" ); 371 } 372} 373 374{ 375 is( 376 "".eval "*{\nOIN}", 377 "*main::OIN", 378 "Newlines at the start of an identifier should be skipped over" 379 ); 380 381 382 SKIP: { 383 skip('Is $^U on EBCDIC 1047, BC; nothing works on 0037', 1) 384 if $::IS_EBCDIC; 385 is( 386 "".eval "*{^JOIN}", 387 "*main::\nOIN", 388 " ... but \$^J is still legal" 389 ); 390 } 391 392 my $ret = eval "\${\cT\n}"; 393 like($@, qr/\QUnrecognized character/, '${\n\cT\n} gives an error message'); 394} 395 396{ 397 # Prior to 5.19.4, the following changed behavior depending 398 # on the presence of the newline after '@{'. 399 sub foo (&) { [1] } 400 my %foo = (a=>2); 401 my $ret = @{ foo { "a" } }; 402 is($ret, $foo{a}, '@{ foo { "a" } } is parsed as @foo{a}'); 403 404 $ret = @{ 405 foo { "a" } 406 }; 407 is($ret, $foo{a}, '@{\nfoo { "a" } } is still parsed as @foo{a}'); 408 409} 410