1#!/usr/bin/perl -w 2 3# Try opening libperl.a with nm, and verifying it has the kind of 4# symbols we expect, and no symbols we should avoid. 5# 6# Fail softly, expect things only on known platforms: 7# - linux, x86 only (ppc linux has odd symbol tables) 8# - darwin (OS X), both x86 and ppc 9# - freebsd 10# and on other platforms, and if things seem odd, just give up (skip_all). 11# 12# Also, if the rarely-used builds options -DPERL_GLOBAL_STRUCT or 13# -DPERL_GLOBAL_STRUCT_PRIVATE are used, verify that they did what 14# they were meant to do, hide the global variables (see perlguts for 15# the details). 16# 17# Debugging tip: nm output (this script's input) can be faked by 18# giving one command line argument for this script: it should be 19# either the filename to read, or "-" for STDIN. You can also append 20# "@style" (where style is a supported nm style, like "gnu" or "darwin") 21# to this filename for "cross-parsing". 22# 23# Some terminology: 24# - "text" symbols are code 25# - "data" symbols are data (duh), with subdivisions: 26# - "bss": (Block-Started-by-Symbol: originally from IBM assembler...), 27# uninitialized data, which often even doesn't exist in the object 28# file as such, only its size does, which is then created on demand 29# by the loader 30# - "const": initialized read-only data, like string literals 31# - "common": uninitialized data unless initialized... 32# (the full story is too long for here, see "man nm") 33# - "data": initialized read-write data 34# (somewhat confusingly below: "data data", but it makes code simpler) 35# - "undefined": external symbol referred to by an object, 36# most likely a text symbol. Can be either a symbol defined by 37# a Perl object file but referred to by other Perl object files, 38# or a completely external symbol from libc, or other system libraries. 39 40BEGIN { 41 chdir 't' if -d 't'; 42 @INC = '../lib'; 43 require "./test.pl"; 44} 45 46use strict; 47 48use Config; 49 50if ($Config{cc} =~ /g\+\+/) { 51 # XXX Could use c++filt, maybe. 52 skip_all "on g++"; 53} 54 55my $libperl_a; 56 57for my $f (qw(../libperl.a libperl.a)) { 58 if (-f $f) { 59 $libperl_a = $f; 60 last; 61 } 62} 63 64unless (defined $libperl_a) { 65 skip_all "no libperl.a"; 66} 67 68print "# \$^O = $^O\n"; 69print "# \$Config{archname} = $Config{archname}\n"; 70print "# \$Config{cc} = $Config{cc}\n"; 71print "# libperl = $libperl_a\n"; 72 73my $nm; 74my $nm_opt = ''; 75my $nm_style; 76my $nm_fh; 77my $nm_err_tmp = "libperl$$"; 78 79END { 80 # this is still executed when we skip_all above, avoid a warning 81 unlink $nm_err_tmp if $nm_err_tmp; 82} 83 84my $fake_input; 85my $fake_style; 86 87if (@ARGV == 1) { 88 $fake_input = shift @ARGV; 89 print "# Faking nm output from $fake_input\n"; 90 if ($fake_input =~ s/\@(.+)$//) { 91 $fake_style = $1; 92 print "# Faking nm style from $fake_style\n"; 93 if ($fake_style eq 'gnu' || 94 $fake_style eq 'linux' || 95 $fake_style eq 'freebsd') { 96 $nm_style = 'gnu' 97 } elsif ($fake_style eq 'darwin' || $fake_style eq 'osx') { 98 $nm_style = 'darwin' 99 } else { 100 die "$0: Unknown explicit nm style '$fake_style'\n"; 101 } 102 } 103} 104 105unless (defined $nm_style) { 106 if ($^O eq 'linux') { 107 # The 'gnu' style could be equally well be called 'bsd' style, 108 # since the output format of the GNU binutils nm is really BSD. 109 $nm_style = 'gnu'; 110 } elsif ($^O eq 'freebsd') { 111 $nm_style = 'gnu'; 112 } elsif ($^O eq 'darwin') { 113 $nm_style = 'darwin'; 114 } 115} 116 117if (defined $nm_style) { 118 if ($nm_style eq 'gnu') { 119 $nm = '/usr/bin/nm'; 120 } elsif ($nm_style eq 'darwin') { 121 $nm = '/usr/bin/nm'; 122 # With the -m option we get better information than the BSD-like 123 # default: with the default, a lot of symbols get dumped into 'S' 124 # or 's', for example one cannot tell the difference between const 125 # and non-const data symbols. 126 $nm_opt = '-m'; 127 } else { 128 die "$0: Unexpected nm style '$nm_style'\n"; 129 } 130} 131 132if ($^O eq 'linux' && $Config{archname} !~ /^x86/) { 133 # For example in ppc most (but not all!) code symbols are placed 134 # in 'D' (data), not in ' T '. We cannot work under such conditions. 135 skip_all "linux but archname $Config{archname} not x86*"; 136} 137 138unless (defined $nm) { 139 skip_all "no nm"; 140} 141 142unless (defined $nm_style) { 143 skip_all "no nm style"; 144} 145 146print "# nm = $nm\n"; 147print "# nm_style = $nm_style\n"; 148print "# nm_opt = $nm_opt\n"; 149 150unless (-x $nm) { 151 skip_all "no executable nm $nm"; 152} 153 154if ($nm_style eq 'gnu' && !defined $fake_style) { 155 open(my $gnu_verify, "$nm --version|") or 156 skip_all "nm failed: $!"; 157 my $gnu_verified; 158 while (<$gnu_verify>) { 159 if (/^GNU nm/) { 160 $gnu_verified = 1; 161 last; 162 } 163 } 164 unless ($gnu_verified) { 165 skip_all "no GNU nm"; 166 } 167} 168 169if (defined $fake_input) { 170 if ($fake_input eq '-') { 171 open($nm_fh, "<&STDIN") or 172 skip_all "Duping STDIN failed: $!"; 173 } else { 174 open($nm_fh, "<", $fake_input) or 175 skip_all "Opening '$fake_input' failed: $!"; 176 } 177 undef $nm_err_tmp; # In this case there will be no nm errors. 178} else { 179 open($nm_fh, "$nm $nm_opt $libperl_a 2>$nm_err_tmp |") or 180 skip_all "$nm $nm_opt $libperl_a failed: $!"; 181} 182 183sub is_perlish_symbol { 184 $_[0] =~ /^(?:PL_|Perl|PerlIO)/; 185} 186 187# XXX Implement "internal test" for this script (option -t?) 188# to verify that the parsing does what it's intended to. 189 190sub nm_parse_gnu { 191 my $symbols = shift; 192 my $line = $_; 193 if (m{^(\w+\.o):$}) { 194 # object file name 195 $symbols->{obj}{$1}++; 196 $symbols->{o} = $1; 197 return; 198 } else { 199 die "$0: undefined current object: $line" 200 unless defined $symbols->{o}; 201 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 202 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 203 if (/^[Rr] (\w+)$/) { 204 # R: read only (const) 205 $symbols->{data}{const}{$1}{$symbols->{o}}++; 206 } elsif (/^r .+$/) { 207 # Skip local const (read only). 208 } elsif (/^([Tti]) (\w+)(\..+)?$/) { 209 $symbols->{text}{$2}{$symbols->{o}}{$1}++; 210 } elsif (/^C (\w+)$/) { 211 $symbols->{data}{common}{$1}{$symbols->{o}}++; 212 } elsif (/^[BbSs] (\w+)(\.\d+)?$/) { 213 # Bb: uninitialized data (bss) 214 # Ss: uninitialized data "for small objects" 215 $symbols->{data}{bss}{$1}{$symbols->{o}}++; 216 } elsif (/^D _LIB_VERSION$/) { 217 # Skip the _LIB_VERSION (not ours, probably libm) 218 } elsif (/^[DdGg] (\w+)$/) { 219 # Dd: initialized data 220 # Gg: initialized "for small objects" 221 $symbols->{data}{data}{$1}{$symbols->{o}}++; 222 } elsif (/^. \.?(\w+)$/) { 223 # Skip the unknown types. 224 print "# Unknown type: $line ($symbols->{o})\n"; 225 } 226 return; 227 } elsif (/^ {8}(?: {8})? U _?(\w+)$/) { 228 my ($symbol) = $1; 229 return if is_perlish_symbol($symbol); 230 $symbols->{undef}{$symbol}{$symbols->{o}}++; 231 return; 232 } 233 } 234 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 235} 236 237sub nm_parse_darwin { 238 my $symbols = shift; 239 my $line = $_; 240 if (m{^(?:.+)?libperl\.a\((\w+\.o)\):$}) { 241 # object file name 242 $symbols->{obj}{$1}++; 243 $symbols->{o} = $1; 244 return; 245 } else { 246 die "$0: undefined current object: $line" unless defined $symbols->{o}; 247 # 64-bit systems have 16 hexdigits, 32-bit systems have 8. 248 if (s/^[0-9a-f]{8}(?:[0-9a-f]{8})? //) { 249 # String literals can live in different sections 250 # depending on the compiler and os release, assumedly 251 # also linker flags. 252 if (/^\(__TEXT,__(?:const|cstring|literal\d+)\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 253 my ($symbol, $suffix) = ($1, $2); 254 # Ignore function-local constants like 255 # _Perl_av_extend_guts.oom_array_extend 256 return if defined $suffix && /__TEXT,__const/; 257 # Ignore the cstring unnamed strings. 258 return if $symbol =~ /^L\.str\d+$/; 259 $symbols->{data}{const}{$symbol}{$symbols->{o}}++; 260 } elsif (/^\(__TEXT,__text\) ((?:non-)?external) _(\w+)$/) { 261 my ($exp, $sym) = ($1, $2); 262 $symbols->{text}{$sym}{$symbols->{o}}{$exp =~ /^non/ ? 't' : 'T'}++; 263 } elsif (/^\(__DATA,__\w*?(const|data|bss|common)\w*\) (?:non-)?external _?(\w+)(\.\w+)?$/) { 264 my ($dtype, $symbol, $suffix) = ($1, $2, $3); 265 # Ignore function-local constants like 266 # _Perl_pp_gmtime.dayname 267 return if defined $suffix; 268 $symbols->{data}{$dtype}{$symbol}{$symbols->{o}}++; 269 } elsif (/^\(__DATA,__const\) non-external _\.memset_pattern\d*$/) { 270 # Skip this, whatever it is (some inlined leakage from 271 # darwin libc?) 272 } elsif (/^\(__TEXT,__eh_frame/) { 273 # Skip the eh_frame (exception handling) symbols. 274 return; 275 } elsif (/^\(__\w+,__\w+\) /) { 276 # Skip the unknown types. 277 print "# Unknown type: $line ($symbols->{o})\n"; 278 } 279 return; 280 } elsif (/^ {8}(?: {8})? \(undefined(?: \[lazy bound\])?\) external _?(.+)/) { 281 # darwin/ppc marks most undefined text symbols 282 # as "[lazy bound]". 283 my ($symbol) = $1; 284 return if is_perlish_symbol($symbol); 285 $symbols->{undef}{$symbol}{$symbols->{o}}++; 286 return; 287 } 288 } 289 print "# Unexpected nm output '$line' ($symbols->{o})\n"; 290} 291 292my $nm_parse; 293 294if ($nm_style eq 'gnu') { 295 $nm_parse = \&nm_parse_gnu; 296} elsif ($nm_style eq 'darwin') { 297 $nm_parse = \&nm_parse_darwin; 298} 299 300unless (defined $nm_parse) { 301 skip_all "no nm parser ($nm_style $nm_style, \$^O $^O)"; 302} 303 304my %symbols; 305 306while (<$nm_fh>) { 307 next if /^$/; 308 chomp; 309 $nm_parse->(\%symbols); 310} 311 312# use Data::Dumper; print Dumper(\%symbols); 313 314# Something went awfully wrong. Wrong nm? Wrong options? 315unless (keys %symbols) { 316 skip_all "no symbols\n"; 317} 318unless (exists $symbols{text}) { 319 skip_all "no text symbols\n"; 320} 321 322# These should always be true for everyone. 323 324ok($symbols{obj}{'pp.o'}, "has object pp.o"); 325ok($symbols{text}{'Perl_peep'}, "has text Perl_peep"); 326ok($symbols{text}{'Perl_pp_uc'}{'pp.o'}, "has text Perl_pp_uc in pp.o"); 327ok(exists $symbols{data}{const}, "has data const symbols"); 328ok($symbols{data}{const}{PL_no_mem}{'globals.o'}, "has PL_no_mem"); 329 330my $GS = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT\b/ ? 1 : 0; 331my $GSP = $Config{ccflags} =~ /-DPERL_GLOBAL_STRUCT_PRIVATE/ ? 1 : 0; 332 333print "# GS = $GS\n"; 334print "# GSP = $GSP\n"; 335 336my %data_symbols; 337 338for my $dtype (sort keys %{$symbols{data}}) { 339 for my $symbol (sort keys %{$symbols{data}{$dtype}}) { 340 $data_symbols{$symbol}++; 341 } 342} 343 344# The following tests differ between vanilla vs $GSP or $GS. 345 346if ($GSP) { 347 print "# -DPERL_GLOBAL_STRUCT_PRIVATE\n"; 348 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); 349 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); 350 351 ok(! exists $symbols{data}{bss}, "has no data bss symbols"); 352 ok(! exists $symbols{data}{data} || 353 # clang with ASAN seems to add this symbol to every object file: 354 !grep($_ ne '__unnamed_1', keys %{$symbols{data}{data}}), 355 "has no data data symbols"); 356 ok(! exists $symbols{data}{common}, "has no data common symbols"); 357 358 # -DPERL_GLOBAL_STRUCT_PRIVATE should NOT have 359 # the extra text symbol for accessing the vars 360 # (as opposed to "just" -DPERL_GLOBAL_STRUCT) 361 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); 362} elsif ($GS) { 363 print "# -DPERL_GLOBAL_STRUCT\n"; 364 ok(!exists $data_symbols{PL_hash_seed}, "has no PL_hash_seed"); 365 ok(!exists $data_symbols{PL_ppaddr}, "has no PL_ppaddr"); 366 367 ok(! exists $symbols{data}{bss}, "has no data bss symbols"); 368 369 # These PerlIO data symbols are left visible with 370 # -DPERL_GLOBAL_STRUCT (as opposed to -DPERL_GLOBAL_STRUCT_PRIVATE) 371 my @PerlIO = 372 qw( 373 PerlIO_byte 374 PerlIO_crlf 375 PerlIO_pending 376 PerlIO_perlio 377 PerlIO_raw 378 PerlIO_remove 379 PerlIO_stdio 380 PerlIO_unix 381 PerlIO_utf8 382 ); 383 384 # PL_magic_vtables is const with -DPERL_GLOBAL_STRUCT_PRIVATE but 385 # otherwise not const -- because of SWIG which wants to modify 386 # the table. Evil SWIG, eeevil. 387 388 # my_cxt_index is used with PERL_IMPLICIT_CONTEXT, which 389 # -DPERL_GLOBAL_STRUCT has turned on. 390 eq_array([sort keys %{$symbols{data}{data}}], 391 [sort('PL_VarsPtr', 392 @PerlIO, 393 'PL_magic_vtables', 394 'my_cxt_index')], 395 "data data symbols"); 396 397 # Only one data common symbol, our "supervariable". 398 eq_array([sort keys %{$symbols{data}{common}}], 399 ['PL_Vars'], 400 "data common symbols"); 401 402 ok($symbols{data}{data}{PL_VarsPtr}{'globals.o'}, "has PL_VarsPtr"); 403 ok($symbols{data}{common}{PL_Vars}{'globals.o'}, "has PL_Vars"); 404 405 # -DPERL_GLOBAL_STRUCT has extra text symbol for accessing the vars. 406 ok($symbols{text}{Perl_GetVars}{'util.o'}, "has Perl_GetVars"); 407} else { 408 print "# neither -DPERL_GLOBAL_STRUCT nor -DPERL_GLOBAL_STRUCT_PRIVATE\n"; 409 410 if ( !$symbols{data}{common} ) { 411 # This is likely because Perl was compiled with 412 # -Accflags="-fno-common" 413 $symbols{data}{common} = $symbols{data}{bss}; 414 } 415 416 ok($symbols{data}{common}{PL_hash_seed}{'globals.o'}, "has PL_hash_seed"); 417 ok($symbols{data}{data}{PL_ppaddr}{'globals.o'}, "has PL_ppaddr"); 418 419 # None of the GLOBAL_STRUCT* business here. 420 ok(! exists $symbols{data}{data}{PL_VarsPtr}, "has no PL_VarsPtr"); 421 ok(! exists $symbols{data}{common}{PL_Vars}, "has no PL_Vars"); 422 ok(! exists $symbols{text}{Perl_GetVars}, "has no Perl_GetVars"); 423} 424 425# See the comments in the beginning for what "undefined symbols" 426# really means. We *should* have many of those, that is a good thing. 427ok(keys %{$symbols{undef}}, "has undefined symbols"); 428 429# There are certain symbols we expect to see. 430 431# chmod, socket, getenv, sigaction, exp, time are system/library 432# calls that should each see at least one use. exp can be expl 433# if so configured. 434my %expected = ( 435 chmod => undef, # There is no Configure symbol for chmod. 436 socket => 'd_socket', 437 getenv => undef, # There is no Configure symbol for getenv, 438 sigaction => 'd_sigaction', 439 time => 'd_time', 440 ); 441 442if ($Config{uselongdouble} && $Config{longdblsize} > $Config{doublesize}) { 443 $expected{expl} = undef; # There is no Configure symbol for expl. 444} elsif ($Config{usequadmath}) { 445 $expected{expq} = undef; # There is no Configure symbol for expq. 446} else { 447 $expected{exp} = undef; # There is no Configure symbol for exp. 448} 449 450# DynaLoader will use dlopen, unless we are building static, 451# and it is used in the platforms we are supporting in this test. 452if ($Config{usedl} ) { 453 $expected{dlopen} = 'd_dlopen'; 454} 455 456for my $symbol (sort keys %expected) { 457 if (defined $expected{$symbol} && !$Config{$expected{$symbol}}) { 458 SKIP: { 459 skip("no $symbol"); 460 } 461 next; 462 } 463 my @o = exists $symbols{undef}{$symbol} ? 464 sort keys %{ $symbols{undef}{$symbol} } : (); 465 ok(@o, "uses $symbol (@o)"); 466} 467 468# There are certain symbols we expect NOT to see. 469# 470# gets is horribly unsafe. 471# 472# fgets should not be used (Perl has its own API, sv_gets), 473# even without perlio. 474# 475# tmpfile is unsafe. 476# 477# strcat, strcpy, strncat, strncpy are unsafe. 478# 479# sprintf and vsprintf should not be used because 480# Perl has its own safer and more portable implementations. 481# (One exception: for certain floating point outputs 482# the native sprintf is still used in some platforms, see below.) 483# 484# atoi has unsafe and undefined failure modes, and is affected by locale. 485# Its cousins include atol and atoll. 486# 487# strtol and strtoul are affected by locale. 488# Cousins include strtoq. 489# 490# system should not be used, use pp_system or my_popen. 491# 492 493my %unexpected; 494 495for my $str (qw(system)) { 496 $unexpected{$str} = "d_$str"; 497} 498 499for my $stdio (qw(gets fgets tmpfile sprintf vsprintf)) { 500 $unexpected{$stdio} = undef; # No Configure symbol for these. 501} 502for my $str (qw(strcat strcpy strncat strncpy)) { 503 $unexpected{$str} = undef; # No Configure symbol for these. 504} 505 506$unexpected{atoi} = undef; # No Configure symbol for atoi. 507$unexpected{atol} = undef; # No Configure symbol for atol. 508 509for my $str (qw(atoll strtol strtoul strtoq)) { 510 $unexpected{$str} = "d_$str"; 511} 512 513for my $symbol (sort keys %unexpected) { 514 if (defined $unexpected{$symbol} && !$Config{$unexpected{$symbol}}) { 515 SKIP: { 516 skip("no $symbol"); 517 } 518 next; 519 } 520 my @o = exists $symbols{undef}{$symbol} ? 521 sort keys %{ $symbols{undef}{$symbol} } : (); 522 # While sprintf() is bad in the general case, 523 # some platforms implement Gconvert via sprintf, in sv.o. 524 if ($symbol eq 'sprintf' && 525 $Config{d_Gconvert} =~ /^sprintf/ && 526 @o == 1 && $o[0] eq 'sv.o') { 527 SKIP: { 528 skip("uses sprintf for Gconvert in sv.o"); 529 } 530 } else { 531 is(@o, 0, "uses no $symbol (@o)"); 532 } 533} 534 535# Check that any text symbols named S_ are not exported. 536my $export_S_prefix = 0; 537for my $t (sort grep { /^S_/ } keys %{$symbols{text}}) { 538 for my $o (sort keys %{$symbols{text}{$t}}) { 539 if (exists $symbols{text}{$t}{$o}{T}) { 540 fail($t, "$t exported from $o"); 541 $export_S_prefix++; 542 } 543 } 544} 545is($export_S_prefix, 0, "no S_ exports"); 546 547if (defined $nm_err_tmp) { 548 if (open(my $nm_err_fh, $nm_err_tmp)) { 549 my $error; 550 while (<$nm_err_fh>) { 551 # OS X has weird error where nm warns about 552 # "no name list" but then outputs fine. 553 if (/nm: no name list/ && $^O eq 'darwin') { 554 print "# $^O ignoring $nm output: $_"; 555 next; 556 } 557 warn "$0: Unexpected $nm error: $_"; 558 $error++; 559 } 560 die "$0: Unexpected $nm errors\n" if $error; 561 } else { 562 warn "Failed to open '$nm_err_tmp': $!\n"; 563 } 564} 565 566done_testing(); 567