1#!/usr/bin/perl -w 2 3use Text::Tabs; 4# 5# Unconditionally regenerate: 6# 7# pod/perlintern.pod 8# pod/perlapi.pod 9# 10# from information stored in 11# 12# embed.fnc 13# plus all the core .c, .h, and .pod files listed in MANIFEST 14# plus %extra_input_pods 15 16my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 ); 17 18# Has an optional arg, which is the directory to chdir to before reading 19# MANIFEST and the files 20# 21# This script is invoked as part of 'make all' 22# 23# The generated pod consists of sections of related elements, functions, 24# macros, and variables. The keys of %valid_sections give the current legal 25# ones. Just add a new key to add a section. 26# 27# Throughout the files read by this script are lines like 28# 29# =for apidoc_section Section Name 30# =for apidoc_section $section_name_variable 31# 32# "Section Name" (after having been stripped of leading space) must be one of 33# the legal section names, or an error is thrown. $section_name_variable must 34# be one of the legal section name variables defined below; these expand to 35# legal section names. This form is used so that minor wording changes in 36# these titles can be confied to this file. All the names of the variables 37# end in '_scn'; this suffix is optional in the apidoc_section lines. 38# 39# All API elements defined between this line and the next 'apidoc_section' 40# line will go into the section "Section Name" (or $section_name_variable), 41# sorted by dictionary order within it. perlintern and perlapi are parallel 42# documents, each potentially with a section "Section Name". Each element is 43# marked as to which document it goes into. If there are none for a 44# particular section in perlapi, that section is omitted. 45# 46# Also, in .[ch] files, there may be 47# 48# =head1 Section Name 49# 50# lines in comments. These are also used by this program to switch to section 51# "Section Name". The difference is that if there are any lines after the 52# =head1, inside the same comment, and before any =for apidoc-ish lines, they 53# are used as a heading for section "Section Name" (in both perlintern and 54# perlapi). This includes any =head[2-5]. If more than one '=head1 Section 55# Name' line has content, they appear in the generated pod in an undefined 56# order. Note that you can't use a $section_name_variable in =head1 lines 57# 58# The next =head1, =for apidoc_section, or file end terminates what goes into 59# the current section 60# 61# The %valid_sections hash below also can have header content, which will 62# appear before any =head1 content. The hash can also have footer content 63# content, which will appear at the end of the section, after all the 64# elements. 65# 66# The lines that define the actual functions, etc are documented in embed.fnc, 67# because they have flags which must be kept in sync with that file. 68 69use strict; 70use warnings; 71 72my $nroff_min_indent = 4; # for non-heading lines 73# 80 column terminal - 2 for pager adding 2 columns; 74my $max_width = 80 - 2 - $nroff_min_indent; 75my $standard_indent = 4; # Any additional indentations 76 77if (@ARGV) { 78 my $workdir = shift; 79 chdir $workdir 80 or die "Couldn't chdir to '$workdir': $!"; 81} 82require './regen/regen_lib.pl'; 83require './regen/embed_lib.pl'; 84 85my %described_elsewhere; 86 87# 88# See database of global and static function prototypes in embed.fnc 89# This is used to generate prototype headers under various configurations, 90# export symbols lists for different platforms, and macros to provide an 91# implicit interpreter context argument. 92# 93 94my %docs; 95my %seen; 96my %funcflags; 97my %missing; 98my %missing_macros; 99 100my $link_text = "Described in"; 101 102my $description_indent = 4; 103my $usage_indent = 3; # + initial blank yields 4 total 104 105my $AV_scn = 'AV Handling'; 106my $callback_scn = 'Callback Functions'; 107my $casting_scn = 'Casting'; 108my $casing_scn = 'Character case changing'; 109my $classification_scn = 'Character classification'; 110my $names_scn = 'Character names'; 111my $scope_scn = 'Compile-time scope hooks'; 112my $compiler_scn = 'Compiler and Preprocessor information'; 113my $directives_scn = 'Compiler directives'; 114my $concurrency_scn = 'Concurrency'; 115my $COP_scn = 'COPs and Hint Hashes'; 116my $CV_scn = 'CV Handling'; 117my $custom_scn = 'Custom Operators'; 118my $debugging_scn = 'Debugging'; 119my $display_scn = 'Display functions'; 120my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning'; 121my $errno_scn = 'Errno'; 122my $exceptions_scn = 'Exception Handling (simple) Macros'; 123my $filesystem_scn = 'Filesystem configuration values'; 124my $filters_scn = 'Source Filters'; 125my $floating_scn = 'Floating point'; 126my $genconfig_scn = 'General Configuration'; 127my $globals_scn = 'Global Variables'; 128my $GV_scn = 'GV Handling and Stashes'; 129my $hook_scn = 'Hook manipulation'; 130my $HV_scn = 'HV Handling'; 131my $io_scn = 'Input/Output'; 132my $io_formats_scn = 'I/O Formats'; 133my $integer_scn = 'Integer'; 134my $lexer_scn = 'Lexer interface'; 135my $locale_scn = 'Locales'; 136my $magic_scn = 'Magic'; 137my $memory_scn = 'Memory Management'; 138my $MRO_scn = 'MRO'; 139my $multicall_scn = 'Multicall Functions'; 140my $numeric_scn = 'Numeric Functions'; 141 142# Now combined, as unclear which functions go where, but separate names kept 143# to avoid 1) other code changes; 2) in case it seems better to split again 144my $optrees_scn = 'Optrees'; 145my $optree_construction_scn = $optrees_scn; # Was 'Optree construction'; 146my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions' 147my $pack_scn = 'Pack and Unpack'; 148my $pad_scn = 'Pad Data Structures'; 149my $password_scn = 'Password and Group access'; 150my $reports_scn = 'Reports and Formats'; 151my $paths_scn = 'Paths to system commands'; 152my $prototypes_scn = 'Prototype information'; 153my $regexp_scn = 'REGEXP Functions'; 154my $signals_scn = 'Signals'; 155my $site_scn = 'Site configuration'; 156my $sockets_scn = 'Sockets configuration values'; 157my $stack_scn = 'Stack Manipulation Macros'; 158my $string_scn = 'String Handling'; 159my $SV_flags_scn = 'SV Flags'; 160my $SV_scn = 'SV Handling'; 161my $tainting_scn = 'Tainting'; 162my $time_scn = 'Time'; 163my $typedefs_scn = 'Typedef names'; 164my $unicode_scn = 'Unicode Support'; 165my $utility_scn = 'Utility Functions'; 166my $versioning_scn = 'Versioning'; 167my $warning_scn = 'Warning and Dieing'; 168my $XS_scn = 'XS'; 169 170# Kept separate at end 171my $undocumented_scn = 'Undocumented elements'; 172 173my %valid_sections = ( 174 $AV_scn => {}, 175 $callback_scn => {}, 176 $casting_scn => {}, 177 $casing_scn => {}, 178 $classification_scn => {}, 179 $scope_scn => {}, 180 $compiler_scn => {}, 181 $directives_scn => {}, 182 $concurrency_scn => {}, 183 $COP_scn => {}, 184 $CV_scn => { 185 header => <<~'EOT', 186 This section documents functions to manipulate CVs which are 187 code-values, meaning subroutines. For more information, see 188 L<perlguts>. 189 EOT 190 }, 191 192 $custom_scn => {}, 193 $debugging_scn => {}, 194 $display_scn => {}, 195 $embedding_scn => {}, 196 $errno_scn => {}, 197 $exceptions_scn => {}, 198 $filesystem_scn => { 199 header => <<~'EOT', 200 Also see L</List of capability HAS_foo symbols>. 201 EOT 202 }, 203 $filters_scn => {}, 204 $floating_scn => { 205 header => <<~'EOT', 206 Also L</List of capability HAS_foo symbols> lists capabilities 207 that arent in this section. For example C<HAS_ASINH>, for the 208 hyperbolic sine function. 209 EOT 210 }, 211 $genconfig_scn => { 212 header => <<~'EOT', 213 This section contains configuration information not otherwise 214 found in the more specialized sections of this document. At the 215 end is a list of C<#defines> whose name should be enough to tell 216 you what they do, and a list of #defines which tell you if you 217 need to C<#include> files to get the corresponding functionality. 218 EOT 219 220 footer => <<~EOT, 221 222 =head2 List of capability C<HAS_I<foo>> symbols 223 224 This is a list of those symbols that dont appear elsewhere in ths 225 document that indicate if the current platform has a certain 226 capability. Their names all begin with C<HAS_>. Only those 227 symbols whose capability is directly derived from the name are 228 listed here. All others have their meaning expanded out elsewhere 229 in this document. This (relatively) compact list is because we 230 think that the expansion would add little or no value and take up 231 a lot of space (because there are so many). If you think certain 232 ones should be expanded, send email to 233 L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>. 234 235 Each symbol here will be C<#define>d if and only if the platform 236 has the capability. If you need more detail, see the 237 corresponding entry in F<config.h>. For convenience, the list is 238 split so that the ones that indicate there is a reentrant version 239 of a capability are listed separately 240 241 __HAS_LIST__ 242 243 And, the reentrant capabilities: 244 245 __HAS_R_LIST__ 246 247 Example usage: 248 249 =over $standard_indent 250 251 #ifdef HAS_STRNLEN 252 use strnlen() 253 #else 254 use an alternative implementation 255 #endif 256 257 =back 258 259 =head2 List of C<#include> needed symbols 260 261 This list contains symbols that indicate if certain C<#include> 262 files are present on the platform. If your code accesses the 263 functionality that one of these is for, you will need to 264 C<#include> it if the symbol on this list is C<#define>d. For 265 more detail, see the corresponding entry in F<config.h>. 266 267 __INCLUDE_LIST__ 268 269 Example usage: 270 271 =over $standard_indent 272 273 #ifdef I_WCHAR 274 #include <wchar.h> 275 #endif 276 277 =back 278 EOT 279 }, 280 $globals_scn => {}, 281 $GV_scn => {}, 282 $hook_scn => {}, 283 $HV_scn => {}, 284 $io_scn => {}, 285 $io_formats_scn => { 286 header => <<~'EOT', 287 These are used for formatting the corresponding type For example, 288 instead of saying 289 290 Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv); 291 292 use 293 294 Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv); 295 296 This keeps you from having to know if, say an IV, needs to be 297 printed as C<%d>, C<%ld>, or something else. 298 EOT 299 }, 300 $integer_scn => {}, 301 $lexer_scn => {}, 302 $locale_scn => {}, 303 $magic_scn => {}, 304 $memory_scn => {}, 305 $MRO_scn => {}, 306 $multicall_scn => {}, 307 $numeric_scn => {}, 308 $optrees_scn => {}, 309 $optree_construction_scn => {}, 310 $optree_manipulation_scn => {}, 311 $pack_scn => {}, 312 $pad_scn => {}, 313 $password_scn => {}, 314 $paths_scn => {}, 315 $prototypes_scn => {}, 316 $regexp_scn => {}, 317 $reports_scn => { 318 header => <<~"EOT", 319 These are used in the simple report generation feature of Perl. 320 See L<perlform>. 321 EOT 322 }, 323 $signals_scn => {}, 324 $site_scn => { 325 header => <<~'EOT', 326 These variables give details as to where various libraries, 327 installation destinations, I<etc.>, go, as well as what various 328 installation options were selected 329 EOT 330 }, 331 $sockets_scn => {}, 332 $stack_scn => {}, 333 $string_scn => { 334 header => <<~EOT, 335 See also C<L</$unicode_scn>>. 336 EOT 337 }, 338 $SV_flags_scn => {}, 339 $SV_scn => {}, 340 $tainting_scn => {}, 341 $time_scn => {}, 342 $typedefs_scn => {}, 343 $unicode_scn => { 344 header => <<~EOT, 345 L<perlguts/Unicode Support> has an introduction to this API. 346 347 See also C<L</$classification_scn>>, 348 C<L</$casing_scn>>, 349 and C<L</$string_scn>>. 350 Various functions outside this section also work specially with 351 Unicode. Search for the string "utf8" in this document. 352 EOT 353 }, 354 $utility_scn => {}, 355 $versioning_scn => {}, 356 $warning_scn => {}, 357 $XS_scn => {}, 358); 359 360# Somewhat loose match for an apidoc line so we can catch minor typos. 361# Parentheses are used to capture portions so that below we verify 362# that things are the actual correct syntax. 363my $apidoc_re = qr/ ^ (\s*) # $1 364 (=?) # $2 365 (\s*) # $3 366 for (\s*) # $4 367 apidoc (_item)? # $5 368 (\s*) # $6 369 (.*?) # $7 370 \s* \n /x; 371# Only certain flags, dealing with display, are acceptable for apidoc_item 372my $display_flags = "fFnDopsTx"; 373 374sub check_api_doc_line ($$) { 375 my ($file, $in) = @_; 376 377 return unless $in =~ $apidoc_re; 378 379 my $is_item = defined $5; 380 my $is_in_proper_form = length $1 == 0 381 && length $2 > 0 382 && length $3 == 0 383 && length $4 > 0 384 && length $7 > 0 385 && ( length $6 > 0 386 || ($is_item && substr($7, 0, 1) eq '|')); 387 my $proto_in_file = $7; 388 my $proto = $proto_in_file; 389 $proto = "||$proto" if $proto !~ /\|/; 390 my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto; 391 392 $name && $is_in_proper_form or die <<EOS; 393Bad apidoc at $file line $.: 394 $in 395Expected: 396 =for apidoc flags|returntype|name|arg|arg|... 397 =for apidoc flags|returntype|name 398 =for apidoc name 399(or 'apidoc_item') 400EOS 401 402 die "Only [$display_flags] allowed in apidoc_item:\n$in" 403 if $is_item && $flags =~ /[^$display_flags]/; 404 405 return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args); 406} 407 408sub embed_override($) { 409 my ($element_name) = shift; 410 411 # If the entry is also in embed.fnc, it should be defined 412 # completely there, but not here 413 my $embed_docref = delete $funcflags{$element_name}; 414 415 return unless $embed_docref and %$embed_docref; 416 417 my $flags = $embed_docref->{'flags'}; 418 warn "embed.fnc entry '$element_name' missing 'd' flag" 419 unless $flags =~ /d/; 420 421 return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*); 422} 423 424# The section that is in effect at the beginning of the given file. If not 425# listed here, an apidoc_section line must precede any apidoc lines. 426# This allows the files listed here that generally are single-purpose, to not 427# have to worry about the autodoc section 428my %initial_file_section = ( 429 'av.c' => $AV_scn, 430 'av.h' => $AV_scn, 431 'cv.h' => $CV_scn, 432 'deb.c' => $debugging_scn, 433 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn, 434 'doio.c' => $io_scn, 435 'gv.c' => $GV_scn, 436 'gv.h' => $GV_scn, 437 'hv.h' => $HV_scn, 438 'locale.c' => $locale_scn, 439 'malloc.c' => $memory_scn, 440 'numeric.c' => $numeric_scn, 441 'opnames.h' => $optree_construction_scn, 442 'pad.h'=> $pad_scn, 443 'patchlevel.h' => $versioning_scn, 444 'perlio.h' => $io_scn, 445 'pod/perlapio.pod' => $io_scn, 446 'pod/perlcall.pod' => $callback_scn, 447 'pod/perlembed.pod' => $embedding_scn, 448 'pod/perlfilter.pod' => $filters_scn, 449 'pod/perliol.pod' => $io_scn, 450 'pod/perlmroapi.pod' => $MRO_scn, 451 'pod/perlreguts.pod' => $regexp_scn, 452 'pp_pack.c' => $pack_scn, 453 'pp_sort.c' => $SV_scn, 454 'regcomp.c' => $regexp_scn, 455 'regexp.h' => $regexp_scn, 456 'sv.h' => $SV_scn, 457 'sv.c' => $SV_scn, 458 'sv_inline.h' => $SV_scn, 459 'taint.c' => $tainting_scn, 460 'unicode_constants.h' => $unicode_scn, 461 'utf8.c' => $unicode_scn, 462 'utf8.h' => $unicode_scn, 463 'vutil.c' => $versioning_scn, 464 ); 465 466sub autodoc ($$) { # parse a file and extract documentation info 467 my($fh,$file) = @_; 468 my($in, $line_num, $header, $section); 469 470 $section = $initial_file_section{$file} 471 if defined $initial_file_section{$file}; 472 473 my $file_is_C = $file =~ / \. [ch] $ /x; 474 475 # Count lines easier 476 my $get_next_line = sub { $line_num++; return <$fh> }; 477 478 # Read the file 479 while ($in = $get_next_line->()) { 480 last unless defined $in; 481 482 next unless ( $in =~ / ^ =for [ ]+ apidoc /x 483 # =head1 lines only have effect in C files 484 || ($file_is_C && $in =~ /^=head1/)); 485 486 # Here, the line introduces a portion of the input that we care about. 487 # Either it is for an API element, or heading text which we expect 488 # will be used for elements later in the file 489 490 my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file); 491 my (@args, @items); 492 493 # If the line starts a new section ... 494 if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) { 495 496 $section = $1; 497 if ($section =~ / ^ \$ /x) { 498 $section .= '_scn' unless $section =~ / _scn $ /; 499 $section = eval "$section"; 500 die "Unknown \$section variable '$section' in $file: $@" if $@; 501 } 502 die "Unknown section name '$section' in $file near line $.\n" 503 unless defined $valid_sections{$section}; 504 505 } 506 elsif ($in=~ /^ =for [ ]+ apidoc \B /x) { # Otherwise better be a 507 # plain apidoc line 508 die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/; 509 die "apidoc_item doesn't immediately follow an apidoc entry: '$in'"; 510 } 511 else { # Plain apidoc 512 513 ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args) 514 = check_api_doc_line($file, $in); 515 # Override this line with any info in embed.fnc 516 my ($embed_flags, $embed_ret_type, @embed_args) 517 = embed_override($element_name); 518 if ($embed_ret_type) { 519 warn "embed.fnc entry overrides redundant information in" 520 . " '$proto_in_file' in $file" 521 if $flags || $ret_type || @args; 522 $flags = $embed_flags; 523 $ret_type = $embed_ret_type; 524 @args = @embed_args; 525 } 526 elsif ($flags !~ /[my]/) { # Not in embed.fnc, is missing if not 527 # a macro or typedef 528 $missing{$element_name} = $file; 529 } 530 531 die "flag '$1' is not legal (for function $element_name (from $file))" 532 if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy] ) /x; 533 534 die "'u' flag must also have 'm' or 'y' flags' for $element_name" 535 if $flags =~ /u/ && $flags !~ /[my]/; 536 warn ("'$element_name' not \\w+ in '$proto_in_file' in $file") 537 if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x; 538 539 if (exists $seen{$element_name} && $flags !~ /h/) { 540 die ("'$element_name' in $file was already documented in $seen{$element_name}"); 541 } 542 else { 543 $seen{$element_name} = $file; 544 } 545 } 546 547 # Here we have processed the initial line in the heading text or API 548 # element, and have saved the important information from it into the 549 # corresponding variables. Now accumulate the text that applies to it 550 # up to a terminating line, which is one of: 551 # 1) =cut 552 # 2) =head (in a C file only =head1) 553 # 3) an end comment line in a C file: m:^\s*\*/: 554 # 4) =for apidoc... (except apidoc_item lines) 555 $text = ""; 556 my $head_ender_num = ($file_is_C) ? 1 : ""; 557 while (defined($in = $get_next_line->())) { 558 559 last if $in =~ /^=cut/x; 560 last if $in =~ /^=head$head_ender_num/; 561 562 if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) { 563 564 # End of comment line in C files is a fall-back terminator, 565 # but warn only if there actually is some accumulated text 566 warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/; 567 last; 568 } 569 570 if ($in !~ / ^ =for [ ]+ apidoc /x) { 571 $text .= $in; 572 next; 573 } 574 575 # Here, the line is an apidoc line. All but apidoc_item terminate 576 # the text being accumulated. 577 last if $in =~ / ^ =for [ ]+ apidoc_section /x; 578 579 my ($item_name, $item_flags, $item_ret_type, $is_item, 580 $item_proto, @item_args) = check_api_doc_line($file, $in); 581 last unless $is_item; 582 583 # Here, is an apidoc_item_line; They can only come within apidoc 584 # paragraphs. 585 die "Unexpected api_doc_item line '$item_proto'" 586 unless $element_name; 587 588 # We accept blank lines between these, but nothing else; 589 die "apidoc_item lines must immediately follow apidoc lines for " 590 . " '$element_name' in $file" 591 if $text =~ /\S/; 592 # Override this line with any info in embed.fnc 593 my ($embed_flags, $embed_ret_type, @embed_args) 594 = embed_override($item_name); 595 if ($embed_ret_type) { 596 warn "embed.fnc entry overrides redundant information in" 597 . " '$item_proto' in $file" 598 if $item_flags || $item_ret_type || @item_args; 599 600 $item_flags = $embed_flags; 601 $item_ret_type = $embed_ret_type; 602 @item_args = @embed_args; 603 } 604 605 # Use the base entry flags if none for this item; otherwise add in 606 # any non-display base entry flags. 607 if ($item_flags) { 608 $item_flags .= $flags =~ s/[$display_flags]//rg; 609 } 610 else { 611 $item_flags = $flags; 612 } 613 $item_ret_type = $ret_type unless $item_ret_type; 614 @item_args = @args unless @item_args; 615 push @items, { name => $item_name, 616 ret_type => $item_ret_type, 617 flags => $item_flags, 618 args => [ @item_args ], 619 }; 620 621 # This line shows that this element is documented. 622 delete $funcflags{$item_name}; 623 } 624 625 # Here, are done accumulating the text for this item. Trim it 626 $text =~ s/ ^ \s* //x; 627 $text =~ s/ \s* $ //x; 628 $text .= "\n" if $text ne ""; 629 630 # And treat all-spaces as nothing at all 631 undef $text unless $text =~ /\S/; 632 633 if ($element_name) { 634 635 # Here, we have accumulated into $text, the pod for $element_name 636 my $where = $flags =~ /A/ ? 'api' : 'guts'; 637 638 die "No =for apidoc_section nor =head1 in $file for '$element_name'\n" 639 unless defined $section; 640 if (exists $docs{$where}{$section}{$element_name}) { 641 warn "$0: duplicate API entry for '$element_name' in" 642 . " $where/$section\n"; 643 next; 644 } 645 646 # Override the text with just a link if the flags call for that 647 my $is_link_only = ($flags =~ /h/); 648 if ($is_link_only) { 649 if ($file_is_C) { 650 die "Can't currently handle link with items to it:\n$in" if @items; 651 redo; # Don't put anything if C source 652 } 653 654 # Here, is an 'h' flag in pod. We add a reference to the pod (and 655 # nothing else) to perlapi/intern. (It would be better to add a 656 # reference to the correct =item,=header, but something that makes 657 # it harder is that it that might be a duplicate, like '=item *'; 658 # so that is a future enhancement XXX. Another complication is 659 # there might be more than one deserving candidates.) 660 my $podname = $file =~ s!.*/!!r; # Rmv directory name(s) 661 $podname =~ s/\.pod//; 662 $text = "Described in L<$podname>.\n"; 663 664 # Don't output a usage example for linked to documentation if 665 # it is trivial (has no arguments) and we aren't to add a 666 # semicolon 667 $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/; 668 669 # Keep track of all the pod files that we refer to. 670 push $described_elsewhere{$podname}->@*, $podname; 671 } 672 673 $docs{$where}{$section}{$element_name}{flags} = $flags; 674 $docs{$where}{$section}{$element_name}{pod} = $text; 675 $docs{$where}{$section}{$element_name}{file} = $file; 676 $docs{$where}{$section}{$element_name}{ret_type} = $ret_type; 677 push $docs{$where}{$section}{$element_name}{args}->@*, @args; 678 push $docs{$where}{$section}{$element_name}{items}->@*, @items; 679 } 680 elsif ($text) { 681 $valid_sections{$section}{header} = "" unless 682 defined $valid_sections{$section}{header}; 683 $valid_sections{$section}{header} .= "\n$text"; 684 } 685 686 # We already have the first line of what's to come in $in 687 redo; 688 689 } # End of loop through input 690} 691 692my %configs; 693my @has_defs; 694my @has_r_defs; # Reentrant symbols 695my @include_defs; 696 697sub parse_config_h { 698 use re '/aa'; # Everthing is ASCII in this file 699 700 # Process config.h 701 my $config_h = 'config.h'; 702 $config_h = 'win32/config.h' unless -e $config_h; 703 die "Can't find $config_h" unless -e $config_h; 704 open my $fh, '<', $config_h or die "Can't open $config_h: $!"; 705 while (<$fh>) { 706 707 # Look for lines like /* FOO_BAR: 708 # By convention all config.h descriptions begin like that 709 if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) { 710 my $name = $1; 711 712 # Here we are starting the description for $name in config.h. We 713 # accumulate the entire description for it into @description. 714 # Flowing text from one input line to another is appended into the 715 # same array element to make a single flowing line element, but 716 # verbatim lines are kept as separate elements in @description. 717 # This will facilitate later doing pattern matching without regard 718 # to line boundaries on non-verbatim text. 719 720 die "Multiple config.h entries for '$name'" 721 if defined $configs{$name}{description}; 722 723 # Get first line of description 724 $_ = <$fh>; 725 726 # Each line in the description begins with blanks followed by '/*' 727 # and some spaces. 728 die "Unexpected config.h initial line for $name: '$_'" 729 unless s/ ^ ( \s* \* \s* ) //x; 730 my $initial_text = $1; 731 732 # Initialize the description with this first line (after having 733 # stripped the prefix text) 734 my @description = $_; 735 736 # The first line is used as a template for how much indentation 737 # each normal succeeding line has. Lines indented further 738 # will be considered as intended to be verbatim. But, empty lines 739 # likely won't have trailing blanks, so just strip the whole thing 740 # for them. 741 my $strip_initial_qr = qr! \s* \* \s* $ 742 | \Q$initial_text\E 743 !x; 744 $configs{$name}{verbatim} = 0; 745 746 # Read in the remainder of the description 747 while (<$fh>) { 748 last if s| ^ \s* \* / ||x; # A '*/' ends it 749 750 die "Unexpected config.h description line for $name: '$_'" 751 unless s/$strip_initial_qr//; 752 753 # Fix up the few flawed lines in config.h wherein a new 754 # sentence begins with a tab (and maybe a space after that). 755 # Although none of them currently do, let it recognize 756 # something like 757 # 758 # "... text"). The next sentence ... 759 # 760 s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1 $2/xg; 761 762 # If this line has extra indentation or looks to have columns, 763 # it should be treated as verbatim. Columns are indicated by 764 # use of interior: tabs, 3 spaces in a row, or even 2 spaces 765 # not preceded by punctuation. 766 if ($_ !~ m/ ^ \s 767 | \S (?: \t 768 | \s{3} 769 | (*nlb:[[:punct:]]) \s{2} 770 ) 771 /x) 772 { 773 # But here, is not a verbatim line. Add an empty line if 774 # this is the first non-verbatim after a run of verbatims 775 if ($description[-1] =~ /^\s/) { 776 push @description, "\n", $_; 777 } 778 else { # Otherwise, append this flowing line to the 779 # current flowing line 780 $description[-1] .= $_; 781 } 782 } 783 else { 784 $configs{$name}{verbatim} = 1; 785 786 # The first verbatim line in a run of them is separated by an 787 # empty line from the flowing lines above it 788 push @description, "\n" if $description[-1] =~ /^\S/; 789 790 $_ = Text::Tabs::expand($_); 791 792 # Only a single space so less likely to wrap 793 s/ ^ \s* / /x; 794 795 push @description, $_; 796 } 797 } 798 799 push $configs{$name}{description}->@*, @description 800 801 } # Not a description; see if it is a macro definition. 802 elsif (m! ^ 803 (?: / \* )? # Optional commented-out 804 # indication 805 \# \s* define \s+ ( \w+ ) # $1 is the name 806 ( \s* ) # $2 indicates if args or not 807 ( .*? ) # $3 is any definition 808 (?: / \s* \* \* / )? # Optional trailing /**/ or / **/ 809 $ 810 !x) 811 { 812 my $name = $1; 813 814 # There can be multiple definitions for a name. We want to know 815 # if any of them has arguments, and if any has a body. 816 $configs{$name}{has_args} //= $2 eq ""; 817 $configs{$name}{has_args} ||= $2 eq ""; 818 $configs{$name}{has_defn} //= $3 ne ""; 819 $configs{$name}{has_defn} ||= $3 ne ""; 820 } 821 } 822 823 # We now have stored the description and information about every #define 824 # in the file. The description is in a form convenient to operate on to 825 # convert to pod. Do that now. 826 foreach my $name (keys %configs) { 827 next unless defined $configs{$name}{description}; 828 829 # All adjacent non-verbatim lines of the description are appended 830 # together in a single element in the array. This allows the patterns 831 # to work across input line boundaries. 832 833 my $pod = ""; 834 while (defined ($_ = shift $configs{$name}{description}->@*)) { 835 chomp; 836 837 if (/ ^ \S /x) { # Don't edit verbatim lines 838 839 # Enclose known file/path names not already so enclosed 840 # with <...>. (Some entries in config.h are already 841 # '<path/to/file>') 842 my $file_name_qr = qr! [ \w / ]+ \. 843 (?: c | h | xs | p [lm] | pmc | PL 844 | sh | SH | exe ) \b 845 !xx; 846 my $path_name_qr = qr! (?: / \w+ )+ !x; 847 for my $re ($file_name_qr, $path_name_qr) { 848 s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx; 849 } 850 851 # Enclose <... file/path names with F<...> (but no double 852 # angle brackets) 853 for my $re ($file_name_qr, $path_name_qr) { 854 s! < ( $re ) > !F<$1>!gxx; 855 } 856 857 # Explain metaconfig units 858 s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx; 859 860 # Convert "See foo" to "See C<L</foo>>" if foo is described in 861 # this file. Also create a link to the known file INSTALL. 862 # And, to be more general, handle "See also foo and bar", and 863 # "See also foo, bar, and baz" 864 while (m/ \b [Ss]ee \s+ 865 (?: also \s+ )? ( \w+ ) 866 (?: , \s+ ( \w+ ) )? 867 (?: ,? \s+ and \s+ ( \w+ ) )? /xg) { 868 my @links = $1; 869 push @links, $2 if defined $2; 870 push @links, $3 if defined $3; 871 foreach my $link (@links) { 872 if ($link eq 'INSTALL') { 873 s/ \b INSTALL \b /C<L<INSTALL>>/xg; 874 } 875 elsif (grep { $link =~ / \b $_ \b /x } keys %configs) { 876 s| \b $link \b |C<L</$link>>|xg; 877 $configs{$link}{linked} = 1; 878 $configs{$name}{linked} = 1; 879 } 880 } 881 } 882 883 # Enclose what we think are symbols with C<...>. 884 no warnings 'experimental::vlb'; 885 s/ (*nlb:<) 886 ( 887 # Any word followed immediately with parens or 888 # brackets 889 \b \w+ (?: \( [^)]* \) # parameter list 890 | \[ [^]]* \] # or array reference 891 ) 892 | (*plb: ^ | \s ) -D \w+ # Also -Dsymbols. 893 | \b (?: struct | union ) \s \w+ 894 895 # Words that contain underscores (which are 896 # definitely not text) or three uppercase letters in 897 # a row. Length two ones, like IV, aren't enclosed, 898 # because they often don't look as nice. 899 | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b 900 ) 901 (*nla:>) 902 /C<$1>/xg; 903 904 # These include foo when the name is HAS_foo. This is a 905 # heuristic which works in most cases. 906 if ($name =~ / ^ HAS_ (.*) /x) { 907 my $symbol = lc $1; 908 909 # Don't include path components, nor things already in 910 # <>, or with trailing '(', '[' 911 s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg; 912 } 913 } 914 915 $pod .= "$_\n"; 916 } 917 delete $configs{$name}{description}; 918 919 $configs{$name}{pod} = $pod; 920 } 921 922 # Now have converted the description to pod. We also now have enough 923 # information that we can do cross checking to find definitions without 924 # corresponding pod, and see if they are mentioned in some description; 925 # otherwise they aren't documented. 926 NAME: 927 foreach my $name (keys %configs) { 928 929 # A definition without pod 930 if (! defined $configs{$name}{pod}) { 931 932 # Leading/trailing underscore means internal to config.h, e.g., 933 # _GNU_SOURCE 934 next if $name =~ / ^ _ /x; 935 next if $name =~ / _ $ /x; 936 937 # MiXeD case names are internal to config.h; the first 4 938 # characters are sufficient to determine this 939 next if $name =~ / ^ [[:upper:]] [[:lower:]] 940 [[:upper:]] [[:lower:]] 941 /x; 942 943 # Here, not internal to config.h. Look to see if this symbol is 944 # mentioned in the pod of some other. If so, assume it is 945 # documented. 946 foreach my $check_name (keys %configs) { 947 my $this_element = $configs{$check_name}; 948 my $this_pod = $this_element->{pod}; 949 if (defined $this_pod) { 950 next NAME if $this_pod =~ / \b $name \b /x; 951 } 952 } 953 954 warn "$name has no documentation\n"; 955 $missing_macros{$name} = 'config.h'; 956 957 next; 958 } 959 960 my $has_defn = $configs{$name}{has_defn}; 961 my $has_args = $configs{$name}{has_args}; 962 963 # Check if any section already has an entry for this element. 964 # If so, it better be a placeholder, in which case we replace it 965 # with this entry. 966 foreach my $section (keys $docs{'api'}->%*) { 967 if (exists $docs{'api'}{$section}{$name}) { 968 my $was = $docs{'api'}{$section}{$name}->{pod}; 969 $was = "" unless $was; 970 chomp $was; 971 if ($was ne "" && $was !~ m/$link_text/) { 972 die "Multiple descriptions for $name\n" 973 . "$section contained '$was'"; 974 } 975 $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod}; 976 $configs{$name}{section} = $section; 977 last; 978 } 979 } 980 981 my $handled = 0; # Haven't handled this yet 982 983 if (defined $configs{$name}{'section'}) { 984 # This has been taken care of elsewhere. 985 $handled = 1; 986 } 987 else { 988 my $flags = ""; 989 if ($has_defn && ! $has_args) { 990 $configs{$name}{args} = 1; 991 } 992 993 # Symbols of the form I_FOO are for #include files. They have 994 # special usage information 995 if ($name =~ / ^ I_ ( .* ) /x) { 996 my $file = lc $1 . '.h'; 997 $configs{$name}{usage} = <<~"EOT"; 998 #ifdef $name 999 #include <$file> 1000 #endif 1001 EOT 1002 } 1003 1004 # Compute what section this variable should go into. This 1005 # heuristic was determined by manually inspecting the current 1006 # things in config.h, and should be adjusted as necessary as 1007 # deficiencies are found. 1008 # 1009 # This is the default section for macros with a definiton but 1010 # no arguments, meaning it is replaced unconditionally 1011 # 1012 my $sb = qr/ _ | \b /x; # segment boundary 1013 my $dash_or_spaces = qr/ - | \s+ /x; 1014 my $pod = $configs{$name}{pod}; 1015 if ($name =~ / ^ USE_ /x) { 1016 $configs{$name}{'section'} = $site_scn; 1017 } 1018 elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x) 1019 { 1020 $configs{$name}{'section'} = $time_scn; 1021 } 1022 elsif ( $name =~ / ^ [[:alpha:]]+ f $ /x 1023 && $configs{$name}{pod} =~ m/ \b format \b /ix) 1024 { 1025 $configs{$name}{'section'} = $io_formats_scn; 1026 } 1027 elsif ($name =~ / DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV 1028 | $sb CASTFLAGS $sb 1029 | QUADMATH 1030 | $sb (?: IS )? NAN 1031 | $sb (?: IS )? FINITE 1032 /x) 1033 { 1034 $configs{$name}{'section'} = 1035 $floating_scn; 1036 } 1037 elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) { 1038 $configs{$name}{'section'} = $filesystem_scn; 1039 } 1040 elsif ( $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x 1041 || $configs{$name}{pod} =~ m/ \b align /x) 1042 { 1043 $configs{$name}{'section'} = $compiler_scn; 1044 } 1045 elsif ($name =~ / ^ [IU] [ \d V ] 1046 | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx) 1047 { 1048 $configs{$name}{'section'} = $integer_scn; 1049 } 1050 elsif ($name =~ / $sb t $sb /x) { 1051 $configs{$name}{'section'} = $typedefs_scn; 1052 $flags .= 'y'; 1053 } 1054 elsif ( $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x 1055 && $configs{$name}{pod} =~ m/ \b format \b /ix) 1056 { 1057 $configs{$name}{'section'} = $io_formats_scn; 1058 } 1059 elsif ($name =~ / BACKTRACE /x) { 1060 $configs{$name}{'section'} = $debugging_scn; 1061 } 1062 elsif ($name =~ / ALLOC $sb /x) { 1063 $configs{$name}{'section'} = $memory_scn; 1064 } 1065 elsif ( $name =~ / STDIO | FCNTL | EOF | FFLUSH 1066 | $sb FILE $sb 1067 | $sb DIR $sb 1068 | $sb LSEEK 1069 | $sb INO $sb 1070 | $sb OPEN 1071 | $sb CLOSE 1072 | ^ DIR 1073 | ^ INO $sb 1074 | DIR $ 1075 | FILENAMES 1076 /x 1077 || $configs{$name}{pod} =~ m! I/O | stdio 1078 | file \s+ descriptor 1079 | file \s* system 1080 | statfs 1081 !x) 1082 { 1083 $configs{$name}{'section'} = $filesystem_scn; 1084 } 1085 elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) { 1086 $configs{$name}{'section'} = $signals_scn; 1087 } 1088 elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) { 1089 $configs{$name}{'section'} = $prototypes_scn; 1090 } 1091 elsif ( $name =~ / ^ LOC_ /x 1092 || $configs{$name}{pod} =~ /full path/i) 1093 { 1094 $configs{$name}{'section'} = $paths_scn; 1095 } 1096 elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) { 1097 $configs{$name}{'section'} = $locale_scn; 1098 } 1099 elsif ($configs{$name}{pod} =~ / GCC | C99 | C\+\+ /xi) { 1100 $configs{$name}{'section'} = $compiler_scn; 1101 } 1102 elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x) 1103 { 1104 $configs{$name}{'section'} = $password_scn; 1105 } 1106 elsif ($name =~ / SOCKET | $sb SOCK /x) { 1107 $configs{$name}{'section'} = $sockets_scn; 1108 } 1109 elsif ( $name =~ / THREAD | MULTIPLICITY /x 1110 || $configs{$name}{pod} =~ m/ \b pthread /ix) 1111 { 1112 $configs{$name}{'section'} = $concurrency_scn; 1113 } 1114 elsif ($name =~ / PERL | ^ PRIV | SITE | ARCH | BIN 1115 | VENDOR | ^ USE 1116 /x) 1117 { 1118 $configs{$name}{'section'} = $site_scn; 1119 } 1120 elsif ( $pod =~ / \b floating $dash_or_spaces point \b /ix 1121 || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix 1122 || $pod =~ / \b doubles \b /ix 1123 || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix) 1124 { 1125 $configs{$name}{'section'} = 1126 $floating_scn; 1127 } 1128 else { 1129 # Above are the specific sections. The rest go into a 1130 # grab-bag of general configuration values. However, we put 1131 # two classes of them into lists of their names, without their 1132 # descriptions, when we think that the description doesn't add 1133 # any real value. One list contains the #include variables: 1134 # the description is basically boiler plate for each of these. 1135 # The other list contains the very many things that are of the 1136 # form HAS_foo, and \bfoo\b is contained in its description, 1137 # and there is no verbatim text in the pod or links to/from it 1138 # (which would add value). That means that it is likely the 1139 # intent of the variable can be gleaned from just its name, 1140 # and unlikely the description adds signficant value, so just 1141 # listing them suffices. Giving their descriptions would 1142 # expand this pod significantly with little added value. 1143 if ( ! $has_defn 1144 && ! $configs{$name}{verbatim} 1145 && ! $configs{$name}{linked}) 1146 { 1147 if ($name =~ / ^ I_ ( .* ) /x) { 1148 push @include_defs, $name; 1149 next; 1150 } 1151 elsif ($name =~ / ^ HAS_ ( .* ) /x) { 1152 my $canonical_name = $1; 1153 $canonical_name =~ s/_//g; 1154 1155 my $canonical_pod = $configs{$name}{pod}; 1156 $canonical_pod =~ s/_//g; 1157 1158 if ($canonical_pod =~ / \b $canonical_name \b /xi) { 1159 if ($name =~ / $sb R $sb /x) { 1160 push @has_r_defs, $name; 1161 } 1162 else { 1163 push @has_defs, $name; 1164 } 1165 next; 1166 } 1167 } 1168 } 1169 1170 $configs{$name}{'section'} = $genconfig_scn; 1171 } 1172 1173 my $section = $configs{$name}{'section'}; 1174 die "Internal error: '$section' not in \%valid_sections" 1175 unless grep { $_ eq $section } keys %valid_sections; 1176 $flags .= 'AdmnT'; 1177 $flags .= 'U' unless defined $configs{$name}{usage}; 1178 1179 # All the information has been gathered; save it 1180 $docs{'api'}{$section}{$name}{flags} = $flags; 1181 $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod}; 1182 $docs{'api'}{$section}{$name}{ret_type} = ""; 1183 $docs{'api'}{$section}{$name}{file} = 'config.h'; 1184 $docs{'api'}{$section}{$name}{usage} 1185 = $configs{$name}{usage} if defined $configs{$name}{usage}; 1186 push $docs{'api'}{$section}{$name}{args}->@*, (); 1187 push $docs{'api'}{$section}{$name}{items}->@*, (); 1188 } 1189 } 1190} 1191 1192sub format_pod_indexes($) { 1193 my $entries_ref = shift; 1194 1195 # Output the X<> references to the names, packed since they don't get 1196 # displayed, but not too many per line so that when someone is editing the 1197 # file, it doesn't run on 1198 1199 my $text =""; 1200 my $line_length = 0; 1201 for my $name (sort dictionary_order $entries_ref->@*) { 1202 my $entry = "X<$name>"; 1203 my $entry_length = length $entry; 1204 1205 # Don't loop forever if we have a verrry long name, and don't go too 1206 # far to the right. 1207 if ($line_length > 0 && $line_length + $entry_length > $max_width) { 1208 $text .= "\n"; 1209 $line_length = 0; 1210 } 1211 1212 $text .= $entry; 1213 $line_length += $entry_length; 1214 } 1215 1216 return $text; 1217} 1218 1219sub docout ($$$) { # output the docs for one function group 1220 my($fh, $element_name, $docref) = @_; 1221 1222 # Trim trailing space 1223 $element_name =~ s/\s*$//; 1224 1225 my $flags = $docref->{flags}; 1226 my $pod = $docref->{pod} // ""; 1227 my $file = $docref->{file}; 1228 1229 my @items = $docref->{items}->@*; 1230 1231 # Make the main element the first of the items. This allows uniform 1232 # treatment below 1233 unshift @items, { name => $element_name, 1234 flags => $flags, 1235 ret_type => $docref->{ret_type}, 1236 args => [ $docref->{args}->@* ], 1237 }; 1238 1239 warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/; 1240 1241 print $fh "\n=over $description_indent\n"; 1242 print $fh "\n=item C<$_->{name}>\n" for @items; 1243 1244 # If we're printing only a link to an element, this isn't the major entry, 1245 # so no X<> here. 1246 if ($flags !~ /h/) { 1247 print $fh "X<$_->{name}>" for @items; 1248 print $fh "\n"; 1249 } 1250 1251 my @deprecated; 1252 my @experimental; 1253 for my $item (@items) { 1254 push @deprecated, "C<$item->{name}>" if $item->{flags} =~ /D/; 1255 push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/; 1256 } 1257 1258 for my $which (\@deprecated, \@experimental) { 1259 if ($which->@*) { 1260 my $is; 1261 my $it; 1262 my $list; 1263 1264 if ($which->@* == 1) { 1265 $is = 'is'; 1266 $it = 'it'; 1267 $list = $which->[0]; 1268 } 1269 elsif ($which->@* == @items) { 1270 $is = 'are'; 1271 $it = 'them'; 1272 $list = (@items == 2) 1273 ? "both forms" 1274 : "all these forms"; 1275 } 1276 else { 1277 $is = 'are'; 1278 $it = 'them'; 1279 my $final = pop $which->@*; 1280 $list = "the " . join ", ", $which->@*; 1281 $list .= "," if $which->@* > 1; 1282 $list .= " and $final forms"; 1283 } 1284 1285 if ($which == \@deprecated) { 1286 print $fh <<~"EOT"; 1287 1288 C<B<DEPRECATED!>> It is planned to remove $list 1289 from a future release of Perl. Do not use $it for 1290 new code; remove $it from existing code. 1291 EOT 1292 } 1293 else { 1294 print $fh <<~"EOT"; 1295 1296 NOTE: $list $is B<experimental> and may change or be 1297 removed without notice. 1298 EOT 1299 } 1300 } 1301 } 1302 1303 chomp $pod; # Make sure prints pod with a single trailing \n 1304 print $fh "\n", $pod, "\n"; 1305 1306 for my $item (@items) { 1307 my $item_flags = $item->{flags}; 1308 my $item_name = $item->{name}; 1309 1310 print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n" 1311 if $item_flags =~ /O/; 1312 # Is Perl_, but no #define foo # Perl_foo 1313 if ( ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/) 1314 1315 # Can't handle threaded varargs 1316 || ($item_flags =~ /f/ && $item_flags !~ /T/)) 1317 { 1318 $item->{name} = "Perl_$item_name"; 1319 print $fh <<~"EOT"; 1320 1321 NOTE: C<$item_name> must be explicitly called as 1322 C<$item->{name}> 1323 EOT 1324 print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/; 1325 print $fh ".\n"; 1326 } 1327 } 1328 1329 if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough 1330 # to never warrant a usage line 1331 warn("U and s flags are incompatible") 1332 if $flags =~ /U/ && $flags =~ /s/; 1333 # nothing 1334 } else { 1335 1336 print $fh "\n=over $usage_indent\n"; 1337 1338 if (defined $docref->{usage}) { # An override of the usage section 1339 print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n"; 1340 } 1341 else { 1342 1343 # Add the thread context formal parameter on expanded-out names 1344 for my $item (@items) { 1345 unshift $item->{args}->@*, (($item->{args}->@*) 1346 ? "pTHX_" 1347 : "pTHX") 1348 if $item->{flags} !~ /T/ 1349 && $item->{name} =~ /^Perl_/; 1350 } 1351 1352 # Look through all the items in this entry. If all have the same 1353 # return type and arguments (including thread context), only the 1354 # main entry is displayed. 1355 # Also, find the longest return type and longest name so that if 1356 # multiple ones are shown, they can be vertically aligned nicely 1357 my $need_individual_usage = 0; 1358 my $longest_name_length = length $items[0]->{name}; 1359 my $base_ret_type = $items[0]->{ret_type}; 1360 my $longest_ret = length $base_ret_type; 1361 my @base_args = $items[0]->{args}->@*; 1362 my $base_thread_context = $items[0]->{flags} =~ /T/; 1363 for (my $i = 1; $i < @items; $i++) { 1364 no warnings 'experimental::smartmatch'; 1365 my $item = $items[$i]; 1366 $need_individual_usage = 1 1367 if $item->{ret_type} ne $base_ret_type 1368 || ! ($item->{args}->@* ~~ @base_args) 1369 || ( $item->{flags} =~ /T/ 1370 != $base_thread_context); 1371 my $ret_length = length $item->{ret_type}; 1372 $longest_ret = $ret_length if $ret_length > $longest_ret; 1373 my $name_length = length $item->{name}; 1374 $longest_name_length = $name_length 1375 if $name_length > $longest_name_length; 1376 } 1377 1378 # If we're only showing one entry, only its length matters. 1379 $longest_name_length = length($items[0]->{name}) 1380 unless $need_individual_usage; 1381 print $fh "\n"; 1382 1383 my $indent = 1; # 1 is sufficient for verbatim; =over is used 1384 # for more 1385 my $ret_name_sep_length = 2; # spaces between return type and name 1386 my $name_indent = $indent + $longest_ret; 1387 $name_indent += $ret_name_sep_length if $longest_ret; 1388 1389 my $this_max_width = 1390 $max_width - $description_indent - $usage_indent; 1391 1392 for my $item (@items) { 1393 my $ret_type = $item->{ret_type}; 1394 my @args = $item->{args}->@*; 1395 my $name = $item->{name}; 1396 my $item_flags = $item->{flags}; 1397 1398 # The return type 1399 print $fh (" " x $indent), $ret_type; 1400 1401 print $fh " " x ( $ret_name_sep_length 1402 + $longest_ret - length $ret_type); 1403 print $fh $name; 1404 1405 if ($item_flags =~ /n/) { # no args 1406 warn("$file: $element_name: n flag without m") 1407 unless $item_flags =~ /m/; 1408 warn("$file: $name: n flag but apparently has args") 1409 if @args; 1410 } 1411 else { 1412 # +1 for the '(' 1413 my $arg_indent = $name_indent + $longest_name_length + 1; 1414 1415 # Align the argument lists of the items 1416 print $fh " " x ($longest_name_length - length($name)); 1417 print $fh "("; 1418 1419 # Display as many of the arguments on the same line as 1420 # will fit. 1421 my $total_length = $arg_indent; 1422 my $first_line = 1; 1423 for (my $i = 0; $i < @args; $i++) { 1424 my $arg = $args[$i]; 1425 my $arg_length = length($arg); 1426 1427 # All but the first arg are preceded by a blank 1428 my $use_blank = $i > 0; 1429 1430 # +1 here and below because either the argument has a 1431 # trailing comma or trailing ')' 1432 $total_length += $arg_length + $use_blank + 1; 1433 1434 # We want none of the arguments to be positioned so 1435 # they extend too far to the right. Ideally, they 1436 # should all start in the same column as the arguments 1437 # on the first line of the function display do. But, if 1438 # necessary, outdent them so that they all start in 1439 # another column, with the longest ending at the right 1440 # margin, like so: 1441 # void function_name(pTHX_ short1, 1442 # short2, 1443 # very_long_argument, 1444 # short3) 1445 if ($total_length > $this_max_width) { 1446 1447 # If this is the first continuation line, 1448 # calculate the longest argument; this will be the 1449 # one we may have to outdent for. 1450 if ($first_line) { 1451 $first_line = 0; 1452 1453 # We will need at least as much as the current 1454 # argument 1455 my $longest_arg_length = $arg_length 1456 + $use_blank + 1; 1457 1458 # Look through the rest of the args to see if 1459 # any are longer than this one. 1460 for (my $j = $i + 1; $j < @args; $j++) { 1461 1462 # Include the trailing ',' or ')' in the 1463 # length. No need to concern ourselves 1464 # with a leading blank, as the argument 1465 # would be positioned first on the next 1466 # line 1467 my $peek_arg_length = length ($args[$j]) 1468 + 1; 1469 $longest_arg_length = $peek_arg_length 1470 if $peek_arg_length > $longest_arg_length; 1471 } 1472 1473 # Calculate the new indent if necessary. 1474 $arg_indent = 1475 $this_max_width - $longest_arg_length 1476 if $arg_indent + $longest_arg_length 1477 > $this_max_width; 1478 } 1479 1480 print $fh "\n", (" " x $arg_indent); 1481 $total_length = $arg_indent + $arg_length + 1; 1482 $use_blank = 0; 1483 } 1484 1485 # Display this argument 1486 print $fh " " if $use_blank; 1487 print $fh $arg; 1488 print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_'; 1489 1490 } # End of loop through args 1491 1492 print $fh ")"; 1493 } 1494 1495 print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;" 1496 print $fh "\n"; 1497 1498 # Only the first entry is normally displayed 1499 last unless $need_individual_usage; 1500 } 1501 } 1502 1503 print $fh "\n=back\n"; 1504 } 1505 1506 print $fh "\n=back\n"; 1507 print $fh "\n=for hackers\nFound in file $file\n"; 1508} 1509 1510sub construct_missings_section { 1511 my ($pod_name, $missings_ref) = @_; 1512 my $text = ""; 1513 1514 return $text unless $missings_ref->@*; 1515 1516 $text .= <<~EOT; 1517 1518 =head1 $undocumented_scn 1519 1520 EOT 1521 if ($pod_name eq 'perlapi') { 1522 $text .= <<~'EOT'; 1523 The following functions have been flagged as part of the public 1524 API, but are currently undocumented. Use them at your own risk, 1525 as the interfaces are subject to change. Functions that are not 1526 listed in this document are not intended for public use, and 1527 should NOT be used under any circumstances. 1528 1529 If you feel you need to use one of these functions, first send 1530 email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>. 1531 It may be that there is a good reason for the function not being 1532 documented, and it should be removed from this list; or it may 1533 just be that no one has gotten around to documenting it. In the 1534 latter case, you will be asked to submit a patch to document the 1535 function. Once your patch is accepted, it will indicate that the 1536 interface is stable (unless it is explicitly marked otherwise) and 1537 usable by you. 1538 EOT 1539 } 1540 else { 1541 $text .= <<~'EOT'; 1542 The following functions are currently undocumented. If you use 1543 one of them, you may wish to consider creating and submitting 1544 documentation for it. 1545 EOT 1546 } 1547 1548 $text .= "\n" . format_pod_indexes($missings_ref); 1549 1550 # Sort the elements. 1551 my @missings = sort dictionary_order $missings_ref->@*; 1552 1553 1554 $text .= "\n"; 1555 1556 use integer; 1557 1558 # Look through all the elements in the list and see how many columns we 1559 # could place them in the output what will fit in the available width. 1560 my $min_spacer = 2; # Need this much space between columns 1561 my $columns; 1562 my $rows; 1563 my @col_widths; 1564 1565 COLUMN: 1566 # We start with more columns, and work down until we find a number that 1567 # can accommodate all the data. This algorithm doesn't require the 1568 # resulting columns to all have the same width. This can allow for 1569 # as tight of packing as the data will possibly allow. 1570 for ($columns = 7; $columns >= 1; $columns--) { 1571 1572 # For this many columns, we will need this many rows (final row might 1573 # not be completely filled) 1574 $rows = (@missings + $columns - 1) / $columns; 1575 1576 # We only need to execute this final iteration to calculate the number 1577 # of rows, as we can't get fewer than a single column. 1578 last if $columns == 1; 1579 1580 my $row_width = 1; # For 1 space indent 1581 my $i = 0; # Which missing element 1582 1583 # For each column ... 1584 for my $col (0 .. $columns - 1) { 1585 1586 # Calculate how wide the column needs to be, which is based on the 1587 # widest element in it 1588 $col_widths[$col] = 0; 1589 1590 # Look through all the rows to find the widest element 1591 for my $row (0 .. $rows - 1) { 1592 1593 # Skip if this row doesn't have an entry for this column 1594 last if $i >= @missings; 1595 1596 # This entry occupies this many bytes. 1597 my $this_width = length $missings[$i]; 1598 1599 # All but the final column need a spacer between it and the 1600 # next column over. 1601 $this_width += $min_spacer if $col < $columns - 1; 1602 1603 1604 # This column will need to have enough width to accommodate 1605 # this element 1606 if ($this_width > $col_widths[$col]) { 1607 1608 # We can't have this many columns if the total width 1609 # exceeds the available; bail now and try fewer columns 1610 next COLUMN if $row_width + $this_width > $max_width; 1611 1612 $col_widths[$col] = $this_width; 1613 } 1614 1615 $i++; # The next row will contain the next item 1616 } 1617 1618 $row_width += $col_widths[$col]; 1619 next COLUMN if $row_width > $max_width; 1620 } 1621 1622 # If we get this far, this many columns works 1623 last; 1624 } 1625 1626 # Here, have calculated the number of rows ($rows) and columns ($columns) 1627 # required to list the elements. @col_widths contains the width of each 1628 # column. 1629 1630 $text .= "\n"; 1631 1632 # Assemble the output 1633 for my $row (0 .. $rows - 1) { 1634 for my $col (0 .. $columns - 1) { 1635 $text .= " " if $col == 0; # Indent one to mark as verbatim 1636 1637 my $index = $row + $rows * $col; # Convert 2 dimensions to 1 1638 1639 # Skip if this row doesn't have an entry for this column 1640 next if $index >= @missings; 1641 1642 my $element = $missings[$index]; 1643 $text .= $element; 1644 1645 # Add alignment spaces for all but final column 1646 $text .= " " x ($col_widths[$col] - length $element) 1647 if $col < $columns - 1; 1648 } 1649 1650 $text .= "\n"; # End of row 1651 } 1652 1653 return $text; 1654} 1655 1656sub dictionary_order { 1657 # Do a case-insensitive dictionary sort, with only alphabetics 1658 # significant, falling back to using everything for determinancy 1659 return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r)) 1660 || uc($a) cmp uc($b) 1661 || $a cmp $b; 1662} 1663 1664sub output { 1665 my ($podname, $header, $dochash, $missings_ref, $footer) = @_; 1666 # 1667 # strip leading '|' from each line which had been used to hide 1668 # pod from pod checkers. 1669 s/^\|//gm for $header, $footer; 1670 1671 my $fh = open_new("pod/$podname.pod", undef, 1672 {by => "$0 extracting documentation", 1673 from => 'the C source files'}, 1); 1674 1675 print $fh $header, "\n"; 1676 1677 for my $section_name (sort dictionary_order keys %valid_sections) { 1678 my $section_info = $dochash->{$section_name}; 1679 1680 # We allow empty sections in perlintern. 1681 if (! $section_info && $podname eq 'perlapi') { 1682 warn "Empty section '$section_name'; skipped"; 1683 next; 1684 } 1685 1686 print $fh "\n=head1 $section_name\n"; 1687 1688 if ($podname eq 'perlapi') { 1689 print $fh "\n", $valid_sections{$section_name}{header}, "\n" 1690 if defined $valid_sections{$section_name}{header}; 1691 1692 # Output any heading-level documentation and delete so won't get in 1693 # the way later 1694 if (exists $section_info->{""}) { 1695 print $fh "\n", $section_info->{""}, "\n"; 1696 delete $section_info->{""}; 1697 } 1698 } 1699 1700 if ($section_info && keys $section_info->%*) { 1701 for my $function_name (sort dictionary_order keys %$section_info) { 1702 docout($fh, $function_name, $section_info->{$function_name}); 1703 } 1704 } 1705 else { 1706 print $fh "\nThere are only public API items currently in $section_name\n"; 1707 } 1708 1709 print $fh "\n", $valid_sections{$section_name}{footer}, "\n" 1710 if $podname eq 'perlapi' 1711 && defined $valid_sections{$section_name}{footer}; 1712 } 1713 1714 print $fh construct_missings_section($podname, $missings_ref); 1715 1716 print $fh "\n$footer\n=cut\n"; 1717 1718 read_only_bottom_close_and_rename($fh); 1719} 1720 1721foreach (@{(setup_embed())[0]}) { 1722 next if @$_ < 2; 1723 my ($flags, $ret_type, $func, @args) = @$_; 1724 s/\b(?:NN|NULLOK)\b\s+//g for @args; 1725 1726 $funcflags{$func} = { 1727 flags => $flags, 1728 ret_type => $ret_type, 1729 args => \@args, 1730 }; 1731} 1732 1733# glob() picks up docs from extra .c or .h files that may be in unclean 1734# development trees. 1735open my $fh, '<', 'MANIFEST' 1736 or die "Can't open MANIFEST: $!"; 1737while (my $line = <$fh>) { 1738 next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/; 1739 1740 # Don't pick up pods from these. 1741 next if $file =~ m! ^ ( cpan | dist | ext ) / !x 1742 && ! defined $extra_input_pods{$file}; 1743 1744 open F, '<', $file or die "Cannot open $file for docs: $!\n"; 1745 autodoc(\*F,$file); 1746 close F or die "Error closing $file: $!\n"; 1747} 1748close $fh or die "Error whilst reading MANIFEST: $!"; 1749 1750parse_config_h(); 1751 1752for (sort keys %funcflags) { 1753 next unless $funcflags{$_}{flags} =~ /d/; 1754 next if $funcflags{$_}{flags} =~ /h/; 1755 warn "no docs for $_\n"; 1756} 1757 1758foreach (sort keys %missing) { 1759 warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc"; 1760} 1761 1762# List of funcs in the public API that aren't also marked as core-only, 1763# experimental nor deprecated. 1764my @missing_api = grep $funcflags{$_}{flags} =~ /A/ 1765 && $funcflags{$_}{flags} !~ /[xD]/ 1766 && !$docs{api}{$_}, keys %funcflags; 1767push @missing_api, keys %missing_macros; 1768 1769my @other_places = ( qw(perlclib ), keys %described_elsewhere ); 1770my $places_other_than_intern = join ", ", 1771 map { "L<$_>" } sort dictionary_order 'perlapi', @other_places; 1772my $places_other_than_api = join ", ", 1773 map { "L<$_>" } sort dictionary_order 'perlintern', @other_places; 1774 1775# The S< > makes things less densely packed, hence more readable 1776my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs; 1777my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs; 1778$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/; 1779$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/; 1780 1781my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs; 1782$valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/; 1783 1784my $section_list = join "\n\n", map { "=item L</$_>" } 1785 sort(dictionary_order keys %valid_sections), 1786 $undocumented_scn; # Keep last 1787 1788output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_"); 1789|=encoding UTF-8 1790| 1791|=head1 NAME 1792| 1793|perlapi - autogenerated documentation for the perl public API 1794| 1795|=head1 DESCRIPTION 1796|X<Perl API> X<API> X<api> 1797| 1798|This file contains most of the documentation of the perl public API, as 1799|generated by F<embed.pl>. Specifically, it is a listing of functions, 1800|macros, flags, and variables that may be used by extension writers. Besides 1801|L<perlintern> and F<config.h>, some items are listed here as being actually 1802|documented in another pod. 1803| 1804|L<At the end|/$undocumented_scn> is a list of functions which have yet 1805|to be documented. Patches welcome! The interfaces of these are subject to 1806|change without notice. 1807| 1808|Some of the functions documented here are consolidated so that a single entry 1809|serves for multiple functions which all do basically the same thing, but have 1810|some slight differences. For example, one form might process magic, while 1811|another doesn't. The name of each variation is listed at the top of the 1812|single entry. But if all have the same signature (arguments and return type) 1813|except for their names, only the usage for the base form is shown. If any 1814|one of the forms has a different signature (such as returning C<const> or 1815|not) every function's signature is explicitly displayed. 1816| 1817|Anything not listed here or in the other mentioned pods is not part of the 1818|public API, and should not be used by extension writers at all. For these 1819|reasons, blindly using functions listed in F<proto.h> is to be avoided when 1820|writing extensions. 1821| 1822|In Perl, unlike C, a string of characters may generally contain embedded 1823|C<NUL> characters. Sometimes in the documentation a Perl string is referred 1824|to as a "buffer" to distinguish it from a C string, but sometimes they are 1825|both just referred to as strings. 1826| 1827|Note that all Perl API global variables must be referenced with the C<PL_> 1828|prefix. Again, those not listed here are not to be used by extension writers, 1829|and can be changed or removed without notice; same with macros. 1830|Some macros are provided for compatibility with the older, 1831|unadorned names, but this support may be disabled in a future release. 1832| 1833|Perl was originally written to handle US-ASCII only (that is characters 1834|whose ordinal numbers are in the range 0 - 127). 1835|And documentation and comments may still use the term ASCII, when 1836|sometimes in fact the entire range from 0 - 255 is meant. 1837| 1838|The non-ASCII characters below 256 can have various meanings, depending on 1839|various things. (See, most notably, L<perllocale>.) But usually the whole 1840|range can be referred to as ISO-8859-1. Often, the term "Latin-1" (or 1841|"Latin1") is used as an equivalent for ISO-8859-1. But some people treat 1842|"Latin1" as referring just to the characters in the range 128 through 255, or 1843|sometimes from 160 through 255. 1844|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters. 1845| 1846|Note that Perl can be compiled and run under either ASCII or EBCDIC (See 1847|L<perlebcdic>). Most of the documentation (and even comments in the code) 1848|ignore the EBCDIC possibility. 1849|For almost all purposes the differences are transparent. 1850|As an example, under EBCDIC, 1851|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so 1852|whenever this documentation refers to C<utf8> 1853|(and variants of that name, including in function names), 1854|it also (essentially transparently) means C<UTF-EBCDIC>. 1855|But the ordinals of characters differ between ASCII, EBCDIC, and 1856|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different 1857|number of bytes than in UTF-8. 1858| 1859|The organization of this document is tentative and subject to change. 1860|Suggestions and patches welcome 1861|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>. 1862| 1863|The sections in this document currently are 1864| 1865|=over $standard_indent 1866 1867|$section_list 1868| 1869|=back 1870| 1871|The listing below is alphabetical, case insensitive. 1872_EOB_ 1873|=head1 AUTHORS 1874| 1875|Until May 1997, this document was maintained by Jeff Okamoto 1876|<okamoto\@corp.hp.com>. It is now maintained as part of Perl itself. 1877| 1878|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie, 1879|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil 1880|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer, 1881|Stephen McCamant, and Gurusamy Sarathy. 1882| 1883|API Listing originally by Dean Roehrich <roehrich\@cray.com>. 1884| 1885|Updated to be autogenerated from comments in the source by Benjamin Stuhl. 1886| 1887|=head1 SEE ALSO 1888| 1889|F<config.h>, $places_other_than_api 1890_EOE_ 1891 1892# List of non-static internal functions 1893my @missing_guts = 1894 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags; 1895 1896output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_"); 1897|=head1 NAME 1898| 1899|perlintern - autogenerated documentation of purely B<internal> 1900|Perl functions 1901| 1902|=head1 DESCRIPTION 1903|X<internal Perl functions> X<interpreter functions> 1904| 1905|This file is the autogenerated documentation of functions in the 1906|Perl interpreter that are documented using Perl's internal documentation 1907|format but are not marked as part of the Perl API. In other words, 1908|B<they are not for use in extensions>! 1909 1910|It has the same sections as L<perlapi>, though some may be empty. 1911| 1912_EOB_ 1913| 1914|=head1 AUTHORS 1915| 1916|The autodocumentation system was originally added to the Perl core by 1917|Benjamin Stuhl. Documentation is by whoever was kind enough to 1918|document their functions. 1919| 1920|=head1 SEE ALSO 1921| 1922|F<config.h>, $places_other_than_intern 1923_EOE_ 1924