xref: /openbsd-src/gnu/usr.bin/perl/autodoc.pl (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1#!/usr/bin/perl -w
2
3use Text::Tabs;
4#
5# Unconditionally regenerate:
6#
7#    pod/perlintern.pod
8#    pod/perlapi.pod
9#
10# from information stored in
11#
12#    embed.fnc
13#    plus all the core .c, .h, and .pod files listed in MANIFEST
14#    plus %extra_input_pods
15
16my %extra_input_pods = ( 'dist/ExtUtils-ParseXS/lib/perlxs.pod' => 1 );
17
18# Has an optional arg, which is the directory to chdir to before reading
19# MANIFEST and the files
20#
21# This script is invoked as part of 'make all'
22#
23# The generated pod consists of sections of related elements, functions,
24# macros, and variables.  The keys of %valid_sections give the current legal
25# ones.  Just add a new key to add a section.
26#
27# Throughout the files read by this script are lines like
28#
29# =for apidoc_section Section Name
30# =for apidoc_section $section_name_variable
31#
32# "Section Name" (after having been stripped of leading space) must be one of
33# the legal section names, or an error is thrown.  $section_name_variable must
34# be one of the legal section name variables defined below; these expand to
35# legal section names.  This form is used so that minor wording changes in
36# these titles can be confied to this file.  All the names of the variables
37# end in '_scn'; this suffix is optional in the apidoc_section lines.
38#
39# All API elements defined between this line and the next 'apidoc_section'
40# line will go into the section "Section Name" (or $section_name_variable),
41# sorted by dictionary order within it.  perlintern and perlapi are parallel
42# documents, each potentially with a section "Section Name".  Each element is
43# marked as to which document it goes into.  If there are none for a
44# particular section in perlapi, that section is omitted.
45#
46# Also, in .[ch] files, there may be
47#
48# =head1 Section Name
49#
50# lines in comments.  These are also used by this program to switch to section
51# "Section Name".  The difference is that if there are any lines after the
52# =head1, inside the same comment, and before any =for apidoc-ish lines, they
53# are used as a heading for section "Section Name" (in both perlintern and
54# perlapi).  This includes any =head[2-5].  If more than one '=head1 Section
55# Name' line has content, they appear in the generated pod in an undefined
56# order.  Note that you can't use a $section_name_variable in =head1 lines
57#
58# The next =head1, =for apidoc_section, or file end terminates what goes into
59# the current section
60#
61# The %valid_sections hash below also can have header content, which will
62# appear before any =head1 content.  The hash can also have footer content
63# content, which will appear at the end of the section, after all the
64# elements.
65#
66# The lines that define the actual functions, etc are documented in embed.fnc,
67# because they have flags which must be kept in sync with that file.
68
69use strict;
70use warnings;
71
72my $nroff_min_indent = 4;   # for non-heading lines
73# 80 column terminal - 2 for pager adding 2 columns;
74my $max_width = 80 - 2 - $nroff_min_indent;
75my $standard_indent = 4;  # Any additional indentations
76
77if (@ARGV) {
78    my $workdir = shift;
79    chdir $workdir
80        or die "Couldn't chdir to '$workdir': $!";
81}
82require './regen/regen_lib.pl';
83require './regen/embed_lib.pl';
84
85my %described_elsewhere;
86
87#
88# See database of global and static function prototypes in embed.fnc
89# This is used to generate prototype headers under various configurations,
90# export symbols lists for different platforms, and macros to provide an
91# implicit interpreter context argument.
92#
93
94my %docs;
95my %seen;
96my %funcflags;
97my %missing;
98my %missing_macros;
99
100my $link_text = "Described in";
101
102my $description_indent = 4;
103my $usage_indent = 3;   # + initial blank yields 4 total
104
105my $AV_scn = 'AV Handling';
106my $callback_scn = 'Callback Functions';
107my $casting_scn = 'Casting';
108my $casing_scn = 'Character case changing';
109my $classification_scn = 'Character classification';
110my $names_scn = 'Character names';
111my $scope_scn = 'Compile-time scope hooks';
112my $compiler_scn = 'Compiler and Preprocessor information';
113my $directives_scn = 'Compiler directives';
114my $concurrency_scn = 'Concurrency';
115my $COP_scn = 'COPs and Hint Hashes';
116my $CV_scn = 'CV Handling';
117my $custom_scn = 'Custom Operators';
118my $debugging_scn = 'Debugging';
119my $display_scn = 'Display functions';
120my $embedding_scn = 'Embedding, Threads, and Interpreter Cloning';
121my $errno_scn = 'Errno';
122my $exceptions_scn = 'Exception Handling (simple) Macros';
123my $filesystem_scn = 'Filesystem configuration values';
124my $filters_scn = 'Source Filters';
125my $floating_scn = 'Floating point';
126my $genconfig_scn = 'General Configuration';
127my $globals_scn = 'Global Variables';
128my $GV_scn = 'GV Handling and Stashes';
129my $hook_scn = 'Hook manipulation';
130my $HV_scn = 'HV Handling';
131my $io_scn = 'Input/Output';
132my $io_formats_scn = 'I/O Formats';
133my $integer_scn = 'Integer';
134my $lexer_scn = 'Lexer interface';
135my $locale_scn = 'Locales';
136my $magic_scn = 'Magic';
137my $memory_scn = 'Memory Management';
138my $MRO_scn = 'MRO';
139my $multicall_scn = 'Multicall Functions';
140my $numeric_scn = 'Numeric Functions';
141
142# Now combined, as unclear which functions go where, but separate names kept
143# to avoid 1) other code changes; 2) in case it seems better to split again
144my $optrees_scn = 'Optrees';
145my $optree_construction_scn = $optrees_scn; # Was 'Optree construction';
146my $optree_manipulation_scn = $optrees_scn; # Was 'Optree Manipulation Functions'
147my $pack_scn = 'Pack and Unpack';
148my $pad_scn = 'Pad Data Structures';
149my $password_scn = 'Password and Group access';
150my $reports_scn = 'Reports and Formats';
151my $paths_scn = 'Paths to system commands';
152my $prototypes_scn = 'Prototype information';
153my $regexp_scn = 'REGEXP Functions';
154my $signals_scn = 'Signals';
155my $site_scn = 'Site configuration';
156my $sockets_scn = 'Sockets configuration values';
157my $stack_scn = 'Stack Manipulation Macros';
158my $string_scn = 'String Handling';
159my $SV_flags_scn = 'SV Flags';
160my $SV_scn = 'SV Handling';
161my $tainting_scn = 'Tainting';
162my $time_scn = 'Time';
163my $typedefs_scn = 'Typedef names';
164my $unicode_scn = 'Unicode Support';
165my $utility_scn = 'Utility Functions';
166my $versioning_scn = 'Versioning';
167my $warning_scn = 'Warning and Dieing';
168my $XS_scn = 'XS';
169
170# Kept separate at end
171my $undocumented_scn = 'Undocumented elements';
172
173my %valid_sections = (
174    $AV_scn => {},
175    $callback_scn => {},
176    $casting_scn => {},
177    $casing_scn => {},
178    $classification_scn => {},
179    $scope_scn => {},
180    $compiler_scn => {},
181    $directives_scn => {},
182    $concurrency_scn => {},
183    $COP_scn => {},
184    $CV_scn => {
185        header => <<~'EOT',
186            This section documents functions to manipulate CVs which are
187            code-values, meaning subroutines.  For more information, see
188            L<perlguts>.
189            EOT
190    },
191
192    $custom_scn => {},
193    $debugging_scn => {},
194    $display_scn => {},
195    $embedding_scn => {},
196    $errno_scn => {},
197    $exceptions_scn => {},
198    $filesystem_scn => {
199        header => <<~'EOT',
200            Also see L</List of capability HAS_foo symbols>.
201            EOT
202        },
203    $filters_scn => {},
204    $floating_scn => {
205        header => <<~'EOT',
206            Also L</List of capability HAS_foo symbols> lists capabilities
207            that arent in this section.  For example C<HAS_ASINH>, for the
208            hyperbolic sine function.
209            EOT
210        },
211    $genconfig_scn => {
212        header => <<~'EOT',
213            This section contains configuration information not otherwise
214            found in the more specialized sections of this document.  At the
215            end is a list of C<#defines> whose name should be enough to tell
216            you what they do, and a list of #defines which tell you if you
217            need to C<#include> files to get the corresponding functionality.
218            EOT
219
220        footer => <<~EOT,
221
222            =head2 List of capability C<HAS_I<foo>> symbols
223
224            This is a list of those symbols that dont appear elsewhere in ths
225            document that indicate if the current platform has a certain
226            capability.  Their names all begin with C<HAS_>.  Only those
227            symbols whose capability is directly derived from the name are
228            listed here.  All others have their meaning expanded out elsewhere
229            in this document.  This (relatively) compact list is because we
230            think that the expansion would add little or no value and take up
231            a lot of space (because there are so many).  If you think certain
232            ones should be expanded, send email to
233            L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
234
235            Each symbol here will be C<#define>d if and only if the platform
236            has the capability.  If you need more detail, see the
237            corresponding entry in F<config.h>.  For convenience, the list is
238            split so that the ones that indicate there is a reentrant version
239            of a capability are listed separately
240
241            __HAS_LIST__
242
243            And, the reentrant capabilities:
244
245            __HAS_R_LIST__
246
247            Example usage:
248
249            =over $standard_indent
250
251             #ifdef HAS_STRNLEN
252               use strnlen()
253             #else
254               use an alternative implementation
255             #endif
256
257            =back
258
259            =head2 List of C<#include> needed symbols
260
261            This list contains symbols that indicate if certain C<#include>
262            files are present on the platform.  If your code accesses the
263            functionality that one of these is for, you will need to
264            C<#include> it if the symbol on this list is C<#define>d.  For
265            more detail, see the corresponding entry in F<config.h>.
266
267            __INCLUDE_LIST__
268
269            Example usage:
270
271            =over $standard_indent
272
273             #ifdef I_WCHAR
274               #include <wchar.h>
275             #endif
276
277            =back
278            EOT
279      },
280    $globals_scn => {},
281    $GV_scn => {},
282    $hook_scn => {},
283    $HV_scn => {},
284    $io_scn => {},
285    $io_formats_scn => {
286        header => <<~'EOT',
287            These are used for formatting the corresponding type For example,
288            instead of saying
289
290             Perl_newSVpvf(pTHX_ "Create an SV with a %d in it\n", iv);
291
292            use
293
294             Perl_newSVpvf(pTHX_ "Create an SV with a " IVdf " in it\n", iv);
295
296            This keeps you from having to know if, say an IV, needs to be
297            printed as C<%d>, C<%ld>, or something else.
298            EOT
299      },
300    $integer_scn => {},
301    $lexer_scn => {},
302    $locale_scn => {},
303    $magic_scn => {},
304    $memory_scn => {},
305    $MRO_scn => {},
306    $multicall_scn => {},
307    $numeric_scn => {},
308    $optrees_scn => {},
309    $optree_construction_scn => {},
310    $optree_manipulation_scn => {},
311    $pack_scn => {},
312    $pad_scn => {},
313    $password_scn => {},
314    $paths_scn => {},
315    $prototypes_scn => {},
316    $regexp_scn => {},
317    $reports_scn => {
318        header => <<~"EOT",
319            These are used in the simple report generation feature of Perl.
320            See L<perlform>.
321            EOT
322      },
323    $signals_scn => {},
324    $site_scn => {
325        header => <<~'EOT',
326            These variables give details as to where various libraries,
327            installation destinations, I<etc.>, go, as well as what various
328            installation options were selected
329            EOT
330      },
331    $sockets_scn => {},
332    $stack_scn => {},
333    $string_scn => {
334        header => <<~EOT,
335            See also C<L</$unicode_scn>>.
336            EOT
337      },
338    $SV_flags_scn => {},
339    $SV_scn => {},
340    $tainting_scn => {},
341    $time_scn => {},
342    $typedefs_scn => {},
343    $unicode_scn => {
344        header => <<~EOT,
345            L<perlguts/Unicode Support> has an introduction to this API.
346
347            See also C<L</$classification_scn>>,
348            C<L</$casing_scn>>,
349            and C<L</$string_scn>>.
350            Various functions outside this section also work specially with
351            Unicode.  Search for the string "utf8" in this document.
352            EOT
353      },
354    $utility_scn => {},
355    $versioning_scn => {},
356    $warning_scn => {},
357    $XS_scn => {},
358);
359
360# Somewhat loose match for an apidoc line so we can catch minor typos.
361# Parentheses are used to capture portions so that below we verify
362# that things are the actual correct syntax.
363my $apidoc_re = qr/ ^ (\s*)            # $1
364                      (=?)             # $2
365                      (\s*)            # $3
366                      for (\s*)        # $4
367                      apidoc (_item)?  # $5
368                      (\s*)            # $6
369                      (.*?)            # $7
370                      \s* \n /x;
371# Only certain flags, dealing with display, are acceptable for apidoc_item
372my $display_flags = "fFnDopsTx";
373
374sub check_api_doc_line ($$) {
375    my ($file, $in) = @_;
376
377    return unless $in =~ $apidoc_re;
378
379    my $is_item = defined $5;
380    my $is_in_proper_form = length $1 == 0
381                         && length $2 > 0
382                         && length $3 == 0
383                         && length $4 > 0
384                         && length $7 > 0
385                         && (    length $6 > 0
386                             || ($is_item && substr($7, 0, 1) eq '|'));
387    my $proto_in_file = $7;
388    my $proto = $proto_in_file;
389    $proto = "||$proto" if $proto !~ /\|/;
390    my ($flags, $ret_type, $name, @args) = split /\s*\|\s*/, $proto;
391
392    $name && $is_in_proper_form or die <<EOS;
393Bad apidoc at $file line $.:
394  $in
395Expected:
396  =for apidoc flags|returntype|name|arg|arg|...
397  =for apidoc flags|returntype|name
398  =for apidoc name
399(or 'apidoc_item')
400EOS
401
402    die "Only [$display_flags] allowed in apidoc_item:\n$in"
403                            if $is_item && $flags =~ /[^$display_flags]/;
404
405    return ($name, $flags, $ret_type, $is_item, $proto_in_file, @args);
406}
407
408sub embed_override($) {
409    my ($element_name) = shift;
410
411    # If the entry is also in embed.fnc, it should be defined
412    # completely there, but not here
413    my $embed_docref = delete $funcflags{$element_name};
414
415    return unless $embed_docref and %$embed_docref;
416
417    my $flags = $embed_docref->{'flags'};
418    warn "embed.fnc entry '$element_name' missing 'd' flag"
419                                            unless $flags =~ /d/;
420
421    return ($flags, $embed_docref->{'ret_type'}, $embed_docref->{args}->@*);
422}
423
424# The section that is in effect at the beginning of the given file.  If not
425# listed here, an apidoc_section line must precede any apidoc lines.
426# This allows the files listed here that generally are single-purpose, to not
427# have to worry about the autodoc section
428my %initial_file_section = (
429                            'av.c' => $AV_scn,
430                            'av.h' => $AV_scn,
431                            'cv.h' => $CV_scn,
432                            'deb.c' => $debugging_scn,
433                            'dist/ExtUtils-ParseXS/lib/perlxs.pod' => $XS_scn,
434                            'doio.c' => $io_scn,
435                            'gv.c' => $GV_scn,
436                            'gv.h' => $GV_scn,
437                            'hv.h' => $HV_scn,
438                            'locale.c' => $locale_scn,
439                            'malloc.c' => $memory_scn,
440                            'numeric.c' => $numeric_scn,
441                            'opnames.h' => $optree_construction_scn,
442                            'pad.h'=> $pad_scn,
443                            'patchlevel.h' => $versioning_scn,
444                            'perlio.h' => $io_scn,
445                            'pod/perlapio.pod' => $io_scn,
446                            'pod/perlcall.pod' => $callback_scn,
447                            'pod/perlembed.pod' => $embedding_scn,
448                            'pod/perlfilter.pod' => $filters_scn,
449                            'pod/perliol.pod' => $io_scn,
450                            'pod/perlmroapi.pod' => $MRO_scn,
451                            'pod/perlreguts.pod' => $regexp_scn,
452                            'pp_pack.c' => $pack_scn,
453                            'pp_sort.c' => $SV_scn,
454                            'regcomp.c' => $regexp_scn,
455                            'regexp.h' => $regexp_scn,
456                            'sv.h' => $SV_scn,
457                            'sv.c' => $SV_scn,
458                            'sv_inline.h' => $SV_scn,
459                            'taint.c' => $tainting_scn,
460                            'unicode_constants.h' => $unicode_scn,
461                            'utf8.c' => $unicode_scn,
462                            'utf8.h' => $unicode_scn,
463                            'vutil.c' => $versioning_scn,
464                           );
465
466sub autodoc ($$) { # parse a file and extract documentation info
467    my($fh,$file) = @_;
468    my($in, $line_num, $header, $section);
469
470    $section = $initial_file_section{$file}
471                                    if defined $initial_file_section{$file};
472
473    my $file_is_C = $file =~ / \. [ch] $ /x;
474
475    # Count lines easier
476    my $get_next_line = sub { $line_num++; return <$fh> };
477
478    # Read the file
479    while ($in = $get_next_line->()) {
480        last unless defined $in;
481
482        next unless (    $in =~ / ^ =for [ ]+ apidoc /x
483                                      # =head1 lines only have effect in C files
484                     || ($file_is_C && $in =~ /^=head1/));
485
486        # Here, the line introduces a portion of the input that we care about.
487        # Either it is for an API element, or heading text which we expect
488        # will be used for elements later in the file
489
490        my ($text, $element_name, $flags, $ret_type, $is_item, $proto_in_file);
491        my (@args, @items);
492
493        # If the line starts a new section ...
494        if ($in=~ /^ = (?: for [ ]+ apidoc_section | head1 ) [ ]+ (.*) /x) {
495
496            $section = $1;
497            if ($section =~ / ^ \$ /x) {
498                $section .= '_scn' unless $section =~ / _scn $ /;
499                $section = eval "$section";
500                die "Unknown \$section variable '$section' in $file: $@" if $@;
501            }
502            die "Unknown section name '$section' in $file near line $.\n"
503                                    unless defined $valid_sections{$section};
504
505        }
506        elsif ($in=~ /^ =for [ ]+ apidoc \B /x) {   # Otherwise better be a
507                                                    # plain apidoc line
508            die "Unkown apidoc-type line '$in'" unless $in=~ /^=for apidoc_item/;
509            die "apidoc_item doesn't immediately follow an apidoc entry: '$in'";
510        }
511        else {  # Plain apidoc
512
513            ($element_name, $flags, $ret_type, $is_item, $proto_in_file, @args)
514                                                = check_api_doc_line($file, $in);
515            # Override this line with any info in embed.fnc
516            my ($embed_flags, $embed_ret_type, @embed_args)
517                                                = embed_override($element_name);
518            if ($embed_ret_type) {
519                warn "embed.fnc entry overrides redundant information in"
520                    . " '$proto_in_file' in $file"
521                                               if $flags || $ret_type || @args;
522                $flags = $embed_flags;
523                $ret_type = $embed_ret_type;
524                @args = @embed_args;
525            }
526            elsif ($flags !~ /[my]/)  { # Not in embed.fnc, is missing if not
527                                        # a macro or typedef
528                $missing{$element_name} = $file;
529            }
530
531            die "flag '$1' is not legal (for function $element_name (from $file))"
532                        if $flags =~ / ( [^AabCDdEeFfGhiIMmNnTOoPpRrSsUuWXxy] ) /x;
533
534            die "'u' flag must also have 'm' or 'y' flags' for $element_name"
535                                            if $flags =~ /u/ && $flags !~ /[my]/;
536            warn ("'$element_name' not \\w+ in '$proto_in_file' in $file")
537                        if $flags !~ /N/ && $element_name !~ / ^ [_[:alpha:]] \w* $ /x;
538
539            if (exists $seen{$element_name} && $flags !~ /h/) {
540                die ("'$element_name' in $file was already documented in $seen{$element_name}");
541            }
542            else {
543                $seen{$element_name} = $file;
544            }
545        }
546
547        # Here we have processed the initial line in the heading text or API
548        # element, and have saved the important information from it into the
549        # corresponding variables.  Now accumulate the text that applies to it
550        # up to a terminating line, which is one of:
551        # 1) =cut
552        # 2) =head (in a C file only =head1)
553        # 3) an end comment line in a C file: m:^\s*\*/:
554        # 4) =for apidoc... (except apidoc_item lines)
555        $text = "";
556        my $head_ender_num = ($file_is_C) ? 1 : "";
557        while (defined($in = $get_next_line->())) {
558
559            last if $in =~ /^=cut/x;
560            last if $in =~ /^=head$head_ender_num/;
561
562            if ($file_is_C && $in =~ m: ^ \s* \* / $ :x) {
563
564                # End of comment line in C files is a fall-back terminator,
565                # but warn only if there actually is some accumulated text
566                warn "=cut missing? $file:$line_num:$in" if $text =~ /\S/;
567                last;
568            }
569
570            if ($in !~ / ^ =for [ ]+ apidoc /x) {
571                $text .= $in;
572                next;
573            }
574
575            # Here, the line is an apidoc line.  All but apidoc_item terminate
576            # the text being accumulated.
577            last if $in =~ / ^ =for [ ]+ apidoc_section /x;
578
579            my ($item_name, $item_flags, $item_ret_type, $is_item,
580                    $item_proto, @item_args) = check_api_doc_line($file, $in);
581            last unless $is_item;
582
583            # Here, is an apidoc_item_line; They can only come within apidoc
584            # paragraphs.
585            die "Unexpected api_doc_item line '$item_proto'"
586                                                        unless $element_name;
587
588            # We accept blank lines between these, but nothing else;
589            die "apidoc_item lines must immediately follow apidoc lines for "
590              . " '$element_name' in $file"
591                                                            if $text =~ /\S/;
592            # Override this line with any info in embed.fnc
593            my ($embed_flags, $embed_ret_type, @embed_args)
594                                                = embed_override($item_name);
595            if ($embed_ret_type) {
596                warn "embed.fnc entry overrides redundant information in"
597                    . " '$item_proto' in $file"
598                                if $item_flags || $item_ret_type || @item_args;
599
600                $item_flags = $embed_flags;
601                $item_ret_type = $embed_ret_type;
602                @item_args = @embed_args;
603            }
604
605            # Use the base entry flags if none for this item; otherwise add in
606            # any non-display base entry flags.
607            if ($item_flags) {
608                $item_flags .= $flags =~ s/[$display_flags]//rg;
609            }
610            else {
611                $item_flags = $flags;
612            }
613            $item_ret_type = $ret_type unless $item_ret_type;
614            @item_args = @args unless @item_args;
615            push @items, { name     => $item_name,
616                           ret_type => $item_ret_type,
617                           flags    => $item_flags,
618                           args     => [ @item_args ],
619                         };
620
621            # This line shows that this element is documented.
622            delete $funcflags{$item_name};
623        }
624
625        # Here, are done accumulating the text for this item.  Trim it
626        $text =~ s/ ^ \s* //x;
627        $text =~ s/ \s* $ //x;
628        $text .= "\n" if $text ne "";
629
630        # And treat all-spaces as nothing at all
631        undef $text unless $text =~ /\S/;
632
633        if ($element_name) {
634
635            # Here, we have accumulated into $text, the pod for $element_name
636            my $where = $flags =~ /A/ ? 'api' : 'guts';
637
638            die "No =for apidoc_section nor =head1 in $file for '$element_name'\n"
639                                                    unless defined $section;
640            if (exists $docs{$where}{$section}{$element_name}) {
641                warn "$0: duplicate API entry for '$element_name' in"
642                    . " $where/$section\n";
643                next;
644            }
645
646            # Override the text with just a link if the flags call for that
647            my $is_link_only = ($flags =~ /h/);
648            if ($is_link_only) {
649                if ($file_is_C) {
650                    die "Can't currently handle link with items to it:\n$in" if @items;
651                    redo;    # Don't put anything if C source
652                }
653
654                # Here, is an 'h' flag in pod.  We add a reference to the pod (and
655                # nothing else) to perlapi/intern.  (It would be better to add a
656                # reference to the correct =item,=header, but something that makes
657                # it harder is that it that might be a duplicate, like '=item *';
658                # so that is a future enhancement XXX.  Another complication is
659                # there might be more than one deserving candidates.)
660                my $podname = $file =~ s!.*/!!r;    # Rmv directory name(s)
661                $podname =~ s/\.pod//;
662                $text = "Described in L<$podname>.\n";
663
664                # Don't output a usage example for linked to documentation if
665                # it is trivial (has no arguments) and we aren't to add a
666                # semicolon
667                $flags .= 'U' if $flags =~ /n/ && $flags !~ /[Us]/;
668
669                # Keep track of all the pod files that we refer to.
670                push $described_elsewhere{$podname}->@*, $podname;
671            }
672
673            $docs{$where}{$section}{$element_name}{flags} = $flags;
674            $docs{$where}{$section}{$element_name}{pod} = $text;
675            $docs{$where}{$section}{$element_name}{file} = $file;
676            $docs{$where}{$section}{$element_name}{ret_type} = $ret_type;
677            push $docs{$where}{$section}{$element_name}{args}->@*, @args;
678            push $docs{$where}{$section}{$element_name}{items}->@*, @items;
679        }
680        elsif ($text) {
681            $valid_sections{$section}{header} = "" unless
682                                    defined $valid_sections{$section}{header};
683            $valid_sections{$section}{header} .= "\n$text";
684        }
685
686        # We already have the first line of what's to come in $in
687        redo;
688
689    } # End of loop through input
690}
691
692my %configs;
693my @has_defs;
694my @has_r_defs;     # Reentrant symbols
695my @include_defs;
696
697sub parse_config_h {
698    use re '/aa';   # Everthing is ASCII in this file
699
700    # Process config.h
701    my $config_h = 'config.h';
702    $config_h = 'win32/config.h' unless -e $config_h;
703    die "Can't find $config_h" unless -e $config_h;
704    open my $fh, '<', $config_h or die "Can't open $config_h: $!";
705    while (<$fh>) {
706
707        # Look for lines like /* FOO_BAR:
708        # By convention all config.h descriptions begin like that
709        if (m[ ^ /\* [ ] ( [[:alpha:]] \w+ ) : \s* $ ]ax) {
710            my $name = $1;
711
712            # Here we are starting the description for $name in config.h.  We
713            # accumulate the entire description for it into @description.
714            # Flowing text from one input line to another is appended into the
715            # same array element to make a single flowing line element, but
716            # verbatim lines are kept as separate elements in @description.
717            # This will facilitate later doing pattern matching without regard
718            # to line boundaries on non-verbatim text.
719
720            die "Multiple config.h entries for '$name'"
721                                        if defined $configs{$name}{description};
722
723            # Get first line of description
724            $_ = <$fh>;
725
726            # Each line in the description begins with blanks followed by '/*'
727            # and some spaces.
728            die "Unexpected config.h initial line for $name: '$_'"
729                                            unless s/ ^ ( \s* \* \s* ) //x;
730            my $initial_text = $1;
731
732            # Initialize the description with this first line (after having
733            # stripped the prefix text)
734            my @description = $_;
735
736            # The first line is used as a template for how much indentation
737            # each normal succeeding line has.  Lines indented further
738            # will be considered as intended to be verbatim.  But, empty lines
739            # likely won't have trailing blanks, so just strip the whole thing
740            # for them.
741            my $strip_initial_qr = qr!   \s* \* \s* $
742                                    | \Q$initial_text\E
743                                    !x;
744            $configs{$name}{verbatim} = 0;
745
746            # Read in the remainder of the description
747            while (<$fh>) {
748                last if s| ^ \s* \* / ||x;  # A '*/' ends it
749
750                die "Unexpected config.h description line for $name: '$_'"
751                                                unless s/$strip_initial_qr//;
752
753                # Fix up the few flawed lines in config.h wherein a new
754                # sentence begins with a tab (and maybe a space after that).
755                # Although none of them currently do, let it recognize
756                # something like
757                #
758                #   "... text").  The next sentence ...
759                #
760                s/ ( \w "? \)? \. ) \t \s* ( [[:alpha:]] ) /$1  $2/xg;
761
762                # If this line has extra indentation or looks to have columns,
763                # it should be treated as verbatim.  Columns are indicated by
764                # use of interior: tabs, 3 spaces in a row, or even 2 spaces
765                # not preceded by punctuation.
766                if ($_ !~ m/  ^ \s
767                              | \S (?:                    \t
768                                    |                     \s{3}
769                                    |  (*nlb:[[:punct:]]) \s{2}
770                                   )
771                           /x)
772                {
773                    # But here, is not a verbatim line.  Add an empty line if
774                    # this is the first non-verbatim after a run of verbatims
775                    if ($description[-1] =~ /^\s/) {
776                        push @description, "\n", $_;
777                    }
778                    else {  # Otherwise, append this flowing line to the
779                            # current flowing line
780                        $description[-1] .= $_;
781                    }
782                }
783                else {
784                    $configs{$name}{verbatim} = 1;
785
786                    # The first verbatim line in a run of them is separated by an
787                    # empty line from the flowing lines above it
788                    push @description, "\n" if $description[-1] =~ /^\S/;
789
790                    $_ = Text::Tabs::expand($_);
791
792                    # Only a single space so less likely to wrap
793                    s/ ^ \s* / /x;
794
795                    push @description, $_;
796                }
797            }
798
799            push $configs{$name}{description}->@*, @description
800
801        }   # Not a description; see if it is a macro definition.
802        elsif (m! ^
803                  (?: / \* )?                   # Optional commented-out
804                                                # indication
805                      \# \s* define \s+ ( \w+ ) # $1 is the name
806                  (   \s* )                     # $2 indicates if args or not
807                  (   .*? )                     # $3 is any definition
808                  (?: / \s* \* \* / )?          # Optional trailing /**/ or / **/
809                  $
810                !x)
811        {
812            my $name = $1;
813
814            # There can be multiple definitions for a name.  We want to know
815            # if any of them has arguments, and if any has a body.
816            $configs{$name}{has_args} //= $2 eq "";
817            $configs{$name}{has_args} ||= $2 eq "";
818            $configs{$name}{has_defn} //= $3 ne "";
819            $configs{$name}{has_defn} ||= $3 ne "";
820        }
821    }
822
823    # We now have stored the description and information about every #define
824    # in the file.  The description is in a form convenient to operate on to
825    # convert to pod.  Do that now.
826    foreach my $name (keys %configs) {
827        next unless defined $configs{$name}{description};
828
829        # All adjacent non-verbatim lines of the description are appended
830        # together in a single element in the array.  This allows the patterns
831        # to work across input line boundaries.
832
833        my $pod = "";
834        while (defined ($_ = shift $configs{$name}{description}->@*)) {
835            chomp;
836
837            if (/ ^ \S /x) {  # Don't edit verbatim lines
838
839                # Enclose known file/path names not already so enclosed
840                # with <...>.  (Some entries in config.h are already
841                # '<path/to/file>')
842                my $file_name_qr = qr! [ \w / ]+ \.
843                                    (?: c | h | xs | p [lm] | pmc | PL
844                                        | sh | SH | exe ) \b
845                                    !xx;
846                my $path_name_qr = qr! (?: / \w+ )+ !x;
847                for my $re ($file_name_qr, $path_name_qr) {
848                    s! (*nlb:[ < \w / ]) ( $re ) !<$1>!gxx;
849                }
850
851                # Enclose <... file/path names with F<...> (but no double
852                # angle brackets)
853                for my $re ($file_name_qr, $path_name_qr) {
854                    s! < ( $re ) > !F<$1>!gxx;
855                }
856
857                # Explain metaconfig units
858                s/ ( \w+ \. U \b ) /$1 (part of metaconfig)/gx;
859
860                # Convert "See foo" to "See C<L</foo>>" if foo is described in
861                # this file.  Also create a link to the known file INSTALL.
862                # And, to be more general, handle "See also foo and bar", and
863                # "See also foo, bar, and baz"
864                while (m/ \b [Ss]ee \s+
865                         (?: also \s+ )?    ( \w+ )
866                         (?: ,  \s+         ( \w+ ) )?
867                         (?: ,? \s+ and \s+ ( \w+ ) )? /xg) {
868                    my @links = $1;
869                    push @links, $2 if defined $2;
870                    push @links, $3 if defined $3;
871                    foreach my $link (@links) {
872                        if ($link eq 'INSTALL') {
873                            s/ \b INSTALL \b /C<L<INSTALL>>/xg;
874                        }
875                        elsif (grep { $link =~ / \b $_ \b /x } keys %configs) {
876                            s| \b $link \b |C<L</$link>>|xg;
877                            $configs{$link}{linked} = 1;
878                            $configs{$name}{linked} = 1;
879                        }
880                    }
881                }
882
883                # Enclose what we think are symbols with C<...>.
884                no warnings 'experimental::vlb';
885                s/ (*nlb:<)
886                   (
887                        # Any word followed immediately with parens or
888                        # brackets
889                        \b \w+ (?: \( [^)]* \)    # parameter list
890                                 | \[ [^]]* \]    # or array reference
891                               )
892                    | (*plb: ^ | \s ) -D \w+    # Also -Dsymbols.
893                    | \b (?: struct | union ) \s \w+
894
895                        # Words that contain underscores (which are
896                        # definitely not text) or three uppercase letters in
897                        # a row.  Length two ones, like IV, aren't enclosed,
898                        # because they often don't look as nice.
899                    | \b \w* (?: _ | [[:upper:]]{3,} ) \w* \b
900                   )
901                    (*nla:>)
902                 /C<$1>/xg;
903
904                # These include foo when the name is HAS_foo.  This is a
905                # heuristic which works in most cases.
906                if ($name =~ / ^ HAS_ (.*) /x) {
907                    my $symbol = lc $1;
908
909                    # Don't include path components, nor things already in
910                    # <>, or with trailing '(', '['
911                    s! \b (*nlb:[/<]) $symbol (*nla:[[/>(]) \b !C<$symbol>!xg;
912                }
913            }
914
915            $pod .=  "$_\n";
916        }
917        delete $configs{$name}{description};
918
919        $configs{$name}{pod} = $pod;
920    }
921
922    # Now have converted the description to pod.  We also now have enough
923    # information that we can do cross checking to find definitions without
924    # corresponding pod, and see if they are mentioned in some description;
925    # otherwise they aren't documented.
926  NAME:
927    foreach my $name (keys %configs) {
928
929        # A definition without pod
930        if (! defined $configs{$name}{pod}) {
931
932            # Leading/trailing underscore means internal to config.h, e.g.,
933            # _GNU_SOURCE
934            next if $name =~ / ^ _ /x;
935            next if $name =~ / _ $ /x;
936
937            # MiXeD case names are internal to config.h; the first 4
938            # characters are sufficient to determine this
939            next if $name =~ / ^ [[:upper:]] [[:lower:]]
940                                 [[:upper:]] [[:lower:]]
941                            /x;
942
943            # Here, not internal to config.h.  Look to see if this symbol is
944            # mentioned in the pod of some other.  If so, assume it is
945            # documented.
946            foreach my $check_name (keys %configs) {
947                my $this_element = $configs{$check_name};
948                my $this_pod = $this_element->{pod};
949                if (defined $this_pod) {
950                    next NAME if $this_pod =~ / \b $name \b /x;
951                }
952            }
953
954            warn "$name has no documentation\n";
955            $missing_macros{$name} = 'config.h';
956
957            next;
958        }
959
960        my $has_defn = $configs{$name}{has_defn};
961        my $has_args = $configs{$name}{has_args};
962
963        # Check if any section already has an entry for this element.
964        # If so, it better be a placeholder, in which case we replace it
965        # with this entry.
966        foreach my $section (keys $docs{'api'}->%*) {
967            if (exists $docs{'api'}{$section}{$name}) {
968                my $was = $docs{'api'}{$section}{$name}->{pod};
969                $was = "" unless $was;
970                chomp $was;
971                if ($was ne "" && $was !~ m/$link_text/) {
972                    die "Multiple descriptions for $name\n"
973                        . "$section contained '$was'";
974                }
975                $docs{'api'}{$section}{$name}->{pod} = $configs{$name}{pod};
976                $configs{$name}{section} = $section;
977                last;
978            }
979        }
980
981        my $handled = 0;    # Haven't handled this yet
982
983        if (defined $configs{$name}{'section'}) {
984            # This has been taken care of elsewhere.
985            $handled = 1;
986        }
987        else {
988            my $flags = "";
989            if ($has_defn && ! $has_args) {
990                $configs{$name}{args} = 1;
991            }
992
993            # Symbols of the form I_FOO are for #include files.  They have
994            # special usage information
995            if ($name =~ / ^ I_ ( .* ) /x) {
996                my $file = lc $1 . '.h';
997                $configs{$name}{usage} = <<~"EOT";
998                    #ifdef $name
999                        #include <$file>
1000                    #endif
1001                    EOT
1002            }
1003
1004            # Compute what section this variable should go into.  This
1005            # heuristic was determined by manually inspecting the current
1006            # things in config.h, and should be adjusted as necessary as
1007            # deficiencies are found.
1008            #
1009            # This is the default section for macros with a definiton but
1010            # no arguments, meaning it is replaced unconditionally
1011            #
1012            my $sb = qr/ _ | \b /x; # segment boundary
1013            my $dash_or_spaces = qr/ - | \s+ /x;
1014            my $pod = $configs{$name}{pod};
1015            if ($name =~ / ^ USE_ /x) {
1016                $configs{$name}{'section'} = $site_scn;
1017            }
1018            elsif ($name =~ / SLEEP | (*nlb:SYS_) TIME | TZ | $sb TM $sb /x)
1019            {
1020                $configs{$name}{'section'} = $time_scn;
1021            }
1022            elsif (   $name =~ / ^ [[:alpha:]]+ f $ /x
1023                   && $configs{$name}{pod} =~ m/ \b format \b /ix)
1024            {
1025                $configs{$name}{'section'} = $io_formats_scn;
1026            }
1027            elsif ($name =~ /  DOUBLE | FLOAT | LONGDBL | LDBL | ^ NV
1028                            | $sb CASTFLAGS $sb
1029                            | QUADMATH
1030                            | $sb (?: IS )? NAN
1031                            | $sb (?: IS )? FINITE
1032                            /x)
1033            {
1034                $configs{$name}{'section'} =
1035                                    $floating_scn;
1036            }
1037            elsif ($name =~ / (?: POS | OFF | DIR ) 64 /x) {
1038                $configs{$name}{'section'} = $filesystem_scn;
1039            }
1040            elsif (   $name =~ / $sb (?: BUILTIN | CPP ) $sb | ^ CPP /x
1041                   || $configs{$name}{pod} =~ m/ \b align /x)
1042            {
1043                $configs{$name}{'section'} = $compiler_scn;
1044            }
1045            elsif ($name =~ / ^ [IU] [ \d V ]
1046                            | ^ INT | SHORT | LONG | QUAD | 64 | 32 /xx)
1047            {
1048                $configs{$name}{'section'} = $integer_scn;
1049            }
1050            elsif ($name =~ / $sb t $sb /x) {
1051                $configs{$name}{'section'} = $typedefs_scn;
1052                $flags .= 'y';
1053            }
1054            elsif (   $name =~ / ^ PERL_ ( PRI | SCN ) | $sb FORMAT $sb /x
1055                    && $configs{$name}{pod} =~ m/ \b format \b /ix)
1056            {
1057                $configs{$name}{'section'} = $io_formats_scn;
1058            }
1059            elsif ($name =~ / BACKTRACE /x) {
1060                $configs{$name}{'section'} = $debugging_scn;
1061            }
1062            elsif ($name =~ / ALLOC $sb /x) {
1063                $configs{$name}{'section'} = $memory_scn;
1064            }
1065            elsif (   $name =~ /   STDIO | FCNTL | EOF | FFLUSH
1066                                | $sb FILE $sb
1067                                | $sb DIR $sb
1068                                | $sb LSEEK
1069                                | $sb INO $sb
1070                                | $sb OPEN
1071                                | $sb CLOSE
1072                                | ^ DIR
1073                                | ^ INO $sb
1074                                | DIR $
1075                                | FILENAMES
1076                                /x
1077                    || $configs{$name}{pod} =~ m!  I/O | stdio
1078                                                | file \s+ descriptor
1079                                                | file \s* system
1080                                                | statfs
1081                                                !x)
1082            {
1083                $configs{$name}{'section'} = $filesystem_scn;
1084            }
1085            elsif ($name =~ / ^ SIG | SIGINFO | signal /ix) {
1086                $configs{$name}{'section'} = $signals_scn;
1087            }
1088            elsif ($name =~ / $sb ( PROTO (?: TYPE)? S? ) $sb /x) {
1089                $configs{$name}{'section'} = $prototypes_scn;
1090            }
1091            elsif (   $name =~ / ^ LOC_ /x
1092                    || $configs{$name}{pod} =~ /full path/i)
1093            {
1094                $configs{$name}{'section'} = $paths_scn;
1095            }
1096            elsif ($name =~ / $sb LC_ | LOCALE | langinfo /xi) {
1097                $configs{$name}{'section'} = $locale_scn;
1098            }
1099            elsif ($configs{$name}{pod} =~ /  GCC | C99 | C\+\+ /xi) {
1100                $configs{$name}{'section'} = $compiler_scn;
1101            }
1102            elsif ($name =~ / PASSW (OR)? D | ^ PW | ( PW | GR ) ENT /x)
1103            {
1104                $configs{$name}{'section'} = $password_scn;
1105            }
1106            elsif ($name =~ /  SOCKET | $sb SOCK /x) {
1107                $configs{$name}{'section'} = $sockets_scn;
1108            }
1109            elsif (   $name =~ / THREAD | MULTIPLICITY /x
1110                    || $configs{$name}{pod} =~ m/ \b pthread /ix)
1111            {
1112                $configs{$name}{'section'} = $concurrency_scn;
1113            }
1114            elsif ($name =~ /  PERL | ^ PRIV | SITE | ARCH | BIN
1115                                | VENDOR | ^ USE
1116                            /x)
1117            {
1118                $configs{$name}{'section'} = $site_scn;
1119            }
1120            elsif (   $pod =~ / \b floating $dash_or_spaces point \b /ix
1121                   || $pod =~ / \b (double | single) $dash_or_spaces precision \b /ix
1122                   || $pod =~ / \b doubles \b /ix
1123                   || $pod =~ / \b (?: a | the | long ) \s+ (?: double | NV ) \b /ix)
1124            {
1125                $configs{$name}{'section'} =
1126                                    $floating_scn;
1127            }
1128            else {
1129                # Above are the specific sections.  The rest go into a
1130                # grab-bag of general configuration values.  However, we put
1131                # two classes of them into lists of their names, without their
1132                # descriptions, when we think that the description doesn't add
1133                # any real value.  One list contains the #include variables:
1134                # the description is basically boiler plate for each of these.
1135                # The other list contains the very many things that are of the
1136                # form HAS_foo, and \bfoo\b is contained in its description,
1137                # and there is no verbatim text in the pod or links to/from it
1138                # (which would add value).  That means that it is likely the
1139                # intent of the variable can be gleaned from just its name,
1140                # and unlikely the description adds signficant value, so just
1141                # listing them suffices.  Giving their descriptions would
1142                # expand this pod significantly with little added value.
1143                if (   ! $has_defn
1144                    && ! $configs{$name}{verbatim}
1145                    && ! $configs{$name}{linked})
1146                {
1147                    if ($name =~ / ^ I_ ( .* ) /x) {
1148                        push @include_defs, $name;
1149                        next;
1150                    }
1151                    elsif ($name =~ / ^ HAS_ ( .* ) /x) {
1152                        my $canonical_name = $1;
1153                        $canonical_name =~ s/_//g;
1154
1155                        my $canonical_pod = $configs{$name}{pod};
1156                        $canonical_pod =~ s/_//g;
1157
1158                        if ($canonical_pod =~ / \b $canonical_name \b /xi) {
1159                            if ($name =~ / $sb R $sb /x) {
1160                                push @has_r_defs, $name;
1161                            }
1162                            else {
1163                                push @has_defs, $name;
1164                            }
1165                            next;
1166                        }
1167                    }
1168                }
1169
1170                $configs{$name}{'section'} = $genconfig_scn;
1171            }
1172
1173            my $section = $configs{$name}{'section'};
1174            die "Internal error: '$section' not in \%valid_sections"
1175                            unless grep { $_ eq $section } keys %valid_sections;
1176            $flags .= 'AdmnT';
1177            $flags .= 'U' unless defined $configs{$name}{usage};
1178
1179            # All the information has been gathered; save it
1180            $docs{'api'}{$section}{$name}{flags} = $flags;
1181            $docs{'api'}{$section}{$name}{pod} = $configs{$name}{pod};
1182            $docs{'api'}{$section}{$name}{ret_type} = "";
1183            $docs{'api'}{$section}{$name}{file} = 'config.h';
1184            $docs{'api'}{$section}{$name}{usage}
1185                = $configs{$name}{usage} if defined $configs{$name}{usage};
1186            push $docs{'api'}{$section}{$name}{args}->@*, ();
1187            push $docs{'api'}{$section}{$name}{items}->@*, ();
1188        }
1189    }
1190}
1191
1192sub format_pod_indexes($) {
1193    my $entries_ref = shift;
1194
1195    # Output the X<> references to the names, packed since they don't get
1196    # displayed, but not too many per line so that when someone is editing the
1197    # file, it doesn't run on
1198
1199    my $text ="";
1200    my $line_length = 0;
1201    for my $name (sort dictionary_order $entries_ref->@*) {
1202        my $entry = "X<$name>";
1203        my $entry_length = length $entry;
1204
1205        # Don't loop forever if we have a verrry long name, and don't go too
1206        # far to the right.
1207        if ($line_length > 0 && $line_length + $entry_length > $max_width) {
1208            $text .= "\n";
1209            $line_length = 0;
1210        }
1211
1212        $text .= $entry;
1213        $line_length += $entry_length;
1214    }
1215
1216    return $text;
1217}
1218
1219sub docout ($$$) { # output the docs for one function group
1220    my($fh, $element_name, $docref) = @_;
1221
1222    # Trim trailing space
1223    $element_name =~ s/\s*$//;
1224
1225    my $flags = $docref->{flags};
1226    my $pod = $docref->{pod} // "";
1227    my $file = $docref->{file};
1228
1229    my @items = $docref->{items}->@*;
1230
1231    # Make the main element the first of the items.  This allows uniform
1232    # treatment below
1233    unshift @items, {   name => $element_name,
1234                        flags => $flags,
1235                        ret_type => $docref->{ret_type},
1236                        args => [ $docref->{args}->@* ],
1237                    };
1238
1239    warn("Empty pod for $element_name (from $file)") unless $pod =~ /\S/;
1240
1241    print $fh "\n=over $description_indent\n";
1242    print $fh "\n=item C<$_->{name}>\n" for @items;
1243
1244    # If we're printing only a link to an element, this isn't the major entry,
1245    # so no X<> here.
1246    if ($flags !~ /h/) {
1247        print $fh "X<$_->{name}>" for @items;
1248        print $fh "\n";
1249    }
1250
1251    my @deprecated;
1252    my @experimental;
1253    for my $item (@items) {
1254        push @deprecated,   "C<$item->{name}>" if $item->{flags} =~ /D/;
1255        push @experimental, "C<$item->{name}>" if $item->{flags} =~ /x/;
1256    }
1257
1258    for my $which (\@deprecated, \@experimental) {
1259        if ($which->@*) {
1260            my $is;
1261            my $it;
1262            my $list;
1263
1264            if ($which->@* == 1) {
1265                $is = 'is';
1266                $it = 'it';
1267                $list = $which->[0];
1268            }
1269            elsif ($which->@* == @items) {
1270                $is = 'are';
1271                $it = 'them';
1272                $list = (@items == 2)
1273                         ? "both forms"
1274                         : "all these forms";
1275            }
1276            else {
1277                $is = 'are';
1278                $it = 'them';
1279                my $final = pop $which->@*;
1280                $list = "the " . join ", ", $which->@*;
1281                $list .= "," if $which->@* > 1;
1282                $list .= " and $final forms";
1283            }
1284
1285            if ($which == \@deprecated) {
1286                print $fh <<~"EOT";
1287
1288                    C<B<DEPRECATED!>>  It is planned to remove $list
1289                    from a future release of Perl.  Do not use $it for
1290                    new code; remove $it from existing code.
1291                    EOT
1292            }
1293            else {
1294                print $fh <<~"EOT";
1295
1296                    NOTE: $list $is B<experimental> and may change or be
1297                    removed without notice.
1298                    EOT
1299            }
1300        }
1301    }
1302
1303    chomp $pod;     # Make sure prints pod with a single trailing \n
1304    print $fh "\n", $pod, "\n";
1305
1306    for my $item (@items) {
1307        my $item_flags = $item->{flags};
1308        my $item_name = $item->{name};
1309
1310        print $fh "\nNOTE: the C<perl_$item_name()> form is B<deprecated>.\n"
1311                                                    if $item_flags =~ /O/;
1312        # Is Perl_, but no #define foo # Perl_foo
1313        if (   ($item_flags =~ /p/ && $item_flags =~ /o/ && $item_flags !~ /M/)
1314
1315                # Can't handle threaded varargs
1316            || ($item_flags =~ /f/ && $item_flags !~ /T/))
1317        {
1318            $item->{name} = "Perl_$item_name";
1319            print $fh <<~"EOT";
1320
1321                NOTE: C<$item_name> must be explicitly called as
1322                C<$item->{name}>
1323                EOT
1324            print $fh "with an C<aTHX_> parameter" if $item_flags !~ /T/;
1325            print $fh ".\n";
1326        }
1327    }
1328
1329    if ($flags =~ /[Uy]/) { # no usage; typedefs are considered simple enough
1330                            # to never warrant a usage line
1331        warn("U and s flags are incompatible")
1332                                            if $flags =~ /U/ && $flags =~ /s/;
1333        # nothing
1334    } else {
1335
1336        print $fh "\n=over $usage_indent\n";
1337
1338        if (defined $docref->{usage}) {     # An override of the usage section
1339            print $fh "\n", ($docref->{usage} =~ s/^/ /mrg), "\n";
1340        }
1341        else {
1342
1343            # Add the thread context formal parameter on expanded-out names
1344            for my $item (@items) {
1345                unshift $item->{args}->@*, (($item->{args}->@*)
1346                                            ? "pTHX_"
1347                                            : "pTHX")
1348                                                   if $item->{flags} !~ /T/
1349                                                   && $item->{name} =~ /^Perl_/;
1350            }
1351
1352            # Look through all the items in this entry.  If all have the same
1353            # return type and arguments (including thread context), only the
1354            # main entry is displayed.
1355            # Also, find the longest return type and longest name so that if
1356            # multiple ones are shown, they can be vertically aligned nicely
1357            my $need_individual_usage = 0;
1358            my $longest_name_length = length $items[0]->{name};
1359            my $base_ret_type = $items[0]->{ret_type};
1360            my $longest_ret = length $base_ret_type;
1361            my @base_args = $items[0]->{args}->@*;
1362            my $base_thread_context = $items[0]->{flags} =~ /T/;
1363            for (my $i = 1; $i < @items; $i++) {
1364                no warnings 'experimental::smartmatch';
1365                my $item = $items[$i];
1366                $need_individual_usage = 1
1367                                    if    $item->{ret_type} ne $base_ret_type
1368                                    || ! ($item->{args}->@* ~~ @base_args)
1369                                    ||   (   $item->{flags} =~ /T/
1370                                          != $base_thread_context);
1371                my $ret_length = length $item->{ret_type};
1372                $longest_ret = $ret_length if $ret_length > $longest_ret;
1373                my $name_length = length $item->{name};
1374                $longest_name_length = $name_length
1375                                        if $name_length > $longest_name_length;
1376            }
1377
1378            # If we're only showing one entry, only its length matters.
1379            $longest_name_length = length($items[0]->{name})
1380                                                unless $need_individual_usage;
1381            print $fh "\n";
1382
1383            my $indent = 1;     # 1 is sufficient for verbatim; =over is used
1384                                # for more
1385            my $ret_name_sep_length = 2; # spaces between return type and name
1386            my $name_indent = $indent + $longest_ret;
1387            $name_indent += $ret_name_sep_length if $longest_ret;
1388
1389            my $this_max_width =
1390                               $max_width - $description_indent - $usage_indent;
1391
1392            for my $item (@items) {
1393                my $ret_type = $item->{ret_type};
1394                my @args = $item->{args}->@*;
1395                my $name = $item->{name};
1396                my $item_flags = $item->{flags};
1397
1398                # The return type
1399                print $fh (" " x $indent), $ret_type;
1400
1401                print $fh " " x (  $ret_name_sep_length
1402                                 + $longest_ret - length $ret_type);
1403                print $fh $name;
1404
1405                if ($item_flags =~ /n/) { # no args
1406                    warn("$file: $element_name: n flag without m")
1407                                                    unless $item_flags =~ /m/;
1408                    warn("$file: $name: n flag but apparently has args")
1409                                                                    if @args;
1410                }
1411                else {
1412                    # +1 for the '('
1413                    my $arg_indent = $name_indent + $longest_name_length + 1;
1414
1415                    # Align the argument lists of the items
1416                    print $fh " " x ($longest_name_length - length($name));
1417                    print $fh "(";
1418
1419                    # Display as many of the arguments on the same line as
1420                    # will fit.
1421                    my $total_length = $arg_indent;
1422                    my $first_line = 1;
1423                    for (my $i = 0; $i < @args; $i++) {
1424                        my $arg = $args[$i];
1425                        my $arg_length = length($arg);
1426
1427                        # All but the first arg are preceded by a blank
1428                        my $use_blank = $i > 0;
1429
1430                        # +1 here and below because either the argument has a
1431                        # trailing comma or trailing ')'
1432                        $total_length += $arg_length + $use_blank + 1;
1433
1434                        # We want none of the arguments to be positioned so
1435                        # they extend too far to the right.  Ideally, they
1436                        # should all start in the same column as the arguments
1437                        # on the first line of the function display do.  But, if
1438                        # necessary, outdent them so that they all start in
1439                        # another column, with the longest ending at the right
1440                        # margin, like so:
1441                        #                   void  function_name(pTHX_ short1,
1442                        #                                    short2,
1443                        #                                    very_long_argument,
1444                        #                                    short3)
1445                        if ($total_length > $this_max_width) {
1446
1447                            # If this is the first continuation line,
1448                            # calculate the longest argument; this will be the
1449                            # one we may have to outdent for.
1450                            if ($first_line) {
1451                                $first_line = 0;
1452
1453                                # We will need at least as much as the current
1454                                # argument
1455                                my $longest_arg_length = $arg_length
1456                                                       + $use_blank + 1;
1457
1458                                # Look through the rest of the args to see if
1459                                # any are longer than this one.
1460                                for (my $j = $i + 1; $j < @args; $j++) {
1461
1462                                    # Include the trailing ',' or ')' in the
1463                                    # length.  No need to concern ourselves
1464                                    # with a leading blank, as the argument
1465                                    # would be positioned first on the next
1466                                    # line
1467                                    my $peek_arg_length = length ($args[$j])
1468                                                        + 1;
1469                                    $longest_arg_length = $peek_arg_length
1470                                      if $peek_arg_length > $longest_arg_length;
1471                                }
1472
1473                                # Calculate the new indent if necessary.
1474                                $arg_indent =
1475                                        $this_max_width - $longest_arg_length
1476                                        if $arg_indent + $longest_arg_length
1477                                                            > $this_max_width;
1478                            }
1479
1480                            print $fh "\n", (" " x $arg_indent);
1481                            $total_length = $arg_indent + $arg_length + 1;
1482                            $use_blank = 0;
1483                        }
1484
1485                        # Display this argument
1486                        print $fh " " if $use_blank;
1487                        print $fh $arg;
1488                        print $fh "," if $i < @args - 1 && $args[$i] ne 'pTHX_';
1489
1490                    } # End of loop through args
1491
1492                    print $fh ")";
1493                }
1494
1495                print $fh ";" if $item_flags =~ /s/; # semicolon: "dTHR;"
1496                print $fh "\n";
1497
1498                # Only the first entry is normally displayed
1499                last unless $need_individual_usage;
1500            }
1501        }
1502
1503        print $fh "\n=back\n";
1504    }
1505
1506    print $fh "\n=back\n";
1507    print $fh "\n=for hackers\nFound in file $file\n";
1508}
1509
1510sub construct_missings_section {
1511    my ($pod_name, $missings_ref) = @_;
1512    my $text = "";
1513
1514    return $text unless $missings_ref->@*;
1515
1516    $text .= <<~EOT;
1517
1518        =head1 $undocumented_scn
1519
1520        EOT
1521    if ($pod_name eq 'perlapi') {
1522        $text .= <<~'EOT';
1523            The following functions have been flagged as part of the public
1524            API, but are currently undocumented.  Use them at your own risk,
1525            as the interfaces are subject to change.  Functions that are not
1526            listed in this document are not intended for public use, and
1527            should NOT be used under any circumstances.
1528
1529            If you feel you need to use one of these functions, first send
1530            email to L<perl5-porters@perl.org|mailto:perl5-porters@perl.org>.
1531            It may be that there is a good reason for the function not being
1532            documented, and it should be removed from this list; or it may
1533            just be that no one has gotten around to documenting it.  In the
1534            latter case, you will be asked to submit a patch to document the
1535            function.  Once your patch is accepted, it will indicate that the
1536            interface is stable (unless it is explicitly marked otherwise) and
1537            usable by you.
1538            EOT
1539    }
1540    else {
1541        $text .= <<~'EOT';
1542            The following functions are currently undocumented.  If you use
1543            one of them, you may wish to consider creating and submitting
1544            documentation for it.
1545            EOT
1546    }
1547
1548    $text .= "\n" . format_pod_indexes($missings_ref);
1549
1550    # Sort the elements.
1551    my @missings = sort dictionary_order $missings_ref->@*;
1552
1553
1554    $text .= "\n";
1555
1556    use integer;
1557
1558    # Look through all the elements in the list and see how many columns we
1559    # could place them in the output what will fit in the available width.
1560    my $min_spacer = 2;     # Need this much space between columns
1561    my $columns;
1562    my $rows;
1563    my @col_widths;
1564
1565  COLUMN:
1566    # We start with more columns, and work down until we find a number that
1567    # can accommodate all the data.  This algorithm doesn't require the
1568    # resulting columns to all have the same width.  This can allow for
1569    # as tight of packing as the data will possibly allow.
1570    for ($columns = 7; $columns >= 1; $columns--) {
1571
1572        # For this many columns, we will need this many rows (final row might
1573        # not be completely filled)
1574        $rows = (@missings + $columns - 1) / $columns;
1575
1576        # We only need to execute this final iteration to calculate the number
1577        # of rows, as we can't get fewer than a single column.
1578        last if $columns == 1;
1579
1580        my $row_width = 1;  # For 1 space indent
1581        my $i = 0;  # Which missing element
1582
1583        # For each column ...
1584        for my $col (0 .. $columns - 1) {
1585
1586            # Calculate how wide the column needs to be, which is based on the
1587            # widest element in it
1588            $col_widths[$col] = 0;
1589
1590            # Look through all the rows to find the widest element
1591            for my $row (0 .. $rows - 1) {
1592
1593                # Skip if this row doesn't have an entry for this column
1594                last if $i >= @missings;
1595
1596                # This entry occupies this many bytes.
1597                my $this_width = length $missings[$i];
1598
1599                # All but the final column need a spacer between it and the
1600                # next column over.
1601                $this_width += $min_spacer if $col < $columns - 1;
1602
1603
1604                # This column will need to have enough width to accommodate
1605                # this element
1606                if ($this_width > $col_widths[$col]) {
1607
1608                    # We can't have this many columns if the total width
1609                    # exceeds the available; bail now and try fewer columns
1610                    next COLUMN if $row_width + $this_width > $max_width;
1611
1612                    $col_widths[$col] = $this_width;
1613                }
1614
1615                $i++;   # The next row will contain the next item
1616            }
1617
1618            $row_width += $col_widths[$col];
1619            next COLUMN if $row_width > $max_width;
1620        }
1621
1622        # If we get this far, this many columns works
1623        last;
1624    }
1625
1626    # Here, have calculated the number of rows ($rows) and columns ($columns)
1627    # required to list the elements.  @col_widths contains the width of each
1628    # column.
1629
1630    $text .= "\n";
1631
1632    # Assemble the output
1633    for my $row (0 .. $rows - 1) {
1634        for my $col (0 .. $columns - 1) {
1635            $text .= " " if $col == 0;  # Indent one to mark as verbatim
1636
1637            my $index = $row + $rows * $col;  # Convert 2 dimensions to 1
1638
1639            # Skip if this row doesn't have an entry for this column
1640            next if $index >= @missings;
1641
1642            my $element = $missings[$index];
1643            $text .= $element;
1644
1645            # Add alignment spaces for all but final column
1646            $text .= " " x ($col_widths[$col] - length $element)
1647                                                        if $col < $columns - 1;
1648        }
1649
1650        $text .= "\n";  # End of row
1651    }
1652
1653    return $text;
1654}
1655
1656sub dictionary_order {
1657    # Do a case-insensitive dictionary sort, with only alphabetics
1658    # significant, falling back to using everything for determinancy
1659    return (uc($a =~ s/[[:^alpha:]]//r) cmp uc($b =~ s/[[:^alpha:]]//r))
1660           || uc($a) cmp uc($b)
1661           || $a cmp $b;
1662}
1663
1664sub output {
1665    my ($podname, $header, $dochash, $missings_ref, $footer) = @_;
1666    #
1667    # strip leading '|' from each line which had been used to hide
1668    # pod from pod checkers.
1669    s/^\|//gm for $header, $footer;
1670
1671    my $fh = open_new("pod/$podname.pod", undef,
1672                      {by => "$0 extracting documentation",
1673                       from => 'the C source files'}, 1);
1674
1675    print $fh $header, "\n";
1676
1677    for my $section_name (sort dictionary_order keys %valid_sections) {
1678        my $section_info = $dochash->{$section_name};
1679
1680        # We allow empty sections in perlintern.
1681        if (! $section_info && $podname eq 'perlapi') {
1682            warn "Empty section '$section_name'; skipped";
1683            next;
1684        }
1685
1686        print $fh "\n=head1 $section_name\n";
1687
1688        if ($podname eq 'perlapi') {
1689            print $fh "\n", $valid_sections{$section_name}{header}, "\n"
1690                                if defined $valid_sections{$section_name}{header};
1691
1692            # Output any heading-level documentation and delete so won't get in
1693            # the way later
1694            if (exists $section_info->{""}) {
1695                print $fh "\n", $section_info->{""}, "\n";
1696                delete $section_info->{""};
1697            }
1698        }
1699
1700        if ($section_info && keys $section_info->%*) {
1701            for my $function_name (sort dictionary_order keys %$section_info) {
1702                docout($fh, $function_name, $section_info->{$function_name});
1703            }
1704        }
1705        else {
1706            print $fh "\nThere are only public API items currently in $section_name\n";
1707        }
1708
1709        print $fh "\n", $valid_sections{$section_name}{footer}, "\n"
1710                            if $podname eq 'perlapi'
1711                            && defined $valid_sections{$section_name}{footer};
1712    }
1713
1714    print $fh construct_missings_section($podname, $missings_ref);
1715
1716    print $fh "\n$footer\n=cut\n";
1717
1718    read_only_bottom_close_and_rename($fh);
1719}
1720
1721foreach (@{(setup_embed())[0]}) {
1722    next if @$_ < 2;
1723    my ($flags, $ret_type, $func, @args) = @$_;
1724    s/\b(?:NN|NULLOK)\b\s+//g for @args;
1725
1726    $funcflags{$func} = {
1727                         flags => $flags,
1728                         ret_type => $ret_type,
1729                         args => \@args,
1730                        };
1731}
1732
1733# glob() picks up docs from extra .c or .h files that may be in unclean
1734# development trees.
1735open my $fh, '<', 'MANIFEST'
1736    or die "Can't open MANIFEST: $!";
1737while (my $line = <$fh>) {
1738    next unless my ($file) = $line =~ /^(\S+\.(?:[ch]|pod))\t/;
1739
1740    # Don't pick up pods from these.
1741    next if $file =~ m! ^ ( cpan | dist | ext ) / !x
1742         && ! defined $extra_input_pods{$file};
1743
1744    open F, '<', $file or die "Cannot open $file for docs: $!\n";
1745    autodoc(\*F,$file);
1746    close F or die "Error closing $file: $!\n";
1747}
1748close $fh or die "Error whilst reading MANIFEST: $!";
1749
1750parse_config_h();
1751
1752for (sort keys %funcflags) {
1753    next unless $funcflags{$_}{flags} =~ /d/;
1754    next if $funcflags{$_}{flags} =~ /h/;
1755    warn "no docs for $_\n";
1756}
1757
1758foreach (sort keys %missing) {
1759    warn "Function '$_', documented in $missing{$_}, not listed in embed.fnc";
1760}
1761
1762# List of funcs in the public API that aren't also marked as core-only,
1763# experimental nor deprecated.
1764my @missing_api = grep $funcflags{$_}{flags} =~ /A/
1765                    && $funcflags{$_}{flags} !~ /[xD]/
1766                    && !$docs{api}{$_}, keys %funcflags;
1767push @missing_api, keys %missing_macros;
1768
1769my @other_places = ( qw(perlclib ), keys %described_elsewhere );
1770my $places_other_than_intern = join ", ",
1771            map { "L<$_>" } sort dictionary_order 'perlapi', @other_places;
1772my $places_other_than_api = join ", ",
1773            map { "L<$_>" } sort dictionary_order 'perlintern', @other_places;
1774
1775# The S< > makes things less densely packed, hence more readable
1776my $has_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_defs;
1777my $has_r_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @has_r_defs;
1778$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_LIST__/$has_defs_text/;
1779$valid_sections{$genconfig_scn}{footer} =~ s/__HAS_R_LIST__/$has_r_defs_text/;
1780
1781my $include_defs_text .= join ",S< > ", map { "C<$_>" } sort dictionary_order @include_defs;
1782$valid_sections{$genconfig_scn}{footer} =~ s/__INCLUDE_LIST__/$include_defs_text/;
1783
1784my $section_list = join "\n\n", map { "=item L</$_>" }
1785                                sort(dictionary_order keys %valid_sections),
1786                                $undocumented_scn;  # Keep last
1787
1788output('perlapi', <<"_EOB_", $docs{api}, \@missing_api, <<"_EOE_");
1789|=encoding UTF-8
1790|
1791|=head1 NAME
1792|
1793|perlapi - autogenerated documentation for the perl public API
1794|
1795|=head1 DESCRIPTION
1796|X<Perl API> X<API> X<api>
1797|
1798|This file contains most of the documentation of the perl public API, as
1799|generated by F<embed.pl>.  Specifically, it is a listing of functions,
1800|macros, flags, and variables that may be used by extension writers.  Besides
1801|L<perlintern> and F<config.h>, some items are listed here as being actually
1802|documented in another pod.
1803|
1804|L<At the end|/$undocumented_scn> is a list of functions which have yet
1805|to be documented.  Patches welcome!  The interfaces of these are subject to
1806|change without notice.
1807|
1808|Some of the functions documented here are consolidated so that a single entry
1809|serves for multiple functions which all do basically the same thing, but have
1810|some slight differences.  For example, one form might process magic, while
1811|another doesn't.  The name of each variation is listed at the top of the
1812|single entry.  But if all have the same signature (arguments and return type)
1813|except for their names, only the usage for the base form is shown.  If any
1814|one of the forms has a different signature (such as returning C<const> or
1815|not) every function's signature is explicitly displayed.
1816|
1817|Anything not listed here or in the other mentioned pods is not part of the
1818|public API, and should not be used by extension writers at all.  For these
1819|reasons, blindly using functions listed in F<proto.h> is to be avoided when
1820|writing extensions.
1821|
1822|In Perl, unlike C, a string of characters may generally contain embedded
1823|C<NUL> characters.  Sometimes in the documentation a Perl string is referred
1824|to as a "buffer" to distinguish it from a C string, but sometimes they are
1825|both just referred to as strings.
1826|
1827|Note that all Perl API global variables must be referenced with the C<PL_>
1828|prefix.  Again, those not listed here are not to be used by extension writers,
1829|and can be changed or removed without notice; same with macros.
1830|Some macros are provided for compatibility with the older,
1831|unadorned names, but this support may be disabled in a future release.
1832|
1833|Perl was originally written to handle US-ASCII only (that is characters
1834|whose ordinal numbers are in the range 0 - 127).
1835|And documentation and comments may still use the term ASCII, when
1836|sometimes in fact the entire range from 0 - 255 is meant.
1837|
1838|The non-ASCII characters below 256 can have various meanings, depending on
1839|various things.  (See, most notably, L<perllocale>.)  But usually the whole
1840|range can be referred to as ISO-8859-1.  Often, the term "Latin-1" (or
1841|"Latin1") is used as an equivalent for ISO-8859-1.  But some people treat
1842|"Latin1" as referring just to the characters in the range 128 through 255, or
1843|sometimes from 160 through 255.
1844|This documentation uses "Latin1" and "Latin-1" to refer to all 256 characters.
1845|
1846|Note that Perl can be compiled and run under either ASCII or EBCDIC (See
1847|L<perlebcdic>).  Most of the documentation (and even comments in the code)
1848|ignore the EBCDIC possibility.
1849|For almost all purposes the differences are transparent.
1850|As an example, under EBCDIC,
1851|instead of UTF-8, UTF-EBCDIC is used to encode Unicode strings, and so
1852|whenever this documentation refers to C<utf8>
1853|(and variants of that name, including in function names),
1854|it also (essentially transparently) means C<UTF-EBCDIC>.
1855|But the ordinals of characters differ between ASCII, EBCDIC, and
1856|the UTF- encodings, and a string encoded in UTF-EBCDIC may occupy a different
1857|number of bytes than in UTF-8.
1858|
1859|The organization of this document is tentative and subject to change.
1860|Suggestions and patches welcome
1861|L<perl5-porters\@perl.org|mailto:perl5-porters\@perl.org>.
1862|
1863|The sections in this document currently are
1864|
1865|=over $standard_indent
1866
1867|$section_list
1868|
1869|=back
1870|
1871|The listing below is alphabetical, case insensitive.
1872_EOB_
1873|=head1 AUTHORS
1874|
1875|Until May 1997, this document was maintained by Jeff Okamoto
1876|<okamoto\@corp.hp.com>.  It is now maintained as part of Perl itself.
1877|
1878|With lots of help and suggestions from Dean Roehrich, Malcolm Beattie,
1879|Andreas Koenig, Paul Hudson, Ilya Zakharevich, Paul Marquess, Neil
1880|Bowers, Matthew Green, Tim Bunce, Spider Boardman, Ulrich Pfeifer,
1881|Stephen McCamant, and Gurusamy Sarathy.
1882|
1883|API Listing originally by Dean Roehrich <roehrich\@cray.com>.
1884|
1885|Updated to be autogenerated from comments in the source by Benjamin Stuhl.
1886|
1887|=head1 SEE ALSO
1888|
1889|F<config.h>, $places_other_than_api
1890_EOE_
1891
1892# List of non-static internal functions
1893my @missing_guts =
1894 grep $funcflags{$_}{flags} !~ /[AS]/ && !$docs{guts}{$_}, keys %funcflags;
1895
1896output('perlintern', <<'_EOB_', $docs{guts}, \@missing_guts, <<"_EOE_");
1897|=head1 NAME
1898|
1899|perlintern - autogenerated documentation of purely B<internal>
1900|Perl functions
1901|
1902|=head1 DESCRIPTION
1903|X<internal Perl functions> X<interpreter functions>
1904|
1905|This file is the autogenerated documentation of functions in the
1906|Perl interpreter that are documented using Perl's internal documentation
1907|format but are not marked as part of the Perl API.  In other words,
1908|B<they are not for use in extensions>!
1909
1910|It has the same sections as L<perlapi>, though some may be empty.
1911|
1912_EOB_
1913|
1914|=head1 AUTHORS
1915|
1916|The autodocumentation system was originally added to the Perl core by
1917|Benjamin Stuhl.  Documentation is by whoever was kind enough to
1918|document their functions.
1919|
1920|=head1 SEE ALSO
1921|
1922|F<config.h>, $places_other_than_intern
1923_EOE_
1924