xref: /openbsd-src/gnu/usr.bin/perl/ext/Pod-Html/lib/Pod/Html.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1package Pod::Html;
2use strict;
3use Exporter 'import';
4
5our $VERSION = 1.35;
6$VERSION = eval $VERSION;
7our @EXPORT = qw(pod2html);
8
9use Config;
10use Cwd;
11use File::Basename;
12use File::Spec;
13use Pod::Simple::Search;
14use Pod::Simple::SimpleTree ();
15use Pod::Html::Util qw(
16    html_escape
17    process_command_line
18    trim_leading_whitespace
19    unixify
20    usage
21    htmlify
22    anchorify
23    relativize_url
24);
25use locale; # make \w work right in non-ASCII lands
26
27=head1 NAME
28
29Pod::Html - module to convert pod files to HTML
30
31=head1 SYNOPSIS
32
33    use Pod::Html;
34    pod2html([options]);
35
36=head1 DESCRIPTION
37
38Converts files from pod format (see L<perlpod>) to HTML format.  It
39can automatically generate indexes and cross-references, and it keeps
40a cache of things it knows how to cross-reference.
41
42=head1 FUNCTIONS
43
44=head2 pod2html
45
46    pod2html("pod2html",
47             "--podpath=lib:ext:pod:vms",
48             "--podroot=/usr/src/perl",
49             "--htmlroot=/perl/nmanual",
50             "--recurse",
51             "--infile=foo.pod",
52             "--outfile=/perl/nmanual/foo.html");
53
54pod2html takes the following arguments:
55
56=over 4
57
58=item backlink
59
60    --backlink
61
62Turns every C<head1> heading into a link back to the top of the page.
63By default, no backlinks are generated.
64
65=item cachedir
66
67    --cachedir=name
68
69Creates the directory cache in the given directory.
70
71=item css
72
73    --css=stylesheet
74
75Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
76C<style> attributes that are output by default (to avoid conflicts).
77
78=item flush
79
80    --flush
81
82Flushes the directory cache.
83
84=item header
85
86    --header
87    --noheader
88
89Creates header and footer blocks containing the text of the C<NAME>
90section.  By default, no headers are generated.
91
92=item help
93
94    --help
95
96Displays the usage message.
97
98=item htmldir
99
100    --htmldir=name
101
102Sets the directory to which all cross references in the resulting
103html file will be relative. Not passing this causes all links to be
104absolute since this is the value that tells Pod::Html the root of the
105documentation tree.
106
107Do not use this and --htmlroot in the same call to pod2html; they are
108mutually exclusive.
109
110=item htmlroot
111
112    --htmlroot=name
113
114Sets the base URL for the HTML files.  When cross-references are made,
115the HTML root is prepended to the URL.
116
117Do not use this if relative links are desired: use --htmldir instead.
118
119Do not pass both this and --htmldir to pod2html; they are mutually
120exclusive.
121
122=item index
123
124    --index
125    --noindex
126
127Generate an index at the top of the HTML file.  This is the default
128behaviour.
129
130=item infile
131
132    --infile=name
133
134Specify the pod file to convert.  Input is taken from STDIN if no
135infile is specified.
136
137=item outfile
138
139    --outfile=name
140
141Specify the HTML file to create.  Output goes to STDOUT if no outfile
142is specified.
143
144=item poderrors
145
146    --poderrors
147    --nopoderrors
148
149Include a "POD ERRORS" section in the outfile if there were any POD
150errors in the infile. This section is included by default.
151
152=item podpath
153
154    --podpath=name:...:name
155
156Specify which subdirectories of the podroot contain pod files whose
157HTML converted forms can be linked to in cross references.
158
159=item podroot
160
161    --podroot=name
162
163Specify the base directory for finding library pods. Default is the
164current working directory.
165
166=item quiet
167
168    --quiet
169    --noquiet
170
171Don't display I<mostly harmless> warning messages.  These messages
172will be displayed by default.  But this is not the same as C<verbose>
173mode.
174
175=item recurse
176
177    --recurse
178    --norecurse
179
180Recurse into subdirectories specified in podpath (default behaviour).
181
182=item title
183
184    --title=title
185
186Specify the title of the resulting HTML file.
187
188=item verbose
189
190    --verbose
191    --noverbose
192
193Display progress messages.  By default, they won't be displayed.
194
195=back
196
197=head2 Formerly Exported Auxiliary Functions
198
199Prior to perl-5.36, the following three functions were exported by
200F<Pod::Html>, either by default or on request:
201
202=over 4
203
204=item * C<htmlify()> (by default)
205
206=item * C<anchorify()> (upon request)
207
208=item * C<relativize_url()> (upon request)
209
210=back
211
212The definition and documentation of these functions have been moved to
213F<Pod::Html::Util>, viewable via C<perldoc Pod::Html::Util>.
214
215Beginning with perl-5.38 these functions must be explicitly imported from
216F<Pod::Html::Util>.  Please modify your code as needed.
217
218=head1 ENVIRONMENT
219
220Uses C<$Config{pod2html}> to setup default options.
221
222=head1 AUTHOR
223
224Marc Green, E<lt>marcgreen@cpan.orgE<gt>.
225
226Original version by Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
227
228=head1 SEE ALSO
229
230L<perlpod>
231
232=head1 COPYRIGHT
233
234This program is distributed under the Artistic License.
235
236=cut
237
238sub new {
239    my $class = shift;
240    return bless {}, $class;
241}
242
243sub pod2html {
244    local(@ARGV) = @_;
245    local $_;
246
247    my $self = Pod::Html->new();
248    $self->init_globals();
249
250    my $opts = process_command_line;
251    $self->process_options($opts);
252
253    $self->refine_globals();
254
255    # load or generate/cache %Pages
256    unless ($self->get_cache()) {
257        # generate %Pages
258        #%Pages = $self->generate_cache(\%Pages);
259        $self->generate_cache($self->{Pages});
260    }
261    my $input   = $self->identify_input();
262    my $podtree = $self->parse_input_for_podtree($input);
263    $self->set_Title_from_podtree($podtree);
264
265    # set options for the HTML generator
266    my $parser = Pod::Simple::XHTML::LocalPodLinks->new();
267    $parser->codes_in_verbatim(0);
268    $parser->anchor_items(1); # the old Pod::Html always did
269    $parser->backlink($self->{Backlink}); # linkify =head1 directives
270    $parser->force_title($self->{Title});
271    $parser->htmldir($self->{Htmldir});
272    $parser->htmlfileurl($self->{Htmlfileurl});
273    $parser->htmlroot($self->{Htmlroot});
274    $parser->index($self->{Doindex});
275    $parser->output_string(\$self->{output}); # written to file later
276    #$parser->pages(\%Pages);
277    $parser->pages($self->{Pages});
278    $parser->quiet($self->{Quiet});
279    $parser->verbose($self->{Verbose});
280
281    $parser = $self->refine_parser($parser);
282    $self->feed_tree_to_parser($parser, $podtree);
283    $self->write_file();
284}
285
286sub init_globals {
287    my $self = shift;
288    $self->{Cachedir} = ".";            # The directory to which directory caches
289                                        #   will be written.
290
291    $self->{Dircache} = "pod2htmd.tmp";
292
293    $self->{Htmlroot} = "/";            # http-server base directory from which all
294                                        #   relative paths in $podpath stem.
295    $self->{Htmldir} = "";              # The directory to which the html pages
296                                        #   will (eventually) be written.
297    $self->{Htmlfile} = "";             # write to stdout by default
298    $self->{Htmlfileurl} = "";          # The url that other files would use to
299                                        # refer to this file.  This is only used
300                                        # to make relative urls that point to
301                                        # other files.
302
303    $self->{Poderrors} = 1;
304    $self->{Podfile} = "";              # read from stdin by default
305    $self->{Podpath} = [];              # list of directories containing library pods.
306    $self->{Podroot} = $self->{Curdir} = File::Spec->curdir;
307                                        # filesystem base directory from which all
308                                        #   relative paths in $podpath stem.
309    $self->{Css} = '';                  # Cascading style sheet
310    $self->{Recurse} = 1;               # recurse on subdirectories in $podpath.
311    $self->{Quiet} = 0;                 # not quiet by default
312    $self->{Verbose} = 0;               # not verbose by default
313    $self->{Doindex} = 1;               # non-zero if we should generate an index
314    $self->{Backlink} = 0;              # no backlinks added by default
315    $self->{Header} = 0;                # produce block header/footer
316    $self->{Title} = undef;             # title to give the pod(s)
317    $self->{Saved_Cache_Key} = '';
318    $self->{Pages} = {};
319    return $self;
320}
321
322sub process_options {
323    my ($self, $opts) = @_;
324
325    $self->{Podpath}   = (defined $opts->{podpath})
326                            ? [ split(":", $opts->{podpath}) ]
327                            : [];
328
329    $self->{Backlink}  =          $opts->{backlink}   if defined $opts->{backlink};
330    $self->{Cachedir}  =  unixify($opts->{cachedir})  if defined $opts->{cachedir};
331    $self->{Css}       =          $opts->{css}        if defined $opts->{css};
332    $self->{Header}    =          $opts->{header}     if defined $opts->{header};
333    $self->{Htmldir}   =  unixify($opts->{htmldir})   if defined $opts->{htmldir};
334    $self->{Htmlroot}  =  unixify($opts->{htmlroot})  if defined $opts->{htmlroot};
335    $self->{Doindex}   =          $opts->{index}      if defined $opts->{index};
336    $self->{Podfile}   =  unixify($opts->{infile})    if defined $opts->{infile};
337    $self->{Htmlfile}  =  unixify($opts->{outfile})   if defined $opts->{outfile};
338    $self->{Poderrors} =          $opts->{poderrors}  if defined $opts->{poderrors};
339    $self->{Podroot}   =  unixify($opts->{podroot})   if defined $opts->{podroot};
340    $self->{Quiet}     =          $opts->{quiet}      if defined $opts->{quiet};
341    $self->{Recurse}   =          $opts->{recurse}    if defined $opts->{recurse};
342    $self->{Title}     =          $opts->{title}      if defined $opts->{title};
343    $self->{Verbose}   =          $opts->{verbose}    if defined $opts->{verbose};
344
345    warn "Flushing directory caches\n"
346        if $opts->{verbose} && defined $opts->{flush};
347    $self->{Dircache} = "$self->{Cachedir}/pod2htmd.tmp";
348    if (defined $opts->{flush}) {
349        1 while unlink($self->{Dircache});
350    }
351    return $self;
352}
353
354sub refine_globals {
355    my $self = shift;
356
357    # prevent '//' in urls
358    $self->{Htmlroot} = "" if $self->{Htmlroot} eq "/";
359    $self->{Htmldir} =~ s#/\z##;
360
361    if (  $self->{Htmlroot} eq ''
362       && defined( $self->{Htmldir} )
363       && $self->{Htmldir} ne ''
364       && substr( $self->{Htmlfile}, 0, length( $self->{Htmldir} ) ) eq $self->{Htmldir}
365       ) {
366        # Set the 'base' url for this file, so that we can use it
367        # as the location from which to calculate relative links
368        # to other files. If this is '', then absolute links will
369        # be used throughout.
370        #$self->{Htmlfileurl} = "$self->{Htmldir}/" . substr( $self->{Htmlfile}, length( $self->{Htmldir} ) + 1);
371        # Is the above not just "$self->{Htmlfileurl} = $self->{Htmlfile}"?
372        $self->{Htmlfileurl} = unixify($self->{Htmlfile});
373    }
374    return $self;
375}
376
377sub generate_cache {
378    my $self = shift;
379    my $pwd = getcwd();
380    chdir($self->{Podroot}) ||
381        die "$0: error changing to directory $self->{Podroot}: $!\n";
382
383    # find all pod modules/pages in podpath, store in %Pages
384    # - inc(0): do not prepend directories in @INC to search list;
385    #     limit search to those in @{$self->{Podpath}}
386    # - verbose: report (via 'warn') what search is doing
387    # - laborious: to allow '.' in dirnames (e.g., /usr/share/perl/5.14.1)
388    # - recurse: go into subdirectories
389    # - survey: search for POD files in PodPath
390    my ($name2path, $path2name) =
391        Pod::Simple::Search->new->inc(0)->verbose($self->{Verbose})->laborious(1)
392        ->recurse($self->{Recurse})->survey(@{$self->{Podpath}});
393    # remove Podroot and extension from each file
394    for my $k (keys %{$name2path}) {
395        $self->{Pages}{$k} = _transform($self, $name2path->{$k});
396    }
397
398    chdir($pwd) || die "$0: error changing to directory $pwd: $!\n";
399
400    # cache the directory list for later use
401    warn "caching directories for later use\n" if $self->{Verbose};
402    open my $cache, '>', $self->{Dircache}
403        or die "$0: error open $self->{Dircache} for writing: $!\n";
404
405    print $cache join(":", @{$self->{Podpath}}) . "\n$self->{Podroot}\n";
406    my $_updirs_only = ($self->{Podroot} =~ /\.\./) && !($self->{Podroot} =~ /[^\.\\\/]/);
407    foreach my $key (keys %{$self->{Pages}}) {
408        if($_updirs_only) {
409          my $_dirlevel = $self->{Podroot};
410          while($_dirlevel =~ /\.\./) {
411            $_dirlevel =~ s/\.\.//;
412            # Assume $Pagesref->{$key} has '/' separators (html dir separators).
413            $self->{Pages}->{$key} =~ s/^[\w\s\-\.]+\///;
414          }
415        }
416        print $cache "$key $self->{Pages}->{$key}\n";
417    }
418    close $cache or die "error closing $self->{Dircache}: $!";
419}
420
421sub _transform {
422    my ($self, $v) = @_;
423    $v = $self->{Podroot} eq File::Spec->curdir
424               ? File::Spec->abs2rel($v)
425               : File::Spec->abs2rel($v,
426                                     File::Spec->canonpath($self->{Podroot}));
427
428    # Convert path to unix style path
429    $v = unixify($v);
430
431    my ($file, $dir) = fileparse($v, qr/\.[^.]*/); # strip .ext
432    return $dir.$file;
433}
434
435sub get_cache {
436    my $self = shift;
437
438    # A first-level cache:
439    # Don't bother reading the cache files if they still apply
440    # and haven't changed since we last read them.
441
442    my $this_cache_key = $self->cache_key();
443    return 1 if $self->{Saved_Cache_Key} and $this_cache_key eq $self->{Saved_Cache_Key};
444    $self->{Saved_Cache_Key} = $this_cache_key;
445
446    # load the cache of %Pages if possible.  $tests will be
447    # non-zero if successful.
448    my $tests = 0;
449    if (-f $self->{Dircache}) {
450        warn "scanning for directory cache\n" if $self->{Verbose};
451        $tests = $self->load_cache();
452    }
453
454    return $tests;
455}
456
457sub cache_key {
458    my $self = shift;
459    return join('!',
460        $self->{Dircache},
461        $self->{Recurse},
462        @{$self->{Podpath}},
463        $self->{Podroot},
464        stat($self->{Dircache}),
465    );
466}
467
468#
469# load_cache - tries to find if the cache stored in $dircache is a valid
470#  cache of %Pages.  if so, it loads them and returns a non-zero value.
471#
472sub load_cache {
473    my $self = shift;
474    my $tests = 0;
475    local $_;
476
477    warn "scanning for directory cache\n" if $self->{Verbose};
478    open(my $cachefh, '<', $self->{Dircache}) ||
479        die "$0: error opening $self->{Dircache} for reading: $!\n";
480    $/ = "\n";
481
482    # is it the same podpath?
483    $_ = <$cachefh>;
484    chomp($_);
485    $tests++ if (join(":", @{$self->{Podpath}}) eq $_);
486
487    # is it the same podroot?
488    $_ = <$cachefh>;
489    chomp($_);
490    $tests++ if ($self->{Podroot} eq $_);
491
492    # load the cache if its good
493    if ($tests != 2) {
494        close($cachefh);
495        return 0;
496    }
497
498    warn "loading directory cache\n" if $self->{Verbose};
499    while (<$cachefh>) {
500        /(.*?) (.*)$/;
501        $self->{Pages}->{$1} = $2;
502    }
503
504    close($cachefh);
505    return 1;
506}
507
508sub identify_input {
509    my $self = shift;
510    my $input;
511    unless (@ARGV && $ARGV[0]) {
512        if ($self->{Podfile} and $self->{Podfile} ne '-') {
513            $input = $self->{Podfile};
514        } else {
515            $input = '-'; # XXX: make a test case for this
516        }
517    } else {
518        $self->{Podfile} = $ARGV[0];
519        $input = *ARGV;
520    }
521    return $input;
522}
523
524sub parse_input_for_podtree {
525    my ($self, $input) = @_;
526    # set options for input parser
527    my $input_parser = Pod::Simple::SimpleTree->new;
528    # Normalize whitespace indenting
529    $input_parser->strip_verbatim_indent(\&trim_leading_whitespace);
530
531    $input_parser->codes_in_verbatim(0);
532    $input_parser->accept_targets(qw(html HTML));
533    $input_parser->no_errata_section(!$self->{Poderrors}); # note the inverse
534
535    warn "Converting input file $self->{Podfile}\n" if $self->{Verbose};
536    my $podtree = $input_parser->parse_file($input)->root;
537    return $podtree;
538}
539
540sub set_Title_from_podtree {
541    my ($self, $podtree) = @_;
542    unless(defined $self->{Title}) {
543        if($podtree->[0] eq "Document" && ref($podtree->[2]) eq "ARRAY" &&
544            $podtree->[2]->[0] eq "head1" && @{$podtree->[2]} == 3 &&
545            ref($podtree->[2]->[2]) eq "" && $podtree->[2]->[2] eq "NAME" &&
546            ref($podtree->[3]) eq "ARRAY" && $podtree->[3]->[0] eq "Para" &&
547            @{$podtree->[3]} >= 3 &&
548            !(grep { ref($_) ne "" }
549                @{$podtree->[3]}[2..$#{$podtree->[3]}]) &&
550            (@$podtree == 4 ||
551                (ref($podtree->[4]) eq "ARRAY" &&
552                $podtree->[4]->[0] eq "head1"))) {
553            $self->{Title} = join("", @{$podtree->[3]}[2..$#{$podtree->[3]}]);
554        }
555    }
556
557    $self->{Title} //= "";
558    $self->{Title} = html_escape($self->{Title});
559    return $self;
560}
561
562sub refine_parser {
563    my ($self, $parser) = @_;
564    # We need to add this ourselves because we use our own header, not
565    # ::XHTML's header. We need to set $parser->backlink to linkify
566    # the =head1 directives
567    my $bodyid = $self->{Backlink} ? ' id="_podtop_"' : '';
568
569    my $csslink = '';
570    my $tdstyle = ' style="background-color: #cccccc; color: #000"';
571
572    if ($self->{Css}) {
573        $csslink = qq(\n<link rel="stylesheet" href="$self->{Css}" type="text/css" />);
574        $csslink =~ s,\\,/,g;
575        $csslink =~ s,(/.):,$1|,;
576        $tdstyle= '';
577    }
578
579    # header/footer block
580    my $block = $self->{Header} ? <<END_OF_BLOCK : '';
581<table border="0" width="100%" cellspacing="0" cellpadding="3">
582<tr><td class="_podblock_"$tdstyle valign="middle">
583<big><strong><span class="_podblock_">&nbsp;$self->{Title}</span></strong></big>
584</td></tr>
585</table>
586END_OF_BLOCK
587
588    # create own header/footer because of --header
589    $parser->html_header(<<"HTMLHEAD");
590<?xml version="1.0" ?>
591<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
592<html xmlns="http://www.w3.org/1999/xhtml">
593<head>
594<title>$self->{Title}</title>$csslink
595<meta http-equiv="content-type" content="text/html; charset=utf-8" />
596<link rev="made" href="mailto:$Config{perladmin}" />
597</head>
598
599<body$bodyid>
600$block
601HTMLHEAD
602
603    $parser->html_footer(<<"HTMLFOOT");
604$block
605</body>
606
607</html>
608HTMLFOOT
609    return $parser;
610}
611
612# This sub duplicates the guts of Pod::Simple::FromTree.  We could have
613# used that module, except that it would have been a non-core dependency.
614sub feed_tree_to_parser {
615    my($self, $parser, $tree) = @_;
616    if(ref($tree) eq "") {
617        $parser->_handle_text($tree);
618    } elsif(!($tree->[0] eq "X" && $parser->nix_X_codes)) {
619        $parser->_handle_element_start($tree->[0], $tree->[1]);
620        $self->feed_tree_to_parser($parser, $_) foreach @{$tree}[2..$#$tree];
621        $parser->_handle_element_end($tree->[0]);
622    }
623}
624
625sub write_file {
626    my $self = shift;
627    $self->{Htmlfile} = "-" unless $self->{Htmlfile}; # stdout
628    my $fhout;
629    if($self->{Htmlfile} and $self->{Htmlfile} ne '-') {
630        open $fhout, ">", $self->{Htmlfile}
631            or die "$0: cannot open $self->{Htmlfile} file for output: $!\n";
632    } else {
633        open $fhout, ">-";
634    }
635    binmode $fhout, ":utf8";
636    print $fhout $self->{output};
637    close $fhout or die "Failed to close $self->{Htmlfile}: $!";
638    chmod 0644, $self->{Htmlfile} unless $self->{Htmlfile} eq '-';
639}
640
641package Pod::Simple::XHTML::LocalPodLinks;
642use strict;
643use warnings;
644use parent 'Pod::Simple::XHTML';
645
646use File::Spec;
647use File::Spec::Unix;
648
649__PACKAGE__->_accessorize(
650 'htmldir',
651 'htmlfileurl',
652 'htmlroot',
653 'pages', # Page name => relative/path/to/page from root POD dir
654 'quiet',
655 'verbose',
656);
657
658sub resolve_pod_page_link {
659    my ($self, $to, $section) = @_;
660
661    return undef unless defined $to || defined $section;
662    if (defined $section) {
663        $section = '#' . $self->idify($section, 1);
664        return $section unless defined $to;
665    } else {
666        $section = '';
667    }
668
669    my $path; # path to $to according to %Pages
670    unless (exists $self->pages->{$to}) {
671        # Try to find a POD that ends with $to and use that.
672        # e.g., given L<XHTML>, if there is no $Podpath/XHTML in %Pages,
673        # look for $Podpath/*/XHTML in %Pages, with * being any path,
674        # as a substitute (e.g., $Podpath/Pod/Simple/XHTML)
675        my @matches;
676        foreach my $modname (keys %{$self->pages}) {
677            push @matches, $modname if $modname =~ /::\Q$to\E\z/;
678        }
679
680        # make it look like a path instead of a namespace
681        my $modloc = File::Spec->catfile(split(/::/, $to));
682
683        if ($#matches == -1) {
684            warn "Cannot find file \"$modloc.*\" directly under podpath, " .
685                 "cannot find suitable replacement: link remains unresolved.\n"
686                 if $self->verbose;
687            return '';
688        } elsif ($#matches == 0) {
689            $path = $self->pages->{$matches[0]};
690            my $matchloc = File::Spec->catfile(split(/::/, $path));
691            warn "Cannot find file \"$modloc.*\" directly under podpath, but ".
692                 "I did find \"$matchloc.*\", so I'll assume that is what you ".
693                 "meant to link to.\n"
694                 if $self->verbose;
695        } else {
696            # Use [-1] so newer (higher numbered) perl PODs are used
697            # XXX currently, @matches isn't sorted so this is not true
698            $path = $self->pages->{$matches[-1]};
699            my $matchloc = File::Spec->catfile(split(/::/, $path));
700            warn "Cannot find file \"$modloc.*\" directly under podpath, but ".
701                 "I did find \"$matchloc.*\" (among others), so I'll use that " .
702                 "to resolve the link.\n" if $self->verbose;
703        }
704    } else {
705        $path = $self->pages->{$to};
706    }
707
708    my $url = File::Spec::Unix->catfile(Pod::Html::Util::unixify($self->htmlroot),
709                                        $path);
710
711    if ($self->htmlfileurl ne '') {
712        # then $self->htmlroot eq '' (by definition of htmlfileurl) so
713        # $self->htmldir needs to be prepended to link to get the absolute path
714        # that will be relativized
715        $url = Pod::Html::Util::relativize_url(
716            File::Spec::Unix->catdir(Pod::Html::Util::unixify($self->htmldir), $url),
717            $self->htmlfileurl # already unixified
718        );
719    }
720
721    return $url . ".html$section";
722}
723
7241;
725