xref: /openbsd-src/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1=pod
2
3=head1 NAME
4
5Pod::Simple::XHTML -- format Pod as validating XHTML
6
7=head1 SYNOPSIS
8
9  use Pod::Simple::XHTML;
10
11  my $parser = Pod::Simple::XHTML->new();
12
13  ...
14
15  $parser->parse_file('path/to/file.pod');
16
17=head1 DESCRIPTION
18
19This class is a formatter that takes Pod and renders it as XHTML
20validating HTML.
21
22This is a subclass of L<Pod::Simple::Methody> and inherits all its
23methods. The implementation is entirely different than
24L<Pod::Simple::HTML>, but it largely preserves the same interface.
25
26=head2 Minimal code
27
28  use Pod::Simple::XHTML;
29  my $psx = Pod::Simple::XHTML->new;
30  $psx->output_string(\my $html);
31  $psx->parse_file('path/to/Module/Name.pm');
32  open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
33  print $out $html;
34
35You can also control the character encoding and entities. For example, if
36you're sure that the POD is properly encoded (using the C<=encoding> command),
37you can prevent high-bit characters from being encoded as HTML entities and
38declare the output character set as UTF-8 before parsing, like so:
39
40  $psx->html_charset('UTF-8');
41use warnings;
42  $psx->html_encode_chars(q{&<>'"});
43
44=cut
45
46package Pod::Simple::XHTML;
47use strict;
48our $VERSION = '3.45';
49use Pod::Simple::Methody ();
50our @ISA = ('Pod::Simple::Methody');
51
52our $HAS_HTML_ENTITIES;
53BEGIN {
54  $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
55}
56
57my %entities = (
58  q{>} => 'gt',
59  q{<} => 'lt',
60  q{'} => '#39',
61  q{"} => 'quot',
62  q{&} => 'amp',
63);
64
65sub encode_entities {
66  my $self = shift;
67  my $ents = $self->html_encode_chars;
68  return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
69  if (defined $ents) {
70      $ents =~ s,(?<!\\)([]/]),\\$1,g;
71      $ents =~ s,(?<!\\)\\\z,\\\\,;
72  } else {
73      $ents = join '', keys %entities;
74  }
75  my $str = $_[0];
76  $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
77  return $str;
78}
79
80my %entity_to_char = reverse %entities;
81my ($entity_re) = map qr{$_}, join '|', map quotemeta, sort keys %entity_to_char;
82
83sub decode_entities {
84  my ($self, $string) = @_;
85  return HTML::Entities::decode_entities( $string ) if $HAS_HTML_ENTITIES;
86
87  $string =~ s{&(?:($entity_re)|#x([0123456789abcdefABCDEF]+)|#([0123456789]+));}{
88      defined $1 ? $entity_to_char{$1}
89    : defined $2 ? chr(hex($2))
90    : defined $3 ? chr($3)
91    : die;
92  }ge;
93
94  return $string;
95}
96
97sub encode_url {
98  my ($self, $string) = @_;
99
100  $string =~ s{([^-_.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZZ0123456789])}{
101    sprintf('%%%02X', ord($1))
102  }eg;
103
104  return $string;
105}
106
107#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
108
109=head1 METHODS
110
111Pod::Simple::XHTML offers a number of methods that modify the format of
112the HTML output. Call these after creating the parser object, but before
113the call to C<parse_file>:
114
115  my $parser = Pod::PseudoPod::HTML->new();
116  $parser->set_optional_param("value");
117  $parser->parse_file($file);
118
119=head2 perldoc_url_prefix
120
121In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
122to put before the "Foo%3a%3aBar". The default value is
123"https://metacpan.org/pod/".
124
125=head2 perldoc_url_postfix
126
127What to put after "Foo%3a%3aBar" in the URL. This option is not set by
128default.
129
130=head2 man_url_prefix
131
132In turning C<< L<crontab(5)> >> into http://whatever/man/1/crontab, what
133to put before the "1/crontab". The default value is
134"http://man.he.net/man".
135
136=head2 man_url_postfix
137
138What to put after "1/crontab" in the URL. This option is not set by default.
139
140=head2 title_prefix, title_postfix
141
142What to put before and after the title in the head. The values should
143already be &-escaped.
144
145=head2 html_css
146
147  $parser->html_css('path/to/style.css');
148
149The URL or relative path of a CSS file to include. This option is not
150set by default.
151
152=head2 html_javascript
153
154The URL or relative path of a JavaScript file to pull in. This option is
155not set by default.
156
157=head2 html_doctype
158
159A document type tag for the file. This option is not set by default.
160
161=head2 html_charset
162
163The character set to declare in the Content-Type meta tag created by default
164for C<html_header_tags>. Note that this option will be ignored if the value of
165C<html_header_tags> is changed. Defaults to "ISO-8859-1".
166
167=head2 html_header_tags
168
169Additional arbitrary HTML tags for the header of the document. The
170default value is just a content type header tag:
171
172  <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
173
174Add additional meta tags here, or blocks of inline CSS or JavaScript
175(wrapped in the appropriate tags).
176
177=head3 html_encode_chars
178
179A string containing all characters that should be encoded as HTML entities,
180specified using the regular expression character class syntax (what you find
181within brackets in regular expressions). This value will be passed as the
182second argument to the C<encode_entities> function of L<HTML::Entities>. If
183L<HTML::Entities> is not installed, then any characters other than C<&<>"'>
184will be encoded numerically.
185
186=head2 html_h_level
187
188This is the level of HTML "Hn" element to which a Pod "head1" corresponds.  For
189example, if C<html_h_level> is set to 2, a head1 will produce an H2, a head2
190will produce an H3, and so on.
191
192=head2 default_title
193
194Set a default title for the page if no title can be determined from the
195content. The value of this string should already be &-escaped.
196
197=head2 force_title
198
199Force a title for the page (don't try to determine it from the content).
200The value of this string should already be &-escaped.
201
202=head2 html_header, html_footer
203
204Set the HTML output at the beginning and end of each file. The default
205header includes a title, a doctype tag (if C<html_doctype> is set), a
206content tag (customized by C<html_header_tags>), a tag for a CSS file
207(if C<html_css> is set), and a tag for a Javascript file (if
208C<html_javascript> is set). The default footer simply closes the C<html>
209and C<body> tags.
210
211The options listed above customize parts of the default header, but
212setting C<html_header> or C<html_footer> completely overrides the
213built-in header or footer. These may be useful if you want to use
214template tags instead of literal HTML headers and footers or are
215integrating converted POD pages in a larger website.
216
217If you want no headers or footers output in the HTML, set these options
218to the empty string.
219
220=head2 index
221
222Whether to add a table-of-contents at the top of each page (called an
223index for the sake of tradition).
224
225=head2 anchor_items
226
227Whether to anchor every definition C<=item> directive. This needs to be
228enabled if you want to be able to link to specific C<=item> directives, which
229are output as C<< <dt> >> elements. Disabled by default.
230
231=head2 backlink
232
233Whether to turn every =head1 directive into a link pointing to the top
234of the page (specifically, the opening body tag).
235
236=cut
237
238__PACKAGE__->_accessorize(
239 'perldoc_url_prefix',
240 'perldoc_url_postfix',
241 'man_url_prefix',
242 'man_url_postfix',
243 'title_prefix',  'title_postfix',
244 'html_css',
245 'html_javascript',
246 'html_doctype',
247 'html_charset',
248 'html_encode_chars',
249 'html_h_level',
250 'title', # Used internally for the title extracted from the content
251 'default_title',
252 'force_title',
253 'html_header',
254 'html_footer',
255 'index',
256 'anchor_items',
257 'backlink',
258 'batch_mode', # whether we're in batch mode
259 'batch_mode_current_level',
260    # When in batch mode, how deep the current module is: 1 for "LWP",
261    #  2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
262);
263
264#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
265
266=head1 SUBCLASSING
267
268If the standard options aren't enough, you may want to subclass
269Pod::Simple::XHMTL. These are the most likely candidates for methods
270you'll want to override when subclassing.
271
272=cut
273
274sub new {
275  my $self = shift;
276  my $new = $self->SUPER::new(@_);
277  $new->{'output_fh'} ||= *STDOUT{IO};
278  $new->perldoc_url_prefix('https://metacpan.org/pod/');
279  $new->man_url_prefix('http://man.he.net/man');
280  $new->html_charset('ISO-8859-1');
281  $new->nix_X_codes(1);
282  $new->{'scratch'} = '';
283  $new->{'to_index'} = [];
284  $new->{'output'} = [];
285  $new->{'saved'} = [];
286  $new->{'ids'} = { '_podtop_' => 1 }; # used in <body>
287  $new->{'in_li'} = [];
288
289  $new->{'__region_targets'}  = [];
290  $new->{'__literal_targets'} = {};
291  $new->accept_targets_as_html( 'html', 'HTML' );
292
293  return $new;
294}
295
296sub html_header_tags {
297    my $self = shift;
298    return $self->{html_header_tags} = shift if @_;
299    return $self->{html_header_tags}
300        ||= '<meta http-equiv="Content-Type" content="text/html; charset='
301            . $self->html_charset . '" />';
302}
303
304#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
305
306=head2 handle_text
307
308This method handles the body of text within any element: it's the body
309of a paragraph, or everything between a "=begin" tag and the
310corresponding "=end" tag, or the text within an L entity, etc. You would
311want to override this if you are adding a custom element type that does
312more than just display formatted text. Perhaps adding a way to generate
313HTML tables from an extended version of POD.
314
315So, let's say you want to add a custom element called 'foo'. In your
316subclass's C<new> method, after calling C<SUPER::new> you'd call:
317
318  $new->accept_targets_as_text( 'foo' );
319
320Then override the C<start_for> method in the subclass to check for when
321"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
322you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
323C<handle_text> method to check for the flag, and pass $text to your
324custom subroutine to construct the HTML output for 'foo' elements,
325something like:
326
327  sub handle_text {
328      my ($self, $text) = @_;
329      if ($self->{'in_foo'}) {
330          $self->{'scratch'} .= build_foo_html($text);
331          return;
332      }
333      $self->SUPER::handle_text($text);
334  }
335
336=head2 handle_code
337
338This method handles the body of text that is marked up to be code.
339You might for instance override this to plug in a syntax highlighter.
340The base implementation just escapes the text.
341
342The callback methods C<start_code> and C<end_code> emits the C<code> tags
343before and after C<handle_code> is invoked, so you might want to override these
344together with C<handle_code> if this wrapping isn't suitable.
345
346Note that the code might be broken into multiple segments if there are
347nested formatting codes inside a C<< CE<lt>...> >> sequence.  In between the
348calls to C<handle_code> other markup tags might have been emitted in that
349case.  The same is true for verbatim sections if the C<codes_in_verbatim>
350option is turned on.
351
352=head2 accept_targets_as_html
353
354This method behaves like C<accept_targets_as_text>, but also marks the region
355as one whose content should be emitted literally, without HTML entity escaping
356or wrapping in a C<div> element.
357
358=cut
359
360sub __in_literal_xhtml_region {
361    return unless @{ $_[0]{__region_targets} };
362    my $target = $_[0]{__region_targets}[-1];
363    return $_[0]{__literal_targets}{ $target };
364}
365
366sub accept_targets_as_html {
367    my ($self, @targets) = @_;
368    $self->accept_targets(@targets);
369    $self->{__literal_targets}{$_} = 1 for @targets;
370}
371
372sub handle_text {
373    # escape special characters in HTML (<, >, &, etc)
374    my $text = $_[1];
375    my $html;
376    if ($_[0]->__in_literal_xhtml_region) {
377        $html = $text;
378        $text =~ s{<[^>]+?>}{}g;
379        $text = $_[0]->decode_entities($text);
380    }
381    else {
382        $html = $_[0]->encode_entities($text);
383    }
384
385    if ($_[0]{'in_code'} && @{$_[0]{'in_code'}}) {
386        # Intentionally use the raw text in $_[1], even if we're not in a
387        # literal xhtml region, since handle_code calls encode_entities.
388        $_[0]->handle_code( $_[1], $_[0]{'in_code'}[-1] );
389    } else {
390        if ($_[0]->{in_for}) {
391            my $newlines = $_[0]->__in_literal_xhtml_region ? "\n\n" : '';
392            if ($_[0]->{started_for}) {
393                if ($html =~ /\S/) {
394                    delete $_[0]->{started_for};
395                    $_[0]{'scratch'} .= $html . $newlines;
396                }
397                # Otherwise, append nothing until we have something to append.
398            } else {
399                # The parser sometimes preserves newlines and sometimes doesn't!
400                $html =~ s/\n\z//;
401                $_[0]{'scratch'} .= $html . $newlines;
402            }
403        } else {
404            # Just plain text.
405            $_[0]{'scratch'} .= $html;
406        }
407    }
408
409    $_[0]{hhtml} .= $html if $_[0]{'in_head'};
410    $_[0]{htext} .= $text if $_[0]{'in_head'};
411    $_[0]{itext} .= $text if $_[0]{'in_item_text'};
412}
413
414sub start_code {
415    $_[0]{'scratch'} .= '<code>';
416}
417
418sub end_code {
419    $_[0]{'scratch'} .= '</code>';
420}
421
422sub handle_code {
423    $_[0]{'scratch'} .= $_[0]->encode_entities( $_[1] );
424}
425
426sub start_Para {
427    $_[0]{'scratch'} .= '<p>';
428}
429
430sub start_Verbatim {
431    $_[0]{'scratch'} = '<pre>';
432    push(@{$_[0]{'in_code'}}, 'Verbatim');
433    $_[0]->start_code($_[0]{'in_code'}[-1]);
434}
435
436sub start_head1 {  $_[0]{'in_head'} = 1; $_[0]{htext} = $_[0]{hhtml} = ''; }
437sub start_head2 {  $_[0]{'in_head'} = 2; $_[0]{htext} = $_[0]{hhtml} = ''; }
438sub start_head3 {  $_[0]{'in_head'} = 3; $_[0]{htext} = $_[0]{hhtml} = ''; }
439sub start_head4 {  $_[0]{'in_head'} = 4; $_[0]{htext} = $_[0]{hhtml} = ''; }
440sub start_head5 {  $_[0]{'in_head'} = 5; $_[0]{htext} = $_[0]{hhtml} = ''; }
441sub start_head6 {  $_[0]{'in_head'} = 6; $_[0]{htext} = $_[0]{hhtml} = ''; }
442
443sub start_item_number {
444    $_[0]{'scratch'} = "</li>\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
445    $_[0]{'scratch'} .= '<li><p>';
446    push @{$_[0]{'in_li'}}, 1;
447}
448
449sub start_item_bullet {
450    $_[0]{'scratch'} = "</li>\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
451    $_[0]{'scratch'} .= '<li><p>';
452    push @{$_[0]{'in_li'}}, 1;
453}
454
455sub start_item_text   {
456    $_[0]{'in_item_text'} = 1; $_[0]{itext} = '';
457    # see end_item_text
458}
459
460sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
461sub start_over_block  { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
462sub start_over_number { $_[0]{'scratch'} = '<ol>'; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
463sub start_over_text   {
464    $_[0]{'scratch'} = '<dl>';
465    $_[0]{'dl_level'}++;
466    $_[0]{'in_dd'} ||= [];
467    $_[0]->emit
468}
469
470sub end_over_block  { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
471
472sub end_over_number   {
473    $_[0]{'scratch'} = "</li>\n" if ( pop @{$_[0]{'in_li'}} );
474    $_[0]{'scratch'} .= '</ol>';
475    pop @{$_[0]{'in_li'}};
476    $_[0]->emit;
477}
478
479sub end_over_bullet   {
480    $_[0]{'scratch'} = "</li>\n" if ( pop @{$_[0]{'in_li'}} );
481    $_[0]{'scratch'} .= '</ul>';
482    pop @{$_[0]{'in_li'}};
483    $_[0]->emit;
484}
485
486sub end_over_text   {
487    if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
488        $_[0]{'scratch'} = "</dd>\n";
489        $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
490    }
491    $_[0]{'scratch'} .= '</dl>';
492    $_[0]{'dl_level'}--;
493    $_[0]->emit;
494}
495
496# . . . . . Now the actual formatters:
497
498sub end_Para     { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
499sub end_Verbatim {
500    $_[0]->end_code(pop(@{$_[0]->{'in_code'}}));
501    $_[0]{'scratch'} .= '</pre>';
502    $_[0]->emit;
503}
504
505sub _end_head {
506    my $h = delete $_[0]{in_head};
507
508    my $add = $_[0]->html_h_level;
509    $add = 1 unless defined $add;
510    $h += $add - 1;
511
512    my $id = $_[0]->idify(delete $_[0]{htext});
513    my $text = $_[0]{scratch};
514    my $head = qq{<h$h id="} . $_[0]->encode_entities($id) . qq{">$text</h$h>};
515    $_[0]{'scratch'} = $_[0]->backlink && ($h - $add == 0)
516                         # backlinks enabled && =head1
517                         ? qq{<a href="#_podtop_">$head</a>}
518                         : $head;
519    $_[0]->emit;
520    push @{ $_[0]{'to_index'} }, [$h, $id, delete $_[0]{'hhtml'}];
521}
522
523sub end_head1       { shift->_end_head(@_); }
524sub end_head2       { shift->_end_head(@_); }
525sub end_head3       { shift->_end_head(@_); }
526sub end_head4       { shift->_end_head(@_); }
527sub end_head5       { shift->_end_head(@_); }
528sub end_head6       { shift->_end_head(@_); }
529
530sub end_item_bullet { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
531sub end_item_number { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
532
533sub end_item_text   {
534    # idify and anchor =item content if wanted
535    my $dt_id = $_[0]{'anchor_items'}
536                 ? ' id="'. $_[0]->encode_entities($_[0]->idify($_[0]{'itext'})) .'"'
537                 : '';
538
539    # reset scratch
540    my $text = $_[0]{scratch};
541    $_[0]{'scratch'} = '';
542
543    if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
544        $_[0]{'scratch'} = "</dd>\n";
545        $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
546    }
547
548    $_[0]{'scratch'} .= qq{<dt$dt_id>$text</dt>\n<dd>};
549    $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 1;
550    $_[0]->emit;
551}
552
553# This handles =begin and =for blocks of all kinds.
554sub start_for {
555  my ($self, $flags) = @_;
556
557  push @{ $self->{__region_targets} }, $flags->{target_matching};
558  $self->{started_for} = 1;
559  $self->{in_for} = 1;
560
561  unless ($self->__in_literal_xhtml_region) {
562    $self->{scratch} .= '<div';
563    $self->{scratch} .= qq( class="$flags->{target}") if $flags->{target};
564    $self->{scratch} .= ">\n\n";
565  }
566}
567
568sub end_for {
569  my ($self) = @_;
570  delete $self->{started_for};
571  delete $self->{in_for};
572
573  if ($self->__in_literal_xhtml_region) {
574    # Remove trailine newlines.
575    $self->{'scratch'} =~ s/\s+\z//s;
576  } else {
577    $self->{'scratch'} .= '</div>';
578  }
579
580  pop @{ $self->{__region_targets} };
581  $self->emit;
582}
583
584sub start_Document {
585  my ($self) = @_;
586  if (defined $self->html_header) {
587    $self->{'scratch'} .= $self->html_header;
588    $self->emit unless $self->html_header eq "";
589  } else {
590    my ($doctype, $title, $metatags, $bodyid);
591    $doctype = $self->html_doctype || '';
592    $title = $self->force_title || $self->title || $self->default_title || '';
593    $metatags = $self->html_header_tags || '';
594    if (my $css = $self->html_css) {
595        if ($css !~ /<link/) {
596            # this is required to be compatible with Pod::Simple::BatchHTML
597            $metatags .= '<link rel="stylesheet" href="'
598                . $self->encode_entities($css) . '" type="text/css" />';
599        } else {
600            $metatags .= $css;
601        }
602    }
603    if ($self->html_javascript) {
604      $metatags .= qq{\n<script type="text/javascript" src="} .
605                    $self->html_javascript . '"></script>';
606    }
607    $bodyid = $self->backlink ? ' id="_podtop_"' : '';
608    $self->{'scratch'} .= <<"HTML";
609$doctype
610<html>
611<head>
612<title>$title</title>
613$metatags
614</head>
615<body$bodyid>
616HTML
617    $self->emit;
618  }
619}
620
621sub build_index {
622    my ($self, $to_index) = @_;
623
624    my @out;
625    my $level  = 0;
626    my $indent = -1;
627    my $space  = '';
628    my $id     = ' id="index"';
629
630    for my $h (@{ $to_index }, [0]) {
631        my $target_level = $h->[0];
632        # Get to target_level by opening or closing ULs
633        if ($level == $target_level) {
634            $out[-1] .= '</li>';
635        } elsif ($level > $target_level) {
636            $out[-1] .= '</li>' if $out[-1] =~ /^\s+<li>/;
637            while ($level > $target_level) {
638                --$level;
639                push @out, ('  ' x --$indent) . '</li>' if @out && $out[-1] =~ m{^\s+<\/ul};
640                push @out, ('  ' x --$indent) . '</ul>';
641            }
642            push @out, ('  ' x --$indent) . '</li>' if $level;
643        } else {
644            while ($level < $target_level) {
645                ++$level;
646                push @out, ('  ' x ++$indent) . '<li>' if @out && $out[-1]=~ /^\s*<ul/;
647                push @out, ('  ' x ++$indent) . "<ul$id>";
648                $id = '';
649            }
650            ++$indent;
651        }
652
653        next unless $level;
654        $space = '  '  x $indent;
655        my $fragment = $self->encode_entities($self->encode_url($h->[1]));
656        push @out, sprintf '%s<li><a href="#%s">%s</a>',
657            $space, $fragment, $h->[2];
658    }
659
660    return join "\n", @out;
661}
662
663sub end_Document   {
664  my ($self) = @_;
665  my $to_index = $self->{'to_index'};
666  if ($self->index && @{ $to_index } ) {
667      my $index = $self->build_index($to_index);
668
669      # Splice the index in between the HTML headers and the first element.
670      my $offset = defined $self->html_header ? $self->html_header eq '' ? 0 : 1 : 1;
671      splice @{ $self->{'output'} }, $offset, 0, $index;
672  }
673
674  if (defined $self->html_footer) {
675    $self->{'scratch'} .= $self->html_footer;
676    $self->emit unless $self->html_footer eq "";
677  } else {
678    $self->{'scratch'} .= "</body>\n</html>";
679    $self->emit;
680  }
681
682  if ($self->index) {
683      print {$self->{'output_fh'}} join ("\n\n", @{ $self->{'output'} }), "\n\n";
684      @{$self->{'output'}} = ();
685  }
686
687}
688
689# Handling code tags
690sub start_B { $_[0]{'scratch'} .= '<b>' }
691sub end_B   { $_[0]{'scratch'} .= '</b>' }
692
693sub start_C { push(@{$_[0]{'in_code'}}, 'C'); $_[0]->start_code($_[0]{'in_code'}[-1]); }
694sub end_C   { $_[0]->end_code(pop(@{$_[0]{'in_code'}})); }
695
696sub start_F { $_[0]{'scratch'} .= '<i>' }
697sub end_F   { $_[0]{'scratch'} .= '</i>' }
698
699sub start_I { $_[0]{'scratch'} .= '<i>' }
700sub end_I   { $_[0]{'scratch'} .= '</i>' }
701
702sub start_L {
703  my ($self, $flags) = @_;
704    my ($type, $to, $section) = @{$flags}{'type', 'to', 'section'};
705    my $url = $self->encode_entities(
706        $type eq 'url' ? $to
707            : $type eq 'pod' ? $self->resolve_pod_page_link($to, $section)
708            : $type eq 'man' ? $self->resolve_man_page_link($to, $section)
709            :                  undef
710    );
711
712    # If it's an unknown type, use an attribute-less <a> like HTML.pm.
713    $self->{'scratch'} .= '<a' . ($url ? ' href="'. $url . '">' : '>');
714}
715
716sub end_L   { $_[0]{'scratch'} .= '</a>' }
717
718sub start_S { $_[0]{'scratch'} .= '<span style="white-space: nowrap;">' }
719sub end_S   { $_[0]{'scratch'} .= '</span>' }
720
721sub emit {
722  my($self) = @_;
723  if ($self->index) {
724      push @{ $self->{'output'} }, $self->{'scratch'};
725  } else {
726      print {$self->{'output_fh'}} $self->{'scratch'}, "\n\n";
727  }
728  $self->{'scratch'} = '';
729  return;
730}
731
732=head2 resolve_pod_page_link
733
734  my $url = $pod->resolve_pod_page_link('Net::Ping', 'INSTALL');
735  my $url = $pod->resolve_pod_page_link('perlpodspec');
736  my $url = $pod->resolve_pod_page_link(undef, 'SYNOPSIS');
737
738Resolves a POD link target (typically a module or POD file name) and section
739name to a URL. The resulting link will be returned for the above examples as:
740
741  https://metacpan.org/pod/Net::Ping#INSTALL
742  https://metacpan.org/pod/perlpodspec
743  #SYNOPSIS
744
745Note that when there is only a section argument the URL will simply be a link
746to a section in the current document.
747
748=cut
749
750sub resolve_pod_page_link {
751    my ($self, $to, $section) = @_;
752    return undef unless defined $to || defined $section;
753    if (defined $section) {
754        my $id = $self->idify($section, 1);
755        $section = '#' . $self->encode_url($id);
756        return $section unless defined $to;
757    } else {
758        $section = ''
759    }
760
761    return ($self->perldoc_url_prefix || '')
762        . $to . $section
763        . ($self->perldoc_url_postfix || '');
764}
765
766=head2 resolve_man_page_link
767
768  my $url = $pod->resolve_man_page_link('crontab(5)', 'EXAMPLE CRON FILE');
769  my $url = $pod->resolve_man_page_link('crontab');
770
771Resolves a man page link target and numeric section to a URL. The resulting
772link will be returned for the above examples as:
773
774    http://man.he.net/man5/crontab
775    http://man.he.net/man1/crontab
776
777Note that the first argument is required. The section number will be parsed
778from it, and if it's missing will default to 1. The second argument is
779currently ignored, as L<man.he.net|http://man.he.net> does not currently
780include linkable IDs or anchor names in its pages. Subclass to link to a
781different man page HTTP server.
782
783=cut
784
785sub resolve_man_page_link {
786    my ($self, $to, $section) = @_;
787    return undef unless defined $to;
788    my ($page, $part) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
789    return undef unless $page;
790    return ($self->man_url_prefix || '')
791        . ($part || 1) . "/" . $self->encode_entities($page)
792        . ($self->man_url_postfix || '');
793
794}
795
796=head2 idify
797
798  my $id   = $pod->idify($text);
799  my $hash = $pod->idify($text, 1);
800
801This method turns an arbitrary string into a valid XHTML ID attribute value.
802The rules enforced, following
803L<http://webdesign.about.com/od/htmltags/a/aa031707.htm>, are:
804
805=over
806
807=item *
808
809The id must start with a letter (a-z or A-Z)
810
811=item *
812
813All subsequent characters can be letters, numbers (0-9), hyphens (-),
814underscores (_), colons (:), and periods (.).
815
816=item *
817
818The final character can't be a hyphen, colon, or period. URLs ending with these
819characters, while allowed by XHTML, can be awkward to extract from plain text.
820
821=item *
822
823Each id must be unique within the document.
824
825=back
826
827In addition, the returned value will be unique within the context of the
828Pod::Simple::XHTML object unless a second argument is passed a true value. ID
829attributes should always be unique within a single XHTML document, but pass
830the true value if you are creating not an ID but a URL hash to point to
831an ID (i.e., if you need to put the "#foo" in C<< <a href="#foo">foo</a> >>.
832
833=cut
834
835sub idify {
836    my ($self, $t, $not_unique) = @_;
837    for ($t) {
838        s/[<>&'"]//g;            # Strip HTML special characters
839        s/^\s+//; s/\s+$//;      # Strip white space.
840        s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
841        s/^[^a-zA-Z]+//;         # First char must be a letter.
842        s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
843        s/[-:.]+$//;             # Strip trailing punctuation.
844    }
845    return $t if $not_unique;
846    my $i = '';
847    $i++ while $self->{ids}{"$t$i"}++;
848    return "$t$i";
849}
850
851=head2 batch_mode_page_object_init
852
853  $pod->batch_mode_page_object_init($batchconvobj, $module, $infile, $outfile, $depth);
854
855Called by L<Pod::Simple::HTMLBatch> so that the class has a chance to
856initialize the converter. Internally it sets the C<batch_mode> property to
857true and sets C<batch_mode_current_level()>, but Pod::Simple::XHTML does not
858currently use those features. Subclasses might, though.
859
860=cut
861
862sub batch_mode_page_object_init {
863  my ($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
864  $self->batch_mode(1);
865  $self->batch_mode_current_level($depth);
866  return $self;
867}
868
869sub html_header_after_title {
870}
871
872
8731;
874
875__END__
876
877=head1 SEE ALSO
878
879L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell>
880
881=head1 SUPPORT
882
883Questions or discussion about POD and Pod::Simple should be sent to the
884pod-people@perl.org mail list. Send an empty email to
885pod-people-subscribe@perl.org to subscribe.
886
887This module is managed in an open GitHub repository,
888L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
889to clone L<https://github.com/perl-pod/pod-simple.git> and send patches!
890
891Patches against Pod::Simple are welcome. Please send bug reports to
892<bug-pod-simple@rt.cpan.org>.
893
894=head1 COPYRIGHT AND DISCLAIMERS
895
896Copyright (c) 2003-2005 Allison Randal.
897
898This library is free software; you can redistribute it and/or modify it
899under the same terms as Perl itself.
900
901This program is distributed in the hope that it will be useful, but
902without any warranty; without even the implied warranty of
903merchantability or fitness for a particular purpose.
904
905=head1 ACKNOWLEDGEMENTS
906
907Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
908L<Linux man pages online|http://man.he.net/> site for man page links.
909
910Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
911site for Perl module links.
912
913=head1 AUTHOR
914
915Pod::Simpele::XHTML was created by Allison Randal <allison@perl.org>.
916
917Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
918But don't bother him, he's retired.
919
920Pod::Simple is maintained by:
921
922=over
923
924=item * Allison Randal C<allison@perl.org>
925
926=item * Hans Dieter Pearcey C<hdp@cpan.org>
927
928=item * David E. Wheeler C<dwheeler@cpan.org>
929
930=back
931
932=cut
933