xref: /openbsd-src/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/XHTML.pm (revision 2777ee89d0e541ec819d05abee114837837abbec)
1=pod
2
3=head1 NAME
4
5Pod::Simple::XHTML -- format Pod as validating XHTML
6
7=head1 SYNOPSIS
8
9  use Pod::Simple::XHTML;
10
11  my $parser = Pod::Simple::XHTML->new();
12
13  ...
14
15  $parser->parse_file('path/to/file.pod');
16
17=head1 DESCRIPTION
18
19This class is a formatter that takes Pod and renders it as XHTML
20validating HTML.
21
22This is a subclass of L<Pod::Simple::Methody> and inherits all its
23methods. The implementation is entirely different than
24L<Pod::Simple::HTML>, but it largely preserves the same interface.
25
26=head2 Minimal code
27
28  use Pod::Simple::XHTML;
29  my $psx = Pod::Simple::XHTML->new;
30  $psx->output_string(\my $html);
31  $psx->parse_file('path/to/Module/Name.pm');
32  open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
33  print $out $html;
34
35You can also control the character encoding and entities. For example, if
36you're sure that the POD is properly encoded (using the C<=encoding> command),
37you can prevent high-bit characters from being encoded as HTML entities and
38declare the output character set as UTF-8 before parsing, like so:
39
40  $psx->html_charset('UTF-8');
41  $psx->html_encode_chars('&<>">');
42
43=cut
44
45package Pod::Simple::XHTML;
46use strict;
47use vars qw( $VERSION @ISA $HAS_HTML_ENTITIES );
48$VERSION = '3.28';
49use Pod::Simple::Methody ();
50@ISA = ('Pod::Simple::Methody');
51
52BEGIN {
53  $HAS_HTML_ENTITIES = eval "require HTML::Entities; 1";
54}
55
56my %entities = (
57  q{>} => 'gt',
58  q{<} => 'lt',
59  q{'} => '#39',
60  q{"} => 'quot',
61  q{&} => 'amp',
62);
63
64sub encode_entities {
65  my $self = shift;
66  my $ents = $self->html_encode_chars;
67  return HTML::Entities::encode_entities( $_[0], $ents ) if $HAS_HTML_ENTITIES;
68  if (defined $ents) {
69      $ents =~ s,(?<!\\)([]/]),\\$1,g;
70      $ents =~ s,(?<!\\)\\\z,\\\\,;
71  } else {
72      $ents = join '', keys %entities;
73  }
74  my $str = $_[0];
75  $str =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
76  return $str;
77}
78
79#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
80
81=head1 METHODS
82
83Pod::Simple::XHTML offers a number of methods that modify the format of
84the HTML output. Call these after creating the parser object, but before
85the call to C<parse_file>:
86
87  my $parser = Pod::PseudoPod::HTML->new();
88  $parser->set_optional_param("value");
89  $parser->parse_file($file);
90
91=head2 perldoc_url_prefix
92
93In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
94to put before the "Foo%3a%3aBar". The default value is
95"http://search.cpan.org/perldoc?".
96
97=head2 perldoc_url_postfix
98
99What to put after "Foo%3a%3aBar" in the URL. This option is not set by
100default.
101
102=head2 man_url_prefix
103
104In turning C<< L<crontab(5)> >> into http://whatever/man/1/crontab, what
105to put before the "1/crontab". The default value is
106"http://man.he.net/man".
107
108=head2 man_url_postfix
109
110What to put after "1/crontab" in the URL. This option is not set by default.
111
112=head2 title_prefix, title_postfix
113
114What to put before and after the title in the head. The values should
115already be &-escaped.
116
117=head2 html_css
118
119  $parser->html_css('path/to/style.css');
120
121The URL or relative path of a CSS file to include. This option is not
122set by default.
123
124=head2 html_javascript
125
126The URL or relative path of a JavaScript file to pull in. This option is
127not set by default.
128
129=head2 html_doctype
130
131A document type tag for the file. This option is not set by default.
132
133=head2 html_charset
134
135The charater set to declare in the Content-Type meta tag created by default
136for C<html_header_tags>. Note that this option will be ignored if the value of
137C<html_header_tags> is changed. Defaults to "ISO-8859-1".
138
139=head2 html_header_tags
140
141Additional arbitrary HTML tags for the header of the document. The
142default value is just a content type header tag:
143
144  <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
145
146Add additional meta tags here, or blocks of inline CSS or JavaScript
147(wrapped in the appropriate tags).
148
149=head3 html_encode_chars
150
151A string containing all characters that should be encoded as HTML entities,
152specified using the regular expression character class syntax (what you find
153within brackets in regular expressions). This value will be passed as the
154second argument to the C<encode_entities> function of L<HTML::Entities>. If
155L<HTML::Entities> is not installed, then any characters other than C<&<>"'>
156will be encoded numerically.
157
158=head2 html_h_level
159
160This is the level of HTML "Hn" element to which a Pod "head1" corresponds.  For
161example, if C<html_h_level> is set to 2, a head1 will produce an H2, a head2
162will produce an H3, and so on.
163
164=head2 default_title
165
166Set a default title for the page if no title can be determined from the
167content. The value of this string should already be &-escaped.
168
169=head2 force_title
170
171Force a title for the page (don't try to determine it from the content).
172The value of this string should already be &-escaped.
173
174=head2 html_header, html_footer
175
176Set the HTML output at the beginning and end of each file. The default
177header includes a title, a doctype tag (if C<html_doctype> is set), a
178content tag (customized by C<html_header_tags>), a tag for a CSS file
179(if C<html_css> is set), and a tag for a Javascript file (if
180C<html_javascript> is set). The default footer simply closes the C<html>
181and C<body> tags.
182
183The options listed above customize parts of the default header, but
184setting C<html_header> or C<html_footer> completely overrides the
185built-in header or footer. These may be useful if you want to use
186template tags instead of literal HTML headers and footers or are
187integrating converted POD pages in a larger website.
188
189If you want no headers or footers output in the HTML, set these options
190to the empty string.
191
192=head2 index
193
194Whether to add a table-of-contents at the top of each page (called an
195index for the sake of tradition).
196
197=head2 anchor_items
198
199Whether to anchor every definition C<=item> directive. This needs to be
200enabled if you want to be able to link to specific C<=item> directives, which
201are output as C<< <dt> >> elements. Disabled by default.
202
203=head2 backlink
204
205Whether to turn every =head1 directive into a link pointing to the top
206of the page (specifically, the opening body tag).
207
208=cut
209
210__PACKAGE__->_accessorize(
211 'perldoc_url_prefix',
212 'perldoc_url_postfix',
213 'man_url_prefix',
214 'man_url_postfix',
215 'title_prefix',  'title_postfix',
216 'html_css',
217 'html_javascript',
218 'html_doctype',
219 'html_charset',
220 'html_encode_chars',
221 'html_h_level',
222 'title', # Used internally for the title extracted from the content
223 'default_title',
224 'force_title',
225 'html_header',
226 'html_footer',
227 'index',
228 'anchor_items',
229 'backlink',
230 'batch_mode', # whether we're in batch mode
231 'batch_mode_current_level',
232    # When in batch mode, how deep the current module is: 1 for "LWP",
233    #  2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
234);
235
236#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
237
238=head1 SUBCLASSING
239
240If the standard options aren't enough, you may want to subclass
241Pod::Simple::XHMTL. These are the most likely candidates for methods
242you'll want to override when subclassing.
243
244=cut
245
246sub new {
247  my $self = shift;
248  my $new = $self->SUPER::new(@_);
249  $new->{'output_fh'} ||= *STDOUT{IO};
250  $new->perldoc_url_prefix('http://search.cpan.org/perldoc?');
251  $new->man_url_prefix('http://man.he.net/man');
252  $new->html_charset('ISO-8859-1');
253  $new->nix_X_codes(1);
254  $new->{'scratch'} = '';
255  $new->{'to_index'} = [];
256  $new->{'output'} = [];
257  $new->{'saved'} = [];
258  $new->{'ids'} = { '_podtop_' => 1 }; # used in <body>
259  $new->{'in_li'} = [];
260
261  $new->{'__region_targets'}  = [];
262  $new->{'__literal_targets'} = {};
263  $new->accept_targets_as_html( 'html', 'HTML' );
264
265  return $new;
266}
267
268sub html_header_tags {
269    my $self = shift;
270    return $self->{html_header_tags} = shift if @_;
271    return $self->{html_header_tags}
272        ||= '<meta http-equiv="Content-Type" content="text/html; charset='
273            . $self->html_charset . '" />';
274}
275
276#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
277
278=head2 handle_text
279
280This method handles the body of text within any element: it's the body
281of a paragraph, or everything between a "=begin" tag and the
282corresponding "=end" tag, or the text within an L entity, etc. You would
283want to override this if you are adding a custom element type that does
284more than just display formatted text. Perhaps adding a way to generate
285HTML tables from an extended version of POD.
286
287So, let's say you want to add a custom element called 'foo'. In your
288subclass's C<new> method, after calling C<SUPER::new> you'd call:
289
290  $new->accept_targets_as_text( 'foo' );
291
292Then override the C<start_for> method in the subclass to check for when
293"$flags->{'target'}" is equal to 'foo' and set a flag that marks that
294you're in a foo block (maybe "$self->{'in_foo'} = 1"). Then override the
295C<handle_text> method to check for the flag, and pass $text to your
296custom subroutine to construct the HTML output for 'foo' elements,
297something like:
298
299  sub handle_text {
300      my ($self, $text) = @_;
301      if ($self->{'in_foo'}) {
302          $self->{'scratch'} .= build_foo_html($text);
303          return;
304      }
305      $self->SUPER::handle_text($text);
306  }
307
308=head2 handle_code
309
310This method handles the body of text that is marked up to be code.
311You might for instance override this to plug in a syntax highlighter.
312The base implementation just escapes the text.
313
314The callback methods C<start_code> and C<end_code> emits the C<code> tags
315before and after C<handle_code> is invoked, so you might want to override these
316together with C<handle_code> if this wrapping isn't suiteable.
317
318Note that the code might be broken into mulitple segments if there are
319nested formatting codes inside a C<< CE<lt>...> >> sequence.  In between the
320calls to C<handle_code> other markup tags might have been emitted in that
321case.  The same is true for verbatim sections if the C<codes_in_verbatim>
322option is turned on.
323
324=head2 accept_targets_as_html
325
326This method behaves like C<accept_targets_as_text>, but also marks the region
327as one whose content should be emitted literally, without HTML entity escaping
328or wrapping in a C<div> element.
329
330=cut
331
332sub __in_literal_xhtml_region {
333    return unless @{ $_[0]{__region_targets} };
334    my $target = $_[0]{__region_targets}[-1];
335    return $_[0]{__literal_targets}{ $target };
336}
337
338sub accept_targets_as_html {
339    my ($self, @targets) = @_;
340    $self->accept_targets(@targets);
341    $self->{__literal_targets}{$_} = 1 for @targets;
342}
343
344sub handle_text {
345    # escape special characters in HTML (<, >, &, etc)
346    my $text = $_[0]->__in_literal_xhtml_region
347        ? $_[1]
348        : $_[0]->encode_entities( $_[1] );
349
350    if ($_[0]{'in_code'} && @{$_[0]{'in_code'}}) {
351        # Intentionally use the raw text in $_[1], even if we're not in a
352        # literal xhtml region, since handle_code calls encode_entities.
353        $_[0]->handle_code( $_[1], $_[0]{'in_code'}[-1] );
354    } else {
355        $_[0]{'scratch'} .= $text;
356    }
357
358    $_[0]{htext} .= $text if $_[0]{'in_head'};
359}
360
361sub start_code {
362    $_[0]{'scratch'} .= '<code>';
363}
364
365sub end_code {
366    $_[0]{'scratch'} .= '</code>';
367}
368
369sub handle_code {
370    $_[0]{'scratch'} .= $_[0]->encode_entities( $_[1] );
371}
372
373sub start_Para {
374    $_[0]{'scratch'} = '<p>';
375}
376
377sub start_Verbatim {
378    $_[0]{'scratch'} = '<pre>';
379    push(@{$_[0]{'in_code'}}, 'Verbatim');
380    $_[0]->start_code($_[0]{'in_code'}[-1]);
381}
382
383sub start_head1 {  $_[0]{'in_head'} = 1; $_[0]{htext} = ''; }
384sub start_head2 {  $_[0]{'in_head'} = 2; $_[0]{htext} = ''; }
385sub start_head3 {  $_[0]{'in_head'} = 3; $_[0]{htext} = ''; }
386sub start_head4 {  $_[0]{'in_head'} = 4; $_[0]{htext} = ''; }
387
388sub start_item_number {
389    $_[0]{'scratch'} = "</li>\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
390    $_[0]{'scratch'} .= '<li><p>';
391    push @{$_[0]{'in_li'}}, 1;
392}
393
394sub start_item_bullet {
395    $_[0]{'scratch'} = "</li>\n" if ($_[0]{'in_li'}->[-1] && pop @{$_[0]{'in_li'}});
396    $_[0]{'scratch'} .= '<li><p>';
397    push @{$_[0]{'in_li'}}, 1;
398}
399
400sub start_item_text   {
401    # see end_item_text
402}
403
404sub start_over_bullet { $_[0]{'scratch'} = '<ul>'; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
405sub start_over_block  { $_[0]{'scratch'} = '<ul>'; $_[0]->emit }
406sub start_over_number { $_[0]{'scratch'} = '<ol>'; push @{$_[0]{'in_li'}}, 0; $_[0]->emit }
407sub start_over_text   {
408    $_[0]{'scratch'} = '<dl>';
409    $_[0]{'dl_level'}++;
410    $_[0]{'in_dd'} ||= [];
411    $_[0]->emit
412}
413
414sub end_over_block  { $_[0]{'scratch'} .= '</ul>'; $_[0]->emit }
415
416sub end_over_number   {
417    $_[0]{'scratch'} = "</li>\n" if ( pop @{$_[0]{'in_li'}} );
418    $_[0]{'scratch'} .= '</ol>';
419    pop @{$_[0]{'in_li'}};
420    $_[0]->emit;
421}
422
423sub end_over_bullet   {
424    $_[0]{'scratch'} = "</li>\n" if ( pop @{$_[0]{'in_li'}} );
425    $_[0]{'scratch'} .= '</ul>';
426    pop @{$_[0]{'in_li'}};
427    $_[0]->emit;
428}
429
430sub end_over_text   {
431    if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
432        $_[0]{'scratch'} = "</dd>\n";
433        $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
434    }
435    $_[0]{'scratch'} .= '</dl>';
436    $_[0]{'dl_level'}--;
437    $_[0]->emit;
438}
439
440# . . . . . Now the actual formatters:
441
442sub end_Para     { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
443sub end_Verbatim {
444    $_[0]->end_code(pop(@{$_[0]->{'in_code'}}));
445    $_[0]{'scratch'} .= '</pre>';
446    $_[0]->emit;
447}
448
449sub _end_head {
450    my $h = delete $_[0]{in_head};
451
452    my $add = $_[0]->html_h_level;
453    $add = 1 unless defined $add;
454    $h += $add - 1;
455
456    my $id = $_[0]->idify($_[0]{htext});
457    my $text = $_[0]{scratch};
458    $_[0]{'scratch'} = $_[0]->backlink && ($h - $add == 0)
459                         # backlinks enabled && =head1
460                         ? qq{<a href="#_podtop_"><h$h id="$id">$text</h$h></a>}
461                         : qq{<h$h id="$id">$text</h$h>};
462    $_[0]->emit;
463    push @{ $_[0]{'to_index'} }, [$h, $id, delete $_[0]{'htext'}];
464}
465
466sub end_head1       { shift->_end_head(@_); }
467sub end_head2       { shift->_end_head(@_); }
468sub end_head3       { shift->_end_head(@_); }
469sub end_head4       { shift->_end_head(@_); }
470
471sub end_item_bullet { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
472sub end_item_number { $_[0]{'scratch'} .= '</p>'; $_[0]->emit }
473
474sub end_item_text   {
475    # idify and anchor =item content if wanted
476    my $dt_id = $_[0]{'anchor_items'}
477                 ? ' id="'. $_[0]->idify($_[0]{'scratch'}) .'"'
478                 : '';
479
480    # reset scratch
481    my $text = $_[0]{scratch};
482    $_[0]{'scratch'} = '';
483
484    if ($_[0]{'in_dd'}[ $_[0]{'dl_level'} ]) {
485        $_[0]{'scratch'} = "</dd>\n";
486        $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 0;
487    }
488
489    $_[0]{'scratch'} .= qq{<dt$dt_id>$text</dt>\n<dd>};
490    $_[0]{'in_dd'}[ $_[0]{'dl_level'} ] = 1;
491    $_[0]->emit;
492}
493
494# This handles =begin and =for blocks of all kinds.
495sub start_for {
496  my ($self, $flags) = @_;
497
498  push @{ $self->{__region_targets} }, $flags->{target_matching};
499
500  unless ($self->__in_literal_xhtml_region) {
501    $self->{scratch} .= '<div';
502    $self->{scratch} .= qq( class="$flags->{target}") if $flags->{target};
503    $self->{scratch} .= '>';
504  }
505
506  $self->emit;
507
508}
509sub end_for {
510  my ($self) = @_;
511
512  $self->{'scratch'} .= '</div>' unless $self->__in_literal_xhtml_region;
513
514  pop @{ $self->{__region_targets} };
515  $self->emit;
516}
517
518sub start_Document {
519  my ($self) = @_;
520  if (defined $self->html_header) {
521    $self->{'scratch'} .= $self->html_header;
522    $self->emit unless $self->html_header eq "";
523  } else {
524    my ($doctype, $title, $metatags, $bodyid);
525    $doctype = $self->html_doctype || '';
526    $title = $self->force_title || $self->title || $self->default_title || '';
527    $metatags = $self->html_header_tags || '';
528    if (my $css = $self->html_css) {
529        $metatags .= $css;
530        if ($css !~ /<link/) {
531            # this is required to be compatible with Pod::Simple::BatchHTML
532            $metatags .= '<link rel="stylesheet" href="'
533                . $self->encode_entities($css) . '" type="text/css" />';
534        }
535    }
536    if ($self->html_javascript) {
537      $metatags .= qq{\n<script type="text/javascript" src="} .
538                    $self->html_javascript . "'></script>";
539    }
540    $bodyid = $self->backlink ? ' id="_podtop_"' : '';
541    $self->{'scratch'} .= <<"HTML";
542$doctype
543<html>
544<head>
545<title>$title</title>
546$metatags
547</head>
548<body$bodyid>
549HTML
550    $self->emit;
551  }
552}
553
554sub end_Document   {
555  my ($self) = @_;
556  my $to_index = $self->{'to_index'};
557  if ($self->index && @{ $to_index } ) {
558      my @out;
559      my $level  = 0;
560      my $indent = -1;
561      my $space  = '';
562      my $id     = ' id="index"';
563
564      for my $h (@{ $to_index }, [0]) {
565          my $target_level = $h->[0];
566          # Get to target_level by opening or closing ULs
567          if ($level == $target_level) {
568              $out[-1] .= '</li>';
569          } elsif ($level > $target_level) {
570              $out[-1] .= '</li>' if $out[-1] =~ /^\s+<li>/;
571              while ($level > $target_level) {
572                  --$level;
573                  push @out, ('  ' x --$indent) . '</li>' if @out && $out[-1] =~ m{^\s+<\/ul};
574                  push @out, ('  ' x --$indent) . '</ul>';
575              }
576              push @out, ('  ' x --$indent) . '</li>' if $level;
577          } else {
578              while ($level < $target_level) {
579                  ++$level;
580                  push @out, ('  ' x ++$indent) . '<li>' if @out && $out[-1]=~ /^\s*<ul/;
581                  push @out, ('  ' x ++$indent) . "<ul$id>";
582                  $id = '';
583              }
584              ++$indent;
585          }
586
587          next unless $level;
588          $space = '  '  x $indent;
589          push @out, sprintf '%s<li><a href="#%s">%s</a>',
590              $space, $h->[1], $h->[2];
591      }
592      # Splice the index in between the HTML headers and the first element.
593      my $offset = defined $self->html_header ? $self->html_header eq '' ? 0 : 1 : 1;
594      splice @{ $self->{'output'} }, $offset, 0, join "\n", @out;
595  }
596
597  if (defined $self->html_footer) {
598    $self->{'scratch'} .= $self->html_footer;
599    $self->emit unless $self->html_footer eq "";
600  } else {
601    $self->{'scratch'} .= "</body>\n</html>";
602    $self->emit;
603  }
604
605  if ($self->index) {
606      print {$self->{'output_fh'}} join ("\n\n", @{ $self->{'output'} }), "\n\n";
607      @{$self->{'output'}} = ();
608  }
609
610}
611
612# Handling code tags
613sub start_B { $_[0]{'scratch'} .= '<b>' }
614sub end_B   { $_[0]{'scratch'} .= '</b>' }
615
616sub start_C { push(@{$_[0]{'in_code'}}, 'C'); $_[0]->start_code($_[0]{'in_code'}[-1]); }
617sub end_C   { $_[0]->end_code(pop(@{$_[0]{'in_code'}})); }
618
619sub start_F { $_[0]{'scratch'} .= '<i>' }
620sub end_F   { $_[0]{'scratch'} .= '</i>' }
621
622sub start_I { $_[0]{'scratch'} .= '<i>' }
623sub end_I   { $_[0]{'scratch'} .= '</i>' }
624
625sub start_L {
626  my ($self, $flags) = @_;
627    my ($type, $to, $section) = @{$flags}{'type', 'to', 'section'};
628    my $url = $self->encode_entities(
629        $type eq 'url' ? $to
630            : $type eq 'pod' ? $self->resolve_pod_page_link($to, $section)
631            : $type eq 'man' ? $self->resolve_man_page_link($to, $section)
632            :                  undef
633    );
634
635    # If it's an unknown type, use an attribute-less <a> like HTML.pm.
636    $self->{'scratch'} .= '<a' . ($url ? ' href="'. $url . '">' : '>');
637}
638
639sub end_L   { $_[0]{'scratch'} .= '</a>' }
640
641sub start_S { $_[0]{'scratch'} .= '<span style="white-space: nowrap;">' }
642sub end_S   { $_[0]{'scratch'} .= '</span>' }
643
644sub emit {
645  my($self) = @_;
646  if ($self->index) {
647      push @{ $self->{'output'} }, $self->{'scratch'};
648  } else {
649      print {$self->{'output_fh'}} $self->{'scratch'}, "\n\n";
650  }
651  $self->{'scratch'} = '';
652  return;
653}
654
655=head2 resolve_pod_page_link
656
657  my $url = $pod->resolve_pod_page_link('Net::Ping', 'INSTALL');
658  my $url = $pod->resolve_pod_page_link('perlpodspec');
659  my $url = $pod->resolve_pod_page_link(undef, 'SYNOPSIS');
660
661Resolves a POD link target (typically a module or POD file name) and section
662name to a URL. The resulting link will be returned for the above examples as:
663
664  http://search.cpan.org/perldoc?Net::Ping#INSTALL
665  http://search.cpan.org/perldoc?perlpodspec
666  #SYNOPSIS
667
668Note that when there is only a section argument the URL will simply be a link
669to a section in the current document.
670
671=cut
672
673sub resolve_pod_page_link {
674    my ($self, $to, $section) = @_;
675    return undef unless defined $to || defined $section;
676    if (defined $section) {
677        $section = '#' . $self->idify($self->encode_entities($section), 1);
678        return $section unless defined $to;
679    } else {
680        $section = ''
681    }
682
683    return ($self->perldoc_url_prefix || '')
684        . $self->encode_entities($to) . $section
685        . ($self->perldoc_url_postfix || '');
686}
687
688=head2 resolve_man_page_link
689
690  my $url = $pod->resolve_man_page_link('crontab(5)', 'EXAMPLE CRON FILE');
691  my $url = $pod->resolve_man_page_link('crontab');
692
693Resolves a man page link target and numeric section to a URL. The resulting
694link will be returned for the above examples as:
695
696    http://man.he.net/man5/crontab
697    http://man.he.net/man1/crontab
698
699Note that the first argument is required. The section number will be parsed
700from it, and if it's missing will default to 1. The second argument is
701currently ignored, as L<man.he.net|http://man.he.net> does not currently
702include linkable IDs or anchor names in its pages. Subclass to link to a
703different man page HTTP server.
704
705=cut
706
707sub resolve_man_page_link {
708    my ($self, $to, $section) = @_;
709    return undef unless defined $to;
710    my ($page, $part) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
711    return undef unless $page;
712    return ($self->man_url_prefix || '')
713        . ($part || 1) . "/" . $self->encode_entities($page)
714        . ($self->man_url_postfix || '');
715
716}
717
718=head2 idify
719
720  my $id   = $pod->idify($text);
721  my $hash = $pod->idify($text, 1);
722
723This method turns an arbitrary string into a valid XHTML ID attribute value.
724The rules enforced, following
725L<http://webdesign.about.com/od/htmltags/a/aa031707.htm>, are:
726
727=over
728
729=item *
730
731The id must start with a letter (a-z or A-Z)
732
733=item *
734
735All subsequent characters can be letters, numbers (0-9), hyphens (-),
736underscores (_), colons (:), and periods (.).
737
738=item *
739
740The final character can't be a hyphen, colon, or period. URLs ending with these
741characters, while allowed by XHTML, can be awkward to extract from plain text.
742
743=item *
744
745Each id must be unique within the document.
746
747=back
748
749In addition, the returned value will be unique within the context of the
750Pod::Simple::XHTML object unless a second argument is passed a true value. ID
751attributes should always be unique within a single XHTML document, but pass
752the true value if you are creating not an ID but a URL hash to point to
753an ID (i.e., if you need to put the "#foo" in C<< <a href="#foo">foo</a> >>.
754
755=cut
756
757sub idify {
758    my ($self, $t, $not_unique) = @_;
759    for ($t) {
760        s/<[^>]+>//g;            # Strip HTML.
761        s/&[^;]+;//g;            # Strip entities.
762        s/^\s+//; s/\s+$//;      # Strip white space.
763        s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
764        s/^[^a-zA-Z]+//;         # First char must be a letter.
765        s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
766        s/[-:.]+$//;             # Strip trailing punctuation.
767    }
768    return $t if $not_unique;
769    my $i = '';
770    $i++ while $self->{ids}{"$t$i"}++;
771    return "$t$i";
772}
773
774=head2 batch_mode_page_object_init
775
776  $pod->batch_mode_page_object_init($batchconvobj, $module, $infile, $outfile, $depth);
777
778Called by L<Pod::Simple::HTMLBatch> so that the class has a chance to
779initialize the converter. Internally it sets the C<batch_mode> property to
780true and sets C<batch_mode_current_level()>, but Pod::Simple::XHTML does not
781currently use those features. Subclasses might, though.
782
783=cut
784
785sub batch_mode_page_object_init {
786  my ($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
787  $self->batch_mode(1);
788  $self->batch_mode_current_level($depth);
789  return $self;
790}
791
792sub html_header_after_title {
793}
794
795
7961;
797
798__END__
799
800=head1 SEE ALSO
801
802L<Pod::Simple>, L<Pod::Simple::Text>, L<Pod::Spell>
803
804=head1 SUPPORT
805
806Questions or discussion about POD and Pod::Simple should be sent to the
807pod-people@perl.org mail list. Send an empty email to
808pod-people-subscribe@perl.org to subscribe.
809
810This module is managed in an open GitHub repository,
811L<https://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
812to clone L<git://github.com/theory/pod-simple.git> and send patches!
813
814Patches against Pod::Simple are welcome. Please send bug reports to
815<bug-pod-simple@rt.cpan.org>.
816
817=head1 COPYRIGHT AND DISCLAIMERS
818
819Copyright (c) 2003-2005 Allison Randal.
820
821This library is free software; you can redistribute it and/or modify it
822under the same terms as Perl itself.
823
824This program is distributed in the hope that it will be useful, but
825without any warranty; without even the implied warranty of
826merchantability or fitness for a particular purpose.
827
828=head1 ACKNOWLEDGEMENTS
829
830Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
831L<Linux man pages online|http://man.he.net/> site for man page links.
832
833Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
834site for Perl module links.
835
836=head1 AUTHOR
837
838Pod::Simpele::XHTML was created by Allison Randal <allison@perl.org>.
839
840Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
841But don't bother him, he's retired.
842
843Pod::Simple is maintained by:
844
845=over
846
847=item * Allison Randal C<allison@perl.org>
848
849=item * Hans Dieter Pearcey C<hdp@cpan.org>
850
851=item * David E. Wheeler C<dwheeler@cpan.org>
852
853=back
854
855=cut
856