xref: /openbsd-src/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm (revision ff0e7be1ebbcc809ea8ad2b6dafe215824da9e46)
1require 5;
2package Pod::Simple::HTML;
3use strict;
4use Pod::Simple::PullParser ();
5use vars qw(
6  @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
7  $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix
8  $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
9  $Doctype_decl  $Content_decl
10);
11@ISA = ('Pod::Simple::PullParser');
12$VERSION = '3.43';
13BEGIN {
14  if(defined &DEBUG) { } # no-op
15  elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
16  else { *DEBUG = sub () {0}; }
17}
18
19$Doctype_decl ||= '';  # No.  Just No.  Don't even ask me for it.
20 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
21 #    "http://www.w3.org/TR/html4/loose.dtd">\n};
22
23$Content_decl ||=
24 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
25
26$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
27$Computerese =  "" unless defined $Computerese;
28$LamePad = '' unless defined $LamePad;
29
30$Linearization_Limit = 120 unless defined $Linearization_Limit;
31 # headings/items longer than that won't get an <a name="...">
32$Perldoc_URL_Prefix  = 'https://metacpan.org/pod/'
33 unless defined $Perldoc_URL_Prefix;
34$Perldoc_URL_Postfix = ''
35 unless defined $Perldoc_URL_Postfix;
36
37
38$Man_URL_Prefix  = 'http://man.he.net/man';
39$Man_URL_Postfix = '';
40
41$Title_Prefix  = '' unless defined $Title_Prefix;
42$Title_Postfix = '' unless defined $Title_Postfix;
43%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
44  # 'item-text' stuff in the index doesn't quite work, and may
45  # not be a good idea anyhow.
46
47
48__PACKAGE__->_accessorize(
49 'perldoc_url_prefix',
50   # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
51   #  to put before the "Foo%3a%3aBar".
52   # (for singleton mode only?)
53 'perldoc_url_postfix',
54   # what to put after "Foo%3a%3aBar" in the URL.  Normally "".
55
56 'man_url_prefix',
57   # In turning L<crontab(5)> into http://whatever/man/1/crontab, what
58   #  to put before the "1/crontab".
59 'man_url_postfix',
60   #  what to put after the "1/crontab" in the URL. Normally "".
61
62 'batch_mode', # whether we're in batch mode
63 'batch_mode_current_level',
64    # When in batch mode, how deep the current module is: 1 for "LWP",
65    #  2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
66
67 'title_prefix',  'title_postfix',
68  # What to put before and after the title in the head.
69  # Should already be &-escaped
70
71 'html_h_level',
72
73 'html_header_before_title',
74 'html_header_after_title',
75 'html_footer',
76 'top_anchor',
77
78 'index', # whether to add an index at the top of each page
79    # (actually it's a table-of-contents, but we'll call it an index,
80    #  out of apparently longstanding habit)
81
82 'html_css', # URL of CSS file to point to
83 'html_javascript', # URL of Javascript file to point to
84
85 'force_title',   # should already be &-escaped
86 'default_title', # should already be &-escaped
87);
88
89#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
90my @_to_accept;
91
92%Tagmap = (
93  'Verbatim'  => "\n<pre$Computerese>",
94  '/Verbatim' => "</pre>\n",
95  'VerbatimFormatted'  => "\n<pre$Computerese>",
96  '/VerbatimFormatted' => "</pre>\n",
97  'VerbatimB'  => "<b>",
98  '/VerbatimB' => "</b>",
99  'VerbatimI'  => "<i>",
100  '/VerbatimI' => "</i>",
101  'VerbatimBI'  => "<b><i>",
102  '/VerbatimBI' => "</i></b>",
103
104
105  'Data'  => "\n",
106  '/Data' => "\n",
107
108  'head1' => "\n<h1>",  # And also stick in an <a name="...">
109  'head2' => "\n<h2>",  #  ''
110  'head3' => "\n<h3>",  #  ''
111  'head4' => "\n<h4>",  #  ''
112  'head5' => "\n<h5>",  #  ''
113  'head6' => "\n<h6>",  #  ''
114  '/head1' => "</a></h1>\n",
115  '/head2' => "</a></h2>\n",
116  '/head3' => "</a></h3>\n",
117  '/head4' => "</a></h4>\n",
118  '/head5' => "</a></h5>\n",
119  '/head6' => "</a></h6>\n",
120
121  'X'  => "<!--\n\tINDEX: ",
122  '/X' => "\n-->",
123
124  changes(qw(
125    Para=p
126    B=b I=i
127    over-bullet=ul
128    over-number=ol
129    over-text=dl
130    over-block=blockquote
131    item-bullet=li
132    item-number=li
133    item-text=dt
134  )),
135  changes2(
136    map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
137    qw[
138      sample=samp
139      definition=dfn
140      keyboard=kbd
141      variable=var
142      citation=cite
143      abbreviation=abbr
144      acronym=acronym
145      subscript=sub
146      superscript=sup
147      big=big
148      small=small
149      underline=u
150      strikethrough=s
151      preformat=pre
152      teletype=tt
153    ]  # no point in providing a way to get <q>...</q>, I think
154  ),
155
156  '/item-bullet' => "</li>$LamePad\n",
157  '/item-number' => "</li>$LamePad\n",
158  '/item-text'   => "</a></dt>$LamePad\n",
159  'item-body'    => "\n<dd>",
160  '/item-body'   => "</dd>\n",
161
162
163  'B'      =>  "<b>",                  '/B'     =>  "</b>",
164  'I'      =>  "<i>",                  '/I'     =>  "</i>",
165  'F'      =>  "<em$Computerese>",     '/F'     =>  "</em>",
166  'C'      =>  "<code$Computerese>",   '/C'     =>  "</code>",
167  'L'  =>  "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
168  '/L' =>  "</a>",
169);
170
171sub changes {
172  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
173     ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
174  } @_;
175}
176sub changes2 {
177  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
178     ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
179  } @_;
180}
181
182#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
184 # Just so we can run from the command line.  No options.
185 #  For that, use perldoc!
186#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187
188sub new {
189  my $new = shift->SUPER::new(@_);
190  #$new->nix_X_codes(1);
191  $new->nbsp_for_S(1);
192  $new->accept_targets( 'html', 'HTML' );
193  $new->accept_codes('VerbatimFormatted');
194  $new->accept_codes(@_to_accept);
195  DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n";
196
197  $new->perldoc_url_prefix(  $Perldoc_URL_Prefix  );
198  $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
199  $new->man_url_prefix(  $Man_URL_Prefix  );
200  $new->man_url_postfix( $Man_URL_Postfix );
201  $new->title_prefix(  $Title_Prefix  );
202  $new->title_postfix( $Title_Postfix );
203
204  $new->html_header_before_title(
205   qq[$Doctype_decl<html><head><title>]
206  );
207  $new->html_header_after_title( join "\n" =>
208    "</title>",
209    $Content_decl,
210    "</head>\n<body class='pod'>",
211    $new->version_tag_comment,
212    "<!-- start doc -->\n",
213  );
214  $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
215  $new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" );
216
217  $new->{'Tagmap'} = {%Tagmap};
218
219  return $new;
220}
221
222sub __adjust_html_h_levels {
223  my ($self) = @_;
224  my $Tagmap = $self->{'Tagmap'};
225
226  my $add = $self->html_h_level;
227  return unless defined $add;
228  return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
229
230  $add -= 1;
231  for (1 .. 6) {
232    $Tagmap->{"head$_"}  =~ s/$_/$_ + $add/e;
233    $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
234  }
235}
236
237sub batch_mode_page_object_init {
238  my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
239  DEBUG and print STDERR "Initting $self\n  for $module\n",
240    "  in $infile\n  out $outfile\n  depth $depth\n";
241  $self->batch_mode(1);
242  $self->batch_mode_current_level($depth);
243  return $self;
244}
245
246sub run {
247  my $self = $_[0];
248  return $self->do_middle if $self->bare_output;
249  return
250   $self->do_beginning && $self->do_middle && $self->do_end;
251}
252
253#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
254
255sub do_beginning {
256  my $self = $_[0];
257
258  my $title;
259
260  if(defined $self->force_title) {
261    $title = $self->force_title;
262    DEBUG and print STDERR "Forcing title to be $title\n";
263  } else {
264    # Actually try looking for the title in the document:
265    $title = $self->get_short_title();
266    unless($self->content_seen) {
267      DEBUG and print STDERR "No content seen in search for title.\n";
268      return;
269    }
270    $self->{'Title'} = $title;
271
272    if(defined $title and $title =~ m/\S/) {
273      $title = $self->title_prefix . esc($title) . $self->title_postfix;
274    } else {
275      $title = $self->default_title;
276      $title = '' unless defined $title;
277      DEBUG and print STDERR "Title defaults to $title\n";
278    }
279  }
280
281
282  my $after = $self->html_header_after_title  || '';
283  if($self->html_css) {
284    my $link =
285    $self->html_css =~ m/</
286     ? $self->html_css # It's a big blob of markup, let's drop it in
287     : sprintf(        # It's just a URL, so let's wrap it up
288      qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
289      $self->html_css,
290    );
291    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
292  }
293  $self->_add_top_anchor(\$after);
294
295  if($self->html_javascript) {
296    my $link =
297    $self->html_javascript =~ m/</
298     ? $self->html_javascript # It's a big blob of markup, let's drop it in
299     : sprintf(        # It's just a URL, so let's wrap it up
300      qq[<script type="text/javascript" src="%s"></script>\n],
301      $self->html_javascript,
302    );
303    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
304  }
305
306  print {$self->{'output_fh'}}
307    $self->html_header_before_title || '',
308    $title, # already escaped
309    $after,
310  ;
311
312  DEBUG and print STDERR "Returning from do_beginning...\n";
313  return 1;
314}
315
316sub _add_top_anchor {
317  my($self, $text_r) = @_;
318  unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
319    $$text_r .= $self->top_anchor || '';
320  }
321  return;
322}
323
324sub version_tag_comment {
325  my $self = shift;
326  return sprintf
327   "<!--\n  generated by %s v%s,\n  using %s v%s,\n  under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
328   esc(
329    ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
330    $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)),
331   ), $self->_modnote(),
332  ;
333}
334
335sub _modnote {
336  my $class = ref($_[0]) || $_[0];
337  return join "\n   " => grep m/\S/, split "\n",
338
339qq{
340If you want to change this HTML document, you probably shouldn't do that
341by changing it directly.  Instead, see about changing the calling options
342to $class, and/or subclassing $class,
343then reconverting this document from the Pod source.
344When in doubt, email the author of $class for advice.
345See 'perldoc $class' for more info.
346};
347
348}
349
350sub do_end {
351  my $self = $_[0];
352  print {$self->{'output_fh'}}  $self->html_footer || '';
353  return 1;
354}
355
356# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
357# Normally this would just be a call to _do_middle_main_loop -- but we
358#  have to do some elaborate things to emit all the content and then
359#  summarize it and output it /before/ the content that it's a summary of.
360
361sub do_middle {
362  my $self = $_[0];
363  return $self->_do_middle_main_loop unless $self->index;
364
365  if( $self->output_string ) {
366    # An efficiency hack
367    my $out = $self->output_string; #it's a reference to it
368    my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
369    $$out .= $sneakytag;
370    $self->_do_middle_main_loop;
371    $sneakytag = quotemeta($sneakytag);
372    my $index = $self->index_as_html();
373    if( $$out =~ s/$sneakytag/$index/s ) {
374      # Expected case
375      DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n";
376    } else {
377      DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n";
378      # I don't think this should ever happen.
379    }
380    return 1;
381  }
382
383  unless( $self->output_fh ) {
384    require Carp;
385    Carp::confess("Parser object \$p doesn't seem to have any output object!  I don't know how to deal with that.");
386  }
387
388  # If we get here, we're outputting to a FH.  So we need to do some magic.
389  # Namely, divert all content to a string, which we output after the index.
390  my $fh = $self->output_fh;
391  my $content = '';
392  {
393    # Our horrible bait and switch:
394    $self->output_string( \$content );
395    $self->_do_middle_main_loop;
396    $self->abandon_output_string();
397    $self->output_fh($fh);
398  }
399  print $fh $self->index_as_html();
400  print $fh $content;
401
402  return 1;
403}
404
405###########################################################################
406
407sub index_as_html {
408  my $self = $_[0];
409  # This is meant to be called AFTER the input document has been parsed!
410
411  my $points = $self->{'PSHTML_index_points'} || [];
412
413  @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
414   # There's no point in having a 0-item or 1-item index, I dare say.
415
416  my(@out) = qq{\n<div class='indexgroup'>};
417  my $level = 0;
418
419  my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
420  foreach my $p (@$points, ['head0', '(end)']) {
421    ($tagname, $text) = @$p;
422    $anchorname = $self->section_escape($text);
423    if( $tagname =~ m{^head(\d+)$} ) {
424      $target_level = 0 + $1;
425    } else {  # must be some kinda list item
426      if($previous_tagname =~ m{^head\d+$} ) {
427        $target_level = $level + 1;
428      } else {
429        $target_level = $level;  # no change needed
430      }
431    }
432
433    # Get to target_level by opening or closing ULs
434    while($level > $target_level)
435     { --$level; push @out, ("  " x $level) . "</ul>"; }
436    while($level < $target_level)
437     { ++$level; push @out, ("  " x ($level-1))
438       . "<ul   class='indexList indexList$level'>"; }
439
440    $previous_tagname = $tagname;
441    next unless $level;
442
443    $indent = '  '  x $level;
444    push @out, sprintf
445      "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
446      $indent, $level, esc($anchorname), esc($text)
447    ;
448  }
449  push @out, "</div>\n";
450  return join "\n", @out;
451}
452
453###########################################################################
454
455sub _do_middle_main_loop {
456  my $self = $_[0];
457  my $fh = $self->{'output_fh'};
458  my $tagmap = $self->{'Tagmap'};
459
460  $self->__adjust_html_h_levels;
461
462  my($token, $type, $tagname, $linkto, $linktype);
463  my @stack;
464  my $dont_wrap = 0;
465
466  while($token = $self->get_token) {
467
468    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
469    if( ($type = $token->type) eq 'start' ) {
470      if(($tagname = $token->tagname) eq 'L') {
471        $linktype = $token->attr('type') || 'insane';
472
473        $linkto = $self->do_link($token);
474
475        if(defined $linkto and length $linkto) {
476          esc($linkto);
477            #   (Yes, SGML-escaping applies on top of %-escaping!
478            #   But it's rarely noticeable in practice.)
479          print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
480        } else {
481          print $fh "<a>"; # Yes, an 'a' element with no attributes!
482        }
483
484      } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
485        print $fh $tagmap->{$tagname} || next;
486
487        my @to_unget;
488        while(1) {
489          push @to_unget, $self->get_token;
490          last if $to_unget[-1]->is_end
491              and $to_unget[-1]->tagname eq $tagname;
492
493          # TODO: support for X<...>'s found in here?  (maybe hack into linearize_tokens)
494        }
495
496        my $name = $self->linearize_tokens(@to_unget);
497        $name = $self->do_section($name, $token) if defined $name;
498
499        print $fh "<a ";
500        if ($tagname =~ m/^head\d$/s) {
501            print $fh "class='u'", $self->index
502                ? " href='#___top' title='click to go to top of document'\n"
503                : "\n";
504        }
505
506        if(defined $name) {
507          my $esc = esc(  $self->section_name_tidy( $name ) );
508          print $fh qq[name="$esc"];
509          DEBUG and print STDERR "Linearized ", scalar(@to_unget),
510           " tokens as \"$name\".\n";
511          push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
512           if $ToIndex{ $tagname };
513            # Obviously, this discards all formatting codes (saving
514            #  just their content), but ahwell.
515
516        } else {  # ludicrously long, so nevermind
517          DEBUG and print STDERR "Linearized ", scalar(@to_unget),
518           " tokens, but it was too long, so nevermind.\n";
519        }
520        print $fh "\n>";
521        $self->unget_token(@to_unget);
522
523      } elsif ($tagname eq 'Data') {
524        my $next = $self->get_token;
525        next unless defined $next;
526        unless( $next->type eq 'text' ) {
527          $self->unget_token($next);
528          next;
529        }
530        DEBUG and print STDERR "    raw text ", $next->text, "\n";
531        # The parser sometimes preserves newlines and sometimes doesn't!
532        (my $text = $next->text) =~ s/\n\z//;
533        print $fh $text, "\n";
534        next;
535
536      } else {
537        if( $tagname =~ m/^over-/s ) {
538          push @stack, '';
539        } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
540          print $fh $stack[-1];
541          $stack[-1] = '';
542        }
543        print $fh $tagmap->{$tagname} || next;
544        ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
545          or $tagname eq 'X';
546      }
547
548    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
549    } elsif( $type eq 'end' ) {
550      if( ($tagname = $token->tagname) =~ m/^over-/s ) {
551        if( my $end = pop @stack ) {
552          print $fh $end;
553        }
554      } elsif( $tagname =~ m/^item-/s and @stack) {
555        $stack[-1] = $tagmap->{"/$tagname"};
556        if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
557          $self->unget_token($next);
558          if( $next->type eq 'start' ) {
559            print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
560            $stack[-1] = $tagmap->{"/item-body"};
561          }
562        }
563        next;
564      }
565      print $fh $tagmap->{"/$tagname"} || next;
566      --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
567
568    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
569    } elsif( $type eq 'text' ) {
570      esc($type = $token->text);  # reuse $type, why not
571      $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
572      print $fh $type;
573    }
574
575  }
576  return 1;
577}
578
579###########################################################################
580#
581
582sub do_section {
583  my($self, $name, $token) = @_;
584  return $name;
585}
586
587sub do_link {
588  my($self, $token) = @_;
589  my $type = $token->attr('type');
590  if(!defined $type) {
591    $self->whine("Typeless L!?", $token->attr('start_line'));
592  } elsif( $type eq 'pod') { return $self->do_pod_link($token);
593  } elsif( $type eq 'url') { return $self->do_url_link($token);
594  } elsif( $type eq 'man') { return $self->do_man_link($token);
595  } else {
596    $self->whine("L of unknown type $type!?", $token->attr('start_line'));
597  }
598  return 'FNORG'; # should never get called
599}
600
601# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
602
603sub do_url_link { return $_[1]->attr('to') }
604
605sub do_man_link {
606  my ($self, $link) = @_;
607  my $to = $link->attr('to');
608  my $frag = $link->attr('section');
609
610  return undef unless defined $to and length $to; # should never happen
611
612  $frag = $self->section_escape($frag)
613   if defined $frag and length($frag .= ''); # (stringify)
614
615  DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n";
616
617  return $self->resolve_man_page_link($to, $frag);
618}
619
620
621sub do_pod_link {
622  # And now things get really messy...
623  my($self, $link) = @_;
624  my $to = $link->attr('to');
625  my $section = $link->attr('section');
626  return undef unless(  # should never happen
627    (defined $to and length $to) or
628    (defined $section and length $section)
629  );
630
631  $section = $self->section_escape($section)
632   if defined $section and length($section .= ''); # (stringify)
633
634  DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n",
635   $to || "(nil)",  $section || "(nil)";
636
637  {
638    # An early hack:
639    my $complete_url = $self->resolve_pod_link_by_table($to, $section);
640    if( $complete_url ) {
641      DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ",
642        $complete_url, "\n  (Returning that.)\n";
643      return $complete_url;
644    } else {
645      DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)",
646       " didn't return anything interesting.\n";
647    }
648  }
649
650  if(defined $to and length $to) {
651    # Give this routine first hack again
652    my $there = $self->resolve_pod_link_by_table($to);
653    if(defined $there and length $there) {
654      DEBUG > 1
655       and print STDERR "resolve_pod_link_by_table(T) gives $there\n";
656    } else {
657      $there =
658        $self->resolve_pod_page_link($to, $section);
659         # (I pass it the section value, but I don't see a
660         #  particular reason it'd use it.)
661      DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n";
662      unless( defined $there and length $there ) {
663        DEBUG and print STDERR "Can't resolve $to\n";
664        return undef;
665      }
666      # resolve_pod_page_link returning undef is how it
667      #  can signal that it gives up on making a link
668    }
669    $to = $there;
670  }
671
672  #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n";
673
674  my $out = (defined $to and length $to) ? $to : '';
675  $out .= "#" . $section if defined $section and length $section;
676
677  unless(length $out) { # sanity check
678    DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
679     $to || "(nil)",  $section || "(nil)";
680    return undef;
681  }
682
683  DEBUG and print STDERR "Resolved to $out\n";
684  return $out;
685}
686
687
688# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
689
690sub section_escape {
691  my($self, $section) = @_;
692  return $self->section_url_escape(
693    $self->section_name_tidy($section)
694  );
695}
696
697sub section_name_tidy {
698  my($self, $section) = @_;
699  $section =~ s/^\s+//;
700  $section =~ s/\s+$//;
701  $section =~ tr/ /_/;
702  if ($] ge 5.006) {
703    $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters
704  } elsif ('A' eq chr(65)) { # But not on early EBCDIC
705    $section =~ tr/\x00-\x1F\x80-\x9F//d;
706  }
707  $section = $self->unicode_escape_url($section);
708  $section = '_' unless length $section;
709  return $section;
710}
711
712sub section_url_escape  { shift->general_url_escape(@_) }
713sub pagepath_url_escape { shift->general_url_escape(@_) }
714sub manpage_url_escape  { shift->general_url_escape(@_) }
715
716sub general_url_escape {
717  my($self, $string) = @_;
718
719  $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
720     # express Unicode things as urlencode(utf(orig)).
721
722  # A pretty conservative escaping, behoovey even for query components
723  #  of a URL (see RFC 2396)
724
725  if ($] ge 5.007_003) {
726    $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg;
727  } else { # Is broken for non-ASCII platforms on early perls
728    $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
729  }
730   # Yes, stipulate the list without a range, so that this can work right on
731   #  all charsets that this module happens to run under.
732
733  return $string;
734}
735
736#--------------------------------------------------------------------------
737#
738# Oh look, a yawning portal to Hell!  Let's play touch football right by it!
739#
740
741sub resolve_pod_page_link {
742  # resolve_pod_page_link must return a properly escaped URL
743  my $self = shift;
744  return $self->batch_mode()
745   ? $self->resolve_pod_page_link_batch_mode(@_)
746   : $self->resolve_pod_page_link_singleton_mode(@_)
747  ;
748}
749
750sub resolve_pod_page_link_singleton_mode {
751  my($self, $it) = @_;
752  return undef unless defined $it and length $it;
753  my $url = $self->pagepath_url_escape($it);
754
755  $url =~ s{::$}{}s; # probably never comes up anyway
756  $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
757
758  return undef unless length $url;
759  return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
760}
761
762sub resolve_pod_page_link_batch_mode {
763  my($self, $to) = @_;
764  DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n";
765  my @path = grep length($_), split m/::/s, $to, -1;
766  unless( @path ) { # sanity
767    DEBUG and print STDERR "Very odd!  Splitting $to gives (nil)!\n";
768    return undef;
769  }
770  $self->batch_mode_rectify_path(\@path);
771  my $out = join('/', map $self->pagepath_url_escape($_), @path)
772    . $HTML_EXTENSION;
773  DEBUG > 1 and print STDERR " => $out\n";
774  return $out;
775}
776
777sub batch_mode_rectify_path {
778  my($self, $pathbits) = @_;
779  my $level = $self->batch_mode_current_level;
780  $level--; # how many levels up to go to get to the root
781  if($level < 1) {
782    unshift @$pathbits, '.'; # just to be pretty
783  } else {
784    unshift @$pathbits, ('..') x $level;
785  }
786  return;
787}
788
789sub resolve_man_page_link {
790  my ($self, $to, $frag) = @_;
791  my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
792
793  return undef unless defined $page and length $page;
794  $section ||= 1;
795
796  return $self->man_url_prefix . "$section/"
797      . $self->manpage_url_escape($page)
798      . $self->man_url_postfix;
799}
800
801#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
802
803sub resolve_pod_link_by_table {
804  # A crazy hack to allow specifying custom L<foo> => URL mappings
805
806  return unless $_[0]->{'podhtml_LOT'};  # An optimizy shortcut
807
808  my($self, $to, $section) = @_;
809
810  # TODO: add a method that actually populates podhtml_LOT from a file?
811
812  if(defined $section) {
813    $to = '' unless defined $to and length $to;
814    return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
815  } else {
816    return $self->{'podhtml_LOT'}{$to};            # quite possibly undef!
817  }
818  return;
819}
820
821###########################################################################
822
823sub linearize_tokens {  # self, tokens
824  my $self = shift;
825  my $out = '';
826
827  my $t;
828  while($t = shift @_) {
829    if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
830      $out .= $t; # a string, or some insane thing
831    } elsif($t->is_text) {
832      $out .= $t->text;
833    } elsif($t->is_start and $t->tag eq 'X') {
834      # Ignore until the end of this X<...> sequence:
835      my $x_open = 1;
836      while($x_open) {
837        next if( ($t = shift @_)->is_text );
838        if(   $t->is_start and $t->tag eq 'X') { ++$x_open }
839        elsif($t->is_end   and $t->tag eq 'X') { --$x_open }
840      }
841    }
842  }
843  return undef if length $out > $Linearization_Limit;
844  return $out;
845}
846
847#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
848
849sub unicode_escape_url {
850  my($self, $string) = @_;
851  $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
852    #  Turn char 1234 into "(1234)"
853  return $string;
854}
855
856#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
857sub esc { # a function.
858  if(defined wantarray) {
859    if(wantarray) {
860      @_ = splice @_; # break aliasing
861    } else {
862      my $x = shift;
863      if ($] ge 5.007_003) {
864        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg;
865      } else { # Is broken for non-ASCII platforms on early perls
866        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
867      }
868      return $x;
869    }
870  }
871  foreach my $x (@_) {
872    # Escape things very cautiously:
873    if (defined $x) {
874      if ($] ge 5.007_003) {
875        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg
876      } else { # Is broken for non-ASCII platforms on early perls
877        $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
878      }
879    }
880    # Leave out "- so that "--" won't make it thru in X-generated comments
881    #  with text in them.
882
883    # Yes, stipulate the list without a range, so that this can work right on
884    #  all charsets that this module happens to run under.
885  }
886  return @_;
887}
888
889#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
890
8911;
892__END__
893
894=head1 NAME
895
896Pod::Simple::HTML - convert Pod to HTML
897
898=head1 SYNOPSIS
899
900  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
901
902
903=head1 DESCRIPTION
904
905This class is for making an HTML rendering of a Pod document.
906
907This is a subclass of L<Pod::Simple::PullParser> and inherits all its
908methods (and options).
909
910Note that if you want to do a batch conversion of a lot of Pod
911documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
912
913
914
915=head1 CALLING FROM THE COMMAND LINE
916
917TODO
918
919  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
920
921
922
923=head1 CALLING FROM PERL
924
925=head2 Minimal code
926
927  use Pod::Simple::HTML;
928  my $p = Pod::Simple::HTML->new;
929  $p->output_string(\my $html);
930  $p->parse_file('path/to/Module/Name.pm');
931  open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
932  print $out $html;
933
934=head2 More detailed example
935
936  use Pod::Simple::HTML;
937
938Set the content type:
939
940  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
941
942  my $p = Pod::Simple::HTML->new;
943
944Include a single javascript source:
945
946  $p->html_javascript('http://abc.com/a.js');
947
948Or insert multiple javascript source in the header
949(or for that matter include anything, thought this is not recommended)
950
951  $p->html_javascript('
952      <script type="text/javascript" src="http://abc.com/b.js"></script>
953      <script type="text/javascript" src="http://abc.com/c.js"></script>');
954
955Include a single css source in the header:
956
957  $p->html_css('/style.css');
958
959or insert multiple css sources:
960
961  $p->html_css('
962      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css">
963      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">');
964
965Tell the parser where should the output go. In this case it will be placed in the $html variable:
966
967  my $html;
968  $p->output_string(\$html);
969
970Parse and process a file with pod in it:
971
972  $p->parse_file('path/to/Module/Name.pm');
973
974=head1 METHODS
975
976TODO
977all (most?) accessorized methods
978
979The following variables need to be set B<before> the call to the ->new constructor.
980
981Set the string that is included before the opening <html> tag:
982
983  $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
984	 "http://www.w3.org/TR/html4/loose.dtd">\n};
985
986Set the content-type in the HTML head: (defaults to ISO-8859-1)
987
988  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
989
990Set the value that will be embedded in the opening tags of F, C tags and verbatim text.
991F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "")
992
993  $Pod::Simple::HTML::Computerese =  ' class="some_class_name';
994
995=head2 html_css
996
997=head2 html_javascript
998
999=head2 title_prefix
1000
1001=head2 title_postfix
1002
1003=head2 html_header_before_title
1004
1005This includes everything before the <title> opening tag including the Document type
1006and including the opening <title> tag. The following call will set it to be a simple HTML
1007file:
1008
1009  $p->html_header_before_title('<html><head><title>');
1010
1011=head2 top_anchor
1012
1013By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML.
1014You can change it by calling
1015
1016  $p->top_anchor('<a name="zz" >');
1017
1018=head2 html_h_level
1019
1020Normally =head1 will become <h1>, =head2 will become <h2> etc.
1021Using the html_h_level method will change these levels setting the h level
1022of =head1 tags:
1023
1024  $p->html_h_level(3);
1025
1026Will make sure that =head1 will become <h3> and =head2 will become <h4> etc...
1027
1028
1029=head2 index
1030
1031Set it to some true value if you want to have an index (in reality a table of contents)
1032to be added at the top of the generated HTML.
1033
1034  $p->index(1);
1035
1036=head2 html_header_after_title
1037
1038Includes the closing tag of </title> and through the rest of the head
1039till the opening of the body
1040
1041  $p->html_header_after_title('</title>...</head><body id="my_id">');
1042
1043=head2 html_footer
1044
1045The very end of the document:
1046
1047  $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
1048
1049=head1 SUBCLASSING
1050
1051Can use any of the methods described above but for further customization
1052one needs to override some of the methods:
1053
1054  package My::Pod;
1055  use strict;
1056  use warnings;
1057
1058  use base 'Pod::Simple::HTML';
1059
1060  # needs to return a URL string such
1061  # http://some.other.com/page.html
1062  # #anchor_in_the_same_file
1063  # /internal/ref.html
1064  sub do_pod_link {
1065    # My::Pod object and Pod::Simple::PullParserStartToken object
1066    my ($self, $link) = @_;
1067
1068    say $link->tagname;          # will be L for links
1069    say $link->attr('to');       #
1070    say $link->attr('type');     # will be 'pod' always
1071    say $link->attr('section');
1072
1073    # Links local to our web site
1074    if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') {
1075      my $to = $link->attr('to');
1076      if ($to =~ /^Padre::/) {
1077          $to =~ s{::}{/}g;
1078          return "/docs/Padre/$to.html";
1079      }
1080    }
1081
1082    # all other links are generated by the parent class
1083    my $ret = $self->SUPER::do_pod_link($link);
1084    return $ret;
1085  }
1086
1087  1;
1088
1089Meanwhile in script.pl:
1090
1091  use My::Pod;
1092
1093  my $p = My::Pod->new;
1094
1095  my $html;
1096  $p->output_string(\$html);
1097  $p->parse_file('path/to/Module/Name.pm');
1098  open my $out, '>', 'out.html' or die;
1099  print $out $html;
1100
1101TODO
1102
1103maybe override do_beginning do_end
1104
1105=head1 SEE ALSO
1106
1107L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
1108
1109TODO: a corpus of sample Pod input and HTML output?  Or common
1110idioms?
1111
1112=head1 SUPPORT
1113
1114Questions or discussion about POD and Pod::Simple should be sent to the
1115pod-people@perl.org mail list. Send an empty email to
1116pod-people-subscribe@perl.org to subscribe.
1117
1118This module is managed in an open GitHub repository,
1119L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or
1120to clone L<git://github.com/perl-pod/pod-simple.git> and send patches!
1121
1122Patches against Pod::Simple are welcome. Please send bug reports to
1123<bug-pod-simple@rt.cpan.org>.
1124
1125=head1 COPYRIGHT AND DISCLAIMERS
1126
1127Copyright (c) 2002-2004 Sean M. Burke.
1128
1129This library is free software; you can redistribute it and/or modify it
1130under the same terms as Perl itself.
1131
1132This program is distributed in the hope that it will be useful, but
1133without any warranty; without even the implied warranty of
1134merchantability or fitness for a particular purpose.
1135
1136=head1 ACKNOWLEDGEMENTS
1137
1138Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
1139L<Linux man pages online|http://man.he.net/> site for man page links.
1140
1141Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
1142site for Perl module links.
1143
1144=head1 AUTHOR
1145
1146Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
1147But don't bother him, he's retired.
1148
1149Pod::Simple is maintained by:
1150
1151=over
1152
1153=item * Allison Randal C<allison@perl.org>
1154
1155=item * Hans Dieter Pearcey C<hdp@cpan.org>
1156
1157=item * David E. Wheeler C<dwheeler@cpan.org>
1158
1159=back
1160
1161=cut
1162