xref: /openbsd-src/gnu/usr.bin/perl/cpan/Pod-Simple/lib/Pod/Simple/HTML.pm (revision 898184e3e61f9129feb5978fad5a8c6865f00b92)
1
2require 5;
3package Pod::Simple::HTML;
4use strict;
5use Pod::Simple::PullParser ();
6use vars qw(
7  @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION
8  $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix
9  $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex
10  $Doctype_decl  $Content_decl
11);
12@ISA = ('Pod::Simple::PullParser');
13$VERSION = '3.20';
14
15BEGIN {
16  if(defined &DEBUG) { } # no-op
17  elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG }
18  else { *DEBUG = sub () {0}; }
19}
20
21$Doctype_decl ||= '';  # No.  Just No.  Don't even ask me for it.
22 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
23 #    "http://www.w3.org/TR/html4/loose.dtd">\n};
24
25$Content_decl ||=
26 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >};
27
28$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION;
29$Computerese =  "" unless defined $Computerese;
30$LamePad = '' unless defined $LamePad;
31
32$Linearization_Limit = 120 unless defined $Linearization_Limit;
33 # headings/items longer than that won't get an <a name="...">
34$Perldoc_URL_Prefix  = 'http://search.cpan.org/perldoc?'
35 unless defined $Perldoc_URL_Prefix;
36$Perldoc_URL_Postfix = ''
37 unless defined $Perldoc_URL_Postfix;
38
39
40$Man_URL_Prefix  = 'http://man.he.net/man';
41$Man_URL_Postfix = '';
42
43$Title_Prefix  = '' unless defined $Title_Prefix;
44$Title_Postfix = '' unless defined $Title_Postfix;
45%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text
46  # 'item-text' stuff in the index doesn't quite work, and may
47  # not be a good idea anyhow.
48
49
50__PACKAGE__->_accessorize(
51 'perldoc_url_prefix',
52   # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what
53   #  to put before the "Foo%3a%3aBar".
54   # (for singleton mode only?)
55 'perldoc_url_postfix',
56   # what to put after "Foo%3a%3aBar" in the URL.  Normally "".
57
58 'man_url_prefix',
59   # In turning L<crontab(5)> into http://whatever/man/1/crontab, what
60   #  to put before the "1/crontab".
61 'man_url_postfix',
62   #  what to put after the "1/crontab" in the URL. Normally "".
63
64 'batch_mode', # whether we're in batch mode
65 'batch_mode_current_level',
66    # When in batch mode, how deep the current module is: 1 for "LWP",
67    #  2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc
68
69 'title_prefix',  'title_postfix',
70  # What to put before and after the title in the head.
71  # Should already be &-escaped
72
73 'html_h_level',
74
75 'html_header_before_title',
76 'html_header_after_title',
77 'html_footer',
78
79 'index', # whether to add an index at the top of each page
80    # (actually it's a table-of-contents, but we'll call it an index,
81    #  out of apparently longstanding habit)
82
83 'html_css', # URL of CSS file to point to
84 'html_javascript', # URL of Javascript file to point to
85
86 'force_title',   # should already be &-escaped
87 'default_title', # should already be &-escaped
88);
89
90#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
91my @_to_accept;
92
93%Tagmap = (
94  'Verbatim'  => "\n<pre$Computerese>",
95  '/Verbatim' => "</pre>\n",
96  'VerbatimFormatted'  => "\n<pre$Computerese>",
97  '/VerbatimFormatted' => "</pre>\n",
98  'VerbatimB'  => "<b>",
99  '/VerbatimB' => "</b>",
100  'VerbatimI'  => "<i>",
101  '/VerbatimI' => "</i>",
102  'VerbatimBI'  => "<b><i>",
103  '/VerbatimBI' => "</i></b>",
104
105
106  'Data'  => "\n",
107  '/Data' => "\n",
108
109  'head1' => "\n<h1>",  # And also stick in an <a name="...">
110  'head2' => "\n<h2>",  #  ''
111  'head3' => "\n<h3>",  #  ''
112  'head4' => "\n<h4>",  #  ''
113  '/head1' => "</a></h1>\n",
114  '/head2' => "</a></h2>\n",
115  '/head3' => "</a></h3>\n",
116  '/head4' => "</a></h4>\n",
117
118  'X'  => "<!--\n\tINDEX: ",
119  '/X' => "\n-->",
120
121  changes(qw(
122    Para=p
123    B=b I=i
124    over-bullet=ul
125    over-number=ol
126    over-text=dl
127    over-block=blockquote
128    item-bullet=li
129    item-number=li
130    item-text=dt
131  )),
132  changes2(
133    map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ }
134    qw[
135      sample=samp
136      definition=dfn
137      kbd=keyboard
138      variable=var
139      citation=cite
140      abbreviation=abbr
141      acronym=acronym
142      subscript=sub
143      superscript=sup
144      big=big
145      small=small
146      underline=u
147      strikethrough=s
148    ]  # no point in providing a way to get <q>...</q>, I think
149  ),
150
151  '/item-bullet' => "</li>$LamePad\n",
152  '/item-number' => "</li>$LamePad\n",
153  '/item-text'   => "</a></dt>$LamePad\n",
154  'item-body'    => "\n<dd>",
155  '/item-body'   => "</dd>\n",
156
157
158  'B'      =>  "<b>",                  '/B'     =>  "</b>",
159  'I'      =>  "<i>",                  '/I'     =>  "</i>",
160  'F'      =>  "<em$Computerese>",     '/F'     =>  "</em>",
161  'C'      =>  "<code$Computerese>",   '/C'     =>  "</code>",
162  'L'  =>  "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used!
163  '/L' =>  "</a>",
164);
165
166sub changes {
167  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
168     ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_"
169  } @_;
170}
171sub changes2 {
172  return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s
173     ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_"
174  } @_;
175}
176
177#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
178sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 }
179 # Just so we can run from the command line.  No options.
180 #  For that, use perldoc!
181#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
182
183sub new {
184  my $new = shift->SUPER::new(@_);
185  #$new->nix_X_codes(1);
186  $new->nbsp_for_S(1);
187  $new->accept_targets( 'html', 'HTML' );
188  $new->accept_codes('VerbatimFormatted');
189  $new->accept_codes(@_to_accept);
190  DEBUG > 2 and print "To accept: ", join(' ',@_to_accept), "\n";
191
192  $new->perldoc_url_prefix(  $Perldoc_URL_Prefix  );
193  $new->perldoc_url_postfix( $Perldoc_URL_Postfix );
194  $new->man_url_prefix(  $Man_URL_Prefix  );
195  $new->man_url_postfix( $Man_URL_Postfix );
196  $new->title_prefix(  $Title_Prefix  );
197  $new->title_postfix( $Title_Postfix );
198
199  $new->html_header_before_title(
200   qq[$Doctype_decl<html><head><title>]
201  );
202  $new->html_header_after_title( join "\n" =>
203    "</title>",
204    $Content_decl,
205    "</head>\n<body class='pod'>",
206    $new->version_tag_comment,
207    "<!-- start doc -->\n",
208  );
209  $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
210
211  $new->{'Tagmap'} = {%Tagmap};
212
213  return $new;
214}
215
216sub __adjust_html_h_levels {
217  my ($self) = @_;
218  my $Tagmap = $self->{'Tagmap'};
219
220  my $add = $self->html_h_level;
221  return unless defined $add;
222  return if ($self->{'Adjusted_html_h_levels'}||0) == $add;
223
224  $add -= 1;
225  for (1 .. 4) {
226    $Tagmap->{"head$_"}  =~ s/$_/$_ + $add/e;
227    $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e;
228  }
229}
230
231sub batch_mode_page_object_init {
232  my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_;
233  DEBUG and print "Initting $self\n  for $module\n",
234    "  in $infile\n  out $outfile\n  depth $depth\n";
235  $self->batch_mode(1);
236  $self->batch_mode_current_level($depth);
237  return $self;
238}
239
240sub run {
241  my $self = $_[0];
242  return $self->do_middle if $self->bare_output;
243  return
244   $self->do_beginning && $self->do_middle && $self->do_end;
245}
246
247#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
248
249sub do_beginning {
250  my $self = $_[0];
251
252  my $title;
253
254  if(defined $self->force_title) {
255    $title = $self->force_title;
256    DEBUG and print "Forcing title to be $title\n";
257  } else {
258    # Actually try looking for the title in the document:
259    $title = $self->get_short_title();
260    unless($self->content_seen) {
261      DEBUG and print "No content seen in search for title.\n";
262      return;
263    }
264    $self->{'Title'} = $title;
265
266    if(defined $title and $title =~ m/\S/) {
267      $title = $self->title_prefix . esc($title) . $self->title_postfix;
268    } else {
269      $title = $self->default_title;
270      $title = '' unless defined $title;
271      DEBUG and print "Title defaults to $title\n";
272    }
273  }
274
275
276  my $after = $self->html_header_after_title  || '';
277  if($self->html_css) {
278    my $link =
279    $self->html_css =~ m/</
280     ? $self->html_css # It's a big blob of markup, let's drop it in
281     : sprintf(        # It's just a URL, so let's wrap it up
282      qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n],
283      $self->html_css,
284    );
285    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
286  }
287  $self->_add_top_anchor(\$after);
288
289  if($self->html_javascript) {
290    my $link =
291    $self->html_javascript =~ m/</
292     ? $self->html_javascript # It's a big blob of markup, let's drop it in
293     : sprintf(        # It's just a URL, so let's wrap it up
294      qq[<script type="text/javascript" src="%s"></script>\n],
295      $self->html_javascript,
296    );
297    $after =~ s{(</head>)}{$link\n$1}i;  # otherwise nevermind
298  }
299
300  print {$self->{'output_fh'}}
301    $self->html_header_before_title || '',
302    $title, # already escaped
303    $after,
304  ;
305
306  DEBUG and print "Returning from do_beginning...\n";
307  return 1;
308}
309
310sub _add_top_anchor {
311  my($self, $text_r) = @_;
312  unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack
313    $$text_r .= "<a name='___top' class='dummyTopAnchor' ></a>\n";
314  }
315  return;
316}
317
318sub version_tag_comment {
319  my $self = shift;
320  return sprintf
321   "<!--\n  generated by %s v%s,\n  using %s v%s,\n  under Perl v%s at %s GMT.\n\n %s\n\n-->\n",
322   esc(
323    ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(),
324    $], scalar(gmtime),
325   ), $self->_modnote(),
326  ;
327}
328
329sub _modnote {
330  my $class = ref($_[0]) || $_[0];
331  return join "\n   " => grep m/\S/, split "\n",
332
333qq{
334If you want to change this HTML document, you probably shouldn't do that
335by changing it directly.  Instead, see about changing the calling options
336to $class, and/or subclassing $class,
337then reconverting this document from the Pod source.
338When in doubt, email the author of $class for advice.
339See 'perldoc $class' for more info.
340};
341
342}
343
344sub do_end {
345  my $self = $_[0];
346  print {$self->{'output_fh'}}  $self->html_footer || '';
347  return 1;
348}
349
350# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
351# Normally this would just be a call to _do_middle_main_loop -- but we
352#  have to do some elaborate things to emit all the content and then
353#  summarize it and output it /before/ the content that it's a summary of.
354
355sub do_middle {
356  my $self = $_[0];
357  return $self->_do_middle_main_loop unless $self->index;
358
359  if( $self->output_string ) {
360    # An efficiency hack
361    my $out = $self->output_string; #it's a reference to it
362    my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n";
363    $$out .= $sneakytag;
364    $self->_do_middle_main_loop;
365    $sneakytag = quotemeta($sneakytag);
366    my $index = $self->index_as_html();
367    if( $$out =~ s/$sneakytag/$index/s ) {
368      # Expected case
369      DEBUG and print "Inserted ", length($index), " bytes of index HTML into $out.\n";
370    } else {
371      DEBUG and print "Odd, couldn't find where to insert the index in the output!\n";
372      # I don't think this should ever happen.
373    }
374    return 1;
375  }
376
377  unless( $self->output_fh ) {
378    require Carp;
379    Carp::confess("Parser object \$p doesn't seem to have any output object!  I don't know how to deal with that.");
380  }
381
382  # If we get here, we're outputting to a FH.  So we need to do some magic.
383  # Namely, divert all content to a string, which we output after the index.
384  my $fh = $self->output_fh;
385  my $content = '';
386  {
387    # Our horrible bait and switch:
388    $self->output_string( \$content );
389    $self->_do_middle_main_loop;
390    $self->abandon_output_string();
391    $self->output_fh($fh);
392  }
393  print $fh $self->index_as_html();
394  print $fh $content;
395
396  return 1;
397}
398
399###########################################################################
400
401sub index_as_html {
402  my $self = $_[0];
403  # This is meant to be called AFTER the input document has been parsed!
404
405  my $points = $self->{'PSHTML_index_points'} || [];
406
407  @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n];
408   # There's no point in having a 0-item or 1-item index, I dare say.
409
410  my(@out) = qq{\n<div class='indexgroup'>};
411  my $level = 0;
412
413  my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent);
414  foreach my $p (@$points, ['head0', '(end)']) {
415    ($tagname, $text) = @$p;
416    $anchorname = $self->section_escape($text);
417    if( $tagname =~ m{^head(\d+)$} ) {
418      $target_level = 0 + $1;
419    } else {  # must be some kinda list item
420      if($previous_tagname =~ m{^head\d+$} ) {
421        $target_level = $level + 1;
422      } else {
423        $target_level = $level;  # no change needed
424      }
425    }
426
427    # Get to target_level by opening or closing ULs
428    while($level > $target_level)
429     { --$level; push @out, ("  " x $level) . "</ul>"; }
430    while($level < $target_level)
431     { ++$level; push @out, ("  " x ($level-1))
432       . "<ul   class='indexList indexList$level'>"; }
433
434    $previous_tagname = $tagname;
435    next unless $level;
436
437    $indent = '  '  x $level;
438    push @out, sprintf
439      "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>",
440      $indent, $level, esc($anchorname), esc($text)
441    ;
442  }
443  push @out, "</div>\n";
444  return join "\n", @out;
445}
446
447###########################################################################
448
449sub _do_middle_main_loop {
450  my $self = $_[0];
451  my $fh = $self->{'output_fh'};
452  my $tagmap = $self->{'Tagmap'};
453
454  $self->__adjust_html_h_levels;
455
456  my($token, $type, $tagname, $linkto, $linktype);
457  my @stack;
458  my $dont_wrap = 0;
459
460  while($token = $self->get_token) {
461
462    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
463    if( ($type = $token->type) eq 'start' ) {
464      if(($tagname = $token->tagname) eq 'L') {
465        $linktype = $token->attr('type') || 'insane';
466
467        $linkto = $self->do_link($token);
468
469        if(defined $linkto and length $linkto) {
470          esc($linkto);
471            #   (Yes, SGML-escaping applies on top of %-escaping!
472            #   But it's rarely noticeable in practice.)
473          print $fh qq{<a href="$linkto" class="podlink$linktype"\n>};
474        } else {
475          print $fh "<a>"; # Yes, an 'a' element with no attributes!
476        }
477
478      } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) {
479        print $fh $tagmap->{$tagname} || next;
480
481        my @to_unget;
482        while(1) {
483          push @to_unget, $self->get_token;
484          last if $to_unget[-1]->is_end
485              and $to_unget[-1]->tagname eq $tagname;
486
487          # TODO: support for X<...>'s found in here?  (maybe hack into linearize_tokens)
488        }
489
490        my $name = $self->linearize_tokens(@to_unget);
491        $name = $self->do_section($name, $token) if defined $name;
492
493        print $fh "<a ";
494        if ($tagname =~ m/^head\d$/s) {
495            print $fh "class='u'", $self->index
496                ? " href='#___top' title='click to go to top of document'\n"
497                : "\n";
498        }
499
500        if(defined $name) {
501          my $esc = esc(  $self->section_name_tidy( $name ) );
502          print $fh qq[name="$esc"];
503          DEBUG and print "Linearized ", scalar(@to_unget),
504           " tokens as \"$name\".\n";
505          push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name]
506           if $ToIndex{ $tagname };
507            # Obviously, this discards all formatting codes (saving
508            #  just their content), but ahwell.
509
510        } else {  # ludicrously long, so nevermind
511          DEBUG and print "Linearized ", scalar(@to_unget),
512           " tokens, but it was too long, so nevermind.\n";
513        }
514        print $fh "\n>";
515        $self->unget_token(@to_unget);
516
517      } elsif ($tagname eq 'Data') {
518        my $next = $self->get_token;
519        next unless defined $next;
520        unless( $next->type eq 'text' ) {
521          $self->unget_token($next);
522          next;
523        }
524        DEBUG and print "    raw text ", $next->text, "\n";
525        print $fh "\n" . $next->text . "\n";
526        next;
527
528      } else {
529        if( $tagname =~ m/^over-/s ) {
530          push @stack, '';
531        } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) {
532          print $fh $stack[-1];
533          $stack[-1] = '';
534        }
535        print $fh $tagmap->{$tagname} || next;
536        ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted"
537          or $tagname eq 'X';
538      }
539
540    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
541    } elsif( $type eq 'end' ) {
542      if( ($tagname = $token->tagname) =~ m/^over-/s ) {
543        if( my $end = pop @stack ) {
544          print $fh $end;
545        }
546      } elsif( $tagname =~ m/^item-/s and @stack) {
547        $stack[-1] = $tagmap->{"/$tagname"};
548        if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) {
549          $self->unget_token($next);
550          if( $next->type eq 'start' ) {
551            print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"};
552            $stack[-1] = $tagmap->{"/item-body"};
553          }
554        }
555        next;
556      }
557      print $fh $tagmap->{"/$tagname"} || next;
558      --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X';
559
560    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
561    } elsif( $type eq 'text' ) {
562      esc($type = $token->text);  # reuse $type, why not
563      $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap;
564      print $fh $type;
565    }
566
567  }
568  return 1;
569}
570
571###########################################################################
572#
573
574sub do_section {
575  my($self, $name, $token) = @_;
576  return $name;
577}
578
579sub do_link {
580  my($self, $token) = @_;
581  my $type = $token->attr('type');
582  if(!defined $type) {
583    $self->whine("Typeless L!?", $token->attr('start_line'));
584  } elsif( $type eq 'pod') { return $self->do_pod_link($token);
585  } elsif( $type eq 'url') { return $self->do_url_link($token);
586  } elsif( $type eq 'man') { return $self->do_man_link($token);
587  } else {
588    $self->whine("L of unknown type $type!?", $token->attr('start_line'));
589  }
590  return 'FNORG'; # should never get called
591}
592
593# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
594
595sub do_url_link { return $_[1]->attr('to') }
596
597sub do_man_link {
598  my ($self, $link) = @_;
599  my $to = $link->attr('to');
600  my $frag = $link->attr('section');
601
602  return undef unless defined $to and length $to; # should never happen
603
604  $frag = $self->section_escape($frag)
605   if defined $frag and length($frag .= ''); # (stringify)
606
607  DEBUG and print "Resolving \"$to/$frag\"\n\n";
608
609  return $self->resolve_man_page_link($to, $frag);
610}
611
612
613sub do_pod_link {
614  # And now things get really messy...
615  my($self, $link) = @_;
616  my $to = $link->attr('to');
617  my $section = $link->attr('section');
618  return undef unless(  # should never happen
619    (defined $to and length $to) or
620    (defined $section and length $section)
621  );
622
623  $section = $self->section_escape($section)
624   if defined $section and length($section .= ''); # (stringify)
625
626  DEBUG and printf "Resolving \"%s\" \"%s\"...\n",
627   $to || "(nil)",  $section || "(nil)";
628
629  {
630    # An early hack:
631    my $complete_url = $self->resolve_pod_link_by_table($to, $section);
632    if( $complete_url ) {
633      DEBUG > 1 and print "resolve_pod_link_by_table(T,S) gives ",
634        $complete_url, "\n  (Returning that.)\n";
635      return $complete_url;
636    } else {
637      DEBUG > 4 and print " resolve_pod_link_by_table(T,S)",
638       " didn't return anything interesting.\n";
639    }
640  }
641
642  if(defined $to and length $to) {
643    # Give this routine first hack again
644    my $there = $self->resolve_pod_link_by_table($to);
645    if(defined $there and length $there) {
646      DEBUG > 1
647       and print "resolve_pod_link_by_table(T) gives $there\n";
648    } else {
649      $there =
650        $self->resolve_pod_page_link($to, $section);
651         # (I pass it the section value, but I don't see a
652         #  particular reason it'd use it.)
653      DEBUG > 1 and print "resolve_pod_page_link gives ", $to || "(nil)", "\n";
654      unless( defined $there and length $there ) {
655        DEBUG and print "Can't resolve $to\n";
656        return undef;
657      }
658      # resolve_pod_page_link returning undef is how it
659      #  can signal that it gives up on making a link
660    }
661    $to = $there;
662  }
663
664  #DEBUG and print "So far [", $to||'nil', "] [", $section||'nil', "]\n";
665
666  my $out = (defined $to and length $to) ? $to : '';
667  $out .= "#" . $section if defined $section and length $section;
668
669  unless(length $out) { # sanity check
670    DEBUG and printf "Oddly, couldn't resolve \"%s\" \"%s\"...\n",
671     $to || "(nil)",  $section || "(nil)";
672    return undef;
673  }
674
675  DEBUG and print "Resolved to $out\n";
676  return $out;
677}
678
679
680# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
681
682sub section_escape {
683  my($self, $section) = @_;
684  return $self->section_url_escape(
685    $self->section_name_tidy($section)
686  );
687}
688
689sub section_name_tidy {
690  my($self, $section) = @_;
691  $section =~ s/^\s+//;
692  $section =~ s/\s+$//;
693  $section =~ tr/ /_/;
694  $section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
695  $section = $self->unicode_escape_url($section);
696  $section = '_' unless length $section;
697  return $section;
698}
699
700sub section_url_escape  { shift->general_url_escape(@_) }
701sub pagepath_url_escape { shift->general_url_escape(@_) }
702sub manpage_url_escape  { shift->general_url_escape(@_) }
703
704sub general_url_escape {
705  my($self, $string) = @_;
706
707  $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg;
708     # express Unicode things as urlencode(utf(orig)).
709
710  # A pretty conservative escaping, behoovey even for query components
711  #  of a URL (see RFC 2396)
712
713  $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg;
714   # Yes, stipulate the list without a range, so that this can work right on
715   #  all charsets that this module happens to run under.
716   # Altho, hmm, what about that ord?  Presumably that won't work right
717   #  under non-ASCII charsets.  Something should be done
718   #  about that, I guess?
719
720  return $string;
721}
722
723#--------------------------------------------------------------------------
724#
725# Oh look, a yawning portal to Hell!  Let's play touch football right by it!
726#
727
728sub resolve_pod_page_link {
729  # resolve_pod_page_link must return a properly escaped URL
730  my $self = shift;
731  return $self->batch_mode()
732   ? $self->resolve_pod_page_link_batch_mode(@_)
733   : $self->resolve_pod_page_link_singleton_mode(@_)
734  ;
735}
736
737sub resolve_pod_page_link_singleton_mode {
738  my($self, $it) = @_;
739  return undef unless defined $it and length $it;
740  my $url = $self->pagepath_url_escape($it);
741
742  $url =~ s{::$}{}s; # probably never comes up anyway
743  $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM?
744
745  return undef unless length $url;
746  return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix;
747}
748
749sub resolve_pod_page_link_batch_mode {
750  my($self, $to) = @_;
751  DEBUG > 1 and print " During batch mode, resolving $to ...\n";
752  my @path = grep length($_), split m/::/s, $to, -1;
753  unless( @path ) { # sanity
754    DEBUG and print "Very odd!  Splitting $to gives (nil)!\n";
755    return undef;
756  }
757  $self->batch_mode_rectify_path(\@path);
758  my $out = join('/', map $self->pagepath_url_escape($_), @path)
759    . $HTML_EXTENSION;
760  DEBUG > 1 and print " => $out\n";
761  return $out;
762}
763
764sub batch_mode_rectify_path {
765  my($self, $pathbits) = @_;
766  my $level = $self->batch_mode_current_level;
767  $level--; # how many levels up to go to get to the root
768  if($level < 1) {
769    unshift @$pathbits, '.'; # just to be pretty
770  } else {
771    unshift @$pathbits, ('..') x $level;
772  }
773  return;
774}
775
776sub resolve_man_page_link {
777  my ($self, $to, $frag) = @_;
778  my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/;
779
780  return undef unless defined $page and length $page;
781  $section ||= 1;
782
783  return $self->man_url_prefix . "$section/"
784      . $self->manpage_url_escape($page)
785      . $self->man_url_postfix;
786}
787
788#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
789
790sub resolve_pod_link_by_table {
791  # A crazy hack to allow specifying custom L<foo> => URL mappings
792
793  return unless $_[0]->{'podhtml_LOT'};  # An optimizy shortcut
794
795  my($self, $to, $section) = @_;
796
797  # TODO: add a method that actually populates podhtml_LOT from a file?
798
799  if(defined $section) {
800    $to = '' unless defined $to and length $to;
801    return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef!
802  } else {
803    return $self->{'podhtml_LOT'}{$to};            # quite possibly undef!
804  }
805  return;
806}
807
808###########################################################################
809
810sub linearize_tokens {  # self, tokens
811  my $self = shift;
812  my $out = '';
813
814  my $t;
815  while($t = shift @_) {
816    if(!ref $t or !UNIVERSAL::can($t, 'is_text')) {
817      $out .= $t; # a string, or some insane thing
818    } elsif($t->is_text) {
819      $out .= $t->text;
820    } elsif($t->is_start and $t->tag eq 'X') {
821      # Ignore until the end of this X<...> sequence:
822      my $x_open = 1;
823      while($x_open) {
824        next if( ($t = shift @_)->is_text );
825        if(   $t->is_start and $t->tag eq 'X') { ++$x_open }
826        elsif($t->is_end   and $t->tag eq 'X') { --$x_open }
827      }
828    }
829  }
830  return undef if length $out > $Linearization_Limit;
831  return $out;
832}
833
834#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
835
836sub unicode_escape_url {
837  my($self, $string) = @_;
838  $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
839    #  Turn char 1234 into "(1234)"
840  return $string;
841}
842
843#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
844sub esc { # a function.
845  if(defined wantarray) {
846    if(wantarray) {
847      @_ = splice @_; # break aliasing
848    } else {
849      my $x = shift;
850      $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg;
851      return $x;
852    }
853  }
854  foreach my $x (@_) {
855    # Escape things very cautiously:
856    $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg
857     if defined $x;
858    # Leave out "- so that "--" won't make it thru in X-generated comments
859    #  with text in them.
860
861    # Yes, stipulate the list without a range, so that this can work right on
862    #  all charsets that this module happens to run under.
863    # Altho, hmm, what about that ord?  Presumably that won't work right
864    #  under non-ASCII charsets.  Something should be done about that.
865  }
866  return @_;
867}
868
869#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
870
8711;
872__END__
873
874=head1 NAME
875
876Pod::Simple::HTML - convert Pod to HTML
877
878=head1 SYNOPSIS
879
880  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod
881
882
883=head1 DESCRIPTION
884
885This class is for making an HTML rendering of a Pod document.
886
887This is a subclass of L<Pod::Simple::PullParser> and inherits all its
888methods (and options).
889
890Note that if you want to do a batch conversion of a lot of Pod
891documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>.
892
893
894
895=head1 CALLING FROM THE COMMAND LINE
896
897TODO
898
899  perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html
900
901
902
903=head1 CALLING FROM PERL
904
905=head2 Minimal code
906
907  use Pod::Simple::HTML;
908  my $p = Pod::Simple::HTML->new;
909  $p->output_string(\my $html);
910  $p->parse_file('path/to/Module/Name.pm');
911  open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n";
912  print $out $html;
913
914=head2 More detailed example
915
916  use Pod::Simple::HTML;
917
918Set the content type:
919
920  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
921
922  my $p = Pod::Simple::HTML->new;
923
924Include a single javascript source:
925
926  $p->html_javascript('http://abc.com/a.js');
927
928Or insert multiple javascript source in the header
929(or for that matter include anything, thought this is not recommended)
930
931  $p->html_javascript('
932      <script type="text/javascript" src="http://abc.com/b.js"></script>
933      <script type="text/javascript" src="http://abc.com/c.js"></script>');
934
935Include a single css source in the header:
936
937  $p->html_css('/style.css');
938
939or insert multiple css sources:
940
941  $p->html_css('
942      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css">
943      <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">');
944
945Tell the parser where should the output go. In this case it will be placed in the $html variable:
946
947  my $html;
948  $p->output_string(\$html);
949
950Parse and process a file with pod in it:
951
952  $p->parse_file('path/to/Module/Name.pm');
953
954=head1 METHODS
955
956TODO
957all (most?) accessorized methods
958
959The following variables need to be set B<before> the call to the ->new constructor.
960
961Set the string that is included before the opening <html> tag:
962
963  $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
964	 "http://www.w3.org/TR/html4/loose.dtd">\n};
965
966Set the content-type in the HTML head: (defaults to ISO-8859-1)
967
968  $Pod::Simple::HTML::Content_decl =  q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >};
969
970Set the value that will be ebedded in the opening tags of F, C tags and verbatim text.
971F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "")
972
973  $Pod::Simple::HTML::Computerese =  ' class="some_class_name';
974
975=head2 html_css
976
977=head2 html_javascript
978
979=head2 title_prefix
980
981=head2 title_postfix
982
983=head2 html_header_before_title
984
985This includes everything before the <title> opening tag including the Document type
986and including the opening <title> tag. The following call will set it to be a simple HTML
987file:
988
989  $p->html_header_before_title('<html><head><title>');
990
991=head2 html_h_level
992
993Normally =head1 will become <h1>, =head2 will become <h2> etc.
994Using the html_h_level method will change these levels setting the h level
995of =head1 tags:
996
997  $p->html_h_level(3);
998
999Will make sure that =head1 will become <h3> and =head2 will become <h4> etc...
1000
1001
1002=head2 index
1003
1004Set it to some true value if you want to have an index (in reality a table of contents)
1005to be added at the top of the generated HTML.
1006
1007  $p->index(1);
1008
1009=head2 html_header_after_title
1010
1011Includes the closing tag of </title> and through the rest of the head
1012till the opening of the body
1013
1014  $p->html_header_after_title('</title>...</head><body id="my_id">');
1015
1016=head2 html_footer
1017
1018The very end of the document:
1019
1020  $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] );
1021
1022=head1 SUBCLASSING
1023
1024Can use any of the methods described above but for further customization
1025one needs to override some of the methods:
1026
1027  package My::Pod;
1028  use strict;
1029  use warnings;
1030
1031  use base 'Pod::Simple::HTML';
1032
1033  # needs to return a URL string such
1034  # http://some.other.com/page.html
1035  # #anchor_in_the_same_file
1036  # /internal/ref.html
1037  sub do_pod_link {
1038    # My::Pod object and Pod::Simple::PullParserStartToken object
1039    my ($self, $link) = @_;
1040
1041    say $link->tagname;          # will be L for links
1042    say $link->attr('to');       #
1043    say $link->attr('type');     # will be 'pod' always
1044    say $link->attr('section');
1045
1046    # Links local to our web site
1047    if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') {
1048      my $to = $link->attr('to');
1049      if ($to =~ /^Padre::/) {
1050          $to =~ s{::}{/}g;
1051          return "/docs/Padre/$to.html";
1052      }
1053    }
1054
1055    # all other links are generated by the parent class
1056    my $ret = $self->SUPER::do_pod_link($link);
1057    return $ret;
1058  }
1059
1060  1;
1061
1062Meanwhile in script.pl:
1063
1064  use My::Pod;
1065
1066  my $p = My::Pod->new;
1067
1068  my $html;
1069  $p->output_string(\$html);
1070  $p->parse_file('path/to/Module/Name.pm');
1071  open my $out, '>', 'out.html' or die;
1072  print $out $html;
1073
1074TODO
1075
1076maybe override do_beginning do_end
1077
1078=head1 SEE ALSO
1079
1080L<Pod::Simple>, L<Pod::Simple::HTMLBatch>
1081
1082TODO: a corpus of sample Pod input and HTML output?  Or common
1083idioms?
1084
1085=head1 SUPPORT
1086
1087Questions or discussion about POD and Pod::Simple should be sent to the
1088pod-people@perl.org mail list. Send an empty email to
1089pod-people-subscribe@perl.org to subscribe.
1090
1091This module is managed in an open GitHub repository,
1092L<http://github.com/theory/pod-simple/>. Feel free to fork and contribute, or
1093to clone L<git://github.com/theory/pod-simple.git> and send patches!
1094
1095Patches against Pod::Simple are welcome. Please send bug reports to
1096<bug-pod-simple@rt.cpan.org>.
1097
1098=head1 COPYRIGHT AND DISCLAIMERS
1099
1100Copyright (c) 2002-2004 Sean M. Burke.
1101
1102This library is free software; you can redistribute it and/or modify it
1103under the same terms as Perl itself.
1104
1105This program is distributed in the hope that it will be useful, but
1106without any warranty; without even the implied warranty of
1107merchantability or fitness for a particular purpose.
1108
1109=head1 ACKNOWLEDGEMENTS
1110
1111Thanks to L<Hurricane Electric|http://he.net/> for permission to use its
1112L<Linux man pages online|http://man.he.net/> site for man page links.
1113
1114Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the
1115site for Perl module links.
1116
1117=head1 AUTHOR
1118
1119Pod::Simple was created by Sean M. Burke <sburke@cpan.org>.
1120But don't bother him, he's retired.
1121
1122Pod::Simple is maintained by:
1123
1124=over
1125
1126=item * Allison Randal C<allison@perl.org>
1127
1128=item * Hans Dieter Pearcey C<hdp@cpan.org>
1129
1130=item * David E. Wheeler C<dwheeler@cpan.org>
1131
1132=back
1133
1134=cut
1135