1require 5; 2package Pod::Simple::HTML; 3use strict; 4use Pod::Simple::PullParser (); 5use vars qw( 6 @ISA %Tagmap $Computerese $LamePad $Linearization_Limit $VERSION 7 $Perldoc_URL_Prefix $Perldoc_URL_Postfix $Man_URL_Prefix $Man_URL_Postfix 8 $Title_Prefix $Title_Postfix $HTML_EXTENSION %ToIndex 9 $Doctype_decl $Content_decl 10); 11@ISA = ('Pod::Simple::PullParser'); 12$VERSION = '3.43'; 13BEGIN { 14 if(defined &DEBUG) { } # no-op 15 elsif( defined &Pod::Simple::DEBUG ) { *DEBUG = \&Pod::Simple::DEBUG } 16 else { *DEBUG = sub () {0}; } 17} 18 19$Doctype_decl ||= ''; # No. Just No. Don't even ask me for it. 20 # qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 21 # "http://www.w3.org/TR/html4/loose.dtd">\n}; 22 23$Content_decl ||= 24 q{<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" >}; 25 26$HTML_EXTENSION = '.html' unless defined $HTML_EXTENSION; 27$Computerese = "" unless defined $Computerese; 28$LamePad = '' unless defined $LamePad; 29 30$Linearization_Limit = 120 unless defined $Linearization_Limit; 31 # headings/items longer than that won't get an <a name="..."> 32$Perldoc_URL_Prefix = 'https://metacpan.org/pod/' 33 unless defined $Perldoc_URL_Prefix; 34$Perldoc_URL_Postfix = '' 35 unless defined $Perldoc_URL_Postfix; 36 37 38$Man_URL_Prefix = 'http://man.he.net/man'; 39$Man_URL_Postfix = ''; 40 41$Title_Prefix = '' unless defined $Title_Prefix; 42$Title_Postfix = '' unless defined $Title_Postfix; 43%ToIndex = map {; $_ => 1 } qw(head1 head2 head3 head4 ); # item-text 44 # 'item-text' stuff in the index doesn't quite work, and may 45 # not be a good idea anyhow. 46 47 48__PACKAGE__->_accessorize( 49 'perldoc_url_prefix', 50 # In turning L<Foo::Bar> into http://whatever/Foo%3a%3aBar, what 51 # to put before the "Foo%3a%3aBar". 52 # (for singleton mode only?) 53 'perldoc_url_postfix', 54 # what to put after "Foo%3a%3aBar" in the URL. Normally "". 55 56 'man_url_prefix', 57 # In turning L<crontab(5)> into http://whatever/man/1/crontab, what 58 # to put before the "1/crontab". 59 'man_url_postfix', 60 # what to put after the "1/crontab" in the URL. Normally "". 61 62 'batch_mode', # whether we're in batch mode 63 'batch_mode_current_level', 64 # When in batch mode, how deep the current module is: 1 for "LWP", 65 # 2 for "LWP::Procotol", 3 for "LWP::Protocol::GHTTP", etc 66 67 'title_prefix', 'title_postfix', 68 # What to put before and after the title in the head. 69 # Should already be &-escaped 70 71 'html_h_level', 72 73 'html_header_before_title', 74 'html_header_after_title', 75 'html_footer', 76 'top_anchor', 77 78 'index', # whether to add an index at the top of each page 79 # (actually it's a table-of-contents, but we'll call it an index, 80 # out of apparently longstanding habit) 81 82 'html_css', # URL of CSS file to point to 83 'html_javascript', # URL of Javascript file to point to 84 85 'force_title', # should already be &-escaped 86 'default_title', # should already be &-escaped 87); 88 89#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 90my @_to_accept; 91 92%Tagmap = ( 93 'Verbatim' => "\n<pre$Computerese>", 94 '/Verbatim' => "</pre>\n", 95 'VerbatimFormatted' => "\n<pre$Computerese>", 96 '/VerbatimFormatted' => "</pre>\n", 97 'VerbatimB' => "<b>", 98 '/VerbatimB' => "</b>", 99 'VerbatimI' => "<i>", 100 '/VerbatimI' => "</i>", 101 'VerbatimBI' => "<b><i>", 102 '/VerbatimBI' => "</i></b>", 103 104 105 'Data' => "\n", 106 '/Data' => "\n", 107 108 'head1' => "\n<h1>", # And also stick in an <a name="..."> 109 'head2' => "\n<h2>", # '' 110 'head3' => "\n<h3>", # '' 111 'head4' => "\n<h4>", # '' 112 'head5' => "\n<h5>", # '' 113 'head6' => "\n<h6>", # '' 114 '/head1' => "</a></h1>\n", 115 '/head2' => "</a></h2>\n", 116 '/head3' => "</a></h3>\n", 117 '/head4' => "</a></h4>\n", 118 '/head5' => "</a></h5>\n", 119 '/head6' => "</a></h6>\n", 120 121 'X' => "<!--\n\tINDEX: ", 122 '/X' => "\n-->", 123 124 changes(qw( 125 Para=p 126 B=b I=i 127 over-bullet=ul 128 over-number=ol 129 over-text=dl 130 over-block=blockquote 131 item-bullet=li 132 item-number=li 133 item-text=dt 134 )), 135 changes2( 136 map {; m/^([-a-z]+)/s && push @_to_accept, $1; $_ } 137 qw[ 138 sample=samp 139 definition=dfn 140 keyboard=kbd 141 variable=var 142 citation=cite 143 abbreviation=abbr 144 acronym=acronym 145 subscript=sub 146 superscript=sup 147 big=big 148 small=small 149 underline=u 150 strikethrough=s 151 preformat=pre 152 teletype=tt 153 ] # no point in providing a way to get <q>...</q>, I think 154 ), 155 156 '/item-bullet' => "</li>$LamePad\n", 157 '/item-number' => "</li>$LamePad\n", 158 '/item-text' => "</a></dt>$LamePad\n", 159 'item-body' => "\n<dd>", 160 '/item-body' => "</dd>\n", 161 162 163 'B' => "<b>", '/B' => "</b>", 164 'I' => "<i>", '/I' => "</i>", 165 'F' => "<em$Computerese>", '/F' => "</em>", 166 'C' => "<code$Computerese>", '/C' => "</code>", 167 'L' => "<a href='YOU_SHOULD_NEVER_SEE_THIS'>", # ideally never used! 168 '/L' => "</a>", 169); 170 171sub changes { 172 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 173 ? ( $1, => "\n<$2>", "/$1", => "</$2>\n" ) : die "Funky $_" 174 } @_; 175} 176sub changes2 { 177 return map {; m/^([-_:0-9a-zA-Z]+)=([-_:0-9a-zA-Z]+)$/s 178 ? ( $1, => "<$2>", "/$1", => "</$2>" ) : die "Funky $_" 179 } @_; 180} 181 182#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 183sub go { Pod::Simple::HTML->parse_from_file(@ARGV); exit 0 } 184 # Just so we can run from the command line. No options. 185 # For that, use perldoc! 186#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 187 188sub new { 189 my $new = shift->SUPER::new(@_); 190 #$new->nix_X_codes(1); 191 $new->nbsp_for_S(1); 192 $new->accept_targets( 'html', 'HTML' ); 193 $new->accept_codes('VerbatimFormatted'); 194 $new->accept_codes(@_to_accept); 195 DEBUG > 2 and print STDERR "To accept: ", join(' ',@_to_accept), "\n"; 196 197 $new->perldoc_url_prefix( $Perldoc_URL_Prefix ); 198 $new->perldoc_url_postfix( $Perldoc_URL_Postfix ); 199 $new->man_url_prefix( $Man_URL_Prefix ); 200 $new->man_url_postfix( $Man_URL_Postfix ); 201 $new->title_prefix( $Title_Prefix ); 202 $new->title_postfix( $Title_Postfix ); 203 204 $new->html_header_before_title( 205 qq[$Doctype_decl<html><head><title>] 206 ); 207 $new->html_header_after_title( join "\n" => 208 "</title>", 209 $Content_decl, 210 "</head>\n<body class='pod'>", 211 $new->version_tag_comment, 212 "<!-- start doc -->\n", 213 ); 214 $new->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 215 $new->top_anchor( "<a name='___top' class='dummyTopAnchor' ></a>\n" ); 216 217 $new->{'Tagmap'} = {%Tagmap}; 218 219 return $new; 220} 221 222sub __adjust_html_h_levels { 223 my ($self) = @_; 224 my $Tagmap = $self->{'Tagmap'}; 225 226 my $add = $self->html_h_level; 227 return unless defined $add; 228 return if ($self->{'Adjusted_html_h_levels'}||0) == $add; 229 230 $add -= 1; 231 for (1 .. 6) { 232 $Tagmap->{"head$_"} =~ s/$_/$_ + $add/e; 233 $Tagmap->{"/head$_"} =~ s/$_/$_ + $add/e; 234 } 235} 236 237sub batch_mode_page_object_init { 238 my($self, $batchconvobj, $module, $infile, $outfile, $depth) = @_; 239 DEBUG and print STDERR "Initting $self\n for $module\n", 240 " in $infile\n out $outfile\n depth $depth\n"; 241 $self->batch_mode(1); 242 $self->batch_mode_current_level($depth); 243 return $self; 244} 245 246sub run { 247 my $self = $_[0]; 248 return $self->do_middle if $self->bare_output; 249 return 250 $self->do_beginning && $self->do_middle && $self->do_end; 251} 252 253#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 254 255sub do_beginning { 256 my $self = $_[0]; 257 258 my $title; 259 260 if(defined $self->force_title) { 261 $title = $self->force_title; 262 DEBUG and print STDERR "Forcing title to be $title\n"; 263 } else { 264 # Actually try looking for the title in the document: 265 $title = $self->get_short_title(); 266 unless($self->content_seen) { 267 DEBUG and print STDERR "No content seen in search for title.\n"; 268 return; 269 } 270 $self->{'Title'} = $title; 271 272 if(defined $title and $title =~ m/\S/) { 273 $title = $self->title_prefix . esc($title) . $self->title_postfix; 274 } else { 275 $title = $self->default_title; 276 $title = '' unless defined $title; 277 DEBUG and print STDERR "Title defaults to $title\n"; 278 } 279 } 280 281 282 my $after = $self->html_header_after_title || ''; 283 if($self->html_css) { 284 my $link = 285 $self->html_css =~ m/</ 286 ? $self->html_css # It's a big blob of markup, let's drop it in 287 : sprintf( # It's just a URL, so let's wrap it up 288 qq[<link rel="stylesheet" type="text/css" title="pod_stylesheet" href="%s">\n], 289 $self->html_css, 290 ); 291 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 292 } 293 $self->_add_top_anchor(\$after); 294 295 if($self->html_javascript) { 296 my $link = 297 $self->html_javascript =~ m/</ 298 ? $self->html_javascript # It's a big blob of markup, let's drop it in 299 : sprintf( # It's just a URL, so let's wrap it up 300 qq[<script type="text/javascript" src="%s"></script>\n], 301 $self->html_javascript, 302 ); 303 $after =~ s{(</head>)}{$link\n$1}i; # otherwise nevermind 304 } 305 306 print {$self->{'output_fh'}} 307 $self->html_header_before_title || '', 308 $title, # already escaped 309 $after, 310 ; 311 312 DEBUG and print STDERR "Returning from do_beginning...\n"; 313 return 1; 314} 315 316sub _add_top_anchor { 317 my($self, $text_r) = @_; 318 unless($$text_r and $$text_r =~ m/name=['"]___top['"]/) { # a hack 319 $$text_r .= $self->top_anchor || ''; 320 } 321 return; 322} 323 324sub version_tag_comment { 325 my $self = shift; 326 return sprintf 327 "<!--\n generated by %s v%s,\n using %s v%s,\n under Perl v%s at %s GMT.\n\n %s\n\n-->\n", 328 esc( 329 ref($self), $self->VERSION(), $ISA[0], $ISA[0]->VERSION(), 330 $], scalar(gmtime($ENV{SOURCE_DATE_EPOCH} || time)), 331 ), $self->_modnote(), 332 ; 333} 334 335sub _modnote { 336 my $class = ref($_[0]) || $_[0]; 337 return join "\n " => grep m/\S/, split "\n", 338 339qq{ 340If you want to change this HTML document, you probably shouldn't do that 341by changing it directly. Instead, see about changing the calling options 342to $class, and/or subclassing $class, 343then reconverting this document from the Pod source. 344When in doubt, email the author of $class for advice. 345See 'perldoc $class' for more info. 346}; 347 348} 349 350sub do_end { 351 my $self = $_[0]; 352 print {$self->{'output_fh'}} $self->html_footer || ''; 353 return 1; 354} 355 356# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 357# Normally this would just be a call to _do_middle_main_loop -- but we 358# have to do some elaborate things to emit all the content and then 359# summarize it and output it /before/ the content that it's a summary of. 360 361sub do_middle { 362 my $self = $_[0]; 363 return $self->_do_middle_main_loop unless $self->index; 364 365 if( $self->output_string ) { 366 # An efficiency hack 367 my $out = $self->output_string; #it's a reference to it 368 my $sneakytag = "\f\f\e\e\b\bIndex Here\e\e\b\b\f\f\n"; 369 $$out .= $sneakytag; 370 $self->_do_middle_main_loop; 371 $sneakytag = quotemeta($sneakytag); 372 my $index = $self->index_as_html(); 373 if( $$out =~ s/$sneakytag/$index/s ) { 374 # Expected case 375 DEBUG and print STDERR "Inserted ", length($index), " bytes of index HTML into $out.\n"; 376 } else { 377 DEBUG and print STDERR "Odd, couldn't find where to insert the index in the output!\n"; 378 # I don't think this should ever happen. 379 } 380 return 1; 381 } 382 383 unless( $self->output_fh ) { 384 require Carp; 385 Carp::confess("Parser object \$p doesn't seem to have any output object! I don't know how to deal with that."); 386 } 387 388 # If we get here, we're outputting to a FH. So we need to do some magic. 389 # Namely, divert all content to a string, which we output after the index. 390 my $fh = $self->output_fh; 391 my $content = ''; 392 { 393 # Our horrible bait and switch: 394 $self->output_string( \$content ); 395 $self->_do_middle_main_loop; 396 $self->abandon_output_string(); 397 $self->output_fh($fh); 398 } 399 print $fh $self->index_as_html(); 400 print $fh $content; 401 402 return 1; 403} 404 405########################################################################### 406 407sub index_as_html { 408 my $self = $_[0]; 409 # This is meant to be called AFTER the input document has been parsed! 410 411 my $points = $self->{'PSHTML_index_points'} || []; 412 413 @$points > 1 or return qq[<div class='indexgroupEmpty'></div>\n]; 414 # There's no point in having a 0-item or 1-item index, I dare say. 415 416 my(@out) = qq{\n<div class='indexgroup'>}; 417 my $level = 0; 418 419 my( $target_level, $previous_tagname, $tagname, $text, $anchorname, $indent); 420 foreach my $p (@$points, ['head0', '(end)']) { 421 ($tagname, $text) = @$p; 422 $anchorname = $self->section_escape($text); 423 if( $tagname =~ m{^head(\d+)$} ) { 424 $target_level = 0 + $1; 425 } else { # must be some kinda list item 426 if($previous_tagname =~ m{^head\d+$} ) { 427 $target_level = $level + 1; 428 } else { 429 $target_level = $level; # no change needed 430 } 431 } 432 433 # Get to target_level by opening or closing ULs 434 while($level > $target_level) 435 { --$level; push @out, (" " x $level) . "</ul>"; } 436 while($level < $target_level) 437 { ++$level; push @out, (" " x ($level-1)) 438 . "<ul class='indexList indexList$level'>"; } 439 440 $previous_tagname = $tagname; 441 next unless $level; 442 443 $indent = ' ' x $level; 444 push @out, sprintf 445 "%s<li class='indexItem indexItem%s'><a href='#%s'>%s</a>", 446 $indent, $level, esc($anchorname), esc($text) 447 ; 448 } 449 push @out, "</div>\n"; 450 return join "\n", @out; 451} 452 453########################################################################### 454 455sub _do_middle_main_loop { 456 my $self = $_[0]; 457 my $fh = $self->{'output_fh'}; 458 my $tagmap = $self->{'Tagmap'}; 459 460 $self->__adjust_html_h_levels; 461 462 my($token, $type, $tagname, $linkto, $linktype); 463 my @stack; 464 my $dont_wrap = 0; 465 466 while($token = $self->get_token) { 467 468 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 469 if( ($type = $token->type) eq 'start' ) { 470 if(($tagname = $token->tagname) eq 'L') { 471 $linktype = $token->attr('type') || 'insane'; 472 473 $linkto = $self->do_link($token); 474 475 if(defined $linkto and length $linkto) { 476 esc($linkto); 477 # (Yes, SGML-escaping applies on top of %-escaping! 478 # But it's rarely noticeable in practice.) 479 print $fh qq{<a href="$linkto" class="podlink$linktype"\n>}; 480 } else { 481 print $fh "<a>"; # Yes, an 'a' element with no attributes! 482 } 483 484 } elsif ($tagname eq 'item-text' or $tagname =~ m/^head\d$/s) { 485 print $fh $tagmap->{$tagname} || next; 486 487 my @to_unget; 488 while(1) { 489 push @to_unget, $self->get_token; 490 last if $to_unget[-1]->is_end 491 and $to_unget[-1]->tagname eq $tagname; 492 493 # TODO: support for X<...>'s found in here? (maybe hack into linearize_tokens) 494 } 495 496 my $name = $self->linearize_tokens(@to_unget); 497 $name = $self->do_section($name, $token) if defined $name; 498 499 print $fh "<a "; 500 if ($tagname =~ m/^head\d$/s) { 501 print $fh "class='u'", $self->index 502 ? " href='#___top' title='click to go to top of document'\n" 503 : "\n"; 504 } 505 506 if(defined $name) { 507 my $esc = esc( $self->section_name_tidy( $name ) ); 508 print $fh qq[name="$esc"]; 509 DEBUG and print STDERR "Linearized ", scalar(@to_unget), 510 " tokens as \"$name\".\n"; 511 push @{ $self->{'PSHTML_index_points'} }, [$tagname, $name] 512 if $ToIndex{ $tagname }; 513 # Obviously, this discards all formatting codes (saving 514 # just their content), but ahwell. 515 516 } else { # ludicrously long, so nevermind 517 DEBUG and print STDERR "Linearized ", scalar(@to_unget), 518 " tokens, but it was too long, so nevermind.\n"; 519 } 520 print $fh "\n>"; 521 $self->unget_token(@to_unget); 522 523 } elsif ($tagname eq 'Data') { 524 my $next = $self->get_token; 525 next unless defined $next; 526 unless( $next->type eq 'text' ) { 527 $self->unget_token($next); 528 next; 529 } 530 DEBUG and print STDERR " raw text ", $next->text, "\n"; 531 # The parser sometimes preserves newlines and sometimes doesn't! 532 (my $text = $next->text) =~ s/\n\z//; 533 print $fh $text, "\n"; 534 next; 535 536 } else { 537 if( $tagname =~ m/^over-/s ) { 538 push @stack, ''; 539 } elsif( $tagname =~ m/^item-/s and @stack and $stack[-1] ) { 540 print $fh $stack[-1]; 541 $stack[-1] = ''; 542 } 543 print $fh $tagmap->{$tagname} || next; 544 ++$dont_wrap if $tagname eq 'Verbatim' or $tagname eq "VerbatimFormatted" 545 or $tagname eq 'X'; 546 } 547 548 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 549 } elsif( $type eq 'end' ) { 550 if( ($tagname = $token->tagname) =~ m/^over-/s ) { 551 if( my $end = pop @stack ) { 552 print $fh $end; 553 } 554 } elsif( $tagname =~ m/^item-/s and @stack) { 555 $stack[-1] = $tagmap->{"/$tagname"}; 556 if( $tagname eq 'item-text' and defined(my $next = $self->get_token) ) { 557 $self->unget_token($next); 558 if( $next->type eq 'start' ) { 559 print $fh $tagmap->{"/item-text"},$tagmap->{"item-body"}; 560 $stack[-1] = $tagmap->{"/item-body"}; 561 } 562 } 563 next; 564 } 565 print $fh $tagmap->{"/$tagname"} || next; 566 --$dont_wrap if $tagname eq 'Verbatim' or $tagname eq 'X'; 567 568 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 569 } elsif( $type eq 'text' ) { 570 esc($type = $token->text); # reuse $type, why not 571 $type =~ s/([\?\!\"\'\.\,]) /$1\n/g unless $dont_wrap; 572 print $fh $type; 573 } 574 575 } 576 return 1; 577} 578 579########################################################################### 580# 581 582sub do_section { 583 my($self, $name, $token) = @_; 584 return $name; 585} 586 587sub do_link { 588 my($self, $token) = @_; 589 my $type = $token->attr('type'); 590 if(!defined $type) { 591 $self->whine("Typeless L!?", $token->attr('start_line')); 592 } elsif( $type eq 'pod') { return $self->do_pod_link($token); 593 } elsif( $type eq 'url') { return $self->do_url_link($token); 594 } elsif( $type eq 'man') { return $self->do_man_link($token); 595 } else { 596 $self->whine("L of unknown type $type!?", $token->attr('start_line')); 597 } 598 return 'FNORG'; # should never get called 599} 600 601# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 602 603sub do_url_link { return $_[1]->attr('to') } 604 605sub do_man_link { 606 my ($self, $link) = @_; 607 my $to = $link->attr('to'); 608 my $frag = $link->attr('section'); 609 610 return undef unless defined $to and length $to; # should never happen 611 612 $frag = $self->section_escape($frag) 613 if defined $frag and length($frag .= ''); # (stringify) 614 615 DEBUG and print STDERR "Resolving \"$to/$frag\"\n\n"; 616 617 return $self->resolve_man_page_link($to, $frag); 618} 619 620 621sub do_pod_link { 622 # And now things get really messy... 623 my($self, $link) = @_; 624 my $to = $link->attr('to'); 625 my $section = $link->attr('section'); 626 return undef unless( # should never happen 627 (defined $to and length $to) or 628 (defined $section and length $section) 629 ); 630 631 $section = $self->section_escape($section) 632 if defined $section and length($section .= ''); # (stringify) 633 634 DEBUG and printf STDERR "Resolving \"%s\" \"%s\"...\n", 635 $to || "(nil)", $section || "(nil)"; 636 637 { 638 # An early hack: 639 my $complete_url = $self->resolve_pod_link_by_table($to, $section); 640 if( $complete_url ) { 641 DEBUG > 1 and print STDERR "resolve_pod_link_by_table(T,S) gives ", 642 $complete_url, "\n (Returning that.)\n"; 643 return $complete_url; 644 } else { 645 DEBUG > 4 and print STDERR " resolve_pod_link_by_table(T,S)", 646 " didn't return anything interesting.\n"; 647 } 648 } 649 650 if(defined $to and length $to) { 651 # Give this routine first hack again 652 my $there = $self->resolve_pod_link_by_table($to); 653 if(defined $there and length $there) { 654 DEBUG > 1 655 and print STDERR "resolve_pod_link_by_table(T) gives $there\n"; 656 } else { 657 $there = 658 $self->resolve_pod_page_link($to, $section); 659 # (I pass it the section value, but I don't see a 660 # particular reason it'd use it.) 661 DEBUG > 1 and print STDERR "resolve_pod_page_link gives ", $there || "(nil)", "\n"; 662 unless( defined $there and length $there ) { 663 DEBUG and print STDERR "Can't resolve $to\n"; 664 return undef; 665 } 666 # resolve_pod_page_link returning undef is how it 667 # can signal that it gives up on making a link 668 } 669 $to = $there; 670 } 671 672 #DEBUG and print STDERR "So far [", $to||'nil', "] [", $section||'nil', "]\n"; 673 674 my $out = (defined $to and length $to) ? $to : ''; 675 $out .= "#" . $section if defined $section and length $section; 676 677 unless(length $out) { # sanity check 678 DEBUG and printf STDERR "Oddly, couldn't resolve \"%s\" \"%s\"...\n", 679 $to || "(nil)", $section || "(nil)"; 680 return undef; 681 } 682 683 DEBUG and print STDERR "Resolved to $out\n"; 684 return $out; 685} 686 687 688# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 689 690sub section_escape { 691 my($self, $section) = @_; 692 return $self->section_url_escape( 693 $self->section_name_tidy($section) 694 ); 695} 696 697sub section_name_tidy { 698 my($self, $section) = @_; 699 $section =~ s/^\s+//; 700 $section =~ s/\s+$//; 701 $section =~ tr/ /_/; 702 if ($] ge 5.006) { 703 $section =~ s/[[:cntrl:][:^ascii:]]//g; # drop crazy characters 704 } elsif ('A' eq chr(65)) { # But not on early EBCDIC 705 $section =~ tr/\x00-\x1F\x80-\x9F//d; 706 } 707 $section = $self->unicode_escape_url($section); 708 $section = '_' unless length $section; 709 return $section; 710} 711 712sub section_url_escape { shift->general_url_escape(@_) } 713sub pagepath_url_escape { shift->general_url_escape(@_) } 714sub manpage_url_escape { shift->general_url_escape(@_) } 715 716sub general_url_escape { 717 my($self, $string) = @_; 718 719 $string =~ s/([^\x00-\xFF])/join '', map sprintf('%%%02X',$_), unpack 'C*', $1/eg; 720 # express Unicode things as urlencode(utf(orig)). 721 722 # A pretty conservative escaping, behoovey even for query components 723 # of a URL (see RFC 2396) 724 725 if ($] ge 5.007_003) { 726 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',utf8::native_to_unicode(ord($1)))/eg; 727 } else { # Is broken for non-ASCII platforms on early perls 728 $string =~ s/([^-_\.!~*()abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/sprintf('%%%02X',ord($1))/eg; 729 } 730 # Yes, stipulate the list without a range, so that this can work right on 731 # all charsets that this module happens to run under. 732 733 return $string; 734} 735 736#-------------------------------------------------------------------------- 737# 738# Oh look, a yawning portal to Hell! Let's play touch football right by it! 739# 740 741sub resolve_pod_page_link { 742 # resolve_pod_page_link must return a properly escaped URL 743 my $self = shift; 744 return $self->batch_mode() 745 ? $self->resolve_pod_page_link_batch_mode(@_) 746 : $self->resolve_pod_page_link_singleton_mode(@_) 747 ; 748} 749 750sub resolve_pod_page_link_singleton_mode { 751 my($self, $it) = @_; 752 return undef unless defined $it and length $it; 753 my $url = $self->pagepath_url_escape($it); 754 755 $url =~ s{::$}{}s; # probably never comes up anyway 756 $url =~ s{::}{/}g unless $self->perldoc_url_prefix =~ m/\?/s; # sane DWIM? 757 758 return undef unless length $url; 759 return $self->perldoc_url_prefix . $url . $self->perldoc_url_postfix; 760} 761 762sub resolve_pod_page_link_batch_mode { 763 my($self, $to) = @_; 764 DEBUG > 1 and print STDERR " During batch mode, resolving $to ...\n"; 765 my @path = grep length($_), split m/::/s, $to, -1; 766 unless( @path ) { # sanity 767 DEBUG and print STDERR "Very odd! Splitting $to gives (nil)!\n"; 768 return undef; 769 } 770 $self->batch_mode_rectify_path(\@path); 771 my $out = join('/', map $self->pagepath_url_escape($_), @path) 772 . $HTML_EXTENSION; 773 DEBUG > 1 and print STDERR " => $out\n"; 774 return $out; 775} 776 777sub batch_mode_rectify_path { 778 my($self, $pathbits) = @_; 779 my $level = $self->batch_mode_current_level; 780 $level--; # how many levels up to go to get to the root 781 if($level < 1) { 782 unshift @$pathbits, '.'; # just to be pretty 783 } else { 784 unshift @$pathbits, ('..') x $level; 785 } 786 return; 787} 788 789sub resolve_man_page_link { 790 my ($self, $to, $frag) = @_; 791 my ($page, $section) = $to =~ /^([^(]+)(?:[(](\d+)[)])?$/; 792 793 return undef unless defined $page and length $page; 794 $section ||= 1; 795 796 return $self->man_url_prefix . "$section/" 797 . $self->manpage_url_escape($page) 798 . $self->man_url_postfix; 799} 800 801#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 802 803sub resolve_pod_link_by_table { 804 # A crazy hack to allow specifying custom L<foo> => URL mappings 805 806 return unless $_[0]->{'podhtml_LOT'}; # An optimizy shortcut 807 808 my($self, $to, $section) = @_; 809 810 # TODO: add a method that actually populates podhtml_LOT from a file? 811 812 if(defined $section) { 813 $to = '' unless defined $to and length $to; 814 return $self->{'podhtml_LOT'}{"$to#$section"}; # quite possibly undef! 815 } else { 816 return $self->{'podhtml_LOT'}{$to}; # quite possibly undef! 817 } 818 return; 819} 820 821########################################################################### 822 823sub linearize_tokens { # self, tokens 824 my $self = shift; 825 my $out = ''; 826 827 my $t; 828 while($t = shift @_) { 829 if(!ref $t or !UNIVERSAL::can($t, 'is_text')) { 830 $out .= $t; # a string, or some insane thing 831 } elsif($t->is_text) { 832 $out .= $t->text; 833 } elsif($t->is_start and $t->tag eq 'X') { 834 # Ignore until the end of this X<...> sequence: 835 my $x_open = 1; 836 while($x_open) { 837 next if( ($t = shift @_)->is_text ); 838 if( $t->is_start and $t->tag eq 'X') { ++$x_open } 839 elsif($t->is_end and $t->tag eq 'X') { --$x_open } 840 } 841 } 842 } 843 return undef if length $out > $Linearization_Limit; 844 return $out; 845} 846 847#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 848 849sub unicode_escape_url { 850 my($self, $string) = @_; 851 $string =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg; 852 # Turn char 1234 into "(1234)" 853 return $string; 854} 855 856#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 857sub esc { # a function. 858 if(defined wantarray) { 859 if(wantarray) { 860 @_ = splice @_; # break aliasing 861 } else { 862 my $x = shift; 863 if ($] ge 5.007_003) { 864 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg; 865 } else { # Is broken for non-ASCII platforms on early perls 866 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg; 867 } 868 return $x; 869 } 870 } 871 foreach my $x (@_) { 872 # Escape things very cautiously: 873 if (defined $x) { 874 if ($] ge 5.007_003) { 875 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(utf8::native_to_unicode(ord($1))).';'/eg 876 } else { # Is broken for non-ASCII platforms on early perls 877 $x =~ s/([^-\n\t !\#\$\%\(\)\*\+,\.\~\/\:\;=\?\@\[\\\]\^_\`\{\|\}abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])/'&#'.(ord($1)).';'/eg 878 } 879 } 880 # Leave out "- so that "--" won't make it thru in X-generated comments 881 # with text in them. 882 883 # Yes, stipulate the list without a range, so that this can work right on 884 # all charsets that this module happens to run under. 885 } 886 return @_; 887} 888 889#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 890 8911; 892__END__ 893 894=head1 NAME 895 896Pod::Simple::HTML - convert Pod to HTML 897 898=head1 SYNOPSIS 899 900 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go thingy.pod 901 902 903=head1 DESCRIPTION 904 905This class is for making an HTML rendering of a Pod document. 906 907This is a subclass of L<Pod::Simple::PullParser> and inherits all its 908methods (and options). 909 910Note that if you want to do a batch conversion of a lot of Pod 911documents to HTML, you should see the module L<Pod::Simple::HTMLBatch>. 912 913 914 915=head1 CALLING FROM THE COMMAND LINE 916 917TODO 918 919 perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Thing.pod Thing.html 920 921 922 923=head1 CALLING FROM PERL 924 925=head2 Minimal code 926 927 use Pod::Simple::HTML; 928 my $p = Pod::Simple::HTML->new; 929 $p->output_string(\my $html); 930 $p->parse_file('path/to/Module/Name.pm'); 931 open my $out, '>', 'out.html' or die "Cannot open 'out.html': $!\n"; 932 print $out $html; 933 934=head2 More detailed example 935 936 use Pod::Simple::HTML; 937 938Set the content type: 939 940 $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; 941 942 my $p = Pod::Simple::HTML->new; 943 944Include a single javascript source: 945 946 $p->html_javascript('http://abc.com/a.js'); 947 948Or insert multiple javascript source in the header 949(or for that matter include anything, thought this is not recommended) 950 951 $p->html_javascript(' 952 <script type="text/javascript" src="http://abc.com/b.js"></script> 953 <script type="text/javascript" src="http://abc.com/c.js"></script>'); 954 955Include a single css source in the header: 956 957 $p->html_css('/style.css'); 958 959or insert multiple css sources: 960 961 $p->html_css(' 962 <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="http://remote.server.com/jquery.css"> 963 <link rel="stylesheet" type="text/css" title="pod_stylesheet" href="/style.css">'); 964 965Tell the parser where should the output go. In this case it will be placed in the $html variable: 966 967 my $html; 968 $p->output_string(\$html); 969 970Parse and process a file with pod in it: 971 972 $p->parse_file('path/to/Module/Name.pm'); 973 974=head1 METHODS 975 976TODO 977all (most?) accessorized methods 978 979The following variables need to be set B<before> the call to the ->new constructor. 980 981Set the string that is included before the opening <html> tag: 982 983 $Pod::Simple::HTML::Doctype_decl = qq{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 984 "http://www.w3.org/TR/html4/loose.dtd">\n}; 985 986Set the content-type in the HTML head: (defaults to ISO-8859-1) 987 988 $Pod::Simple::HTML::Content_decl = q{<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" >}; 989 990Set the value that will be embedded in the opening tags of F, C tags and verbatim text. 991F maps to <em>, C maps to <code>, Verbatim text maps to <pre> (Computerese defaults to "") 992 993 $Pod::Simple::HTML::Computerese = ' class="some_class_name'; 994 995=head2 html_css 996 997=head2 html_javascript 998 999=head2 title_prefix 1000 1001=head2 title_postfix 1002 1003=head2 html_header_before_title 1004 1005This includes everything before the <title> opening tag including the Document type 1006and including the opening <title> tag. The following call will set it to be a simple HTML 1007file: 1008 1009 $p->html_header_before_title('<html><head><title>'); 1010 1011=head2 top_anchor 1012 1013By default Pod::Simple::HTML adds a dummy anchor at the top of the HTML. 1014You can change it by calling 1015 1016 $p->top_anchor('<a name="zz" >'); 1017 1018=head2 html_h_level 1019 1020Normally =head1 will become <h1>, =head2 will become <h2> etc. 1021Using the html_h_level method will change these levels setting the h level 1022of =head1 tags: 1023 1024 $p->html_h_level(3); 1025 1026Will make sure that =head1 will become <h3> and =head2 will become <h4> etc... 1027 1028 1029=head2 index 1030 1031Set it to some true value if you want to have an index (in reality a table of contents) 1032to be added at the top of the generated HTML. 1033 1034 $p->index(1); 1035 1036=head2 html_header_after_title 1037 1038Includes the closing tag of </title> and through the rest of the head 1039till the opening of the body 1040 1041 $p->html_header_after_title('</title>...</head><body id="my_id">'); 1042 1043=head2 html_footer 1044 1045The very end of the document: 1046 1047 $p->html_footer( qq[\n<!-- end doc -->\n\n</body></html>\n] ); 1048 1049=head1 SUBCLASSING 1050 1051Can use any of the methods described above but for further customization 1052one needs to override some of the methods: 1053 1054 package My::Pod; 1055 use strict; 1056 use warnings; 1057 1058 use base 'Pod::Simple::HTML'; 1059 1060 # needs to return a URL string such 1061 # http://some.other.com/page.html 1062 # #anchor_in_the_same_file 1063 # /internal/ref.html 1064 sub do_pod_link { 1065 # My::Pod object and Pod::Simple::PullParserStartToken object 1066 my ($self, $link) = @_; 1067 1068 say $link->tagname; # will be L for links 1069 say $link->attr('to'); # 1070 say $link->attr('type'); # will be 'pod' always 1071 say $link->attr('section'); 1072 1073 # Links local to our web site 1074 if ($link->tagname eq 'L' and $link->attr('type') eq 'pod') { 1075 my $to = $link->attr('to'); 1076 if ($to =~ /^Padre::/) { 1077 $to =~ s{::}{/}g; 1078 return "/docs/Padre/$to.html"; 1079 } 1080 } 1081 1082 # all other links are generated by the parent class 1083 my $ret = $self->SUPER::do_pod_link($link); 1084 return $ret; 1085 } 1086 1087 1; 1088 1089Meanwhile in script.pl: 1090 1091 use My::Pod; 1092 1093 my $p = My::Pod->new; 1094 1095 my $html; 1096 $p->output_string(\$html); 1097 $p->parse_file('path/to/Module/Name.pm'); 1098 open my $out, '>', 'out.html' or die; 1099 print $out $html; 1100 1101TODO 1102 1103maybe override do_beginning do_end 1104 1105=head1 SEE ALSO 1106 1107L<Pod::Simple>, L<Pod::Simple::HTMLBatch> 1108 1109TODO: a corpus of sample Pod input and HTML output? Or common 1110idioms? 1111 1112=head1 SUPPORT 1113 1114Questions or discussion about POD and Pod::Simple should be sent to the 1115pod-people@perl.org mail list. Send an empty email to 1116pod-people-subscribe@perl.org to subscribe. 1117 1118This module is managed in an open GitHub repository, 1119L<https://github.com/perl-pod/pod-simple/>. Feel free to fork and contribute, or 1120to clone L<git://github.com/perl-pod/pod-simple.git> and send patches! 1121 1122Patches against Pod::Simple are welcome. Please send bug reports to 1123<bug-pod-simple@rt.cpan.org>. 1124 1125=head1 COPYRIGHT AND DISCLAIMERS 1126 1127Copyright (c) 2002-2004 Sean M. Burke. 1128 1129This library is free software; you can redistribute it and/or modify it 1130under the same terms as Perl itself. 1131 1132This program is distributed in the hope that it will be useful, but 1133without any warranty; without even the implied warranty of 1134merchantability or fitness for a particular purpose. 1135 1136=head1 ACKNOWLEDGEMENTS 1137 1138Thanks to L<Hurricane Electric|http://he.net/> for permission to use its 1139L<Linux man pages online|http://man.he.net/> site for man page links. 1140 1141Thanks to L<search.cpan.org|http://search.cpan.org/> for permission to use the 1142site for Perl module links. 1143 1144=head1 AUTHOR 1145 1146Pod::Simple was created by Sean M. Burke <sburke@cpan.org>. 1147But don't bother him, he's retired. 1148 1149Pod::Simple is maintained by: 1150 1151=over 1152 1153=item * Allison Randal C<allison@perl.org> 1154 1155=item * Hans Dieter Pearcey C<hdp@cpan.org> 1156 1157=item * David E. Wheeler C<dwheeler@cpan.org> 1158 1159=back 1160 1161=cut 1162