1# Convert POD data to formatted text. 2# 3# This module converts POD to formatted text. It replaces the old Pod::Text 4# module that came with versions of Perl prior to 5.6.0 and attempts to match 5# its output except for some specific circumstances where other decisions 6# seemed to produce better output. It uses Pod::Parser and is designed to be 7# very easy to subclass. 8# 9# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl 10 11############################################################################## 12# Modules and declarations 13############################################################################## 14 15package Pod::Text; 16 17use 5.008; 18use strict; 19use warnings; 20 21use vars qw(@ISA @EXPORT %ESCAPES $VERSION); 22 23use Carp qw(carp croak); 24use Encode qw(encode); 25use Exporter (); 26use Pod::Simple (); 27 28@ISA = qw(Pod::Simple Exporter); 29 30# We have to export pod2text for backward compatibility. 31@EXPORT = qw(pod2text); 32 33$VERSION = '4.14'; 34 35# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code 36# taken from Pod::Simple 3.32, but was only added in 3.30. 37my ($NBSP, $SHY); 38if ($Pod::Simple::VERSION ge 3.30) { 39 $NBSP = $Pod::Simple::nbsp; 40 $SHY = $Pod::Simple::shy; 41} else { 42 $NBSP = chr utf8::unicode_to_native(0xA0); 43 $SHY = chr utf8::unicode_to_native(0xAD); 44} 45 46############################################################################## 47# Initialization 48############################################################################## 49 50# This function handles code blocks. It's registered as a callback to 51# Pod::Simple and therefore doesn't work as a regular method call, but all it 52# does is call output_code with the line. 53sub handle_code { 54 my ($line, $number, $parser) = @_; 55 $parser->output_code ($line . "\n"); 56} 57 58# Initialize the object and set various Pod::Simple options that we need. 59# Here, we also process any additional options passed to the constructor or 60# set up defaults if none were given. Note that all internal object keys are 61# in all-caps, reserving all lower-case object keys for Pod::Simple and user 62# arguments. 63sub new { 64 my $class = shift; 65 my $self = $class->SUPER::new; 66 67 # Tell Pod::Simple to handle S<> by automatically inserting . 68 $self->nbsp_for_S (1); 69 70 # Tell Pod::Simple to keep whitespace whenever possible. 71 if ($self->can ('preserve_whitespace')) { 72 $self->preserve_whitespace (1); 73 } else { 74 $self->fullstop_space_harden (1); 75 } 76 77 # The =for and =begin targets that we accept. 78 $self->accept_targets (qw/text TEXT/); 79 80 # Ensure that contiguous blocks of code are merged together. Otherwise, 81 # some of the guesswork heuristics don't work right. 82 $self->merge_text (1); 83 84 # Pod::Simple doesn't do anything useful with our arguments, but we want 85 # to put them in our object as hash keys and values. This could cause 86 # problems if we ever clash with Pod::Simple's own internal class 87 # variables. 88 my %opts = @_; 89 my @opts = map { ("opt_$_", $opts{$_}) } keys %opts; 90 %$self = (%$self, @opts); 91 92 # Send errors to stderr if requested. 93 if ($$self{opt_stderr} and not $$self{opt_errors}) { 94 $$self{opt_errors} = 'stderr'; 95 } 96 delete $$self{opt_stderr}; 97 98 # Validate the errors parameter and act on it. 99 if (not defined $$self{opt_errors}) { 100 $$self{opt_errors} = 'pod'; 101 } 102 if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') { 103 $self->no_errata_section (1); 104 $self->complain_stderr (1); 105 if ($$self{opt_errors} eq 'die') { 106 $$self{complain_die} = 1; 107 } 108 } elsif ($$self{opt_errors} eq 'pod') { 109 $self->no_errata_section (0); 110 $self->complain_stderr (0); 111 } elsif ($$self{opt_errors} eq 'none') { 112 $self->no_errata_section (1); 113 $self->no_whining (1); 114 } else { 115 croak (qq(Invalid errors setting: "$$self{errors}")); 116 } 117 delete $$self{errors}; 118 119 # Initialize various things from our parameters. 120 $$self{opt_alt} = 0 unless defined $$self{opt_alt}; 121 $$self{opt_indent} = 4 unless defined $$self{opt_indent}; 122 $$self{opt_margin} = 0 unless defined $$self{opt_margin}; 123 $$self{opt_loose} = 0 unless defined $$self{opt_loose}; 124 $$self{opt_sentence} = 0 unless defined $$self{opt_sentence}; 125 $$self{opt_width} = 76 unless defined $$self{opt_width}; 126 127 # Figure out what quotes we'll be using for C<> text. 128 $$self{opt_quotes} ||= '"'; 129 if ($$self{opt_quotes} eq 'none') { 130 $$self{LQUOTE} = $$self{RQUOTE} = ''; 131 } elsif (length ($$self{opt_quotes}) == 1) { 132 $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes}; 133 } elsif (length ($$self{opt_quotes}) % 2 == 0) { 134 my $length = length ($$self{opt_quotes}) / 2; 135 $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length); 136 $$self{RQUOTE} = substr ($$self{opt_quotes}, $length); 137 } else { 138 croak qq(Invalid quote specification "$$self{opt_quotes}"); 139 } 140 141 # If requested, do something with the non-POD text. 142 $self->code_handler (\&handle_code) if $$self{opt_code}; 143 144 # Return the created object. 145 return $self; 146} 147 148############################################################################## 149# Core parsing 150############################################################################## 151 152# This is the glue that connects the code below with Pod::Simple itself. The 153# goal is to convert the event stream coming from the POD parser into method 154# calls to handlers once the complete content of a tag has been seen. Each 155# paragraph or POD command will have textual content associated with it, and 156# as soon as all of a paragraph or POD command has been seen, that content 157# will be passed in to the corresponding method for handling that type of 158# object. The exceptions are handlers for lists, which have opening tag 159# handlers and closing tag handlers that will be called right away. 160# 161# The internal hash key PENDING is used to store the contents of a tag until 162# all of it has been seen. It holds a stack of open tags, each one 163# represented by a tuple of the attributes hash for the tag and the contents 164# of the tag. 165 166# Add a block of text to the contents of the current node, formatting it 167# according to the current formatting instructions as we do. 168sub _handle_text { 169 my ($self, $text) = @_; 170 my $tag = $$self{PENDING}[-1]; 171 $$tag[1] .= $text; 172} 173 174# Given an element name, get the corresponding method name. 175sub method_for_element { 176 my ($self, $element) = @_; 177 $element =~ tr/-/_/; 178 $element =~ tr/A-Z/a-z/; 179 $element =~ tr/_a-z0-9//cd; 180 return $element; 181} 182 183# Handle the start of a new element. If cmd_element is defined, assume that 184# we need to collect the entire tree for this element before passing it to the 185# element method, and create a new tree into which we'll collect blocks of 186# text and nested elements. Otherwise, if start_element is defined, call it. 187sub _handle_element_start { 188 my ($self, $element, $attrs) = @_; 189 my $method = $self->method_for_element ($element); 190 191 # If we have a command handler, we need to accumulate the contents of the 192 # tag before calling it. 193 if ($self->can ("cmd_$method")) { 194 push (@{ $$self{PENDING} }, [ $attrs, '' ]); 195 } elsif ($self->can ("start_$method")) { 196 my $method = 'start_' . $method; 197 $self->$method ($attrs, ''); 198 } 199} 200 201# Handle the end of an element. If we had a cmd_ method for this element, 202# this is where we pass along the text that we've accumulated. Otherwise, if 203# we have an end_ method for the element, call that. 204sub _handle_element_end { 205 my ($self, $element) = @_; 206 my $method = $self->method_for_element ($element); 207 208 # If we have a command handler, pull off the pending text and pass it to 209 # the handler along with the saved attribute hash. 210 if ($self->can ("cmd_$method")) { 211 my $tag = pop @{ $$self{PENDING} }; 212 my $method = 'cmd_' . $method; 213 my $text = $self->$method (@$tag); 214 if (defined $text) { 215 if (@{ $$self{PENDING} } > 1) { 216 $$self{PENDING}[-1][1] .= $text; 217 } else { 218 $self->output ($text); 219 } 220 } 221 } elsif ($self->can ("end_$method")) { 222 my $method = 'end_' . $method; 223 $self->$method (); 224 } 225} 226 227############################################################################## 228# Output formatting 229############################################################################## 230 231# Wrap a line, indenting by the current left margin. We can't use Text::Wrap 232# because it plays games with tabs. We can't use formline, even though we'd 233# really like to, because it screws up non-printing characters. So we have to 234# do the wrapping ourselves. 235sub wrap { 236 my $self = shift; 237 local $_ = shift; 238 my $output = ''; 239 my $spaces = ' ' x $$self{MARGIN}; 240 my $width = $$self{opt_width} - $$self{MARGIN}; 241 while (length > $width) { 242 if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) { 243 $output .= $spaces . $1 . "\n"; 244 } else { 245 last; 246 } 247 } 248 $output .= $spaces . $_; 249 $output =~ s/\s+$/\n\n/; 250 return $output; 251} 252 253# Reformat a paragraph of text for the current margin. Takes the text to 254# reformat and returns the formatted text. 255sub reformat { 256 my $self = shift; 257 local $_ = shift; 258 259 # If we're trying to preserve two spaces after sentences, do some munging 260 # to support that. Otherwise, smash all repeated whitespace. Be careful 261 # not to use \s here, which in Unicode input may match non-breaking spaces 262 # that we don't want to smash. 263 if ($$self{opt_sentence}) { 264 s/ +$//mg; 265 s/\.\n/. \n/g; 266 s/\n/ /g; 267 s/ +/ /g; 268 } else { 269 s/[ \t\n]+/ /g; 270 } 271 return $self->wrap ($_); 272} 273 274# Output text to the output device. Replace non-breaking spaces with spaces 275# and soft hyphens with nothing, and then try to fix the output encoding if 276# necessary to match the input encoding unless UTF-8 output is forced. This 277# preserves the traditional pass-through behavior of Pod::Text. 278sub output { 279 my ($self, @text) = @_; 280 my $text = join ('', @text); 281 if ($NBSP) { 282 $text =~ s/$NBSP/ /g; 283 } 284 if ($SHY) { 285 $text =~ s/$SHY//g; 286 } 287 unless ($$self{opt_utf8}) { 288 my $encoding = $$self{encoding} || ''; 289 if ($encoding && $encoding ne $$self{ENCODING}) { 290 $$self{ENCODING} = $encoding; 291 eval { binmode ($$self{output_fh}, ":encoding($encoding)") }; 292 } 293 } 294 if ($$self{ENCODE}) { 295 print { $$self{output_fh} } encode ('UTF-8', $text); 296 } else { 297 print { $$self{output_fh} } $text; 298 } 299} 300 301# Output a block of code (something that isn't part of the POD text). Called 302# by preprocess_paragraph only if we were given the code option. Exists here 303# only so that it can be overridden by subclasses. 304sub output_code { $_[0]->output ($_[1]) } 305 306############################################################################## 307# Document initialization 308############################################################################## 309 310# Set up various things that have to be initialized on a per-document basis. 311sub start_document { 312 my ($self, $attrs) = @_; 313 if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { 314 $$self{CONTENTLESS} = 1; 315 } else { 316 delete $$self{CONTENTLESS}; 317 } 318 my $margin = $$self{opt_indent} + $$self{opt_margin}; 319 320 # Initialize a few per-document variables. 321 $$self{INDENTS} = []; # Stack of indentations. 322 $$self{MARGIN} = $margin; # Default left margin. 323 $$self{PENDING} = [[]]; # Pending output. 324 325 # We have to redo encoding handling for each document. 326 $$self{ENCODING} = ''; 327 328 # When UTF-8 output is set, check whether our output file handle already 329 # has a PerlIO encoding layer set. If it does not, we'll need to encode 330 # our output before printing it (handled in the output() sub). 331 $$self{ENCODE} = 0; 332 if ($$self{opt_utf8}) { 333 $$self{ENCODE} = 1; 334 eval { 335 my @options = (output => 1, details => 1); 336 my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1]; 337 if ($flag && ($flag & PerlIO::F_UTF8 ())) { 338 $$self{ENCODE} = 0; 339 $$self{ENCODING} = 'UTF-8'; 340 } 341 }; 342 } 343 344 return ''; 345} 346 347# Handle the end of the document. The only thing we do is handle dying on POD 348# errors, since Pod::Parser currently doesn't. 349sub end_document { 350 my ($self) = @_; 351 if ($$self{complain_die} && $self->errors_seen) { 352 croak ("POD document had syntax errors"); 353 } 354} 355 356############################################################################## 357# Text blocks 358############################################################################## 359 360# Intended for subclasses to override, this method returns text with any 361# non-printing formatting codes stripped out so that length() correctly 362# returns the length of the text. For basic Pod::Text, it does nothing. 363sub strip_format { 364 my ($self, $string) = @_; 365 return $string; 366} 367 368# This method is called whenever an =item command is complete (in other words, 369# we've seen its associated paragraph or know for certain that it doesn't have 370# one). It gets the paragraph associated with the item as an argument. If 371# that argument is empty, just output the item tag; if it contains a newline, 372# output the item tag followed by the newline. Otherwise, see if there's 373# enough room for us to output the item tag in the margin of the text or if we 374# have to put it on a separate line. 375sub item { 376 my ($self, $text) = @_; 377 my $tag = $$self{ITEM}; 378 unless (defined $tag) { 379 carp "Item called without tag"; 380 return; 381 } 382 undef $$self{ITEM}; 383 384 # Calculate the indentation and margin. $fits is set to true if the tag 385 # will fit into the margin of the paragraph given our indentation level. 386 my $indent = $$self{INDENTS}[-1]; 387 $indent = $$self{opt_indent} unless defined $indent; 388 my $margin = ' ' x $$self{opt_margin}; 389 my $tag_length = length ($self->strip_format ($tag)); 390 my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1); 391 392 # If the tag doesn't fit, or if we have no associated text, print out the 393 # tag separately. Otherwise, put the tag in the margin of the paragraph. 394 if (!$text || $text =~ /^\s+$/ || !$fits) { 395 my $realindent = $$self{MARGIN}; 396 $$self{MARGIN} = $indent; 397 my $output = $self->reformat ($tag); 398 $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 399 $output =~ s/\n*$/\n/; 400 401 # If the text is just whitespace, we have an empty item paragraph; 402 # this can result from =over/=item/=back without any intermixed 403 # paragraphs. Insert some whitespace to keep the =item from merging 404 # into the next paragraph. 405 $output .= "\n" if $text && $text =~ /^\s*$/; 406 407 $self->output ($output); 408 $$self{MARGIN} = $realindent; 409 $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/); 410 } else { 411 my $space = ' ' x $indent; 412 $space =~ s/^$margin /$margin:/ if $$self{opt_alt}; 413 $text = $self->reformat ($text); 414 $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 415 my $tagspace = ' ' x $tag_length; 416 $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item"; 417 $self->output ($text); 418 } 419} 420 421# Handle a basic block of text. The only tricky thing here is that if there 422# is a pending item tag, we need to format this as an item paragraph. 423sub cmd_para { 424 my ($self, $attrs, $text) = @_; 425 $text =~ s/\s+$/\n/; 426 if (defined $$self{ITEM}) { 427 $self->item ($text . "\n"); 428 } else { 429 $self->output ($self->reformat ($text . "\n")); 430 } 431 return ''; 432} 433 434# Handle a verbatim paragraph. Just print it out, but indent it according to 435# our margin. 436sub cmd_verbatim { 437 my ($self, $attrs, $text) = @_; 438 $self->item if defined $$self{ITEM}; 439 return if $text =~ /^\s*$/; 440 $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme; 441 $text =~ s/\s*$/\n\n/; 442 $self->output ($text); 443 return ''; 444} 445 446# Handle literal text (produced by =for and similar constructs). Just output 447# it with the minimum of changes. 448sub cmd_data { 449 my ($self, $attrs, $text) = @_; 450 $text =~ s/^\n+//; 451 $text =~ s/\n{0,2}$/\n/; 452 $self->output ($text); 453 return ''; 454} 455 456############################################################################## 457# Headings 458############################################################################## 459 460# The common code for handling all headers. Takes the header text, the 461# indentation, and the surrounding marker for the alt formatting method. 462sub heading { 463 my ($self, $text, $indent, $marker) = @_; 464 $self->item ("\n\n") if defined $$self{ITEM}; 465 $text =~ s/\s+$//; 466 if ($$self{opt_alt}) { 467 my $closemark = reverse (split (//, $marker)); 468 my $margin = ' ' x $$self{opt_margin}; 469 $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n"); 470 } else { 471 $text .= "\n" if $$self{opt_loose}; 472 my $margin = ' ' x ($$self{opt_margin} + $indent); 473 $self->output ($margin . $text . "\n"); 474 } 475 return ''; 476} 477 478# First level heading. 479sub cmd_head1 { 480 my ($self, $attrs, $text) = @_; 481 $self->heading ($text, 0, '===='); 482} 483 484# Second level heading. 485sub cmd_head2 { 486 my ($self, $attrs, $text) = @_; 487 $self->heading ($text, $$self{opt_indent} / 2, '== '); 488} 489 490# Third level heading. 491sub cmd_head3 { 492 my ($self, $attrs, $text) = @_; 493 $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= '); 494} 495 496# Fourth level heading. 497sub cmd_head4 { 498 my ($self, $attrs, $text) = @_; 499 $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- '); 500} 501 502############################################################################## 503# List handling 504############################################################################## 505 506# Handle the beginning of an =over block. Takes the type of the block as the 507# first argument, and then the attr hash. This is called by the handlers for 508# the four different types of lists (bullet, number, text, and block). 509sub over_common_start { 510 my ($self, $attrs) = @_; 511 $self->item ("\n\n") if defined $$self{ITEM}; 512 513 # Find the indentation level. 514 my $indent = $$attrs{indent}; 515 unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) { 516 $indent = $$self{opt_indent}; 517 } 518 519 # Add this to our stack of indents and increase our current margin. 520 push (@{ $$self{INDENTS} }, $$self{MARGIN}); 521 $$self{MARGIN} += ($indent + 0); 522 return ''; 523} 524 525# End an =over block. Takes no options other than the class pointer. Output 526# any pending items and then pop one level of indentation. 527sub over_common_end { 528 my ($self) = @_; 529 $self->item ("\n\n") if defined $$self{ITEM}; 530 $$self{MARGIN} = pop @{ $$self{INDENTS} }; 531 return ''; 532} 533 534# Dispatch the start and end calls as appropriate. 535sub start_over_bullet { $_[0]->over_common_start ($_[1]) } 536sub start_over_number { $_[0]->over_common_start ($_[1]) } 537sub start_over_text { $_[0]->over_common_start ($_[1]) } 538sub start_over_block { $_[0]->over_common_start ($_[1]) } 539sub end_over_bullet { $_[0]->over_common_end } 540sub end_over_number { $_[0]->over_common_end } 541sub end_over_text { $_[0]->over_common_end } 542sub end_over_block { $_[0]->over_common_end } 543 544# The common handler for all item commands. Takes the type of the item, the 545# attributes, and then the text of the item. 546sub item_common { 547 my ($self, $type, $attrs, $text) = @_; 548 $self->item if defined $$self{ITEM}; 549 550 # Clean up the text. We want to end up with two variables, one ($text) 551 # which contains any body text after taking out the item portion, and 552 # another ($item) which contains the actual item text. Note the use of 553 # the internal Pod::Simple attribute here; that's a potential land mine. 554 $text =~ s/\s+$//; 555 my ($item, $index); 556 if ($type eq 'bullet') { 557 $item = '*'; 558 } elsif ($type eq 'number') { 559 $item = $$attrs{'~orig_content'}; 560 } else { 561 $item = $text; 562 $item =~ s/\s*\n\s*/ /g; 563 $text = ''; 564 } 565 $$self{ITEM} = $item; 566 567 # If body text for this item was included, go ahead and output that now. 568 if ($text) { 569 $text =~ s/\s*$/\n/; 570 $self->item ($text); 571 } 572 return ''; 573} 574 575# Dispatch the item commands to the appropriate place. 576sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) } 577sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) } 578sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) } 579sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) } 580 581############################################################################## 582# Formatting codes 583############################################################################## 584 585# The simple ones. 586sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] } 587sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] } 588sub cmd_i { return '*' . $_[2] . '*' } 589sub cmd_x { return '' } 590 591# Apply a whole bunch of messy heuristics to not quote things that don't 592# benefit from being quoted. These originally come from Barrie Slaymaker and 593# largely duplicate code in Pod::Man. 594sub cmd_c { 595 my ($self, $attrs, $text) = @_; 596 597 # A regex that matches the portion of a variable reference that's the 598 # array or hash index, separated out just because we want to use it in 599 # several places in the following regex. 600 my $index = '(?: \[.*\] | \{.*\} )?'; 601 602 # Check for things that we don't want to quote, and if we find any of 603 # them, return the string with just a font change and no quoting. 604 $text =~ m{ 605 ^\s* 606 (?: 607 ( [\'\`\"] ) .* \1 # already quoted 608 | \` .* \' # `quoted' 609 | \$+ [\#^]? \S $index # special ($^Foo, $") 610 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func 611 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call 612 | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number 613 | 0x [a-fA-F\d]+ # a hex constant 614 ) 615 \s*\z 616 }xo && return $text; 617 618 # If we didn't return, go ahead and quote the text. 619 return $$self{opt_alt} 620 ? "``$text''" 621 : "$$self{LQUOTE}$text$$self{RQUOTE}"; 622} 623 624# Links reduce to the text that we're given, wrapped in angle brackets if it's 625# a URL. 626sub cmd_l { 627 my ($self, $attrs, $text) = @_; 628 if ($$attrs{type} eq 'url') { 629 if (not defined($$attrs{to}) or $$attrs{to} eq $text) { 630 return "<$text>"; 631 } elsif ($$self{opt_nourls}) { 632 return $text; 633 } else { 634 return "$text <$$attrs{to}>"; 635 } 636 } else { 637 return $text; 638 } 639} 640 641############################################################################## 642# Backwards compatibility 643############################################################################## 644 645# The old Pod::Text module did everything in a pod2text() function. This 646# tries to provide the same interface for legacy applications. 647sub pod2text { 648 my @args; 649 650 # This is really ugly; I hate doing option parsing in the middle of a 651 # module. But the old Pod::Text module supported passing flags to its 652 # entry function, so handle -a and -<number>. 653 while ($_[0] =~ /^-/) { 654 my $flag = shift; 655 if ($flag eq '-a') { push (@args, alt => 1) } 656 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) } 657 else { 658 unshift (@_, $flag); 659 last; 660 } 661 } 662 663 # Now that we know what arguments we're using, create the parser. 664 my $parser = Pod::Text->new (@args); 665 666 # If two arguments were given, the second argument is going to be a file 667 # handle. That means we want to call parse_from_filehandle(), which means 668 # we need to turn the first argument into a file handle. Magic open will 669 # handle the <&STDIN case automagically. 670 if (defined $_[1]) { 671 my @fhs = @_; 672 local *IN; 673 unless (open (IN, $fhs[0])) { 674 croak ("Can't open $fhs[0] for reading: $!\n"); 675 return; 676 } 677 $fhs[0] = \*IN; 678 $parser->output_fh ($fhs[1]); 679 my $retval = $parser->parse_file ($fhs[0]); 680 my $fh = $parser->output_fh (); 681 close $fh; 682 return $retval; 683 } else { 684 $parser->output_fh (\*STDOUT); 685 return $parser->parse_file (@_); 686 } 687} 688 689# Reset the underlying Pod::Simple object between calls to parse_from_file so 690# that the same object can be reused to convert multiple pages. 691sub parse_from_file { 692 my $self = shift; 693 $self->reinit; 694 695 # Fake the old cutting option to Pod::Parser. This fiddles with internal 696 # Pod::Simple state and is quite ugly; we need a better approach. 697 if (ref ($_[0]) eq 'HASH') { 698 my $opts = shift @_; 699 if (defined ($$opts{-cutting}) && !$$opts{-cutting}) { 700 $$self{in_pod} = 1; 701 $$self{last_was_blank} = 1; 702 } 703 } 704 705 # Do the work. 706 my $retval = $self->Pod::Simple::parse_from_file (@_); 707 708 # Flush output, since Pod::Simple doesn't do this. Ideally we should also 709 # close the file descriptor if we had to open one, but we can't easily 710 # figure this out. 711 my $fh = $self->output_fh (); 712 my $oldfh = select $fh; 713 my $oldflush = $|; 714 $| = 1; 715 print $fh ''; 716 $| = $oldflush; 717 select $oldfh; 718 return $retval; 719} 720 721# Pod::Simple failed to provide this backward compatibility function, so 722# implement it ourselves. File handles are one of the inputs that 723# parse_from_file supports. 724sub parse_from_filehandle { 725 my $self = shift; 726 $self->parse_from_file (@_); 727} 728 729# Pod::Simple's parse_file doesn't set output_fh. Wrap the call and do so 730# ourself unless it was already set by the caller, since our documentation has 731# always said that this should work. 732sub parse_file { 733 my ($self, $in) = @_; 734 unless (defined $$self{output_fh}) { 735 $self->output_fh (\*STDOUT); 736 } 737 return $self->SUPER::parse_file ($in); 738} 739 740# Do the same for parse_lines, just to be polite. Pod::Simple's man page 741# implies that the caller is responsible for setting this, but I don't see any 742# reason not to set a default. 743sub parse_lines { 744 my ($self, @lines) = @_; 745 unless (defined $$self{output_fh}) { 746 $self->output_fh (\*STDOUT); 747 } 748 return $self->SUPER::parse_lines (@lines); 749} 750 751# Likewise for parse_string_document. 752sub parse_string_document { 753 my ($self, $doc) = @_; 754 unless (defined $$self{output_fh}) { 755 $self->output_fh (\*STDOUT); 756 } 757 return $self->SUPER::parse_string_document ($doc); 758} 759 760############################################################################## 761# Module return value and documentation 762############################################################################## 763 7641; 765__END__ 766 767=for stopwords 768alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls 769parsers 770 771=head1 NAME 772 773Pod::Text - Convert POD data to formatted text 774 775=head1 SYNOPSIS 776 777 use Pod::Text; 778 my $parser = Pod::Text->new (sentence => 1, width => 78); 779 780 # Read POD from STDIN and write to STDOUT. 781 $parser->parse_from_filehandle; 782 783 # Read POD from file.pod and write to file.txt. 784 $parser->parse_from_file ('file.pod', 'file.txt'); 785 786=head1 DESCRIPTION 787 788Pod::Text is a module that can convert documentation in the POD format 789(the preferred language for documenting Perl) into formatted text. It 790uses no special formatting controls or codes whatsoever, and its output is 791therefore suitable for nearly any device. 792 793As a derived class from Pod::Simple, Pod::Text supports the same methods and 794interfaces. See L<Pod::Simple> for all the details; briefly, one creates a 795new parser with C<< Pod::Text->new() >> and then normally calls parse_file(). 796 797new() can take options, in the form of key/value pairs, that control the 798behavior of the parser. The currently recognized options are: 799 800=over 4 801 802=item alt 803 804If set to a true value, selects an alternate output format that, among other 805things, uses a different heading style and marks C<=item> entries with a 806colon in the left margin. Defaults to false. 807 808=item code 809 810If set to a true value, the non-POD parts of the input file will be included 811in the output. Useful for viewing code documented with POD blocks with the 812POD rendered and the code left intact. 813 814=item errors 815 816How to report errors. C<die> says to throw an exception on any POD 817formatting error. C<stderr> says to report errors on standard error, but 818not to throw an exception. C<pod> says to include a POD ERRORS section 819in the resulting documentation summarizing the errors. C<none> ignores 820POD errors entirely, as much as possible. 821 822The default is C<pod>. 823 824=item indent 825 826The number of spaces to indent regular text, and the default indentation for 827C<=over> blocks. Defaults to 4. 828 829=item loose 830 831If set to a true value, a blank line is printed after a C<=head1> heading. 832If set to false (the default), no blank line is printed after C<=head1>, 833although one is still printed after C<=head2>. This is the default because 834it's the expected formatting for manual pages; if you're formatting 835arbitrary text documents, setting this to true may result in more pleasing 836output. 837 838=item margin 839 840The width of the left margin in spaces. Defaults to 0. This is the margin 841for all text, including headings, not the amount by which regular text is 842indented; for the latter, see the I<indent> option. To set the right 843margin, see the I<width> option. 844 845=item nourls 846 847Normally, LZ<><> formatting codes with a URL but anchor text are formatted 848to show both the anchor text and the URL. In other words: 849 850 L<foo|http://example.com/> 851 852is formatted as: 853 854 foo <http://example.com/> 855 856This option, if set to a true value, suppresses the URL when anchor text 857is given, so this example would be formatted as just C<foo>. This can 858produce less cluttered output in cases where the URLs are not particularly 859important. 860 861=item quotes 862 863Sets the quote marks used to surround CE<lt>> text. If the value is a 864single character, it is used as both the left and right quote. Otherwise, 865it is split in half, and the first half of the string is used as the left 866quote and the second is used as the right quote. 867 868This may also be set to the special value C<none>, in which case no quote 869marks are added around CE<lt>> text. 870 871=item sentence 872 873If set to a true value, Pod::Text will assume that each sentence ends in two 874spaces, and will try to preserve that spacing. If set to false, all 875consecutive whitespace in non-verbatim paragraphs is compressed into a 876single space. Defaults to false. 877 878=item stderr 879 880Send error messages about invalid POD to standard error instead of 881appending a POD ERRORS section to the generated output. This is 882equivalent to setting C<errors> to C<stderr> if C<errors> is not already 883set. It is supported for backward compatibility. 884 885=item utf8 886 887By default, Pod::Text uses the same output encoding as the input encoding 888of the POD source (provided that Perl was built with PerlIO; otherwise, it 889doesn't encode its output). If this option is given, the output encoding 890is forced to UTF-8. 891 892Be aware that, when using this option, the input encoding of your POD 893source should be properly declared unless it's US-ASCII. Pod::Simple will 894attempt to guess the encoding and may be successful if it's Latin-1 or 895UTF-8, but it will produce warnings. Use the C<=encoding> command to 896declare the encoding. See L<perlpod(1)> for more information. 897 898=item width 899 900The column at which to wrap text on the right-hand side. Defaults to 76. 901 902=back 903 904The standard Pod::Simple method parse_file() takes one argument naming the 905POD file to read from. By default, the output is sent to C<STDOUT>, but 906this can be changed with the output_fh() method. 907 908The standard Pod::Simple method parse_from_file() takes up to two 909arguments, the first being the input file to read POD from and the second 910being the file to write the formatted output to. 911 912You can also call parse_lines() to parse an array of lines or 913parse_string_document() to parse a document already in memory. As with 914parse_file(), parse_lines() and parse_string_document() default to sending 915their output to C<STDOUT> unless changed with the output_fh() method. Be 916aware that parse_lines() and parse_string_document() both expect raw bytes, 917not decoded characters. 918 919To put the output from any parse method into a string instead of a file 920handle, call the output_string() method instead of output_fh(). 921 922See L<Pod::Simple> for more specific details on the methods available to 923all derived parsers. 924 925=head1 DIAGNOSTICS 926 927=over 4 928 929=item Bizarre space in item 930 931=item Item called without tag 932 933(W) Something has gone wrong in internal C<=item> processing. These 934messages indicate a bug in Pod::Text; you should never see them. 935 936=item Can't open %s for reading: %s 937 938(F) Pod::Text was invoked via the compatibility mode pod2text() interface 939and the input file it was given could not be opened. 940 941=item Invalid errors setting "%s" 942 943(F) The C<errors> parameter to the constructor was set to an unknown value. 944 945=item Invalid quote specification "%s" 946 947(F) The quote specification given (the C<quotes> option to the 948constructor) was invalid. A quote specification must be either one 949character long or an even number (greater than one) characters long. 950 951=item POD document had syntax errors 952 953(F) The POD document being formatted had syntax errors and the C<errors> 954option was set to C<die>. 955 956=back 957 958=head1 BUGS 959 960Encoding handling assumes that PerlIO is available and does not work 961properly if it isn't. The C<utf8> option is therefore not supported 962unless Perl is built with PerlIO support. 963 964=head1 CAVEATS 965 966If Pod::Text is given the C<utf8> option, the encoding of its output file 967handle will be forced to UTF-8 if possible, overriding any existing 968encoding. This will be done even if the file handle is not created by 969Pod::Text and was passed in from outside. This maintains consistency 970regardless of PERL_UNICODE and other settings. 971 972If the C<utf8> option is not given, the encoding of its output file handle 973will be forced to the detected encoding of the input POD, which preserves 974whatever the input text is. This ensures backward compatibility with 975earlier, pre-Unicode versions of this module, without large numbers of 976Perl warnings. 977 978This is not ideal, but it seems to be the best compromise. If it doesn't 979work for you, please let me know the details of how it broke. 980 981=head1 NOTES 982 983This is a replacement for an earlier Pod::Text module written by Tom 984Christiansen. It has a revamped interface, since it now uses Pod::Simple, 985but an interface roughly compatible with the old Pod::Text::pod2text() 986function is still available. Please change to the new calling convention, 987though. 988 989The original Pod::Text contained code to do formatting via termcap 990sequences, although it wasn't turned on by default and it was problematic to 991get it to work at all. This rewrite doesn't even try to do that, but a 992subclass of it does. Look for L<Pod::Text::Termcap>. 993 994=head1 AUTHOR 995 996Russ Allbery <rra@cpan.org>, based I<very> heavily on the original 997Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to 998Pod::Parser by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial 999conversion of Pod::Man to use Pod::Simple provided much-needed guidance on 1000how to use Pod::Simple. 1001 1002=head1 COPYRIGHT AND LICENSE 1003 1004Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019 Russ Allbery 1005<rra@cpan.org> 1006 1007This program is free software; you may redistribute it and/or modify it 1008under the same terms as Perl itself. 1009 1010=head1 SEE ALSO 1011 1012L<Pod::Simple>, L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)> 1013 1014The current version of this module is always available from its web site at 1015L<https://www.eyrie.org/~eagle/software/podlators/>. It is also part of the 1016Perl core distribution as of 5.6.0. 1017 1018=cut 1019 1020# Local Variables: 1021# copyright-at-end-flag: t 1022# End: 1023