1# Convert POD data to formatted text. 2# 3# This module converts POD to formatted text. It replaces the old Pod::Text 4# module that came with versions of Perl prior to 5.6.0 and attempts to match 5# its output except for some specific circumstances where other decisions 6# seemed to produce better output. It uses Pod::Parser and is designed to be 7# very easy to subclass. 8# 9# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl 10 11############################################################################## 12# Modules and declarations 13############################################################################## 14 15package Pod::Text; 16 17use 5.010; 18use strict; 19use warnings; 20 21use Carp qw(carp croak); 22use Encode qw(encode); 23use Exporter (); 24use Pod::Simple (); 25 26our @ISA = qw(Pod::Simple Exporter); 27our $VERSION = '5.01_02'; 28$VERSION =~ tr/_//d; 29 30# We have to export pod2text for backward compatibility. 31our @EXPORT = qw(pod2text); 32 33# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code 34# taken from Pod::Simple 3.32, but was only added in 3.30. 35my ($NBSP, $SHY); 36if ($Pod::Simple::VERSION ge 3.30) { 37 $NBSP = $Pod::Simple::nbsp; 38 $SHY = $Pod::Simple::shy; 39} else { 40 $NBSP = chr utf8::unicode_to_native(0xA0); 41 $SHY = chr utf8::unicode_to_native(0xAD); 42} 43 44# Import the ASCII constant from Pod::Simple. This is true iff we're in an 45# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is 46# generally only false for EBCDIC. 47BEGIN { *ASCII = \&Pod::Simple::ASCII } 48 49############################################################################## 50# Initialization 51############################################################################## 52 53# This function handles code blocks. It's registered as a callback to 54# Pod::Simple and therefore doesn't work as a regular method call, but all it 55# does is call output_code with the line. 56sub handle_code { 57 my ($line, $number, $parser) = @_; 58 $parser->output_code ($line . "\n"); 59} 60 61# Initialize the object and set various Pod::Simple options that we need. 62# Here, we also process any additional options passed to the constructor or 63# set up defaults if none were given. Note that all internal object keys are 64# in all-caps, reserving all lower-case object keys for Pod::Simple and user 65# arguments. 66sub new { 67 my $class = shift; 68 my $self = $class->SUPER::new; 69 70 # Tell Pod::Simple to keep whitespace whenever possible. 71 if ($self->can ('preserve_whitespace')) { 72 $self->preserve_whitespace (1); 73 } else { 74 $self->fullstop_space_harden (1); 75 } 76 77 # The =for and =begin targets that we accept. 78 $self->accept_targets (qw/text TEXT/); 79 80 # Ensure that contiguous blocks of code are merged together. Otherwise, 81 # some of the guesswork heuristics don't work right. 82 $self->merge_text (1); 83 84 # Pod::Simple doesn't do anything useful with our arguments, but we want 85 # to put them in our object as hash keys and values. This could cause 86 # problems if we ever clash with Pod::Simple's own internal class 87 # variables. 88 my %opts = @_; 89 my @opts = map { ("opt_$_", $opts{$_}) } keys %opts; 90 %$self = (%$self, @opts); 91 92 # Backwards-compatibility support for the stderr option. 93 if ($$self{opt_stderr} and not $$self{opt_errors}) { 94 $$self{opt_errors} = 'stderr'; 95 } 96 delete $$self{opt_stderr}; 97 98 # Backwards-compatibility support for the utf8 option. 99 if ($$self{opt_utf8} && !$$self{opt_encoding}) { 100 $$self{opt_encoding} = 'UTF-8'; 101 } 102 delete $$self{opt_utf8}; 103 104 # Validate the errors parameter and act on it. 105 $$self{opt_errors} //= 'pod'; 106 if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') { 107 $self->no_errata_section (1); 108 $self->complain_stderr (1); 109 if ($$self{opt_errors} eq 'die') { 110 $$self{complain_die} = 1; 111 } 112 } elsif ($$self{opt_errors} eq 'pod') { 113 $self->no_errata_section (0); 114 $self->complain_stderr (0); 115 } elsif ($$self{opt_errors} eq 'none') { 116 $self->no_errata_section (1); 117 $self->no_whining (1); 118 } else { 119 croak (qq(Invalid errors setting: "$$self{errors}")); 120 } 121 delete $$self{errors}; 122 123 # Initialize various things from our parameters. 124 $$self{opt_alt} //= 0; 125 $$self{opt_indent} //= 4; 126 $$self{opt_margin} //= 0; 127 $$self{opt_loose} //= 0; 128 $$self{opt_sentence} //= 0; 129 $$self{opt_width} //= 76; 130 131 # Figure out what quotes we'll be using for C<> text. 132 $$self{opt_quotes} ||= '"'; 133 if ($$self{opt_quotes} eq 'none') { 134 $$self{LQUOTE} = $$self{RQUOTE} = ''; 135 } elsif (length ($$self{opt_quotes}) == 1) { 136 $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes}; 137 } elsif (length ($$self{opt_quotes}) % 2 == 0) { 138 my $length = length ($$self{opt_quotes}) / 2; 139 $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length); 140 $$self{RQUOTE} = substr ($$self{opt_quotes}, $length); 141 } else { 142 croak qq(Invalid quote specification "$$self{opt_quotes}"); 143 } 144 145 # Configure guesswork based on options. 146 my $guesswork = $self->{opt_guesswork} || q{}; 147 my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork); 148 if (!%guesswork || $guesswork{all}) { 149 $$self{GUESSWORK} = {quoting => 1}; 150 } elsif ($guesswork{none}) { 151 $$self{GUESSWORK} = {}; 152 } else { 153 $$self{GUESSWORK} = {%guesswork}; 154 } 155 156 # If requested, do something with the non-POD text. 157 $self->code_handler (\&handle_code) if $$self{opt_code}; 158 159 # Return the created object. 160 return $self; 161} 162 163############################################################################## 164# Core parsing 165############################################################################## 166 167# This is the glue that connects the code below with Pod::Simple itself. The 168# goal is to convert the event stream coming from the POD parser into method 169# calls to handlers once the complete content of a tag has been seen. Each 170# paragraph or POD command will have textual content associated with it, and 171# as soon as all of a paragraph or POD command has been seen, that content 172# will be passed in to the corresponding method for handling that type of 173# object. The exceptions are handlers for lists, which have opening tag 174# handlers and closing tag handlers that will be called right away. 175# 176# The internal hash key PENDING is used to store the contents of a tag until 177# all of it has been seen. It holds a stack of open tags, each one 178# represented by a tuple of the attributes hash for the tag and the contents 179# of the tag. 180 181# Add a block of text to the contents of the current node, formatting it 182# according to the current formatting instructions as we do. 183sub _handle_text { 184 my ($self, $text) = @_; 185 my $tag = $$self{PENDING}[-1]; 186 $$tag[1] .= $text; 187} 188 189# Given an element name, get the corresponding method name. 190sub method_for_element { 191 my ($self, $element) = @_; 192 $element =~ tr/-/_/; 193 $element =~ tr/A-Z/a-z/; 194 $element =~ tr/_a-z0-9//cd; 195 return $element; 196} 197 198# Handle the start of a new element. If cmd_element is defined, assume that 199# we need to collect the entire tree for this element before passing it to the 200# element method, and create a new tree into which we'll collect blocks of 201# text and nested elements. Otherwise, if start_element is defined, call it. 202sub _handle_element_start { 203 my ($self, $element, $attrs) = @_; 204 my $method = $self->method_for_element ($element); 205 206 # If we have a command handler, we need to accumulate the contents of the 207 # tag before calling it. 208 if ($self->can ("cmd_$method")) { 209 push (@{ $$self{PENDING} }, [ $attrs, '' ]); 210 } elsif ($self->can ("start_$method")) { 211 my $method = 'start_' . $method; 212 $self->$method ($attrs, ''); 213 } 214} 215 216# Handle the end of an element. If we had a cmd_ method for this element, 217# this is where we pass along the text that we've accumulated. Otherwise, if 218# we have an end_ method for the element, call that. 219sub _handle_element_end { 220 my ($self, $element) = @_; 221 my $method = $self->method_for_element ($element); 222 223 # If we have a command handler, pull off the pending text and pass it to 224 # the handler along with the saved attribute hash. 225 if ($self->can ("cmd_$method")) { 226 my $tag = pop @{ $$self{PENDING} }; 227 my $method = 'cmd_' . $method; 228 my $text = $self->$method (@$tag); 229 if (defined $text) { 230 if (@{ $$self{PENDING} } > 1) { 231 $$self{PENDING}[-1][1] .= $text; 232 } else { 233 $self->output ($text); 234 } 235 } 236 } elsif ($self->can ("end_$method")) { 237 my $method = 'end_' . $method; 238 $self->$method (); 239 } 240} 241 242############################################################################## 243# Output formatting 244############################################################################## 245 246# Wrap a line, indenting by the current left margin. We can't use Text::Wrap 247# because it plays games with tabs. We can't use formline, even though we'd 248# really like to, because it screws up non-printing characters. So we have to 249# do the wrapping ourselves. 250sub wrap { 251 my $self = shift; 252 local $_ = shift; 253 my $output = ''; 254 my $spaces = ' ' x $$self{MARGIN}; 255 my $width = $$self{opt_width} - $$self{MARGIN}; 256 while (length > $width) { 257 if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) { 258 $output .= $spaces . $1 . "\n"; 259 } else { 260 last; 261 } 262 } 263 $output .= $spaces . $_; 264 $output =~ s/\s+$/\n\n/; 265 return $output; 266} 267 268# Reformat a paragraph of text for the current margin. Takes the text to 269# reformat and returns the formatted text. 270sub reformat { 271 my $self = shift; 272 local $_ = shift; 273 274 # If we're trying to preserve two spaces after sentences, do some munging 275 # to support that. Otherwise, smash all repeated whitespace. Be careful 276 # not to use \s here, which in Unicode input may match non-breaking spaces 277 # that we don't want to smash. 278 if ($$self{opt_sentence}) { 279 s/ +$//mg; 280 s/\.\n/. \n/g; 281 s/\n/ /g; 282 s/ +/ /g; 283 } else { 284 s/[ \t\n]+/ /g; 285 } 286 return $self->wrap ($_); 287} 288 289# Output text to the output device. Replace non-breaking spaces with spaces 290# and soft hyphens with nothing, and then determine the output encoding. 291sub output { 292 my ($self, @text) = @_; 293 my $text = join ('', @text); 294 if ($NBSP) { 295 $text =~ s/$NBSP/ /g; 296 } 297 if ($SHY) { 298 $text =~ s/$SHY//g; 299 } 300 301 # The logic used here is described in the POD documentation. Prefer the 302 # configured encoding, then the pass-through option of using the same 303 # encoding as the input, and then UTF-8, but commit to an encoding for the 304 # document. 305 # 306 # ENCODE says whether to encode or not and is turned off if there is a 307 # PerlIO encoding layer (in start_document). ENCODING is the encoding 308 # that we previously committed to and is cleared at the start of each 309 # document. 310 if ($$self{ENCODE}) { 311 my $encoding = $$self{ENCODING}; 312 if (!$encoding) { 313 $encoding = $self->encoding(); 314 if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) { 315 $encoding = 'UTF-8'; 316 } 317 if ($encoding) { 318 $$self{ENCODING} = $encoding; 319 } 320 } 321 if ($encoding) { 322 my $check = sub { 323 my ($char) = @_; 324 my $display = '"\x{' . hex($char) . '}"'; 325 my $error = "$display does not map to $$self{ENCODING}"; 326 $self->whine ($self->line_count(), $error); 327 return Encode::encode ($$self{ENCODING}, chr($char)); 328 }; 329 print { $$self{output_fh} } encode ($encoding, $text, $check); 330 } else { 331 print { $$self{output_fh} } $text; 332 } 333 } else { 334 print { $$self{output_fh} } $text; 335 } 336} 337 338# Output a block of code (something that isn't part of the POD text). Called 339# by preprocess_paragraph only if we were given the code option. Exists here 340# only so that it can be overridden by subclasses. 341sub output_code { $_[0]->output ($_[1]) } 342 343############################################################################## 344# Document initialization 345############################################################################## 346 347# Set up various things that have to be initialized on a per-document basis. 348sub start_document { 349 my ($self, $attrs) = @_; 350 if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { 351 $$self{CONTENTLESS} = 1; 352 } else { 353 delete $$self{CONTENTLESS}; 354 } 355 my $margin = $$self{opt_indent} + $$self{opt_margin}; 356 357 # Initialize a few per-document variables. 358 $$self{INDENTS} = []; # Stack of indentations. 359 $$self{MARGIN} = $margin; # Default left margin. 360 $$self{PENDING} = [[]]; # Pending output. 361 362 # We have to redo encoding handling for each document. Check whether the 363 # output file handle already has a PerlIO encoding layer set and, if so, 364 # disable encoding. 365 $$self{ENCODE} = 1; 366 eval { 367 require PerlIO; 368 my @options = (output => 1, details => 1); 369 my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1]; 370 if ($flag && ($flag & PerlIO::F_UTF8 ())) { 371 $$self{ENCODE} = 0; 372 } 373 }; 374 $$self{ENCODING} = $$self{opt_encoding}; 375 376 return ''; 377} 378 379# Handle the end of the document. The only thing we do is handle dying on POD 380# errors, since Pod::Parser currently doesn't. 381sub end_document { 382 my ($self) = @_; 383 if ($$self{complain_die} && $self->errors_seen) { 384 croak ("POD document had syntax errors"); 385 } 386} 387 388############################################################################## 389# Text blocks 390############################################################################## 391 392# Intended for subclasses to override, this method returns text with any 393# non-printing formatting codes stripped out so that length() correctly 394# returns the length of the text. For basic Pod::Text, it does nothing. 395sub strip_format { 396 my ($self, $string) = @_; 397 return $string; 398} 399 400# This method is called whenever an =item command is complete (in other words, 401# we've seen its associated paragraph or know for certain that it doesn't have 402# one). It gets the paragraph associated with the item as an argument. If 403# that argument is empty, just output the item tag; if it contains a newline, 404# output the item tag followed by the newline. Otherwise, see if there's 405# enough room for us to output the item tag in the margin of the text or if we 406# have to put it on a separate line. 407sub item { 408 my ($self, $text) = @_; 409 my $tag = $$self{ITEM}; 410 unless (defined $tag) { 411 carp "Item called without tag"; 412 return; 413 } 414 undef $$self{ITEM}; 415 416 # Calculate the indentation and margin. $fits is set to true if the tag 417 # will fit into the margin of the paragraph given our indentation level. 418 my $indent = $$self{INDENTS}[-1] // $$self{opt_indent}; 419 my $margin = ' ' x $$self{opt_margin}; 420 my $tag_length = length ($self->strip_format ($tag)); 421 my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1); 422 423 # If the tag doesn't fit, or if we have no associated text, print out the 424 # tag separately. Otherwise, put the tag in the margin of the paragraph. 425 if (!$text || $text =~ /^\s+$/ || !$fits) { 426 my $realindent = $$self{MARGIN}; 427 $$self{MARGIN} = $indent; 428 my $output = $self->reformat ($tag); 429 $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 430 $output =~ s/\n*$/\n/; 431 432 # If the text is just whitespace, we have an empty item paragraph; 433 # this can result from =over/=item/=back without any intermixed 434 # paragraphs. Insert some whitespace to keep the =item from merging 435 # into the next paragraph. 436 $output .= "\n" if $text && $text =~ /^\s*$/; 437 438 $self->output ($output); 439 $$self{MARGIN} = $realindent; 440 $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/); 441 } else { 442 my $space = ' ' x $indent; 443 $space =~ s/^$margin /$margin:/ if $$self{opt_alt}; 444 $text = $self->reformat ($text); 445 $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 446 my $tagspace = ' ' x $tag_length; 447 $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item"; 448 $self->output ($text); 449 } 450} 451 452# Handle a basic block of text. The only tricky thing here is that if there 453# is a pending item tag, we need to format this as an item paragraph. 454sub cmd_para { 455 my ($self, $attrs, $text) = @_; 456 $text =~ s/\s+$/\n/; 457 if (defined $$self{ITEM}) { 458 $self->item ($text . "\n"); 459 } else { 460 $self->output ($self->reformat ($text . "\n")); 461 } 462 return ''; 463} 464 465# Handle a verbatim paragraph. Just print it out, but indent it according to 466# our margin. 467sub cmd_verbatim { 468 my ($self, $attrs, $text) = @_; 469 $self->item if defined $$self{ITEM}; 470 return if $text =~ /^\s*$/; 471 $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme; 472 $text =~ s/\s*$/\n\n/; 473 $self->output ($text); 474 return ''; 475} 476 477# Handle literal text (produced by =for and similar constructs). Just output 478# it with the minimum of changes. 479sub cmd_data { 480 my ($self, $attrs, $text) = @_; 481 $text =~ s/^\n+//; 482 $text =~ s/\n{0,2}$/\n/; 483 $self->output ($text); 484 return ''; 485} 486 487############################################################################## 488# Headings 489############################################################################## 490 491# The common code for handling all headers. Takes the header text, the 492# indentation, and the surrounding marker for the alt formatting method. 493sub heading { 494 my ($self, $text, $indent, $marker) = @_; 495 $self->item ("\n\n") if defined $$self{ITEM}; 496 $text =~ s/\s+$//; 497 if ($$self{opt_alt}) { 498 my $closemark = reverse (split (//, $marker)); 499 my $margin = ' ' x $$self{opt_margin}; 500 $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n"); 501 } else { 502 $text .= "\n" if $$self{opt_loose}; 503 my $margin = ' ' x ($$self{opt_margin} + $indent); 504 $self->output ($margin . $text . "\n"); 505 } 506 return ''; 507} 508 509# First level heading. 510sub cmd_head1 { 511 my ($self, $attrs, $text) = @_; 512 $self->heading ($text, 0, '===='); 513} 514 515# Second level heading. 516sub cmd_head2 { 517 my ($self, $attrs, $text) = @_; 518 $self->heading ($text, $$self{opt_indent} / 2, '== '); 519} 520 521# Third level heading. 522sub cmd_head3 { 523 my ($self, $attrs, $text) = @_; 524 $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= '); 525} 526 527# Fourth level heading. 528sub cmd_head4 { 529 my ($self, $attrs, $text) = @_; 530 $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- '); 531} 532 533############################################################################## 534# List handling 535############################################################################## 536 537# Handle the beginning of an =over block. Takes the type of the block as the 538# first argument, and then the attr hash. This is called by the handlers for 539# the four different types of lists (bullet, number, text, and block). 540sub over_common_start { 541 my ($self, $attrs) = @_; 542 $self->item ("\n\n") if defined $$self{ITEM}; 543 544 # Find the indentation level. 545 my $indent = $$attrs{indent}; 546 unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) { 547 $indent = $$self{opt_indent}; 548 } 549 550 # Add this to our stack of indents and increase our current margin. 551 push (@{ $$self{INDENTS} }, $$self{MARGIN}); 552 $$self{MARGIN} += ($indent + 0); 553 return ''; 554} 555 556# End an =over block. Takes no options other than the class pointer. Output 557# any pending items and then pop one level of indentation. 558sub over_common_end { 559 my ($self) = @_; 560 $self->item ("\n\n") if defined $$self{ITEM}; 561 $$self{MARGIN} = pop @{ $$self{INDENTS} }; 562 return ''; 563} 564 565# Dispatch the start and end calls as appropriate. 566sub start_over_bullet { $_[0]->over_common_start ($_[1]) } 567sub start_over_number { $_[0]->over_common_start ($_[1]) } 568sub start_over_text { $_[0]->over_common_start ($_[1]) } 569sub start_over_block { $_[0]->over_common_start ($_[1]) } 570sub end_over_bullet { $_[0]->over_common_end } 571sub end_over_number { $_[0]->over_common_end } 572sub end_over_text { $_[0]->over_common_end } 573sub end_over_block { $_[0]->over_common_end } 574 575# The common handler for all item commands. Takes the type of the item, the 576# attributes, and then the text of the item. 577sub item_common { 578 my ($self, $type, $attrs, $text) = @_; 579 $self->item if defined $$self{ITEM}; 580 581 # Clean up the text. We want to end up with two variables, one ($text) 582 # which contains any body text after taking out the item portion, and 583 # another ($item) which contains the actual item text. Note the use of 584 # the internal Pod::Simple attribute here; that's a potential land mine. 585 $text =~ s/\s+$//; 586 my ($item, $index); 587 if ($type eq 'bullet') { 588 $item = '*'; 589 } elsif ($type eq 'number') { 590 $item = $$attrs{'~orig_content'}; 591 } else { 592 $item = $text; 593 $item =~ s/\s*\n\s*/ /g; 594 $text = ''; 595 } 596 $$self{ITEM} = $item; 597 598 # If body text for this item was included, go ahead and output that now. 599 if ($text) { 600 $text =~ s/\s*$/\n/; 601 $self->item ($text); 602 } 603 return ''; 604} 605 606# Dispatch the item commands to the appropriate place. 607sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) } 608sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) } 609sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) } 610sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) } 611 612############################################################################## 613# Formatting codes 614############################################################################## 615 616# The simple ones. 617sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] } 618sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] } 619sub cmd_i { return '*' . $_[2] . '*' } 620sub cmd_x { return '' } 621 622# Convert all internal whitespace to $NBSP. 623sub cmd_s { 624 my ($self, $attrs, $text) = @_; 625 $text =~ s{ \s }{$NBSP}xmsg; 626 return $text; 627} 628 629# Apply a whole bunch of messy heuristics to not quote things that don't 630# benefit from being quoted. These originally come from Barrie Slaymaker and 631# largely duplicate code in Pod::Man. 632sub cmd_c { 633 my ($self, $attrs, $text) = @_; 634 635 # A regex that matches the portion of a variable reference that's the 636 # array or hash index, separated out just because we want to use it in 637 # several places in the following regex. 638 my $index = '(?: \[[^]]+\] | \{[^}]+\} )?'; 639 640 # Check for things that we don't want to quote, and if we find any of 641 # them, return the string with just a font change and no quoting. 642 # 643 # Traditionally, Pod::Text has not quoted Perl variables, functions, 644 # numbers, or hex constants, but this is not always desirable. Make this 645 # optional on the quoting guesswork flag. 646 my $extra = qr{(?!)}xms; # never matches 647 if ($$self{GUESSWORK}{quoting}) { 648 $extra = qr{ 649 \$+ [\#^]? \S $index # special ($^F, $") 650 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func 651 | [\$\@%&*]* [:\'\w]+ 652 (?: -> )? \(\s*[^\s,\)]*\s*\) # 0/1-arg func call 653 | [+-]? ( \d[\d.]* | \.\d+ ) 654 (?: [eE][+-]?\d+ )? # a number 655 | 0x [a-fA-F\d]+ # a hex constant 656 }xms; 657 } 658 $text =~ m{ 659 ^\s* 660 (?: 661 ( [\'\`\"] ) .* \1 # already quoted 662 | \` .* \' # `quoted' 663 | $extra 664 ) 665 \s*\z 666 }xms and return $text; 667 668 # If we didn't return, go ahead and quote the text. 669 return $$self{opt_alt} 670 ? "``$text''" 671 : "$$self{LQUOTE}$text$$self{RQUOTE}"; 672} 673 674# Links reduce to the text that we're given, wrapped in angle brackets if it's 675# a URL. 676sub cmd_l { 677 my ($self, $attrs, $text) = @_; 678 if ($$attrs{type} eq 'url') { 679 if (not defined($$attrs{to}) or $$attrs{to} eq $text) { 680 return "<$text>"; 681 } elsif ($$self{opt_nourls}) { 682 return $text; 683 } else { 684 return "$text <$$attrs{to}>"; 685 } 686 } else { 687 return $text; 688 } 689} 690 691############################################################################## 692# Backwards compatibility 693############################################################################## 694 695# The old Pod::Text module did everything in a pod2text() function. This 696# tries to provide the same interface for legacy applications. 697sub pod2text { 698 my @args; 699 700 # This is really ugly; I hate doing option parsing in the middle of a 701 # module. But the old Pod::Text module supported passing flags to its 702 # entry function, so handle -a and -<number>. 703 while ($_[0] =~ /^-/) { 704 my $flag = shift; 705 if ($flag eq '-a') { push (@args, alt => 1) } 706 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) } 707 else { 708 unshift (@_, $flag); 709 last; 710 } 711 } 712 713 # Now that we know what arguments we're using, create the parser. 714 my $parser = Pod::Text->new (@args); 715 716 # If two arguments were given, the second argument is going to be a file 717 # handle. That means we want to call parse_from_filehandle(), which means 718 # we need to turn the first argument into a file handle. Magic open will 719 # handle the <&STDIN case automagically. 720 if (defined $_[1]) { 721 my @fhs = @_; 722 local *IN; 723 unless (open (IN, $fhs[0])) { 724 croak ("Can't open $fhs[0] for reading: $!\n"); 725 return; 726 } 727 $fhs[0] = \*IN; 728 $parser->output_fh ($fhs[1]); 729 my $retval = $parser->parse_file ($fhs[0]); 730 my $fh = $parser->output_fh (); 731 close $fh; 732 return $retval; 733 } else { 734 $parser->output_fh (\*STDOUT); 735 return $parser->parse_file (@_); 736 } 737} 738 739# Reset the underlying Pod::Simple object between calls to parse_from_file so 740# that the same object can be reused to convert multiple pages. 741sub parse_from_file { 742 my $self = shift; 743 $self->reinit; 744 745 # Fake the old cutting option to Pod::Parser. This fiddles with internal 746 # Pod::Simple state and is quite ugly; we need a better approach. 747 if (ref ($_[0]) eq 'HASH') { 748 my $opts = shift @_; 749 if (defined ($$opts{-cutting}) && !$$opts{-cutting}) { 750 $$self{in_pod} = 1; 751 $$self{last_was_blank} = 1; 752 } 753 } 754 755 # Do the work. 756 my $retval = $self->Pod::Simple::parse_from_file (@_); 757 758 # Flush output, since Pod::Simple doesn't do this. Ideally we should also 759 # close the file descriptor if we had to open one, but we can't easily 760 # figure this out. 761 my $fh = $self->output_fh (); 762 my $oldfh = select $fh; 763 my $oldflush = $|; 764 $| = 1; 765 print $fh ''; 766 $| = $oldflush; 767 select $oldfh; 768 return $retval; 769} 770 771# Pod::Simple failed to provide this backward compatibility function, so 772# implement it ourselves. File handles are one of the inputs that 773# parse_from_file supports. 774sub parse_from_filehandle { 775 my $self = shift; 776 $self->parse_from_file (@_); 777} 778 779# Pod::Simple's parse_file doesn't set output_fh. Wrap the call and do so 780# ourself unless it was already set by the caller, since our documentation has 781# always said that this should work. 782sub parse_file { 783 my ($self, $in) = @_; 784 unless (defined $$self{output_fh}) { 785 $self->output_fh (\*STDOUT); 786 } 787 return $self->SUPER::parse_file ($in); 788} 789 790# Do the same for parse_lines, just to be polite. Pod::Simple's man page 791# implies that the caller is responsible for setting this, but I don't see any 792# reason not to set a default. 793sub parse_lines { 794 my ($self, @lines) = @_; 795 unless (defined $$self{output_fh}) { 796 $self->output_fh (\*STDOUT); 797 } 798 return $self->SUPER::parse_lines (@lines); 799} 800 801# Likewise for parse_string_document. 802sub parse_string_document { 803 my ($self, $doc) = @_; 804 unless (defined $$self{output_fh}) { 805 $self->output_fh (\*STDOUT); 806 } 807 return $self->SUPER::parse_string_document ($doc); 808} 809 810############################################################################## 811# Module return value and documentation 812############################################################################## 813 8141; 815__END__ 816 817=for stopwords 818alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls 819parsers EBCDIC autodetecting superset unrepresentable FH NNN 820 821=head1 NAME 822 823Pod::Text - Convert POD data to formatted text 824 825=head1 SYNOPSIS 826 827 use Pod::Text; 828 my $parser = Pod::Text->new (sentence => 1, width => 78); 829 830 # Read POD from STDIN and write to STDOUT. 831 $parser->parse_from_filehandle; 832 833 # Read POD from file.pod and write to file.txt. 834 $parser->parse_from_file ('file.pod', 'file.txt'); 835 836=head1 DESCRIPTION 837 838Pod::Text is a module that can convert documentation in the POD format (the 839preferred language for documenting Perl) into formatted text. It uses no 840special formatting controls or codes, and its output is therefore suitable for 841nearly any device. 842 843=head2 Encoding 844 845Pod::Text uses the following logic to choose an output encoding, in order: 846 847=over 4 848 849=item 1. 850 851If a PerlIO encoding layer is set on the output file handle, do not do any 852output encoding and will instead rely on the PerlIO encoding layer. 853 854=item 2. 855 856If the C<encoding> or C<utf8> options are set, use the output encoding 857specified by those options. 858 859=item 3. 860 861If the input encoding of the POD source file was explicitly specified (using 862C<=encoding>) or automatically detected by Pod::Simple, use that as the output 863encoding as well. 864 865=item 4. 866 867Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output 868encoding. Since this is a superset of ASCII, this will result in ASCII output 869unless the POD input contains non-ASCII characters without declaring or 870autodetecting an encoding (usually via EZ<><> escapes). 871 872=item 5. 873 874Otherwise, for EBCDIC systems, output without doing any encoding and hope 875this works. 876 877=back 878 879One caveat: Pod::Text has to commit to an output encoding the first time it 880outputs a non-ASCII character, and then has to stick with it for consistency. 881However, C<=encoding> commands don't have to be at the beginning of a POD 882document. If someone uses a non-ASCII character early in a document with an 883escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later, 884ideally Pod::Text would follow rule 3 and output the entire document as ISO 8858859-1. Instead, it will commit to UTF-8 following rule 4 as soon as it sees 886that escape, and then stick with that encoding for the rest of the document. 887 888Unfortunately, there's no universally good choice for an output encoding. 889Each choice will be incorrect in some circumstances. This approach was chosen 890primarily for backwards compatibility. Callers should consider forcing the 891output encoding via C<encoding> if they have any knowledge about what encoding 892the user may expect. 893 894In particular, consider importing the L<Encode::Locale> module, if available, 895and setting C<encoding> to C<locale> to use an output encoding appropriate to 896the user's locale. But be aware that if the user is not using locales or is 897using a locale of C<C>, Encode::Locale will set the output encoding to 898US-ASCII. This will cause all non-ASCII characters will be replaced with C<?> 899and produce a flurry of warnings about unsupported characters, which may or 900may not be what you want. 901 902=head1 CLASS METHODS 903 904=over 4 905 906=item new(ARGS) 907 908Create a new Pod::Text object. ARGS should be a list of key/value pairs, 909where the keys are chosen from the following. Each option is annotated with 910the version of Pod::Text in which that option was added with its current 911meaning. 912 913=over 4 914 915=item alt 916 917[2.00] If set to a true value, selects an alternate output format that, among 918other things, uses a different heading style and marks C<=item> entries with a 919colon in the left margin. Defaults to false. 920 921=item code 922 923[2.13] If set to a true value, the non-POD parts of the input file will be 924included in the output. Useful for viewing code documented with POD blocks 925with the POD rendered and the code left intact. 926 927=item encoding 928 929[5.00] Specifies the encoding of the output. The value must be an encoding 930recognized by the L<Encode> module (see L<Encode::Supported>). If the output 931contains characters that cannot be represented in this encoding, that is an 932error that will be reported as configured by the C<errors> option. If error 933handling is other than C<die>, the unrepresentable character will be replaced 934with the Encode substitution character (normally C<?>). 935 936If the output file handle has a PerlIO encoding layer set, this parameter will 937be ignored and no encoding will be done by Pod::Man. It will instead rely on 938the encoding layer to make whatever output encoding transformations are 939desired. 940 941WARNING: The input encoding of the POD source is independent from the output 942encoding, and setting this option does not affect the interpretation of the 943POD input. Unless your POD source is US-ASCII, its encoding should be 944declared with the C<=encoding> command in the source, as near to the top of 945the file as possible. If this is not done, Pod::Simple will will attempt to 946guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will 947produce warnings. See L<perlpod(1)> for more information. 948 949=item errors 950 951[3.17] How to report errors. C<die> says to throw an exception on any POD 952formatting error. C<stderr> says to report errors on standard error, but not 953to throw an exception. C<pod> says to include a POD ERRORS section in the 954resulting documentation summarizing the errors. C<none> ignores POD errors 955entirely, as much as possible. 956 957The default is C<pod>. 958 959=item guesswork 960 961[5.01] By default, Pod::Text applies some default formatting rules based on 962guesswork and regular expressions that are intended to make writing Perl 963documentation easier and require less explicit markup. These rules may not 964always be appropriate, particularly for documentation that isn't about Perl. 965This option allows turning all or some of it off. 966 967The special value C<all> enables all guesswork. This is also the default for 968backward compatibility reasons. The special value C<none> disables all 969guesswork. Otherwise, the value of this option should be a comma-separated 970list of one or more of the following keywords: 971 972=over 4 973 974=item quoting 975 976If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by 977double quotes in nroff (terminal) output unless the contents are already 978quoted. When this guesswork is enabled, quote marks will also be suppressed 979for Perl variables, function names, function calls, numbers, and hex 980constants. 981 982=back 983 984Any unknown guesswork name is silently ignored (for potential future 985compatibility), so be careful about spelling. 986 987=item indent 988 989[2.00] The number of spaces to indent regular text, and the default 990indentation for C<=over> blocks. Defaults to 4. 991 992=item loose 993 994[2.00] If set to a true value, a blank line is printed after a C<=head1> 995heading. If set to false (the default), no blank line is printed after 996C<=head1>, although one is still printed after C<=head2>. This is the default 997because it's the expected formatting for manual pages; if you're formatting 998arbitrary text documents, setting this to true may result in more pleasing 999output. 1000 1001=item margin 1002 1003[2.21] The width of the left margin in spaces. Defaults to 0. This is the 1004margin for all text, including headings, not the amount by which regular text 1005is indented; for the latter, see the I<indent> option. To set the right 1006margin, see the I<width> option. 1007 1008=item nourls 1009 1010[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are 1011formatted to show both the anchor text and the URL. In other words: 1012 1013 L<foo|http://example.com/> 1014 1015is formatted as: 1016 1017 foo <http://example.com/> 1018 1019This option, if set to a true value, suppresses the URL when anchor text is 1020given, so this example would be formatted as just C<foo>. This can produce 1021less cluttered output in cases where the URLs are not particularly important. 1022 1023=item quotes 1024 1025[4.00] Sets the quote marks used to surround CE<lt>> text. If the value is a 1026single character, it is used as both the left and right quote. Otherwise, it 1027is split in half, and the first half of the string is used as the left quote 1028and the second is used as the right quote. 1029 1030This may also be set to the special value C<none>, in which case no quote 1031marks are added around CE<lt>> text. 1032 1033=item sentence 1034 1035[3.00] If set to a true value, Pod::Text will assume that each sentence ends 1036in two spaces, and will try to preserve that spacing. If set to false, all 1037consecutive whitespace in non-verbatim paragraphs is compressed into a single 1038space. Defaults to false. 1039 1040=item stderr 1041 1042[3.10] Send error messages about invalid POD to standard error instead of 1043appending a POD ERRORS section to the generated output. This is equivalent to 1044setting C<errors> to C<stderr> if C<errors> is not already set. It is 1045supported for backward compatibility. 1046 1047=item utf8 1048 1049[3.12] If this option is set to a true value, the output encoding is set to 1050UTF-8. This is equivalent to setting C<encoding> to C<UTF-8> if C<encoding> 1051is not already set. It is supported for backward compatibility. 1052 1053=item width 1054 1055[2.00] The column at which to wrap text on the right-hand side. Defaults to 105676. 1057 1058=back 1059 1060=back 1061 1062=head1 INSTANCE METHODS 1063 1064As a derived class from Pod::Simple, Pod::Text supports the same methods and 1065interfaces. See L<Pod::Simple> for all the details. This section summarizes 1066the most-frequently-used methods and the ones added by Pod::Text. 1067 1068=over 4 1069 1070=item output_fh(FH) 1071 1072Direct the output from parse_file(), parse_lines(), or parse_string_document() 1073to the file handle FH instead of C<STDOUT>. 1074 1075=item output_string(REF) 1076 1077Direct the output from parse_file(), parse_lines(), or parse_string_document() 1078to the scalar variable pointed to by REF, rather than C<STDOUT>. For example: 1079 1080 my $man = Pod::Man->new(); 1081 my $output; 1082 $man->output_string(\$output); 1083 $man->parse_file('/some/input/file'); 1084 1085Be aware that the output in that variable will already be encoded (see 1086L</Encoding>). 1087 1088=item parse_file(PATH) 1089 1090Read the POD source from PATH and format it. By default, the output is sent 1091to C<STDOUT>, but this can be changed with the output_fh() or output_string() 1092methods. 1093 1094=item parse_from_file(INPUT, OUTPUT) 1095 1096=item parse_from_filehandle(FH, OUTPUT) 1097 1098Read the POD source from INPUT, format it, and output the results to OUTPUT. 1099 1100parse_from_filehandle() is provided for backward compatibility with older 1101versions of Pod::Man. parse_from_file() should be used instead. 1102 1103=item parse_lines(LINES[, ...[, undef]]) 1104 1105Parse the provided lines as POD source, writing the output to either C<STDOUT> 1106or the file handle set with the output_fh() or output_string() methods. This 1107method can be called repeatedly to provide more input lines. An explicit 1108C<undef> should be passed to indicate the end of input. 1109 1110This method expects raw bytes, not decoded characters. 1111 1112=item parse_string_document(INPUT) 1113 1114Parse the provided scalar variable as POD source, writing the output to either 1115C<STDOUT> or the file handle set with the output_fh() or output_string() 1116methods. 1117 1118This method expects raw bytes, not decoded characters. 1119 1120=back 1121 1122=head1 FUNCTIONS 1123 1124Pod::Text exports one function for backward compatibility with older versions. 1125This function is deprecated; instead, use the object-oriented interface 1126described above. 1127 1128=over 4 1129 1130=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT]) 1131 1132Convert the POD source from INPUT to text and write it to OUTPUT. If OUTPUT 1133is not given, defaults to C<STDOUT>. INPUT can be any expression supported as 1134the second argument to two-argument open(). 1135 1136If C<-a> is given as an initial argument, pass the C<alt> option to the 1137Pod::Text constructor. This enables alternative formatting. 1138 1139If C<-NNN> is given as an initial argument, pass the C<width> option to the 1140Pod::Text constructor with the number C<NNN> as its argument. This sets the 1141wrap line width to NNN. 1142 1143=back 1144 1145=head1 DIAGNOSTICS 1146 1147=over 4 1148 1149=item Bizarre space in item 1150 1151=item Item called without tag 1152 1153(W) Something has gone wrong in internal C<=item> processing. These 1154messages indicate a bug in Pod::Text; you should never see them. 1155 1156=item Can't open %s for reading: %s 1157 1158(F) Pod::Text was invoked via the compatibility mode pod2text() interface 1159and the input file it was given could not be opened. 1160 1161=item Invalid errors setting "%s" 1162 1163(F) The C<errors> parameter to the constructor was set to an unknown value. 1164 1165=item Invalid quote specification "%s" 1166 1167(F) The quote specification given (the C<quotes> option to the 1168constructor) was invalid. A quote specification must be either one 1169character long or an even number (greater than one) characters long. 1170 1171=item POD document had syntax errors 1172 1173(F) The POD document being formatted had syntax errors and the C<errors> 1174option was set to C<die>. 1175 1176=back 1177 1178=head1 COMPATIBILITY 1179 1180Pod::Text 2.03 (based on L<Pod::Parser>) was the first version of this module 1181included with Perl, in Perl 5.6.0. Earlier versions of Perl had a different 1182Pod::Text module, with a different API. 1183 1184The current API based on L<Pod::Simple> was added in Pod::Text 3.00. 1185Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to 1186incorporate those changes. This is the first version that correctly supports 1187all modern POD syntax. The parse_from_filehandle() method was re-added for 1188backward compatibility in Pod::Text 3.07, included in Perl 5.9.4. 1189 1190Pod::Text 3.12, included in Perl 5.10.1, first implemented the current 1191practice of attempting to match the default output encoding with the input 1192encoding of the POD source, unless overridden by the C<utf8> option or (added 1193later) the C<encoding> option. 1194 1195Support for anchor text in LZ<><> links of type URL was added in Pod::Text 11963.14, included in Perl 5.11.5. 1197 1198parse_lines(), parse_string_document(), and parse_file() set a default output 1199file handle of C<STDOUT> if one was not already set as of Pod::Text 3.18, 1200included in Perl 5.19.5. 1201 1202Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the 1203version of the podlators distribution. All modules included in podlators, and 1204the podlators distribution itself, share the same version number from this 1205point forward. 1206 1207Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC 1208systems, present in all versions back to 3.00, that would cause opening 1209brackets to disappear. 1210 1211Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it 1212sees a non-ASCII character in the input and the input encoding is not 1213specified. It also commits to an encoding with the first non-ASCII character 1214and does not change the output encoding if the input encoding changes. The 1215L<Encode> module is now used for all output encoding rather than PerlIO 1216layers, which fixes earlier problems with output to scalars. 1217 1218=head1 AUTHOR 1219 1220Russ Allbery <rra@cpan.org>, based I<very> heavily on the original Pod::Text 1221by Tom Christiansen <tchrist@mox.perl.com> and its conversion to Pod::Parser 1222by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial conversion of 1223Pod::Man to use Pod::Simple provided much-needed guidance on how to use 1224Pod::Simple. 1225 1226=head1 COPYRIGHT AND LICENSE 1227 1228Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ 1229Allbery <rra@cpan.org> 1230 1231This program is free software; you may redistribute it and/or modify it 1232under the same terms as Perl itself. 1233 1234=head1 SEE ALSO 1235 1236L<Encode::Locale>, L<Encode::Supproted>, L<Pod::Simple>, 1237L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)> 1238 1239The current version of this module is always available from its web site at 1240L<https://www.eyrie.org/~eagle/software/podlators/>. It is also part of the 1241Perl core distribution as of 5.6.0. 1242 1243=cut 1244 1245# Local Variables: 1246# copyright-at-end-flag: t 1247# End: 1248