1# Convert POD data to formatted text. 2# 3# This module converts POD to formatted text. It replaces the old Pod::Text 4# module that came with versions of Perl prior to 5.6.0 and attempts to match 5# its output except for some specific circumstances where other decisions 6# seemed to produce better output. It uses Pod::Parser and is designed to be 7# very easy to subclass. 8# 9# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl 10 11############################################################################## 12# Modules and declarations 13############################################################################## 14 15package Pod::Text; 16 17use 5.010; 18use strict; 19use warnings; 20 21use Carp qw(carp croak); 22use Encode qw(encode); 23use Exporter (); 24use Pod::Simple (); 25 26our @ISA = qw(Pod::Simple Exporter); 27our $VERSION = '5.01'; 28 29# We have to export pod2text for backward compatibility. 30our @EXPORT = qw(pod2text); 31 32# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code 33# taken from Pod::Simple 3.32, but was only added in 3.30. 34my ($NBSP, $SHY); 35if ($Pod::Simple::VERSION ge 3.30) { 36 $NBSP = $Pod::Simple::nbsp; 37 $SHY = $Pod::Simple::shy; 38} else { 39 $NBSP = chr utf8::unicode_to_native(0xA0); 40 $SHY = chr utf8::unicode_to_native(0xAD); 41} 42 43# Import the ASCII constant from Pod::Simple. This is true iff we're in an 44# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is 45# generally only false for EBCDIC. 46BEGIN { *ASCII = \&Pod::Simple::ASCII } 47 48############################################################################## 49# Initialization 50############################################################################## 51 52# This function handles code blocks. It's registered as a callback to 53# Pod::Simple and therefore doesn't work as a regular method call, but all it 54# does is call output_code with the line. 55sub handle_code { 56 my ($line, $number, $parser) = @_; 57 $parser->output_code ($line . "\n"); 58} 59 60# Initialize the object and set various Pod::Simple options that we need. 61# Here, we also process any additional options passed to the constructor or 62# set up defaults if none were given. Note that all internal object keys are 63# in all-caps, reserving all lower-case object keys for Pod::Simple and user 64# arguments. 65sub new { 66 my $class = shift; 67 my $self = $class->SUPER::new; 68 69 # Tell Pod::Simple to keep whitespace whenever possible. 70 if ($self->can ('preserve_whitespace')) { 71 $self->preserve_whitespace (1); 72 } else { 73 $self->fullstop_space_harden (1); 74 } 75 76 # The =for and =begin targets that we accept. 77 $self->accept_targets (qw/text TEXT/); 78 79 # Ensure that contiguous blocks of code are merged together. Otherwise, 80 # some of the guesswork heuristics don't work right. 81 $self->merge_text (1); 82 83 # Pod::Simple doesn't do anything useful with our arguments, but we want 84 # to put them in our object as hash keys and values. This could cause 85 # problems if we ever clash with Pod::Simple's own internal class 86 # variables. 87 my %opts = @_; 88 my @opts = map { ("opt_$_", $opts{$_}) } keys %opts; 89 %$self = (%$self, @opts); 90 91 # Backwards-compatibility support for the stderr option. 92 if ($$self{opt_stderr} and not $$self{opt_errors}) { 93 $$self{opt_errors} = 'stderr'; 94 } 95 delete $$self{opt_stderr}; 96 97 # Backwards-compatibility support for the utf8 option. 98 if ($$self{opt_utf8} && !$$self{opt_encoding}) { 99 $$self{opt_encoding} = 'UTF-8'; 100 } 101 delete $$self{opt_utf8}; 102 103 # Validate the errors parameter and act on it. 104 $$self{opt_errors} //= 'pod'; 105 if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') { 106 $self->no_errata_section (1); 107 $self->complain_stderr (1); 108 if ($$self{opt_errors} eq 'die') { 109 $$self{complain_die} = 1; 110 } 111 } elsif ($$self{opt_errors} eq 'pod') { 112 $self->no_errata_section (0); 113 $self->complain_stderr (0); 114 } elsif ($$self{opt_errors} eq 'none') { 115 $self->no_errata_section (1); 116 $self->no_whining (1); 117 } else { 118 croak (qq(Invalid errors setting: "$$self{errors}")); 119 } 120 delete $$self{errors}; 121 122 # Initialize various things from our parameters. 123 $$self{opt_alt} //= 0; 124 $$self{opt_indent} //= 4; 125 $$self{opt_margin} //= 0; 126 $$self{opt_loose} //= 0; 127 $$self{opt_sentence} //= 0; 128 $$self{opt_width} //= 76; 129 130 # Figure out what quotes we'll be using for C<> text. 131 $$self{opt_quotes} ||= '"'; 132 if ($$self{opt_quotes} eq 'none') { 133 $$self{LQUOTE} = $$self{RQUOTE} = ''; 134 } elsif (length ($$self{opt_quotes}) == 1) { 135 $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes}; 136 } elsif (length ($$self{opt_quotes}) % 2 == 0) { 137 my $length = length ($$self{opt_quotes}) / 2; 138 $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length); 139 $$self{RQUOTE} = substr ($$self{opt_quotes}, $length); 140 } else { 141 croak qq(Invalid quote specification "$$self{opt_quotes}"); 142 } 143 144 # Configure guesswork based on options. 145 my $guesswork = $self->{opt_guesswork} || q{}; 146 my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork); 147 if (!%guesswork || $guesswork{all}) { 148 $$self{GUESSWORK} = {quoting => 1}; 149 } elsif ($guesswork{none}) { 150 $$self{GUESSWORK} = {}; 151 } else { 152 $$self{GUESSWORK} = {%guesswork}; 153 } 154 155 # If requested, do something with the non-POD text. 156 $self->code_handler (\&handle_code) if $$self{opt_code}; 157 158 # Return the created object. 159 return $self; 160} 161 162############################################################################## 163# Core parsing 164############################################################################## 165 166# This is the glue that connects the code below with Pod::Simple itself. The 167# goal is to convert the event stream coming from the POD parser into method 168# calls to handlers once the complete content of a tag has been seen. Each 169# paragraph or POD command will have textual content associated with it, and 170# as soon as all of a paragraph or POD command has been seen, that content 171# will be passed in to the corresponding method for handling that type of 172# object. The exceptions are handlers for lists, which have opening tag 173# handlers and closing tag handlers that will be called right away. 174# 175# The internal hash key PENDING is used to store the contents of a tag until 176# all of it has been seen. It holds a stack of open tags, each one 177# represented by a tuple of the attributes hash for the tag and the contents 178# of the tag. 179 180# Add a block of text to the contents of the current node, formatting it 181# according to the current formatting instructions as we do. 182sub _handle_text { 183 my ($self, $text) = @_; 184 my $tag = $$self{PENDING}[-1]; 185 $$tag[1] .= $text; 186} 187 188# Given an element name, get the corresponding method name. 189sub method_for_element { 190 my ($self, $element) = @_; 191 $element =~ tr/-/_/; 192 $element =~ tr/A-Z/a-z/; 193 $element =~ tr/_a-z0-9//cd; 194 return $element; 195} 196 197# Handle the start of a new element. If cmd_element is defined, assume that 198# we need to collect the entire tree for this element before passing it to the 199# element method, and create a new tree into which we'll collect blocks of 200# text and nested elements. Otherwise, if start_element is defined, call it. 201sub _handle_element_start { 202 my ($self, $element, $attrs) = @_; 203 my $method = $self->method_for_element ($element); 204 205 # If we have a command handler, we need to accumulate the contents of the 206 # tag before calling it. 207 if ($self->can ("cmd_$method")) { 208 push (@{ $$self{PENDING} }, [ $attrs, '' ]); 209 } elsif ($self->can ("start_$method")) { 210 my $method = 'start_' . $method; 211 $self->$method ($attrs, ''); 212 } 213} 214 215# Handle the end of an element. If we had a cmd_ method for this element, 216# this is where we pass along the text that we've accumulated. Otherwise, if 217# we have an end_ method for the element, call that. 218sub _handle_element_end { 219 my ($self, $element) = @_; 220 my $method = $self->method_for_element ($element); 221 222 # If we have a command handler, pull off the pending text and pass it to 223 # the handler along with the saved attribute hash. 224 if ($self->can ("cmd_$method")) { 225 my $tag = pop @{ $$self{PENDING} }; 226 my $method = 'cmd_' . $method; 227 my $text = $self->$method (@$tag); 228 if (defined $text) { 229 if (@{ $$self{PENDING} } > 1) { 230 $$self{PENDING}[-1][1] .= $text; 231 } else { 232 $self->output ($text); 233 } 234 } 235 } elsif ($self->can ("end_$method")) { 236 my $method = 'end_' . $method; 237 $self->$method (); 238 } 239} 240 241############################################################################## 242# Output formatting 243############################################################################## 244 245# Wrap a line, indenting by the current left margin. We can't use Text::Wrap 246# because it plays games with tabs. We can't use formline, even though we'd 247# really like to, because it screws up non-printing characters. So we have to 248# do the wrapping ourselves. 249sub wrap { 250 my $self = shift; 251 local $_ = shift; 252 my $output = ''; 253 my $spaces = ' ' x $$self{MARGIN}; 254 my $width = $$self{opt_width} - $$self{MARGIN}; 255 while (length > $width) { 256 if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) { 257 $output .= $spaces . $1 . "\n"; 258 } else { 259 last; 260 } 261 } 262 $output .= $spaces . $_; 263 $output =~ s/\s+$/\n\n/; 264 return $output; 265} 266 267# Reformat a paragraph of text for the current margin. Takes the text to 268# reformat and returns the formatted text. 269sub reformat { 270 my $self = shift; 271 local $_ = shift; 272 273 # If we're trying to preserve two spaces after sentences, do some munging 274 # to support that. Otherwise, smash all repeated whitespace. Be careful 275 # not to use \s here, which in Unicode input may match non-breaking spaces 276 # that we don't want to smash. 277 if ($$self{opt_sentence}) { 278 s/ +$//mg; 279 s/\.\n/. \n/g; 280 s/\n/ /g; 281 s/ +/ /g; 282 } else { 283 s/[ \t\n]+/ /g; 284 } 285 return $self->wrap ($_); 286} 287 288# Output text to the output device. Replace non-breaking spaces with spaces 289# and soft hyphens with nothing, and then determine the output encoding. 290sub output { 291 my ($self, @text) = @_; 292 my $text = join ('', @text); 293 if ($NBSP) { 294 $text =~ s/$NBSP/ /g; 295 } 296 if ($SHY) { 297 $text =~ s/$SHY//g; 298 } 299 300 # The logic used here is described in the POD documentation. Prefer the 301 # configured encoding, then the pass-through option of using the same 302 # encoding as the input, and then UTF-8, but commit to an encoding for the 303 # document. 304 # 305 # ENCODE says whether to encode or not and is turned off if there is a 306 # PerlIO encoding layer (in start_document). ENCODING is the encoding 307 # that we previously committed to and is cleared at the start of each 308 # document. 309 if ($$self{ENCODE}) { 310 my $encoding = $$self{ENCODING}; 311 if (!$encoding) { 312 $encoding = $self->encoding(); 313 if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) { 314 $encoding = 'UTF-8'; 315 } 316 if ($encoding) { 317 $$self{ENCODING} = $encoding; 318 } 319 } 320 if ($encoding) { 321 my $check = sub { 322 my ($char) = @_; 323 my $display = '"\x{' . hex($char) . '}"'; 324 my $error = "$display does not map to $$self{ENCODING}"; 325 $self->whine ($self->line_count(), $error); 326 return Encode::encode ($$self{ENCODING}, chr($char)); 327 }; 328 print { $$self{output_fh} } encode ($encoding, $text, $check); 329 } else { 330 print { $$self{output_fh} } $text; 331 } 332 } else { 333 print { $$self{output_fh} } $text; 334 } 335} 336 337# Output a block of code (something that isn't part of the POD text). Called 338# by preprocess_paragraph only if we were given the code option. Exists here 339# only so that it can be overridden by subclasses. 340sub output_code { $_[0]->output ($_[1]) } 341 342############################################################################## 343# Document initialization 344############################################################################## 345 346# Set up various things that have to be initialized on a per-document basis. 347sub start_document { 348 my ($self, $attrs) = @_; 349 if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { 350 $$self{CONTENTLESS} = 1; 351 } else { 352 delete $$self{CONTENTLESS}; 353 } 354 my $margin = $$self{opt_indent} + $$self{opt_margin}; 355 356 # Initialize a few per-document variables. 357 $$self{INDENTS} = []; # Stack of indentations. 358 $$self{MARGIN} = $margin; # Default left margin. 359 $$self{PENDING} = [[]]; # Pending output. 360 361 # We have to redo encoding handling for each document. Check whether the 362 # output file handle already has a PerlIO encoding layer set and, if so, 363 # disable encoding. 364 $$self{ENCODE} = 1; 365 eval { 366 my @options = (output => 1, details => 1); 367 my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1]; 368 if ($flag && ($flag & PerlIO::F_UTF8 ())) { 369 $$self{ENCODE} = 0; 370 } 371 }; 372 $$self{ENCODING} = $$self{opt_encoding}; 373 374 return ''; 375} 376 377# Handle the end of the document. The only thing we do is handle dying on POD 378# errors, since Pod::Parser currently doesn't. 379sub end_document { 380 my ($self) = @_; 381 if ($$self{complain_die} && $self->errors_seen) { 382 croak ("POD document had syntax errors"); 383 } 384} 385 386############################################################################## 387# Text blocks 388############################################################################## 389 390# Intended for subclasses to override, this method returns text with any 391# non-printing formatting codes stripped out so that length() correctly 392# returns the length of the text. For basic Pod::Text, it does nothing. 393sub strip_format { 394 my ($self, $string) = @_; 395 return $string; 396} 397 398# This method is called whenever an =item command is complete (in other words, 399# we've seen its associated paragraph or know for certain that it doesn't have 400# one). It gets the paragraph associated with the item as an argument. If 401# that argument is empty, just output the item tag; if it contains a newline, 402# output the item tag followed by the newline. Otherwise, see if there's 403# enough room for us to output the item tag in the margin of the text or if we 404# have to put it on a separate line. 405sub item { 406 my ($self, $text) = @_; 407 my $tag = $$self{ITEM}; 408 unless (defined $tag) { 409 carp "Item called without tag"; 410 return; 411 } 412 undef $$self{ITEM}; 413 414 # Calculate the indentation and margin. $fits is set to true if the tag 415 # will fit into the margin of the paragraph given our indentation level. 416 my $indent = $$self{INDENTS}[-1] // $$self{opt_indent}; 417 my $margin = ' ' x $$self{opt_margin}; 418 my $tag_length = length ($self->strip_format ($tag)); 419 my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1); 420 421 # If the tag doesn't fit, or if we have no associated text, print out the 422 # tag separately. Otherwise, put the tag in the margin of the paragraph. 423 if (!$text || $text =~ /^\s+$/ || !$fits) { 424 my $realindent = $$self{MARGIN}; 425 $$self{MARGIN} = $indent; 426 my $output = $self->reformat ($tag); 427 $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 428 $output =~ s/\n*$/\n/; 429 430 # If the text is just whitespace, we have an empty item paragraph; 431 # this can result from =over/=item/=back without any intermixed 432 # paragraphs. Insert some whitespace to keep the =item from merging 433 # into the next paragraph. 434 $output .= "\n" if $text && $text =~ /^\s*$/; 435 436 $self->output ($output); 437 $$self{MARGIN} = $realindent; 438 $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/); 439 } else { 440 my $space = ' ' x $indent; 441 $space =~ s/^$margin /$margin:/ if $$self{opt_alt}; 442 $text = $self->reformat ($text); 443 $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 444 my $tagspace = ' ' x $tag_length; 445 $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item"; 446 $self->output ($text); 447 } 448} 449 450# Handle a basic block of text. The only tricky thing here is that if there 451# is a pending item tag, we need to format this as an item paragraph. 452sub cmd_para { 453 my ($self, $attrs, $text) = @_; 454 $text =~ s/\s+$/\n/; 455 if (defined $$self{ITEM}) { 456 $self->item ($text . "\n"); 457 } else { 458 $self->output ($self->reformat ($text . "\n")); 459 } 460 return ''; 461} 462 463# Handle a verbatim paragraph. Just print it out, but indent it according to 464# our margin. 465sub cmd_verbatim { 466 my ($self, $attrs, $text) = @_; 467 $self->item if defined $$self{ITEM}; 468 return if $text =~ /^\s*$/; 469 $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme; 470 $text =~ s/\s*$/\n\n/; 471 $self->output ($text); 472 return ''; 473} 474 475# Handle literal text (produced by =for and similar constructs). Just output 476# it with the minimum of changes. 477sub cmd_data { 478 my ($self, $attrs, $text) = @_; 479 $text =~ s/^\n+//; 480 $text =~ s/\n{0,2}$/\n/; 481 $self->output ($text); 482 return ''; 483} 484 485############################################################################## 486# Headings 487############################################################################## 488 489# The common code for handling all headers. Takes the header text, the 490# indentation, and the surrounding marker for the alt formatting method. 491sub heading { 492 my ($self, $text, $indent, $marker) = @_; 493 $self->item ("\n\n") if defined $$self{ITEM}; 494 $text =~ s/\s+$//; 495 if ($$self{opt_alt}) { 496 my $closemark = reverse (split (//, $marker)); 497 my $margin = ' ' x $$self{opt_margin}; 498 $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n"); 499 } else { 500 $text .= "\n" if $$self{opt_loose}; 501 my $margin = ' ' x ($$self{opt_margin} + $indent); 502 $self->output ($margin . $text . "\n"); 503 } 504 return ''; 505} 506 507# First level heading. 508sub cmd_head1 { 509 my ($self, $attrs, $text) = @_; 510 $self->heading ($text, 0, '===='); 511} 512 513# Second level heading. 514sub cmd_head2 { 515 my ($self, $attrs, $text) = @_; 516 $self->heading ($text, $$self{opt_indent} / 2, '== '); 517} 518 519# Third level heading. 520sub cmd_head3 { 521 my ($self, $attrs, $text) = @_; 522 $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= '); 523} 524 525# Fourth level heading. 526sub cmd_head4 { 527 my ($self, $attrs, $text) = @_; 528 $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- '); 529} 530 531############################################################################## 532# List handling 533############################################################################## 534 535# Handle the beginning of an =over block. Takes the type of the block as the 536# first argument, and then the attr hash. This is called by the handlers for 537# the four different types of lists (bullet, number, text, and block). 538sub over_common_start { 539 my ($self, $attrs) = @_; 540 $self->item ("\n\n") if defined $$self{ITEM}; 541 542 # Find the indentation level. 543 my $indent = $$attrs{indent}; 544 unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) { 545 $indent = $$self{opt_indent}; 546 } 547 548 # Add this to our stack of indents and increase our current margin. 549 push (@{ $$self{INDENTS} }, $$self{MARGIN}); 550 $$self{MARGIN} += ($indent + 0); 551 return ''; 552} 553 554# End an =over block. Takes no options other than the class pointer. Output 555# any pending items and then pop one level of indentation. 556sub over_common_end { 557 my ($self) = @_; 558 $self->item ("\n\n") if defined $$self{ITEM}; 559 $$self{MARGIN} = pop @{ $$self{INDENTS} }; 560 return ''; 561} 562 563# Dispatch the start and end calls as appropriate. 564sub start_over_bullet { $_[0]->over_common_start ($_[1]) } 565sub start_over_number { $_[0]->over_common_start ($_[1]) } 566sub start_over_text { $_[0]->over_common_start ($_[1]) } 567sub start_over_block { $_[0]->over_common_start ($_[1]) } 568sub end_over_bullet { $_[0]->over_common_end } 569sub end_over_number { $_[0]->over_common_end } 570sub end_over_text { $_[0]->over_common_end } 571sub end_over_block { $_[0]->over_common_end } 572 573# The common handler for all item commands. Takes the type of the item, the 574# attributes, and then the text of the item. 575sub item_common { 576 my ($self, $type, $attrs, $text) = @_; 577 $self->item if defined $$self{ITEM}; 578 579 # Clean up the text. We want to end up with two variables, one ($text) 580 # which contains any body text after taking out the item portion, and 581 # another ($item) which contains the actual item text. Note the use of 582 # the internal Pod::Simple attribute here; that's a potential land mine. 583 $text =~ s/\s+$//; 584 my ($item, $index); 585 if ($type eq 'bullet') { 586 $item = '*'; 587 } elsif ($type eq 'number') { 588 $item = $$attrs{'~orig_content'}; 589 } else { 590 $item = $text; 591 $item =~ s/\s*\n\s*/ /g; 592 $text = ''; 593 } 594 $$self{ITEM} = $item; 595 596 # If body text for this item was included, go ahead and output that now. 597 if ($text) { 598 $text =~ s/\s*$/\n/; 599 $self->item ($text); 600 } 601 return ''; 602} 603 604# Dispatch the item commands to the appropriate place. 605sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) } 606sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) } 607sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) } 608sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) } 609 610############################################################################## 611# Formatting codes 612############################################################################## 613 614# The simple ones. 615sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] } 616sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] } 617sub cmd_i { return '*' . $_[2] . '*' } 618sub cmd_x { return '' } 619 620# Convert all internal whitespace to $NBSP. 621sub cmd_s { 622 my ($self, $attrs, $text) = @_; 623 $text =~ s{ \s }{$NBSP}xmsg; 624 return $text; 625} 626 627# Apply a whole bunch of messy heuristics to not quote things that don't 628# benefit from being quoted. These originally come from Barrie Slaymaker and 629# largely duplicate code in Pod::Man. 630sub cmd_c { 631 my ($self, $attrs, $text) = @_; 632 633 # A regex that matches the portion of a variable reference that's the 634 # array or hash index, separated out just because we want to use it in 635 # several places in the following regex. 636 my $index = '(?: \[[^]]+\] | \{[^}]+\} )?'; 637 638 # Check for things that we don't want to quote, and if we find any of 639 # them, return the string with just a font change and no quoting. 640 # 641 # Traditionally, Pod::Text has not quoted Perl variables, functions, 642 # numbers, or hex constants, but this is not always desirable. Make this 643 # optional on the quoting guesswork flag. 644 my $extra = qr{(?!)}xms; # never matches 645 if ($$self{GUESSWORK}{quoting}) { 646 $extra = qr{ 647 \$+ [\#^]? \S $index # special ($^F, $") 648 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func 649 | [\$\@%&*]* [:\'\w]+ 650 (?: -> )? \(\s*[^\s,\)]*\s*\) # 0/1-arg func call 651 | [+-]? ( \d[\d.]* | \.\d+ ) 652 (?: [eE][+-]?\d+ )? # a number 653 | 0x [a-fA-F\d]+ # a hex constant 654 }xms; 655 } 656 $text =~ m{ 657 ^\s* 658 (?: 659 ( [\'\`\"] ) .* \1 # already quoted 660 | \` .* \' # `quoted' 661 | $extra 662 ) 663 \s*\z 664 }xms and return $text; 665 666 # If we didn't return, go ahead and quote the text. 667 return $$self{opt_alt} 668 ? "``$text''" 669 : "$$self{LQUOTE}$text$$self{RQUOTE}"; 670} 671 672# Links reduce to the text that we're given, wrapped in angle brackets if it's 673# a URL. 674sub cmd_l { 675 my ($self, $attrs, $text) = @_; 676 if ($$attrs{type} eq 'url') { 677 if (not defined($$attrs{to}) or $$attrs{to} eq $text) { 678 return "<$text>"; 679 } elsif ($$self{opt_nourls}) { 680 return $text; 681 } else { 682 return "$text <$$attrs{to}>"; 683 } 684 } else { 685 return $text; 686 } 687} 688 689############################################################################## 690# Backwards compatibility 691############################################################################## 692 693# The old Pod::Text module did everything in a pod2text() function. This 694# tries to provide the same interface for legacy applications. 695sub pod2text { 696 my @args; 697 698 # This is really ugly; I hate doing option parsing in the middle of a 699 # module. But the old Pod::Text module supported passing flags to its 700 # entry function, so handle -a and -<number>. 701 while ($_[0] =~ /^-/) { 702 my $flag = shift; 703 if ($flag eq '-a') { push (@args, alt => 1) } 704 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) } 705 else { 706 unshift (@_, $flag); 707 last; 708 } 709 } 710 711 # Now that we know what arguments we're using, create the parser. 712 my $parser = Pod::Text->new (@args); 713 714 # If two arguments were given, the second argument is going to be a file 715 # handle. That means we want to call parse_from_filehandle(), which means 716 # we need to turn the first argument into a file handle. Magic open will 717 # handle the <&STDIN case automagically. 718 if (defined $_[1]) { 719 my @fhs = @_; 720 local *IN; 721 unless (open (IN, $fhs[0])) { 722 croak ("Can't open $fhs[0] for reading: $!\n"); 723 return; 724 } 725 $fhs[0] = \*IN; 726 $parser->output_fh ($fhs[1]); 727 my $retval = $parser->parse_file ($fhs[0]); 728 my $fh = $parser->output_fh (); 729 close $fh; 730 return $retval; 731 } else { 732 $parser->output_fh (\*STDOUT); 733 return $parser->parse_file (@_); 734 } 735} 736 737# Reset the underlying Pod::Simple object between calls to parse_from_file so 738# that the same object can be reused to convert multiple pages. 739sub parse_from_file { 740 my $self = shift; 741 $self->reinit; 742 743 # Fake the old cutting option to Pod::Parser. This fiddles with internal 744 # Pod::Simple state and is quite ugly; we need a better approach. 745 if (ref ($_[0]) eq 'HASH') { 746 my $opts = shift @_; 747 if (defined ($$opts{-cutting}) && !$$opts{-cutting}) { 748 $$self{in_pod} = 1; 749 $$self{last_was_blank} = 1; 750 } 751 } 752 753 # Do the work. 754 my $retval = $self->Pod::Simple::parse_from_file (@_); 755 756 # Flush output, since Pod::Simple doesn't do this. Ideally we should also 757 # close the file descriptor if we had to open one, but we can't easily 758 # figure this out. 759 my $fh = $self->output_fh (); 760 my $oldfh = select $fh; 761 my $oldflush = $|; 762 $| = 1; 763 print $fh ''; 764 $| = $oldflush; 765 select $oldfh; 766 return $retval; 767} 768 769# Pod::Simple failed to provide this backward compatibility function, so 770# implement it ourselves. File handles are one of the inputs that 771# parse_from_file supports. 772sub parse_from_filehandle { 773 my $self = shift; 774 $self->parse_from_file (@_); 775} 776 777# Pod::Simple's parse_file doesn't set output_fh. Wrap the call and do so 778# ourself unless it was already set by the caller, since our documentation has 779# always said that this should work. 780sub parse_file { 781 my ($self, $in) = @_; 782 unless (defined $$self{output_fh}) { 783 $self->output_fh (\*STDOUT); 784 } 785 return $self->SUPER::parse_file ($in); 786} 787 788# Do the same for parse_lines, just to be polite. Pod::Simple's man page 789# implies that the caller is responsible for setting this, but I don't see any 790# reason not to set a default. 791sub parse_lines { 792 my ($self, @lines) = @_; 793 unless (defined $$self{output_fh}) { 794 $self->output_fh (\*STDOUT); 795 } 796 return $self->SUPER::parse_lines (@lines); 797} 798 799# Likewise for parse_string_document. 800sub parse_string_document { 801 my ($self, $doc) = @_; 802 unless (defined $$self{output_fh}) { 803 $self->output_fh (\*STDOUT); 804 } 805 return $self->SUPER::parse_string_document ($doc); 806} 807 808############################################################################## 809# Module return value and documentation 810############################################################################## 811 8121; 813__END__ 814 815=for stopwords 816alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls 817parsers EBCDIC autodetecting superset unrepresentable FH NNN 818 819=head1 NAME 820 821Pod::Text - Convert POD data to formatted text 822 823=head1 SYNOPSIS 824 825 use Pod::Text; 826 my $parser = Pod::Text->new (sentence => 1, width => 78); 827 828 # Read POD from STDIN and write to STDOUT. 829 $parser->parse_from_filehandle; 830 831 # Read POD from file.pod and write to file.txt. 832 $parser->parse_from_file ('file.pod', 'file.txt'); 833 834=head1 DESCRIPTION 835 836Pod::Text is a module that can convert documentation in the POD format (the 837preferred language for documenting Perl) into formatted text. It uses no 838special formatting controls or codes, and its output is therefore suitable for 839nearly any device. 840 841=head2 Encoding 842 843Pod::Text uses the following logic to choose an output encoding, in order: 844 845=over 4 846 847=item 1. 848 849If a PerlIO encoding layer is set on the output file handle, do not do any 850output encoding and will instead rely on the PerlIO encoding layer. 851 852=item 2. 853 854If the C<encoding> or C<utf8> options are set, use the output encoding 855specified by those options. 856 857=item 3. 858 859If the input encoding of the POD source file was explicitly specified (using 860C<=encoding>) or automatically detected by Pod::Simple, use that as the output 861encoding as well. 862 863=item 4. 864 865Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output 866encoding. Since this is a superset of ASCII, this will result in ASCII output 867unless the POD input contains non-ASCII characters without declaring or 868autodetecting an encoding (usually via EZ<><> escapes). 869 870=item 5. 871 872Otherwise, for EBCDIC systems, output without doing any encoding and hope 873this works. 874 875=back 876 877One caveat: Pod::Text has to commit to an output encoding the first time it 878outputs a non-ASCII character, and then has to stick with it for consistency. 879However, C<=encoding> commands don't have to be at the beginning of a POD 880document. If someone uses a non-ASCII character early in a document with an 881escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later, 882ideally Pod::Text would follow rule 3 and output the entire document as ISO 8838859-1. Instead, it will commit to UTF-8 following rule 4 as soon as it sees 884that escape, and then stick with that encoding for the rest of the document. 885 886Unfortunately, there's no universally good choice for an output encoding. 887Each choice will be incorrect in some circumstances. This approach was chosen 888primarily for backwards compatibility. Callers should consider forcing the 889output encoding via C<encoding> if they have any knowledge about what encoding 890the user may expect. 891 892In particular, consider importing the L<Encode::Locale> module, if available, 893and setting C<encoding> to C<locale> to use an output encoding appropriate to 894the user's locale. But be aware that if the user is not using locales or is 895using a locale of C<C>, Encode::Locale will set the output encoding to 896US-ASCII. This will cause all non-ASCII characters will be replaced with C<?> 897and produce a flurry of warnings about unsupported characters, which may or 898may not be what you want. 899 900=head1 CLASS METHODS 901 902=over 4 903 904=item new(ARGS) 905 906Create a new Pod::Text object. ARGS should be a list of key/value pairs, 907where the keys are chosen from the following. Each option is annotated with 908the version of Pod::Text in which that option was added with its current 909meaning. 910 911=over 4 912 913=item alt 914 915[2.00] If set to a true value, selects an alternate output format that, among 916other things, uses a different heading style and marks C<=item> entries with a 917colon in the left margin. Defaults to false. 918 919=item code 920 921[2.13] If set to a true value, the non-POD parts of the input file will be 922included in the output. Useful for viewing code documented with POD blocks 923with the POD rendered and the code left intact. 924 925=item encoding 926 927[5.00] Specifies the encoding of the output. The value must be an encoding 928recognized by the L<Encode> module (see L<Encode::Supported>). If the output 929contains characters that cannot be represented in this encoding, that is an 930error that will be reported as configured by the C<errors> option. If error 931handling is other than C<die>, the unrepresentable character will be replaced 932with the Encode substitution character (normally C<?>). 933 934If the output file handle has a PerlIO encoding layer set, this parameter will 935be ignored and no encoding will be done by Pod::Man. It will instead rely on 936the encoding layer to make whatever output encoding transformations are 937desired. 938 939WARNING: The input encoding of the POD source is independent from the output 940encoding, and setting this option does not affect the interpretation of the 941POD input. Unless your POD source is US-ASCII, its encoding should be 942declared with the C<=encoding> command in the source, as near to the top of 943the file as possible. If this is not done, Pod::Simple will will attempt to 944guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will 945produce warnings. See L<perlpod(1)> for more information. 946 947=item errors 948 949[3.17] How to report errors. C<die> says to throw an exception on any POD 950formatting error. C<stderr> says to report errors on standard error, but not 951to throw an exception. C<pod> says to include a POD ERRORS section in the 952resulting documentation summarizing the errors. C<none> ignores POD errors 953entirely, as much as possible. 954 955The default is C<pod>. 956 957=item guesswork 958 959[5.01] By default, Pod::Text applies some default formatting rules based on 960guesswork and regular expressions that are intended to make writing Perl 961documentation easier and require less explicit markup. These rules may not 962always be appropriate, particularly for documentation that isn't about Perl. 963This option allows turning all or some of it off. 964 965The special value C<all> enables all guesswork. This is also the default for 966backward compatibility reasons. The special value C<none> disables all 967guesswork. Otherwise, the value of this option should be a comma-separated 968list of one or more of the following keywords: 969 970=over 4 971 972=item quoting 973 974If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by 975double quotes in nroff (terminal) output unless the contents are already 976quoted. When this guesswork is enabled, quote marks will also be suppressed 977for Perl variables, function names, function calls, numbers, and hex 978constants. 979 980=back 981 982Any unknown guesswork name is silently ignored (for potential future 983compatibility), so be careful about spelling. 984 985=item indent 986 987[2.00] The number of spaces to indent regular text, and the default 988indentation for C<=over> blocks. Defaults to 4. 989 990=item loose 991 992[2.00] If set to a true value, a blank line is printed after a C<=head1> 993heading. If set to false (the default), no blank line is printed after 994C<=head1>, although one is still printed after C<=head2>. This is the default 995because it's the expected formatting for manual pages; if you're formatting 996arbitrary text documents, setting this to true may result in more pleasing 997output. 998 999=item margin 1000 1001[2.21] The width of the left margin in spaces. Defaults to 0. This is the 1002margin for all text, including headings, not the amount by which regular text 1003is indented; for the latter, see the I<indent> option. To set the right 1004margin, see the I<width> option. 1005 1006=item nourls 1007 1008[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are 1009formatted to show both the anchor text and the URL. In other words: 1010 1011 L<foo|http://example.com/> 1012 1013is formatted as: 1014 1015 foo <http://example.com/> 1016 1017This option, if set to a true value, suppresses the URL when anchor text is 1018given, so this example would be formatted as just C<foo>. This can produce 1019less cluttered output in cases where the URLs are not particularly important. 1020 1021=item quotes 1022 1023[4.00] Sets the quote marks used to surround CE<lt>> text. If the value is a 1024single character, it is used as both the left and right quote. Otherwise, it 1025is split in half, and the first half of the string is used as the left quote 1026and the second is used as the right quote. 1027 1028This may also be set to the special value C<none>, in which case no quote 1029marks are added around CE<lt>> text. 1030 1031=item sentence 1032 1033[3.00] If set to a true value, Pod::Text will assume that each sentence ends 1034in two spaces, and will try to preserve that spacing. If set to false, all 1035consecutive whitespace in non-verbatim paragraphs is compressed into a single 1036space. Defaults to false. 1037 1038=item stderr 1039 1040[3.10] Send error messages about invalid POD to standard error instead of 1041appending a POD ERRORS section to the generated output. This is equivalent to 1042setting C<errors> to C<stderr> if C<errors> is not already set. It is 1043supported for backward compatibility. 1044 1045=item utf8 1046 1047[3.12] If this option is set to a true value, the output encoding is set to 1048UTF-8. This is equivalent to setting C<encoding> to C<UTF-8> if C<encoding> 1049is not already set. It is supported for backward compatibility. 1050 1051=item width 1052 1053[2.00] The column at which to wrap text on the right-hand side. Defaults to 105476. 1055 1056=back 1057 1058=back 1059 1060=head1 INSTANCE METHODS 1061 1062As a derived class from Pod::Simple, Pod::Text supports the same methods and 1063interfaces. See L<Pod::Simple> for all the details. This section summarizes 1064the most-frequently-used methods and the ones added by Pod::Text. 1065 1066=over 4 1067 1068=item output_fh(FH) 1069 1070Direct the output from parse_file(), parse_lines(), or parse_string_document() 1071to the file handle FH instead of C<STDOUT>. 1072 1073=item output_string(REF) 1074 1075Direct the output from parse_file(), parse_lines(), or parse_string_document() 1076to the scalar variable pointed to by REF, rather than C<STDOUT>. For example: 1077 1078 my $man = Pod::Man->new(); 1079 my $output; 1080 $man->output_string(\$output); 1081 $man->parse_file('/some/input/file'); 1082 1083Be aware that the output in that variable will already be encoded (see 1084L</Encoding>). 1085 1086=item parse_file(PATH) 1087 1088Read the POD source from PATH and format it. By default, the output is sent 1089to C<STDOUT>, but this can be changed with the output_fh() or output_string() 1090methods. 1091 1092=item parse_from_file(INPUT, OUTPUT) 1093 1094=item parse_from_filehandle(FH, OUTPUT) 1095 1096Read the POD source from INPUT, format it, and output the results to OUTPUT. 1097 1098parse_from_filehandle() is provided for backward compatibility with older 1099versions of Pod::Man. parse_from_file() should be used instead. 1100 1101=item parse_lines(LINES[, ...[, undef]]) 1102 1103Parse the provided lines as POD source, writing the output to either C<STDOUT> 1104or the file handle set with the output_fh() or output_string() methods. This 1105method can be called repeatedly to provide more input lines. An explicit 1106C<undef> should be passed to indicate the end of input. 1107 1108This method expects raw bytes, not decoded characters. 1109 1110=item parse_string_document(INPUT) 1111 1112Parse the provided scalar variable as POD source, writing the output to either 1113C<STDOUT> or the file handle set with the output_fh() or output_string() 1114methods. 1115 1116This method expects raw bytes, not decoded characters. 1117 1118=back 1119 1120=head1 FUNCTIONS 1121 1122Pod::Text exports one function for backward compatibility with older versions. 1123This function is deprecated; instead, use the object-oriented interface 1124described above. 1125 1126=over 4 1127 1128=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT]) 1129 1130Convert the POD source from INPUT to text and write it to OUTPUT. If OUTPUT 1131is not given, defaults to C<STDOUT>. INPUT can be any expression supported as 1132the second argument to two-argument open(). 1133 1134If C<-a> is given as an initial argument, pass the C<alt> option to the 1135Pod::Text constructor. This enables alternative formatting. 1136 1137If C<-NNN> is given as an initial argument, pass the C<width> option to the 1138Pod::Text constructor with the number C<NNN> as its argument. This sets the 1139wrap line width to NNN. 1140 1141=back 1142 1143=head1 DIAGNOSTICS 1144 1145=over 4 1146 1147=item Bizarre space in item 1148 1149=item Item called without tag 1150 1151(W) Something has gone wrong in internal C<=item> processing. These 1152messages indicate a bug in Pod::Text; you should never see them. 1153 1154=item Can't open %s for reading: %s 1155 1156(F) Pod::Text was invoked via the compatibility mode pod2text() interface 1157and the input file it was given could not be opened. 1158 1159=item Invalid errors setting "%s" 1160 1161(F) The C<errors> parameter to the constructor was set to an unknown value. 1162 1163=item Invalid quote specification "%s" 1164 1165(F) The quote specification given (the C<quotes> option to the 1166constructor) was invalid. A quote specification must be either one 1167character long or an even number (greater than one) characters long. 1168 1169=item POD document had syntax errors 1170 1171(F) The POD document being formatted had syntax errors and the C<errors> 1172option was set to C<die>. 1173 1174=back 1175 1176=head1 COMPATIBILITY 1177 1178Pod::Text 2.03 (based on L<Pod::Parser>) was the first version of this module 1179included with Perl, in Perl 5.6.0. Earlier versions of Perl had a different 1180Pod::Text module, with a different API. 1181 1182The current API based on L<Pod::Simple> was added in Pod::Text 3.00. 1183Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to 1184incorporate those changes. This is the first version that correctly supports 1185all modern POD syntax. The parse_from_filehandle() method was re-added for 1186backward compatibility in Pod::Text 3.07, included in Perl 5.9.4. 1187 1188Pod::Text 3.12, included in Perl 5.10.1, first implemented the current 1189practice of attempting to match the default output encoding with the input 1190encoding of the POD source, unless overridden by the C<utf8> option or (added 1191later) the C<encoding> option. 1192 1193Support for anchor text in LZ<><> links of type URL was added in Pod::Text 11943.14, included in Perl 5.11.5. 1195 1196parse_lines(), parse_string_document(), and parse_file() set a default output 1197file handle of C<STDOUT> if one was not already set as of Pod::Text 3.18, 1198included in Perl 5.19.5. 1199 1200Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the 1201version of the podlators distribution. All modules included in podlators, and 1202the podlators distribution itself, share the same version number from this 1203point forward. 1204 1205Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC 1206systems, present in all versions back to 3.00, that would cause opening 1207brackets to disappear. 1208 1209Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it 1210sees a non-ASCII character in the input and the input encoding is not 1211specified. It also commits to an encoding with the first non-ASCII character 1212and does not change the output encoding if the input encoding changes. The 1213L<Encode> module is now used for all output encoding rather than PerlIO 1214layers, which fixes earlier problems with output to scalars. 1215 1216=head1 AUTHOR 1217 1218Russ Allbery <rra@cpan.org>, based I<very> heavily on the original Pod::Text 1219by Tom Christiansen <tchrist@mox.perl.com> and its conversion to Pod::Parser 1220by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial conversion of 1221Pod::Man to use Pod::Simple provided much-needed guidance on how to use 1222Pod::Simple. 1223 1224=head1 COPYRIGHT AND LICENSE 1225 1226Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ 1227Allbery <rra@cpan.org> 1228 1229This program is free software; you may redistribute it and/or modify it 1230under the same terms as Perl itself. 1231 1232=head1 SEE ALSO 1233 1234L<Encode::Locale>, L<Encode::Supproted>, L<Pod::Simple>, 1235L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)> 1236 1237The current version of this module is always available from its web site at 1238L<https://www.eyrie.org/~eagle/software/podlators/>. It is also part of the 1239Perl core distribution as of 5.6.0. 1240 1241=cut 1242 1243# Local Variables: 1244# copyright-at-end-flag: t 1245# End: 1246