19f11ffb7Safresh1# Convert POD data to formatted text. 2b39c5158Smillert# 3b39c5158Smillert# This module converts POD to formatted text. It replaces the old Pod::Text 4b39c5158Smillert# module that came with versions of Perl prior to 5.6.0 and attempts to match 5b39c5158Smillert# its output except for some specific circumstances where other decisions 6b39c5158Smillert# seemed to produce better output. It uses Pod::Parser and is designed to be 7b39c5158Smillert# very easy to subclass. 8b39c5158Smillert# 9b46d8ef2Safresh1# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl 10b39c5158Smillert 11b39c5158Smillert############################################################################## 12b39c5158Smillert# Modules and declarations 13b39c5158Smillert############################################################################## 14b39c5158Smillert 15b39c5158Smillertpackage Pod::Text; 16b39c5158Smillert 17e0680481Safresh1use 5.010; 18b39c5158Smillertuse strict; 19b8851fccSafresh1use warnings; 20b8851fccSafresh1 21b39c5158Smillertuse Carp qw(carp croak); 2248950c12Ssthenuse Encode qw(encode); 23b39c5158Smillertuse Exporter (); 24b39c5158Smillertuse Pod::Simple (); 25b39c5158Smillert 26e0680481Safresh1our @ISA = qw(Pod::Simple Exporter); 27*3d61058aSafresh1our $VERSION = '5.01_02'; 28*3d61058aSafresh1$VERSION =~ tr/_//d; 29b39c5158Smillert 30b39c5158Smillert# We have to export pod2text for backward compatibility. 31e0680481Safresh1our @EXPORT = qw(pod2text); 329f11ffb7Safresh1 339f11ffb7Safresh1# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available. Code 349f11ffb7Safresh1# taken from Pod::Simple 3.32, but was only added in 3.30. 359f11ffb7Safresh1my ($NBSP, $SHY); 369f11ffb7Safresh1if ($Pod::Simple::VERSION ge 3.30) { 379f11ffb7Safresh1 $NBSP = $Pod::Simple::nbsp; 389f11ffb7Safresh1 $SHY = $Pod::Simple::shy; 399f11ffb7Safresh1} else { 409f11ffb7Safresh1 $NBSP = chr utf8::unicode_to_native(0xA0); 419f11ffb7Safresh1 $SHY = chr utf8::unicode_to_native(0xAD); 429f11ffb7Safresh1} 43b39c5158Smillert 44e0680481Safresh1# Import the ASCII constant from Pod::Simple. This is true iff we're in an 45e0680481Safresh1# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is 46e0680481Safresh1# generally only false for EBCDIC. 47e0680481Safresh1BEGIN { *ASCII = \&Pod::Simple::ASCII } 48e0680481Safresh1 49b39c5158Smillert############################################################################## 50b39c5158Smillert# Initialization 51b39c5158Smillert############################################################################## 52b39c5158Smillert 53b39c5158Smillert# This function handles code blocks. It's registered as a callback to 54b39c5158Smillert# Pod::Simple and therefore doesn't work as a regular method call, but all it 55b39c5158Smillert# does is call output_code with the line. 56b39c5158Smillertsub handle_code { 57b39c5158Smillert my ($line, $number, $parser) = @_; 58b39c5158Smillert $parser->output_code ($line . "\n"); 59b39c5158Smillert} 60b39c5158Smillert 61b39c5158Smillert# Initialize the object and set various Pod::Simple options that we need. 62b39c5158Smillert# Here, we also process any additional options passed to the constructor or 63b39c5158Smillert# set up defaults if none were given. Note that all internal object keys are 64b39c5158Smillert# in all-caps, reserving all lower-case object keys for Pod::Simple and user 65b39c5158Smillert# arguments. 66b39c5158Smillertsub new { 67b39c5158Smillert my $class = shift; 68b39c5158Smillert my $self = $class->SUPER::new; 69b39c5158Smillert 70b39c5158Smillert # Tell Pod::Simple to keep whitespace whenever possible. 71b39c5158Smillert if ($self->can ('preserve_whitespace')) { 72b39c5158Smillert $self->preserve_whitespace (1); 73b39c5158Smillert } else { 74b39c5158Smillert $self->fullstop_space_harden (1); 75b39c5158Smillert } 76b39c5158Smillert 77b39c5158Smillert # The =for and =begin targets that we accept. 78b39c5158Smillert $self->accept_targets (qw/text TEXT/); 79b39c5158Smillert 80b39c5158Smillert # Ensure that contiguous blocks of code are merged together. Otherwise, 81b39c5158Smillert # some of the guesswork heuristics don't work right. 82b39c5158Smillert $self->merge_text (1); 83b39c5158Smillert 84b39c5158Smillert # Pod::Simple doesn't do anything useful with our arguments, but we want 85b39c5158Smillert # to put them in our object as hash keys and values. This could cause 86b39c5158Smillert # problems if we ever clash with Pod::Simple's own internal class 87b39c5158Smillert # variables. 88b39c5158Smillert my %opts = @_; 89b39c5158Smillert my @opts = map { ("opt_$_", $opts{$_}) } keys %opts; 90b39c5158Smillert %$self = (%$self, @opts); 91b39c5158Smillert 92e0680481Safresh1 # Backwards-compatibility support for the stderr option. 93e9ce3842Safresh1 if ($$self{opt_stderr} and not $$self{opt_errors}) { 94e9ce3842Safresh1 $$self{opt_errors} = 'stderr'; 95e9ce3842Safresh1 } 96e9ce3842Safresh1 delete $$self{opt_stderr}; 97e9ce3842Safresh1 98e0680481Safresh1 # Backwards-compatibility support for the utf8 option. 99e0680481Safresh1 if ($$self{opt_utf8} && !$$self{opt_encoding}) { 100e0680481Safresh1 $$self{opt_encoding} = 'UTF-8'; 101e9ce3842Safresh1 } 102e0680481Safresh1 delete $$self{opt_utf8}; 103e0680481Safresh1 104e0680481Safresh1 # Validate the errors parameter and act on it. 105e0680481Safresh1 $$self{opt_errors} //= 'pod'; 106e9ce3842Safresh1 if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') { 107b39c5158Smillert $self->no_errata_section (1); 108b39c5158Smillert $self->complain_stderr (1); 109e9ce3842Safresh1 if ($$self{opt_errors} eq 'die') { 110e9ce3842Safresh1 $$self{complain_die} = 1; 111b39c5158Smillert } 112e9ce3842Safresh1 } elsif ($$self{opt_errors} eq 'pod') { 113e9ce3842Safresh1 $self->no_errata_section (0); 114e9ce3842Safresh1 $self->complain_stderr (0); 115e9ce3842Safresh1 } elsif ($$self{opt_errors} eq 'none') { 116b46d8ef2Safresh1 $self->no_errata_section (1); 117e9ce3842Safresh1 $self->no_whining (1); 118e9ce3842Safresh1 } else { 119e9ce3842Safresh1 croak (qq(Invalid errors setting: "$$self{errors}")); 120e9ce3842Safresh1 } 121e9ce3842Safresh1 delete $$self{errors}; 122b39c5158Smillert 123b39c5158Smillert # Initialize various things from our parameters. 124e0680481Safresh1 $$self{opt_alt} //= 0; 125e0680481Safresh1 $$self{opt_indent} //= 4; 126e0680481Safresh1 $$self{opt_margin} //= 0; 127e0680481Safresh1 $$self{opt_loose} //= 0; 128e0680481Safresh1 $$self{opt_sentence} //= 0; 129e0680481Safresh1 $$self{opt_width} //= 76; 130b39c5158Smillert 131b39c5158Smillert # Figure out what quotes we'll be using for C<> text. 132b39c5158Smillert $$self{opt_quotes} ||= '"'; 133b39c5158Smillert if ($$self{opt_quotes} eq 'none') { 134b39c5158Smillert $$self{LQUOTE} = $$self{RQUOTE} = ''; 135b39c5158Smillert } elsif (length ($$self{opt_quotes}) == 1) { 136b39c5158Smillert $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes}; 137b8851fccSafresh1 } elsif (length ($$self{opt_quotes}) % 2 == 0) { 138b8851fccSafresh1 my $length = length ($$self{opt_quotes}) / 2; 139b8851fccSafresh1 $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length); 140b8851fccSafresh1 $$self{RQUOTE} = substr ($$self{opt_quotes}, $length); 141b39c5158Smillert } else { 142b39c5158Smillert croak qq(Invalid quote specification "$$self{opt_quotes}"); 143b39c5158Smillert } 144b39c5158Smillert 145e0680481Safresh1 # Configure guesswork based on options. 146e0680481Safresh1 my $guesswork = $self->{opt_guesswork} || q{}; 147e0680481Safresh1 my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork); 148e0680481Safresh1 if (!%guesswork || $guesswork{all}) { 149e0680481Safresh1 $$self{GUESSWORK} = {quoting => 1}; 150e0680481Safresh1 } elsif ($guesswork{none}) { 151e0680481Safresh1 $$self{GUESSWORK} = {}; 152e0680481Safresh1 } else { 153e0680481Safresh1 $$self{GUESSWORK} = {%guesswork}; 154e0680481Safresh1 } 155e0680481Safresh1 156b39c5158Smillert # If requested, do something with the non-POD text. 157b39c5158Smillert $self->code_handler (\&handle_code) if $$self{opt_code}; 158b39c5158Smillert 159b39c5158Smillert # Return the created object. 160b39c5158Smillert return $self; 161b39c5158Smillert} 162b39c5158Smillert 163b39c5158Smillert############################################################################## 164b39c5158Smillert# Core parsing 165b39c5158Smillert############################################################################## 166b39c5158Smillert 167b39c5158Smillert# This is the glue that connects the code below with Pod::Simple itself. The 168b39c5158Smillert# goal is to convert the event stream coming from the POD parser into method 169b39c5158Smillert# calls to handlers once the complete content of a tag has been seen. Each 170b39c5158Smillert# paragraph or POD command will have textual content associated with it, and 171b39c5158Smillert# as soon as all of a paragraph or POD command has been seen, that content 172b39c5158Smillert# will be passed in to the corresponding method for handling that type of 173b39c5158Smillert# object. The exceptions are handlers for lists, which have opening tag 174b39c5158Smillert# handlers and closing tag handlers that will be called right away. 175b39c5158Smillert# 176b39c5158Smillert# The internal hash key PENDING is used to store the contents of a tag until 177b39c5158Smillert# all of it has been seen. It holds a stack of open tags, each one 178b39c5158Smillert# represented by a tuple of the attributes hash for the tag and the contents 179b39c5158Smillert# of the tag. 180b39c5158Smillert 181b39c5158Smillert# Add a block of text to the contents of the current node, formatting it 182b39c5158Smillert# according to the current formatting instructions as we do. 183b39c5158Smillertsub _handle_text { 184b39c5158Smillert my ($self, $text) = @_; 185b39c5158Smillert my $tag = $$self{PENDING}[-1]; 186b39c5158Smillert $$tag[1] .= $text; 187b39c5158Smillert} 188b39c5158Smillert 189b39c5158Smillert# Given an element name, get the corresponding method name. 190b39c5158Smillertsub method_for_element { 191b39c5158Smillert my ($self, $element) = @_; 192b39c5158Smillert $element =~ tr/-/_/; 193b39c5158Smillert $element =~ tr/A-Z/a-z/; 194b39c5158Smillert $element =~ tr/_a-z0-9//cd; 195b39c5158Smillert return $element; 196b39c5158Smillert} 197b39c5158Smillert 198b39c5158Smillert# Handle the start of a new element. If cmd_element is defined, assume that 199b39c5158Smillert# we need to collect the entire tree for this element before passing it to the 200b39c5158Smillert# element method, and create a new tree into which we'll collect blocks of 201b39c5158Smillert# text and nested elements. Otherwise, if start_element is defined, call it. 202b39c5158Smillertsub _handle_element_start { 203b39c5158Smillert my ($self, $element, $attrs) = @_; 204b39c5158Smillert my $method = $self->method_for_element ($element); 205b39c5158Smillert 206b39c5158Smillert # If we have a command handler, we need to accumulate the contents of the 207b39c5158Smillert # tag before calling it. 208b39c5158Smillert if ($self->can ("cmd_$method")) { 209b39c5158Smillert push (@{ $$self{PENDING} }, [ $attrs, '' ]); 210b39c5158Smillert } elsif ($self->can ("start_$method")) { 211b39c5158Smillert my $method = 'start_' . $method; 212b39c5158Smillert $self->$method ($attrs, ''); 213b39c5158Smillert } 214b39c5158Smillert} 215b39c5158Smillert 216b39c5158Smillert# Handle the end of an element. If we had a cmd_ method for this element, 217b39c5158Smillert# this is where we pass along the text that we've accumulated. Otherwise, if 218b39c5158Smillert# we have an end_ method for the element, call that. 219b39c5158Smillertsub _handle_element_end { 220b39c5158Smillert my ($self, $element) = @_; 221b39c5158Smillert my $method = $self->method_for_element ($element); 222b39c5158Smillert 223b39c5158Smillert # If we have a command handler, pull off the pending text and pass it to 224b39c5158Smillert # the handler along with the saved attribute hash. 225b39c5158Smillert if ($self->can ("cmd_$method")) { 226b39c5158Smillert my $tag = pop @{ $$self{PENDING} }; 227b39c5158Smillert my $method = 'cmd_' . $method; 228b39c5158Smillert my $text = $self->$method (@$tag); 229b39c5158Smillert if (defined $text) { 230b39c5158Smillert if (@{ $$self{PENDING} } > 1) { 231b39c5158Smillert $$self{PENDING}[-1][1] .= $text; 232b39c5158Smillert } else { 233b39c5158Smillert $self->output ($text); 234b39c5158Smillert } 235b39c5158Smillert } 236b39c5158Smillert } elsif ($self->can ("end_$method")) { 237b39c5158Smillert my $method = 'end_' . $method; 238b39c5158Smillert $self->$method (); 239b39c5158Smillert } 240b39c5158Smillert} 241b39c5158Smillert 242b39c5158Smillert############################################################################## 243b39c5158Smillert# Output formatting 244b39c5158Smillert############################################################################## 245b39c5158Smillert 246b39c5158Smillert# Wrap a line, indenting by the current left margin. We can't use Text::Wrap 247b39c5158Smillert# because it plays games with tabs. We can't use formline, even though we'd 248b39c5158Smillert# really like to, because it screws up non-printing characters. So we have to 249b39c5158Smillert# do the wrapping ourselves. 250b39c5158Smillertsub wrap { 251b39c5158Smillert my $self = shift; 252b39c5158Smillert local $_ = shift; 253b39c5158Smillert my $output = ''; 254b39c5158Smillert my $spaces = ' ' x $$self{MARGIN}; 255b39c5158Smillert my $width = $$self{opt_width} - $$self{MARGIN}; 256b39c5158Smillert while (length > $width) { 25756d68f1eSafresh1 if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) { 258b39c5158Smillert $output .= $spaces . $1 . "\n"; 259b39c5158Smillert } else { 260b39c5158Smillert last; 261b39c5158Smillert } 262b39c5158Smillert } 263b39c5158Smillert $output .= $spaces . $_; 264b39c5158Smillert $output =~ s/\s+$/\n\n/; 265b39c5158Smillert return $output; 266b39c5158Smillert} 267b39c5158Smillert 268b39c5158Smillert# Reformat a paragraph of text for the current margin. Takes the text to 269b39c5158Smillert# reformat and returns the formatted text. 270b39c5158Smillertsub reformat { 271b39c5158Smillert my $self = shift; 272b39c5158Smillert local $_ = shift; 273b39c5158Smillert 274b39c5158Smillert # If we're trying to preserve two spaces after sentences, do some munging 27556d68f1eSafresh1 # to support that. Otherwise, smash all repeated whitespace. Be careful 27656d68f1eSafresh1 # not to use \s here, which in Unicode input may match non-breaking spaces 27756d68f1eSafresh1 # that we don't want to smash. 278b39c5158Smillert if ($$self{opt_sentence}) { 279b39c5158Smillert s/ +$//mg; 280b39c5158Smillert s/\.\n/. \n/g; 281b39c5158Smillert s/\n/ /g; 282b39c5158Smillert s/ +/ /g; 283b39c5158Smillert } else { 28456d68f1eSafresh1 s/[ \t\n]+/ /g; 285b39c5158Smillert } 286b39c5158Smillert return $self->wrap ($_); 287b39c5158Smillert} 288b39c5158Smillert 289b39c5158Smillert# Output text to the output device. Replace non-breaking spaces with spaces 290e0680481Safresh1# and soft hyphens with nothing, and then determine the output encoding. 291b39c5158Smillertsub output { 29248950c12Ssthen my ($self, @text) = @_; 29348950c12Ssthen my $text = join ('', @text); 2949f11ffb7Safresh1 if ($NBSP) { 2959f11ffb7Safresh1 $text =~ s/$NBSP/ /g; 2969f11ffb7Safresh1 } 2979f11ffb7Safresh1 if ($SHY) { 2989f11ffb7Safresh1 $text =~ s/$SHY//g; 2999f11ffb7Safresh1 } 300e0680481Safresh1 301e0680481Safresh1 # The logic used here is described in the POD documentation. Prefer the 302e0680481Safresh1 # configured encoding, then the pass-through option of using the same 303e0680481Safresh1 # encoding as the input, and then UTF-8, but commit to an encoding for the 304e0680481Safresh1 # document. 305e0680481Safresh1 # 306e0680481Safresh1 # ENCODE says whether to encode or not and is turned off if there is a 307e0680481Safresh1 # PerlIO encoding layer (in start_document). ENCODING is the encoding 308e0680481Safresh1 # that we previously committed to and is cleared at the start of each 309e0680481Safresh1 # document. 31048950c12Ssthen if ($$self{ENCODE}) { 311e0680481Safresh1 my $encoding = $$self{ENCODING}; 312e0680481Safresh1 if (!$encoding) { 313e0680481Safresh1 $encoding = $self->encoding(); 314e0680481Safresh1 if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) { 315e0680481Safresh1 $encoding = 'UTF-8'; 316e0680481Safresh1 } 317e0680481Safresh1 if ($encoding) { 318e0680481Safresh1 $$self{ENCODING} = $encoding; 319e0680481Safresh1 } 320e0680481Safresh1 } 321e0680481Safresh1 if ($encoding) { 322e0680481Safresh1 my $check = sub { 323e0680481Safresh1 my ($char) = @_; 324e0680481Safresh1 my $display = '"\x{' . hex($char) . '}"'; 325e0680481Safresh1 my $error = "$display does not map to $$self{ENCODING}"; 326e0680481Safresh1 $self->whine ($self->line_count(), $error); 327e0680481Safresh1 return Encode::encode ($$self{ENCODING}, chr($char)); 328e0680481Safresh1 }; 329e0680481Safresh1 print { $$self{output_fh} } encode ($encoding, $text, $check); 330e0680481Safresh1 } else { 331e0680481Safresh1 print { $$self{output_fh} } $text; 332e0680481Safresh1 } 33348950c12Ssthen } else { 334b39c5158Smillert print { $$self{output_fh} } $text; 335b39c5158Smillert } 33648950c12Ssthen} 337b39c5158Smillert 338b39c5158Smillert# Output a block of code (something that isn't part of the POD text). Called 339b39c5158Smillert# by preprocess_paragraph only if we were given the code option. Exists here 340b39c5158Smillert# only so that it can be overridden by subclasses. 341b39c5158Smillertsub output_code { $_[0]->output ($_[1]) } 342b39c5158Smillert 343b39c5158Smillert############################################################################## 344b39c5158Smillert# Document initialization 345b39c5158Smillert############################################################################## 346b39c5158Smillert 347b39c5158Smillert# Set up various things that have to be initialized on a per-document basis. 348b39c5158Smillertsub start_document { 349e9ce3842Safresh1 my ($self, $attrs) = @_; 350e9ce3842Safresh1 if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) { 351e9ce3842Safresh1 $$self{CONTENTLESS} = 1; 352e9ce3842Safresh1 } else { 353e9ce3842Safresh1 delete $$self{CONTENTLESS}; 354e9ce3842Safresh1 } 355b39c5158Smillert my $margin = $$self{opt_indent} + $$self{opt_margin}; 356b39c5158Smillert 357b39c5158Smillert # Initialize a few per-document variables. 358b39c5158Smillert $$self{INDENTS} = []; # Stack of indentations. 359b39c5158Smillert $$self{MARGIN} = $margin; # Default left margin. 360b39c5158Smillert $$self{PENDING} = [[]]; # Pending output. 361b39c5158Smillert 362e0680481Safresh1 # We have to redo encoding handling for each document. Check whether the 363e0680481Safresh1 # output file handle already has a PerlIO encoding layer set and, if so, 364e0680481Safresh1 # disable encoding. 36548950c12Ssthen $$self{ENCODE} = 1; 36648950c12Ssthen eval { 367*3d61058aSafresh1 require PerlIO; 368e9ce3842Safresh1 my @options = (output => 1, details => 1); 369e9ce3842Safresh1 my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1]; 37056d68f1eSafresh1 if ($flag && ($flag & PerlIO::F_UTF8 ())) { 37148950c12Ssthen $$self{ENCODE} = 0; 37248950c12Ssthen } 37348950c12Ssthen }; 374e0680481Safresh1 $$self{ENCODING} = $$self{opt_encoding}; 375b39c5158Smillert 376b39c5158Smillert return ''; 377b39c5158Smillert} 378b39c5158Smillert 379e9ce3842Safresh1# Handle the end of the document. The only thing we do is handle dying on POD 380e9ce3842Safresh1# errors, since Pod::Parser currently doesn't. 381e9ce3842Safresh1sub end_document { 382e9ce3842Safresh1 my ($self) = @_; 383e9ce3842Safresh1 if ($$self{complain_die} && $self->errors_seen) { 384e9ce3842Safresh1 croak ("POD document had syntax errors"); 385e9ce3842Safresh1 } 386e9ce3842Safresh1} 387e9ce3842Safresh1 388b39c5158Smillert############################################################################## 389b39c5158Smillert# Text blocks 390b39c5158Smillert############################################################################## 391b39c5158Smillert 392b39c5158Smillert# Intended for subclasses to override, this method returns text with any 393b39c5158Smillert# non-printing formatting codes stripped out so that length() correctly 394b39c5158Smillert# returns the length of the text. For basic Pod::Text, it does nothing. 395b39c5158Smillertsub strip_format { 396b39c5158Smillert my ($self, $string) = @_; 397b39c5158Smillert return $string; 398b39c5158Smillert} 399b39c5158Smillert 400b39c5158Smillert# This method is called whenever an =item command is complete (in other words, 401b39c5158Smillert# we've seen its associated paragraph or know for certain that it doesn't have 402b39c5158Smillert# one). It gets the paragraph associated with the item as an argument. If 403b39c5158Smillert# that argument is empty, just output the item tag; if it contains a newline, 404b39c5158Smillert# output the item tag followed by the newline. Otherwise, see if there's 405b39c5158Smillert# enough room for us to output the item tag in the margin of the text or if we 406b39c5158Smillert# have to put it on a separate line. 407b39c5158Smillertsub item { 408b39c5158Smillert my ($self, $text) = @_; 409b39c5158Smillert my $tag = $$self{ITEM}; 410b39c5158Smillert unless (defined $tag) { 411b39c5158Smillert carp "Item called without tag"; 412b39c5158Smillert return; 413b39c5158Smillert } 414b39c5158Smillert undef $$self{ITEM}; 415b39c5158Smillert 416b39c5158Smillert # Calculate the indentation and margin. $fits is set to true if the tag 417b39c5158Smillert # will fit into the margin of the paragraph given our indentation level. 418e0680481Safresh1 my $indent = $$self{INDENTS}[-1] // $$self{opt_indent}; 419b39c5158Smillert my $margin = ' ' x $$self{opt_margin}; 420b39c5158Smillert my $tag_length = length ($self->strip_format ($tag)); 421b39c5158Smillert my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1); 422b39c5158Smillert 423b39c5158Smillert # If the tag doesn't fit, or if we have no associated text, print out the 424b39c5158Smillert # tag separately. Otherwise, put the tag in the margin of the paragraph. 425b39c5158Smillert if (!$text || $text =~ /^\s+$/ || !$fits) { 426b39c5158Smillert my $realindent = $$self{MARGIN}; 427b39c5158Smillert $$self{MARGIN} = $indent; 428b39c5158Smillert my $output = $self->reformat ($tag); 429b39c5158Smillert $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 430b39c5158Smillert $output =~ s/\n*$/\n/; 431b39c5158Smillert 432b39c5158Smillert # If the text is just whitespace, we have an empty item paragraph; 433b39c5158Smillert # this can result from =over/=item/=back without any intermixed 434b39c5158Smillert # paragraphs. Insert some whitespace to keep the =item from merging 435b39c5158Smillert # into the next paragraph. 436b39c5158Smillert $output .= "\n" if $text && $text =~ /^\s*$/; 437b39c5158Smillert 438b39c5158Smillert $self->output ($output); 439b39c5158Smillert $$self{MARGIN} = $realindent; 440b39c5158Smillert $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/); 441b39c5158Smillert } else { 442b39c5158Smillert my $space = ' ' x $indent; 443b39c5158Smillert $space =~ s/^$margin /$margin:/ if $$self{opt_alt}; 444b39c5158Smillert $text = $self->reformat ($text); 445b39c5158Smillert $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0); 446b39c5158Smillert my $tagspace = ' ' x $tag_length; 447b39c5158Smillert $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item"; 448b39c5158Smillert $self->output ($text); 449b39c5158Smillert } 450b39c5158Smillert} 451b39c5158Smillert 452b39c5158Smillert# Handle a basic block of text. The only tricky thing here is that if there 453b39c5158Smillert# is a pending item tag, we need to format this as an item paragraph. 454b39c5158Smillertsub cmd_para { 455b39c5158Smillert my ($self, $attrs, $text) = @_; 456b39c5158Smillert $text =~ s/\s+$/\n/; 457b39c5158Smillert if (defined $$self{ITEM}) { 458b39c5158Smillert $self->item ($text . "\n"); 459b39c5158Smillert } else { 460b39c5158Smillert $self->output ($self->reformat ($text . "\n")); 461b39c5158Smillert } 462b39c5158Smillert return ''; 463b39c5158Smillert} 464b39c5158Smillert 465b39c5158Smillert# Handle a verbatim paragraph. Just print it out, but indent it according to 466b39c5158Smillert# our margin. 467b39c5158Smillertsub cmd_verbatim { 468b39c5158Smillert my ($self, $attrs, $text) = @_; 469b39c5158Smillert $self->item if defined $$self{ITEM}; 470b39c5158Smillert return if $text =~ /^\s*$/; 471b39c5158Smillert $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme; 472b39c5158Smillert $text =~ s/\s*$/\n\n/; 473b39c5158Smillert $self->output ($text); 474b39c5158Smillert return ''; 475b39c5158Smillert} 476b39c5158Smillert 477b39c5158Smillert# Handle literal text (produced by =for and similar constructs). Just output 478b39c5158Smillert# it with the minimum of changes. 479b39c5158Smillertsub cmd_data { 480b39c5158Smillert my ($self, $attrs, $text) = @_; 481b39c5158Smillert $text =~ s/^\n+//; 482b39c5158Smillert $text =~ s/\n{0,2}$/\n/; 483b39c5158Smillert $self->output ($text); 484b39c5158Smillert return ''; 485b39c5158Smillert} 486b39c5158Smillert 487b39c5158Smillert############################################################################## 488b39c5158Smillert# Headings 489b39c5158Smillert############################################################################## 490b39c5158Smillert 491b39c5158Smillert# The common code for handling all headers. Takes the header text, the 492b39c5158Smillert# indentation, and the surrounding marker for the alt formatting method. 493b39c5158Smillertsub heading { 494b39c5158Smillert my ($self, $text, $indent, $marker) = @_; 495b39c5158Smillert $self->item ("\n\n") if defined $$self{ITEM}; 496b39c5158Smillert $text =~ s/\s+$//; 497b39c5158Smillert if ($$self{opt_alt}) { 498b39c5158Smillert my $closemark = reverse (split (//, $marker)); 499b39c5158Smillert my $margin = ' ' x $$self{opt_margin}; 500b39c5158Smillert $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n"); 501b39c5158Smillert } else { 502b39c5158Smillert $text .= "\n" if $$self{opt_loose}; 503b39c5158Smillert my $margin = ' ' x ($$self{opt_margin} + $indent); 504b39c5158Smillert $self->output ($margin . $text . "\n"); 505b39c5158Smillert } 506b39c5158Smillert return ''; 507b39c5158Smillert} 508b39c5158Smillert 509b39c5158Smillert# First level heading. 510b39c5158Smillertsub cmd_head1 { 511b39c5158Smillert my ($self, $attrs, $text) = @_; 512b39c5158Smillert $self->heading ($text, 0, '===='); 513b39c5158Smillert} 514b39c5158Smillert 515b39c5158Smillert# Second level heading. 516b39c5158Smillertsub cmd_head2 { 517b39c5158Smillert my ($self, $attrs, $text) = @_; 518b39c5158Smillert $self->heading ($text, $$self{opt_indent} / 2, '== '); 519b39c5158Smillert} 520b39c5158Smillert 521b39c5158Smillert# Third level heading. 522b39c5158Smillertsub cmd_head3 { 523b39c5158Smillert my ($self, $attrs, $text) = @_; 524b39c5158Smillert $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= '); 525b39c5158Smillert} 526b39c5158Smillert 527b39c5158Smillert# Fourth level heading. 528b39c5158Smillertsub cmd_head4 { 529b39c5158Smillert my ($self, $attrs, $text) = @_; 530b39c5158Smillert $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- '); 531b39c5158Smillert} 532b39c5158Smillert 533b39c5158Smillert############################################################################## 534b39c5158Smillert# List handling 535b39c5158Smillert############################################################################## 536b39c5158Smillert 537b39c5158Smillert# Handle the beginning of an =over block. Takes the type of the block as the 538b39c5158Smillert# first argument, and then the attr hash. This is called by the handlers for 539b39c5158Smillert# the four different types of lists (bullet, number, text, and block). 540b39c5158Smillertsub over_common_start { 541b39c5158Smillert my ($self, $attrs) = @_; 542b39c5158Smillert $self->item ("\n\n") if defined $$self{ITEM}; 543b39c5158Smillert 544b39c5158Smillert # Find the indentation level. 545b39c5158Smillert my $indent = $$attrs{indent}; 546b39c5158Smillert unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) { 547b39c5158Smillert $indent = $$self{opt_indent}; 548b39c5158Smillert } 549b39c5158Smillert 550b39c5158Smillert # Add this to our stack of indents and increase our current margin. 551b39c5158Smillert push (@{ $$self{INDENTS} }, $$self{MARGIN}); 552b39c5158Smillert $$self{MARGIN} += ($indent + 0); 553b39c5158Smillert return ''; 554b39c5158Smillert} 555b39c5158Smillert 556b39c5158Smillert# End an =over block. Takes no options other than the class pointer. Output 557b39c5158Smillert# any pending items and then pop one level of indentation. 558b39c5158Smillertsub over_common_end { 559b39c5158Smillert my ($self) = @_; 560b39c5158Smillert $self->item ("\n\n") if defined $$self{ITEM}; 561b39c5158Smillert $$self{MARGIN} = pop @{ $$self{INDENTS} }; 562b39c5158Smillert return ''; 563b39c5158Smillert} 564b39c5158Smillert 565b39c5158Smillert# Dispatch the start and end calls as appropriate. 566b39c5158Smillertsub start_over_bullet { $_[0]->over_common_start ($_[1]) } 567b39c5158Smillertsub start_over_number { $_[0]->over_common_start ($_[1]) } 568b39c5158Smillertsub start_over_text { $_[0]->over_common_start ($_[1]) } 569b39c5158Smillertsub start_over_block { $_[0]->over_common_start ($_[1]) } 570b39c5158Smillertsub end_over_bullet { $_[0]->over_common_end } 571b39c5158Smillertsub end_over_number { $_[0]->over_common_end } 572b39c5158Smillertsub end_over_text { $_[0]->over_common_end } 573b39c5158Smillertsub end_over_block { $_[0]->over_common_end } 574b39c5158Smillert 575b39c5158Smillert# The common handler for all item commands. Takes the type of the item, the 576b39c5158Smillert# attributes, and then the text of the item. 577b39c5158Smillertsub item_common { 578b39c5158Smillert my ($self, $type, $attrs, $text) = @_; 579b39c5158Smillert $self->item if defined $$self{ITEM}; 580b39c5158Smillert 581b39c5158Smillert # Clean up the text. We want to end up with two variables, one ($text) 582b39c5158Smillert # which contains any body text after taking out the item portion, and 583b39c5158Smillert # another ($item) which contains the actual item text. Note the use of 584b39c5158Smillert # the internal Pod::Simple attribute here; that's a potential land mine. 585b39c5158Smillert $text =~ s/\s+$//; 586b39c5158Smillert my ($item, $index); 587b39c5158Smillert if ($type eq 'bullet') { 588b39c5158Smillert $item = '*'; 589b39c5158Smillert } elsif ($type eq 'number') { 590b39c5158Smillert $item = $$attrs{'~orig_content'}; 591b39c5158Smillert } else { 592b39c5158Smillert $item = $text; 593b39c5158Smillert $item =~ s/\s*\n\s*/ /g; 594b39c5158Smillert $text = ''; 595b39c5158Smillert } 596b39c5158Smillert $$self{ITEM} = $item; 597b39c5158Smillert 598b39c5158Smillert # If body text for this item was included, go ahead and output that now. 599b39c5158Smillert if ($text) { 600b39c5158Smillert $text =~ s/\s*$/\n/; 601b39c5158Smillert $self->item ($text); 602b39c5158Smillert } 603b39c5158Smillert return ''; 604b39c5158Smillert} 605b39c5158Smillert 606b39c5158Smillert# Dispatch the item commands to the appropriate place. 607b39c5158Smillertsub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) } 608b39c5158Smillertsub cmd_item_number { my $self = shift; $self->item_common ('number', @_) } 609b39c5158Smillertsub cmd_item_text { my $self = shift; $self->item_common ('text', @_) } 610b39c5158Smillertsub cmd_item_block { my $self = shift; $self->item_common ('block', @_) } 611b39c5158Smillert 612b39c5158Smillert############################################################################## 613b39c5158Smillert# Formatting codes 614b39c5158Smillert############################################################################## 615b39c5158Smillert 616b39c5158Smillert# The simple ones. 617b39c5158Smillertsub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] } 618b39c5158Smillertsub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] } 619b39c5158Smillertsub cmd_i { return '*' . $_[2] . '*' } 620b39c5158Smillertsub cmd_x { return '' } 621b39c5158Smillert 622e0680481Safresh1# Convert all internal whitespace to $NBSP. 623e0680481Safresh1sub cmd_s { 624e0680481Safresh1 my ($self, $attrs, $text) = @_; 625e0680481Safresh1 $text =~ s{ \s }{$NBSP}xmsg; 626e0680481Safresh1 return $text; 627e0680481Safresh1} 628e0680481Safresh1 629b39c5158Smillert# Apply a whole bunch of messy heuristics to not quote things that don't 630b39c5158Smillert# benefit from being quoted. These originally come from Barrie Slaymaker and 631b39c5158Smillert# largely duplicate code in Pod::Man. 632b39c5158Smillertsub cmd_c { 633b39c5158Smillert my ($self, $attrs, $text) = @_; 634b39c5158Smillert 635b39c5158Smillert # A regex that matches the portion of a variable reference that's the 636b39c5158Smillert # array or hash index, separated out just because we want to use it in 637b39c5158Smillert # several places in the following regex. 638e0680481Safresh1 my $index = '(?: \[[^]]+\] | \{[^}]+\} )?'; 639b39c5158Smillert 640b39c5158Smillert # Check for things that we don't want to quote, and if we find any of 641b39c5158Smillert # them, return the string with just a font change and no quoting. 642e0680481Safresh1 # 643e0680481Safresh1 # Traditionally, Pod::Text has not quoted Perl variables, functions, 644e0680481Safresh1 # numbers, or hex constants, but this is not always desirable. Make this 645e0680481Safresh1 # optional on the quoting guesswork flag. 646e0680481Safresh1 my $extra = qr{(?!)}xms; # never matches 647e0680481Safresh1 if ($$self{GUESSWORK}{quoting}) { 648e0680481Safresh1 $extra = qr{ 649e0680481Safresh1 \$+ [\#^]? \S $index # special ($^F, $") 650e0680481Safresh1 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func 651e0680481Safresh1 | [\$\@%&*]* [:\'\w]+ 652e0680481Safresh1 (?: -> )? \(\s*[^\s,\)]*\s*\) # 0/1-arg func call 653e0680481Safresh1 | [+-]? ( \d[\d.]* | \.\d+ ) 654e0680481Safresh1 (?: [eE][+-]?\d+ )? # a number 655e0680481Safresh1 | 0x [a-fA-F\d]+ # a hex constant 656e0680481Safresh1 }xms; 657e0680481Safresh1 } 658b39c5158Smillert $text =~ m{ 659b39c5158Smillert ^\s* 660b39c5158Smillert (?: 661b39c5158Smillert ( [\'\`\"] ) .* \1 # already quoted 662b39c5158Smillert | \` .* \' # `quoted' 663e0680481Safresh1 | $extra 664b39c5158Smillert ) 665b39c5158Smillert \s*\z 666e0680481Safresh1 }xms and return $text; 667b39c5158Smillert 668b39c5158Smillert # If we didn't return, go ahead and quote the text. 669b39c5158Smillert return $$self{opt_alt} 670b39c5158Smillert ? "``$text''" 671b39c5158Smillert : "$$self{LQUOTE}$text$$self{RQUOTE}"; 672b39c5158Smillert} 673b39c5158Smillert 674b39c5158Smillert# Links reduce to the text that we're given, wrapped in angle brackets if it's 675b39c5158Smillert# a URL. 676b39c5158Smillertsub cmd_l { 677b39c5158Smillert my ($self, $attrs, $text) = @_; 678b39c5158Smillert if ($$attrs{type} eq 'url') { 679b39c5158Smillert if (not defined($$attrs{to}) or $$attrs{to} eq $text) { 680b39c5158Smillert return "<$text>"; 681e9ce3842Safresh1 } elsif ($$self{opt_nourls}) { 682e9ce3842Safresh1 return $text; 683b39c5158Smillert } else { 684b39c5158Smillert return "$text <$$attrs{to}>"; 685b39c5158Smillert } 686b39c5158Smillert } else { 687b39c5158Smillert return $text; 688b39c5158Smillert } 689b39c5158Smillert} 690b39c5158Smillert 691b39c5158Smillert############################################################################## 692b39c5158Smillert# Backwards compatibility 693b39c5158Smillert############################################################################## 694b39c5158Smillert 695b39c5158Smillert# The old Pod::Text module did everything in a pod2text() function. This 696b39c5158Smillert# tries to provide the same interface for legacy applications. 697b39c5158Smillertsub pod2text { 698b39c5158Smillert my @args; 699b39c5158Smillert 700b39c5158Smillert # This is really ugly; I hate doing option parsing in the middle of a 701b39c5158Smillert # module. But the old Pod::Text module supported passing flags to its 702b39c5158Smillert # entry function, so handle -a and -<number>. 703b39c5158Smillert while ($_[0] =~ /^-/) { 704b39c5158Smillert my $flag = shift; 705b39c5158Smillert if ($flag eq '-a') { push (@args, alt => 1) } 706b39c5158Smillert elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) } 707b39c5158Smillert else { 708b39c5158Smillert unshift (@_, $flag); 709b39c5158Smillert last; 710b39c5158Smillert } 711b39c5158Smillert } 712b39c5158Smillert 713b39c5158Smillert # Now that we know what arguments we're using, create the parser. 714b39c5158Smillert my $parser = Pod::Text->new (@args); 715b39c5158Smillert 716b39c5158Smillert # If two arguments were given, the second argument is going to be a file 717b39c5158Smillert # handle. That means we want to call parse_from_filehandle(), which means 718b39c5158Smillert # we need to turn the first argument into a file handle. Magic open will 719b39c5158Smillert # handle the <&STDIN case automagically. 720b39c5158Smillert if (defined $_[1]) { 721b39c5158Smillert my @fhs = @_; 722b39c5158Smillert local *IN; 723b39c5158Smillert unless (open (IN, $fhs[0])) { 724b39c5158Smillert croak ("Can't open $fhs[0] for reading: $!\n"); 725b39c5158Smillert return; 726b39c5158Smillert } 727b39c5158Smillert $fhs[0] = \*IN; 728b39c5158Smillert $parser->output_fh ($fhs[1]); 729b39c5158Smillert my $retval = $parser->parse_file ($fhs[0]); 730b39c5158Smillert my $fh = $parser->output_fh (); 731b39c5158Smillert close $fh; 732b39c5158Smillert return $retval; 733b39c5158Smillert } else { 734b39c5158Smillert $parser->output_fh (\*STDOUT); 735b39c5158Smillert return $parser->parse_file (@_); 736b39c5158Smillert } 737b39c5158Smillert} 738b39c5158Smillert 739b39c5158Smillert# Reset the underlying Pod::Simple object between calls to parse_from_file so 740b39c5158Smillert# that the same object can be reused to convert multiple pages. 741b39c5158Smillertsub parse_from_file { 742b39c5158Smillert my $self = shift; 743b39c5158Smillert $self->reinit; 744b39c5158Smillert 7459f11ffb7Safresh1 # Fake the old cutting option to Pod::Parser. This fiddles with internal 746b39c5158Smillert # Pod::Simple state and is quite ugly; we need a better approach. 747b39c5158Smillert if (ref ($_[0]) eq 'HASH') { 748b39c5158Smillert my $opts = shift @_; 749b39c5158Smillert if (defined ($$opts{-cutting}) && !$$opts{-cutting}) { 750b39c5158Smillert $$self{in_pod} = 1; 751b39c5158Smillert $$self{last_was_blank} = 1; 752b39c5158Smillert } 753b39c5158Smillert } 754b39c5158Smillert 755b39c5158Smillert # Do the work. 756b39c5158Smillert my $retval = $self->Pod::Simple::parse_from_file (@_); 757b39c5158Smillert 758b39c5158Smillert # Flush output, since Pod::Simple doesn't do this. Ideally we should also 759b39c5158Smillert # close the file descriptor if we had to open one, but we can't easily 760b39c5158Smillert # figure this out. 761b39c5158Smillert my $fh = $self->output_fh (); 762b39c5158Smillert my $oldfh = select $fh; 763b39c5158Smillert my $oldflush = $|; 764b39c5158Smillert $| = 1; 765b39c5158Smillert print $fh ''; 766b39c5158Smillert $| = $oldflush; 767b39c5158Smillert select $oldfh; 768b39c5158Smillert return $retval; 769b39c5158Smillert} 770b39c5158Smillert 771b39c5158Smillert# Pod::Simple failed to provide this backward compatibility function, so 772b39c5158Smillert# implement it ourselves. File handles are one of the inputs that 773b39c5158Smillert# parse_from_file supports. 774b39c5158Smillertsub parse_from_filehandle { 775b39c5158Smillert my $self = shift; 776b39c5158Smillert $self->parse_from_file (@_); 777b39c5158Smillert} 778b39c5158Smillert 779e9ce3842Safresh1# Pod::Simple's parse_file doesn't set output_fh. Wrap the call and do so 780e9ce3842Safresh1# ourself unless it was already set by the caller, since our documentation has 781e9ce3842Safresh1# always said that this should work. 782e9ce3842Safresh1sub parse_file { 783e9ce3842Safresh1 my ($self, $in) = @_; 784e9ce3842Safresh1 unless (defined $$self{output_fh}) { 785e9ce3842Safresh1 $self->output_fh (\*STDOUT); 786e9ce3842Safresh1 } 787e9ce3842Safresh1 return $self->SUPER::parse_file ($in); 788e9ce3842Safresh1} 789e9ce3842Safresh1 790e5157e49Safresh1# Do the same for parse_lines, just to be polite. Pod::Simple's man page 791e5157e49Safresh1# implies that the caller is responsible for setting this, but I don't see any 792e5157e49Safresh1# reason not to set a default. 793e5157e49Safresh1sub parse_lines { 794e5157e49Safresh1 my ($self, @lines) = @_; 795e5157e49Safresh1 unless (defined $$self{output_fh}) { 796e5157e49Safresh1 $self->output_fh (\*STDOUT); 797e5157e49Safresh1 } 798e5157e49Safresh1 return $self->SUPER::parse_lines (@lines); 799e5157e49Safresh1} 800e5157e49Safresh1 801e5157e49Safresh1# Likewise for parse_string_document. 802e5157e49Safresh1sub parse_string_document { 803e5157e49Safresh1 my ($self, $doc) = @_; 804e5157e49Safresh1 unless (defined $$self{output_fh}) { 805e5157e49Safresh1 $self->output_fh (\*STDOUT); 806e5157e49Safresh1 } 807e5157e49Safresh1 return $self->SUPER::parse_string_document ($doc); 808e5157e49Safresh1} 809e5157e49Safresh1 810b39c5158Smillert############################################################################## 811b39c5158Smillert# Module return value and documentation 812b39c5158Smillert############################################################################## 813b39c5158Smillert 814b39c5158Smillert1; 815b39c5158Smillert__END__ 816b39c5158Smillert 817e9ce3842Safresh1=for stopwords 818e9ce3842Safresh1alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls 819e0680481Safresh1parsers EBCDIC autodetecting superset unrepresentable FH NNN 820e9ce3842Safresh1 821b39c5158Smillert=head1 NAME 822b39c5158Smillert 823b8851fccSafresh1Pod::Text - Convert POD data to formatted text 824b39c5158Smillert 825b39c5158Smillert=head1 SYNOPSIS 826b39c5158Smillert 827b39c5158Smillert use Pod::Text; 828b46d8ef2Safresh1 my $parser = Pod::Text->new (sentence => 1, width => 78); 829b39c5158Smillert 830b39c5158Smillert # Read POD from STDIN and write to STDOUT. 831b39c5158Smillert $parser->parse_from_filehandle; 832b39c5158Smillert 833b39c5158Smillert # Read POD from file.pod and write to file.txt. 834b39c5158Smillert $parser->parse_from_file ('file.pod', 'file.txt'); 835b39c5158Smillert 836b39c5158Smillert=head1 DESCRIPTION 837b39c5158Smillert 838e0680481Safresh1Pod::Text is a module that can convert documentation in the POD format (the 839e0680481Safresh1preferred language for documenting Perl) into formatted text. It uses no 840e0680481Safresh1special formatting controls or codes, and its output is therefore suitable for 841e0680481Safresh1nearly any device. 842b39c5158Smillert 843e0680481Safresh1=head2 Encoding 844b39c5158Smillert 845e0680481Safresh1Pod::Text uses the following logic to choose an output encoding, in order: 846e0680481Safresh1 847e0680481Safresh1=over 4 848e0680481Safresh1 849e0680481Safresh1=item 1. 850e0680481Safresh1 851e0680481Safresh1If a PerlIO encoding layer is set on the output file handle, do not do any 852e0680481Safresh1output encoding and will instead rely on the PerlIO encoding layer. 853e0680481Safresh1 854e0680481Safresh1=item 2. 855e0680481Safresh1 856e0680481Safresh1If the C<encoding> or C<utf8> options are set, use the output encoding 857e0680481Safresh1specified by those options. 858e0680481Safresh1 859e0680481Safresh1=item 3. 860e0680481Safresh1 861e0680481Safresh1If the input encoding of the POD source file was explicitly specified (using 862e0680481Safresh1C<=encoding>) or automatically detected by Pod::Simple, use that as the output 863e0680481Safresh1encoding as well. 864e0680481Safresh1 865e0680481Safresh1=item 4. 866e0680481Safresh1 867e0680481Safresh1Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output 868e0680481Safresh1encoding. Since this is a superset of ASCII, this will result in ASCII output 869e0680481Safresh1unless the POD input contains non-ASCII characters without declaring or 870e0680481Safresh1autodetecting an encoding (usually via EZ<><> escapes). 871e0680481Safresh1 872e0680481Safresh1=item 5. 873e0680481Safresh1 874e0680481Safresh1Otherwise, for EBCDIC systems, output without doing any encoding and hope 875e0680481Safresh1this works. 876e0680481Safresh1 877e0680481Safresh1=back 878e0680481Safresh1 879e0680481Safresh1One caveat: Pod::Text has to commit to an output encoding the first time it 880e0680481Safresh1outputs a non-ASCII character, and then has to stick with it for consistency. 881e0680481Safresh1However, C<=encoding> commands don't have to be at the beginning of a POD 882e0680481Safresh1document. If someone uses a non-ASCII character early in a document with an 883e0680481Safresh1escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later, 884e0680481Safresh1ideally Pod::Text would follow rule 3 and output the entire document as ISO 885e0680481Safresh18859-1. Instead, it will commit to UTF-8 following rule 4 as soon as it sees 886e0680481Safresh1that escape, and then stick with that encoding for the rest of the document. 887e0680481Safresh1 888e0680481Safresh1Unfortunately, there's no universally good choice for an output encoding. 889e0680481Safresh1Each choice will be incorrect in some circumstances. This approach was chosen 890e0680481Safresh1primarily for backwards compatibility. Callers should consider forcing the 891e0680481Safresh1output encoding via C<encoding> if they have any knowledge about what encoding 892e0680481Safresh1the user may expect. 893e0680481Safresh1 894e0680481Safresh1In particular, consider importing the L<Encode::Locale> module, if available, 895e0680481Safresh1and setting C<encoding> to C<locale> to use an output encoding appropriate to 896e0680481Safresh1the user's locale. But be aware that if the user is not using locales or is 897e0680481Safresh1using a locale of C<C>, Encode::Locale will set the output encoding to 898e0680481Safresh1US-ASCII. This will cause all non-ASCII characters will be replaced with C<?> 899e0680481Safresh1and produce a flurry of warnings about unsupported characters, which may or 900e0680481Safresh1may not be what you want. 901e0680481Safresh1 902e0680481Safresh1=head1 CLASS METHODS 903e0680481Safresh1 904e0680481Safresh1=over 4 905e0680481Safresh1 906e0680481Safresh1=item new(ARGS) 907e0680481Safresh1 908e0680481Safresh1Create a new Pod::Text object. ARGS should be a list of key/value pairs, 909e0680481Safresh1where the keys are chosen from the following. Each option is annotated with 910e0680481Safresh1the version of Pod::Text in which that option was added with its current 911e0680481Safresh1meaning. 912b39c5158Smillert 913b39c5158Smillert=over 4 914b39c5158Smillert 915b39c5158Smillert=item alt 916b39c5158Smillert 917e0680481Safresh1[2.00] If set to a true value, selects an alternate output format that, among 918e0680481Safresh1other things, uses a different heading style and marks C<=item> entries with a 919b39c5158Smillertcolon in the left margin. Defaults to false. 920b39c5158Smillert 921b39c5158Smillert=item code 922b39c5158Smillert 923e0680481Safresh1[2.13] If set to a true value, the non-POD parts of the input file will be 924e0680481Safresh1included in the output. Useful for viewing code documented with POD blocks 925e0680481Safresh1with the POD rendered and the code left intact. 926e0680481Safresh1 927e0680481Safresh1=item encoding 928e0680481Safresh1 929e0680481Safresh1[5.00] Specifies the encoding of the output. The value must be an encoding 930e0680481Safresh1recognized by the L<Encode> module (see L<Encode::Supported>). If the output 931e0680481Safresh1contains characters that cannot be represented in this encoding, that is an 932e0680481Safresh1error that will be reported as configured by the C<errors> option. If error 933e0680481Safresh1handling is other than C<die>, the unrepresentable character will be replaced 934e0680481Safresh1with the Encode substitution character (normally C<?>). 935e0680481Safresh1 936e0680481Safresh1If the output file handle has a PerlIO encoding layer set, this parameter will 937e0680481Safresh1be ignored and no encoding will be done by Pod::Man. It will instead rely on 938e0680481Safresh1the encoding layer to make whatever output encoding transformations are 939e0680481Safresh1desired. 940e0680481Safresh1 941e0680481Safresh1WARNING: The input encoding of the POD source is independent from the output 942e0680481Safresh1encoding, and setting this option does not affect the interpretation of the 943e0680481Safresh1POD input. Unless your POD source is US-ASCII, its encoding should be 944e0680481Safresh1declared with the C<=encoding> command in the source, as near to the top of 945e0680481Safresh1the file as possible. If this is not done, Pod::Simple will will attempt to 946e0680481Safresh1guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will 947e0680481Safresh1produce warnings. See L<perlpod(1)> for more information. 948b39c5158Smillert 949e9ce3842Safresh1=item errors 950e9ce3842Safresh1 951e0680481Safresh1[3.17] How to report errors. C<die> says to throw an exception on any POD 952e0680481Safresh1formatting error. C<stderr> says to report errors on standard error, but not 953e0680481Safresh1to throw an exception. C<pod> says to include a POD ERRORS section in the 954e0680481Safresh1resulting documentation summarizing the errors. C<none> ignores POD errors 955e0680481Safresh1entirely, as much as possible. 956e9ce3842Safresh1 957e5157e49Safresh1The default is C<pod>. 958e9ce3842Safresh1 959e0680481Safresh1=item guesswork 960e0680481Safresh1 961e0680481Safresh1[5.01] By default, Pod::Text applies some default formatting rules based on 962e0680481Safresh1guesswork and regular expressions that are intended to make writing Perl 963e0680481Safresh1documentation easier and require less explicit markup. These rules may not 964e0680481Safresh1always be appropriate, particularly for documentation that isn't about Perl. 965e0680481Safresh1This option allows turning all or some of it off. 966e0680481Safresh1 967e0680481Safresh1The special value C<all> enables all guesswork. This is also the default for 968e0680481Safresh1backward compatibility reasons. The special value C<none> disables all 969e0680481Safresh1guesswork. Otherwise, the value of this option should be a comma-separated 970e0680481Safresh1list of one or more of the following keywords: 971e0680481Safresh1 972e0680481Safresh1=over 4 973e0680481Safresh1 974e0680481Safresh1=item quoting 975e0680481Safresh1 976e0680481Safresh1If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by 977e0680481Safresh1double quotes in nroff (terminal) output unless the contents are already 978e0680481Safresh1quoted. When this guesswork is enabled, quote marks will also be suppressed 979e0680481Safresh1for Perl variables, function names, function calls, numbers, and hex 980e0680481Safresh1constants. 981e0680481Safresh1 982e0680481Safresh1=back 983e0680481Safresh1 984e0680481Safresh1Any unknown guesswork name is silently ignored (for potential future 985e0680481Safresh1compatibility), so be careful about spelling. 986e0680481Safresh1 987b39c5158Smillert=item indent 988b39c5158Smillert 989e0680481Safresh1[2.00] The number of spaces to indent regular text, and the default 990e0680481Safresh1indentation for C<=over> blocks. Defaults to 4. 991b39c5158Smillert 992b39c5158Smillert=item loose 993b39c5158Smillert 994e0680481Safresh1[2.00] If set to a true value, a blank line is printed after a C<=head1> 995e0680481Safresh1heading. If set to false (the default), no blank line is printed after 996e0680481Safresh1C<=head1>, although one is still printed after C<=head2>. This is the default 997e0680481Safresh1because it's the expected formatting for manual pages; if you're formatting 998b39c5158Smillertarbitrary text documents, setting this to true may result in more pleasing 999b39c5158Smillertoutput. 1000b39c5158Smillert 1001b39c5158Smillert=item margin 1002b39c5158Smillert 1003e0680481Safresh1[2.21] The width of the left margin in spaces. Defaults to 0. This is the 1004e0680481Safresh1margin for all text, including headings, not the amount by which regular text 1005e0680481Safresh1is indented; for the latter, see the I<indent> option. To set the right 1006b39c5158Smillertmargin, see the I<width> option. 1007b39c5158Smillert 1008e9ce3842Safresh1=item nourls 1009e9ce3842Safresh1 1010e0680481Safresh1[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are 1011e0680481Safresh1formatted to show both the anchor text and the URL. In other words: 1012e9ce3842Safresh1 1013e9ce3842Safresh1 L<foo|http://example.com/> 1014e9ce3842Safresh1 1015e9ce3842Safresh1is formatted as: 1016e9ce3842Safresh1 1017e9ce3842Safresh1 foo <http://example.com/> 1018e9ce3842Safresh1 1019e0680481Safresh1This option, if set to a true value, suppresses the URL when anchor text is 1020e0680481Safresh1given, so this example would be formatted as just C<foo>. This can produce 1021e0680481Safresh1less cluttered output in cases where the URLs are not particularly important. 1022e9ce3842Safresh1 1023b39c5158Smillert=item quotes 1024b39c5158Smillert 1025e0680481Safresh1[4.00] Sets the quote marks used to surround CE<lt>> text. If the value is a 1026e0680481Safresh1single character, it is used as both the left and right quote. Otherwise, it 1027e0680481Safresh1is split in half, and the first half of the string is used as the left quote 1028e0680481Safresh1and the second is used as the right quote. 1029b39c5158Smillert 1030b39c5158SmillertThis may also be set to the special value C<none>, in which case no quote 1031b39c5158Smillertmarks are added around CE<lt>> text. 1032b39c5158Smillert 1033b39c5158Smillert=item sentence 1034b39c5158Smillert 1035e0680481Safresh1[3.00] If set to a true value, Pod::Text will assume that each sentence ends 1036e0680481Safresh1in two spaces, and will try to preserve that spacing. If set to false, all 1037e0680481Safresh1consecutive whitespace in non-verbatim paragraphs is compressed into a single 1038e0680481Safresh1space. Defaults to false. 1039b39c5158Smillert 1040b39c5158Smillert=item stderr 1041b39c5158Smillert 1042e0680481Safresh1[3.10] Send error messages about invalid POD to standard error instead of 1043e0680481Safresh1appending a POD ERRORS section to the generated output. This is equivalent to 1044e0680481Safresh1setting C<errors> to C<stderr> if C<errors> is not already set. It is 1045e0680481Safresh1supported for backward compatibility. 1046b39c5158Smillert 1047b39c5158Smillert=item utf8 1048b39c5158Smillert 1049e0680481Safresh1[3.12] If this option is set to a true value, the output encoding is set to 1050e0680481Safresh1UTF-8. This is equivalent to setting C<encoding> to C<UTF-8> if C<encoding> 1051e0680481Safresh1is not already set. It is supported for backward compatibility. 1052b39c5158Smillert 1053b39c5158Smillert=item width 1054b39c5158Smillert 1055e0680481Safresh1[2.00] The column at which to wrap text on the right-hand side. Defaults to 1056e0680481Safresh176. 1057b39c5158Smillert 1058b39c5158Smillert=back 1059b39c5158Smillert 1060e0680481Safresh1=back 1061e5157e49Safresh1 1062e0680481Safresh1=head1 INSTANCE METHODS 1063e5157e49Safresh1 1064e0680481Safresh1As a derived class from Pod::Simple, Pod::Text supports the same methods and 1065e0680481Safresh1interfaces. See L<Pod::Simple> for all the details. This section summarizes 1066e0680481Safresh1the most-frequently-used methods and the ones added by Pod::Text. 1067e5157e49Safresh1 1068e0680481Safresh1=over 4 1069e5157e49Safresh1 1070e0680481Safresh1=item output_fh(FH) 1071e0680481Safresh1 1072e0680481Safresh1Direct the output from parse_file(), parse_lines(), or parse_string_document() 1073e0680481Safresh1to the file handle FH instead of C<STDOUT>. 1074e0680481Safresh1 1075e0680481Safresh1=item output_string(REF) 1076e0680481Safresh1 1077e0680481Safresh1Direct the output from parse_file(), parse_lines(), or parse_string_document() 1078e0680481Safresh1to the scalar variable pointed to by REF, rather than C<STDOUT>. For example: 1079e0680481Safresh1 1080e0680481Safresh1 my $man = Pod::Man->new(); 1081e0680481Safresh1 my $output; 1082e0680481Safresh1 $man->output_string(\$output); 1083e0680481Safresh1 $man->parse_file('/some/input/file'); 1084e0680481Safresh1 1085e0680481Safresh1Be aware that the output in that variable will already be encoded (see 1086e0680481Safresh1L</Encoding>). 1087e0680481Safresh1 1088e0680481Safresh1=item parse_file(PATH) 1089e0680481Safresh1 1090e0680481Safresh1Read the POD source from PATH and format it. By default, the output is sent 1091e0680481Safresh1to C<STDOUT>, but this can be changed with the output_fh() or output_string() 1092e0680481Safresh1methods. 1093e0680481Safresh1 1094e0680481Safresh1=item parse_from_file(INPUT, OUTPUT) 1095e0680481Safresh1 1096e0680481Safresh1=item parse_from_filehandle(FH, OUTPUT) 1097e0680481Safresh1 1098e0680481Safresh1Read the POD source from INPUT, format it, and output the results to OUTPUT. 1099e0680481Safresh1 1100e0680481Safresh1parse_from_filehandle() is provided for backward compatibility with older 1101e0680481Safresh1versions of Pod::Man. parse_from_file() should be used instead. 1102e0680481Safresh1 1103e0680481Safresh1=item parse_lines(LINES[, ...[, undef]]) 1104e0680481Safresh1 1105e0680481Safresh1Parse the provided lines as POD source, writing the output to either C<STDOUT> 1106e0680481Safresh1or the file handle set with the output_fh() or output_string() methods. This 1107e0680481Safresh1method can be called repeatedly to provide more input lines. An explicit 1108e0680481Safresh1C<undef> should be passed to indicate the end of input. 1109e0680481Safresh1 1110e0680481Safresh1This method expects raw bytes, not decoded characters. 1111e0680481Safresh1 1112e0680481Safresh1=item parse_string_document(INPUT) 1113e0680481Safresh1 1114e0680481Safresh1Parse the provided scalar variable as POD source, writing the output to either 1115e0680481Safresh1C<STDOUT> or the file handle set with the output_fh() or output_string() 1116e0680481Safresh1methods. 1117e0680481Safresh1 1118e0680481Safresh1This method expects raw bytes, not decoded characters. 1119e0680481Safresh1 1120e0680481Safresh1=back 1121e0680481Safresh1 1122e0680481Safresh1=head1 FUNCTIONS 1123e0680481Safresh1 1124e0680481Safresh1Pod::Text exports one function for backward compatibility with older versions. 1125e0680481Safresh1This function is deprecated; instead, use the object-oriented interface 1126e0680481Safresh1described above. 1127e0680481Safresh1 1128e0680481Safresh1=over 4 1129e0680481Safresh1 1130e0680481Safresh1=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT]) 1131e0680481Safresh1 1132e0680481Safresh1Convert the POD source from INPUT to text and write it to OUTPUT. If OUTPUT 1133e0680481Safresh1is not given, defaults to C<STDOUT>. INPUT can be any expression supported as 1134e0680481Safresh1the second argument to two-argument open(). 1135e0680481Safresh1 1136e0680481Safresh1If C<-a> is given as an initial argument, pass the C<alt> option to the 1137e0680481Safresh1Pod::Text constructor. This enables alternative formatting. 1138e0680481Safresh1 1139e0680481Safresh1If C<-NNN> is given as an initial argument, pass the C<width> option to the 1140e0680481Safresh1Pod::Text constructor with the number C<NNN> as its argument. This sets the 1141e0680481Safresh1wrap line width to NNN. 1142e0680481Safresh1 1143e0680481Safresh1=back 1144b39c5158Smillert 1145b39c5158Smillert=head1 DIAGNOSTICS 1146b39c5158Smillert 1147b39c5158Smillert=over 4 1148b39c5158Smillert 1149b39c5158Smillert=item Bizarre space in item 1150b39c5158Smillert 1151b39c5158Smillert=item Item called without tag 1152b39c5158Smillert 1153b39c5158Smillert(W) Something has gone wrong in internal C<=item> processing. These 1154b39c5158Smillertmessages indicate a bug in Pod::Text; you should never see them. 1155b39c5158Smillert 1156b39c5158Smillert=item Can't open %s for reading: %s 1157b39c5158Smillert 1158b39c5158Smillert(F) Pod::Text was invoked via the compatibility mode pod2text() interface 1159b39c5158Smillertand the input file it was given could not be opened. 1160b39c5158Smillert 1161e9ce3842Safresh1=item Invalid errors setting "%s" 1162e9ce3842Safresh1 1163e9ce3842Safresh1(F) The C<errors> parameter to the constructor was set to an unknown value. 1164e9ce3842Safresh1 1165b39c5158Smillert=item Invalid quote specification "%s" 1166b39c5158Smillert 1167e9ce3842Safresh1(F) The quote specification given (the C<quotes> option to the 1168b8851fccSafresh1constructor) was invalid. A quote specification must be either one 1169b8851fccSafresh1character long or an even number (greater than one) characters long. 1170e9ce3842Safresh1 1171e9ce3842Safresh1=item POD document had syntax errors 1172e9ce3842Safresh1 1173e9ce3842Safresh1(F) The POD document being formatted had syntax errors and the C<errors> 1174e9ce3842Safresh1option was set to C<die>. 1175b39c5158Smillert 1176b39c5158Smillert=back 1177b39c5158Smillert 1178e0680481Safresh1=head1 COMPATIBILITY 1179b39c5158Smillert 1180e0680481Safresh1Pod::Text 2.03 (based on L<Pod::Parser>) was the first version of this module 1181e0680481Safresh1included with Perl, in Perl 5.6.0. Earlier versions of Perl had a different 1182e0680481Safresh1Pod::Text module, with a different API. 1183b39c5158Smillert 1184e0680481Safresh1The current API based on L<Pod::Simple> was added in Pod::Text 3.00. 1185e0680481Safresh1Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to 1186e0680481Safresh1incorporate those changes. This is the first version that correctly supports 1187e0680481Safresh1all modern POD syntax. The parse_from_filehandle() method was re-added for 1188e0680481Safresh1backward compatibility in Pod::Text 3.07, included in Perl 5.9.4. 1189b39c5158Smillert 1190e0680481Safresh1Pod::Text 3.12, included in Perl 5.10.1, first implemented the current 1191e0680481Safresh1practice of attempting to match the default output encoding with the input 1192e0680481Safresh1encoding of the POD source, unless overridden by the C<utf8> option or (added 1193e0680481Safresh1later) the C<encoding> option. 1194b39c5158Smillert 1195e0680481Safresh1Support for anchor text in LZ<><> links of type URL was added in Pod::Text 1196e0680481Safresh13.14, included in Perl 5.11.5. 1197b39c5158Smillert 1198e0680481Safresh1parse_lines(), parse_string_document(), and parse_file() set a default output 1199e0680481Safresh1file handle of C<STDOUT> if one was not already set as of Pod::Text 3.18, 1200e0680481Safresh1included in Perl 5.19.5. 1201b39c5158Smillert 1202e0680481Safresh1Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the 1203e0680481Safresh1version of the podlators distribution. All modules included in podlators, and 1204e0680481Safresh1the podlators distribution itself, share the same version number from this 1205e0680481Safresh1point forward. 1206b39c5158Smillert 1207e0680481Safresh1Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC 1208e0680481Safresh1systems, present in all versions back to 3.00, that would cause opening 1209e0680481Safresh1brackets to disappear. 1210b39c5158Smillert 1211e0680481Safresh1Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it 1212e0680481Safresh1sees a non-ASCII character in the input and the input encoding is not 1213e0680481Safresh1specified. It also commits to an encoding with the first non-ASCII character 1214e0680481Safresh1and does not change the output encoding if the input encoding changes. The 1215e0680481Safresh1L<Encode> module is now used for all output encoding rather than PerlIO 1216e0680481Safresh1layers, which fixes earlier problems with output to scalars. 1217b39c5158Smillert 1218b39c5158Smillert=head1 AUTHOR 1219b39c5158Smillert 1220e0680481Safresh1Russ Allbery <rra@cpan.org>, based I<very> heavily on the original Pod::Text 1221e0680481Safresh1by Tom Christiansen <tchrist@mox.perl.com> and its conversion to Pod::Parser 1222e0680481Safresh1by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial conversion of 1223e0680481Safresh1Pod::Man to use Pod::Simple provided much-needed guidance on how to use 1224e0680481Safresh1Pod::Simple. 1225b39c5158Smillert 1226b39c5158Smillert=head1 COPYRIGHT AND LICENSE 1227b39c5158Smillert 1228e0680481Safresh1Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ 1229e0680481Safresh1Allbery <rra@cpan.org> 1230b39c5158Smillert 1231b39c5158SmillertThis program is free software; you may redistribute it and/or modify it 1232b39c5158Smillertunder the same terms as Perl itself. 1233b39c5158Smillert 1234b46d8ef2Safresh1=head1 SEE ALSO 1235b46d8ef2Safresh1 1236e0680481Safresh1L<Encode::Locale>, L<Encode::Supproted>, L<Pod::Simple>, 1237e0680481Safresh1L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)> 1238b46d8ef2Safresh1 1239b46d8ef2Safresh1The current version of this module is always available from its web site at 1240b46d8ef2Safresh1L<https://www.eyrie.org/~eagle/software/podlators/>. It is also part of the 1241b46d8ef2Safresh1Perl core distribution as of 5.6.0. 1242b46d8ef2Safresh1 1243b39c5158Smillert=cut 1244b46d8ef2Safresh1 1245b46d8ef2Safresh1# Local Variables: 1246b46d8ef2Safresh1# copyright-at-end-flag: t 1247b46d8ef2Safresh1# End: 1248