xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/Text.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
19f11ffb7Safresh1# Convert POD data to formatted text.
2b39c5158Smillert#
3b39c5158Smillert# This module converts POD to formatted text.  It replaces the old Pod::Text
4b39c5158Smillert# module that came with versions of Perl prior to 5.6.0 and attempts to match
5b39c5158Smillert# its output except for some specific circumstances where other decisions
6b39c5158Smillert# seemed to produce better output.  It uses Pod::Parser and is designed to be
7b39c5158Smillert# very easy to subclass.
8b39c5158Smillert#
9b46d8ef2Safresh1# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
10b39c5158Smillert
11b39c5158Smillert##############################################################################
12b39c5158Smillert# Modules and declarations
13b39c5158Smillert##############################################################################
14b39c5158Smillert
15b39c5158Smillertpackage Pod::Text;
16b39c5158Smillert
17e0680481Safresh1use 5.010;
18b39c5158Smillertuse strict;
19b8851fccSafresh1use warnings;
20b8851fccSafresh1
21b39c5158Smillertuse Carp qw(carp croak);
2248950c12Ssthenuse Encode qw(encode);
23b39c5158Smillertuse Exporter ();
24b39c5158Smillertuse Pod::Simple ();
25b39c5158Smillert
26e0680481Safresh1our @ISA = qw(Pod::Simple Exporter);
27*3d61058aSafresh1our $VERSION = '5.01_02';
28*3d61058aSafresh1$VERSION =~ tr/_//d;
29b39c5158Smillert
30b39c5158Smillert# We have to export pod2text for backward compatibility.
31e0680481Safresh1our @EXPORT = qw(pod2text);
329f11ffb7Safresh1
339f11ffb7Safresh1# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available.  Code
349f11ffb7Safresh1# taken from Pod::Simple 3.32, but was only added in 3.30.
359f11ffb7Safresh1my ($NBSP, $SHY);
369f11ffb7Safresh1if ($Pod::Simple::VERSION ge 3.30) {
379f11ffb7Safresh1    $NBSP = $Pod::Simple::nbsp;
389f11ffb7Safresh1    $SHY  = $Pod::Simple::shy;
399f11ffb7Safresh1} else {
409f11ffb7Safresh1    $NBSP = chr utf8::unicode_to_native(0xA0);
419f11ffb7Safresh1    $SHY  = chr utf8::unicode_to_native(0xAD);
429f11ffb7Safresh1}
43b39c5158Smillert
44e0680481Safresh1# Import the ASCII constant from Pod::Simple.  This is true iff we're in an
45e0680481Safresh1# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is
46e0680481Safresh1# generally only false for EBCDIC.
47e0680481Safresh1BEGIN { *ASCII = \&Pod::Simple::ASCII }
48e0680481Safresh1
49b39c5158Smillert##############################################################################
50b39c5158Smillert# Initialization
51b39c5158Smillert##############################################################################
52b39c5158Smillert
53b39c5158Smillert# This function handles code blocks.  It's registered as a callback to
54b39c5158Smillert# Pod::Simple and therefore doesn't work as a regular method call, but all it
55b39c5158Smillert# does is call output_code with the line.
56b39c5158Smillertsub handle_code {
57b39c5158Smillert    my ($line, $number, $parser) = @_;
58b39c5158Smillert    $parser->output_code ($line . "\n");
59b39c5158Smillert}
60b39c5158Smillert
61b39c5158Smillert# Initialize the object and set various Pod::Simple options that we need.
62b39c5158Smillert# Here, we also process any additional options passed to the constructor or
63b39c5158Smillert# set up defaults if none were given.  Note that all internal object keys are
64b39c5158Smillert# in all-caps, reserving all lower-case object keys for Pod::Simple and user
65b39c5158Smillert# arguments.
66b39c5158Smillertsub new {
67b39c5158Smillert    my $class = shift;
68b39c5158Smillert    my $self = $class->SUPER::new;
69b39c5158Smillert
70b39c5158Smillert    # Tell Pod::Simple to keep whitespace whenever possible.
71b39c5158Smillert    if ($self->can ('preserve_whitespace')) {
72b39c5158Smillert        $self->preserve_whitespace (1);
73b39c5158Smillert    } else {
74b39c5158Smillert        $self->fullstop_space_harden (1);
75b39c5158Smillert    }
76b39c5158Smillert
77b39c5158Smillert    # The =for and =begin targets that we accept.
78b39c5158Smillert    $self->accept_targets (qw/text TEXT/);
79b39c5158Smillert
80b39c5158Smillert    # Ensure that contiguous blocks of code are merged together.  Otherwise,
81b39c5158Smillert    # some of the guesswork heuristics don't work right.
82b39c5158Smillert    $self->merge_text (1);
83b39c5158Smillert
84b39c5158Smillert    # Pod::Simple doesn't do anything useful with our arguments, but we want
85b39c5158Smillert    # to put them in our object as hash keys and values.  This could cause
86b39c5158Smillert    # problems if we ever clash with Pod::Simple's own internal class
87b39c5158Smillert    # variables.
88b39c5158Smillert    my %opts = @_;
89b39c5158Smillert    my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
90b39c5158Smillert    %$self = (%$self, @opts);
91b39c5158Smillert
92e0680481Safresh1    # Backwards-compatibility support for the stderr option.
93e9ce3842Safresh1    if ($$self{opt_stderr} and not $$self{opt_errors}) {
94e9ce3842Safresh1        $$self{opt_errors} = 'stderr';
95e9ce3842Safresh1    }
96e9ce3842Safresh1    delete $$self{opt_stderr};
97e9ce3842Safresh1
98e0680481Safresh1    # Backwards-compatibility support for the utf8 option.
99e0680481Safresh1    if ($$self{opt_utf8} && !$$self{opt_encoding}) {
100e0680481Safresh1        $$self{opt_encoding} = 'UTF-8';
101e9ce3842Safresh1    }
102e0680481Safresh1    delete $$self{opt_utf8};
103e0680481Safresh1
104e0680481Safresh1    # Validate the errors parameter and act on it.
105e0680481Safresh1    $$self{opt_errors} //= 'pod';
106e9ce3842Safresh1    if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') {
107b39c5158Smillert        $self->no_errata_section (1);
108b39c5158Smillert        $self->complain_stderr (1);
109e9ce3842Safresh1        if ($$self{opt_errors} eq 'die') {
110e9ce3842Safresh1            $$self{complain_die} = 1;
111b39c5158Smillert        }
112e9ce3842Safresh1    } elsif ($$self{opt_errors} eq 'pod') {
113e9ce3842Safresh1        $self->no_errata_section (0);
114e9ce3842Safresh1        $self->complain_stderr (0);
115e9ce3842Safresh1    } elsif ($$self{opt_errors} eq 'none') {
116b46d8ef2Safresh1        $self->no_errata_section (1);
117e9ce3842Safresh1        $self->no_whining (1);
118e9ce3842Safresh1    } else {
119e9ce3842Safresh1        croak (qq(Invalid errors setting: "$$self{errors}"));
120e9ce3842Safresh1    }
121e9ce3842Safresh1    delete $$self{errors};
122b39c5158Smillert
123b39c5158Smillert    # Initialize various things from our parameters.
124e0680481Safresh1    $$self{opt_alt}      //= 0;
125e0680481Safresh1    $$self{opt_indent}   //= 4;
126e0680481Safresh1    $$self{opt_margin}   //= 0;
127e0680481Safresh1    $$self{opt_loose}    //= 0;
128e0680481Safresh1    $$self{opt_sentence} //= 0;
129e0680481Safresh1    $$self{opt_width}    //= 76;
130b39c5158Smillert
131b39c5158Smillert    # Figure out what quotes we'll be using for C<> text.
132b39c5158Smillert    $$self{opt_quotes} ||= '"';
133b39c5158Smillert    if ($$self{opt_quotes} eq 'none') {
134b39c5158Smillert        $$self{LQUOTE} = $$self{RQUOTE} = '';
135b39c5158Smillert    } elsif (length ($$self{opt_quotes}) == 1) {
136b39c5158Smillert        $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
137b8851fccSafresh1    } elsif (length ($$self{opt_quotes}) % 2 == 0) {
138b8851fccSafresh1        my $length = length ($$self{opt_quotes}) / 2;
139b8851fccSafresh1        $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length);
140b8851fccSafresh1        $$self{RQUOTE} = substr ($$self{opt_quotes}, $length);
141b39c5158Smillert    } else {
142b39c5158Smillert        croak qq(Invalid quote specification "$$self{opt_quotes}");
143b39c5158Smillert    }
144b39c5158Smillert
145e0680481Safresh1    # Configure guesswork based on options.
146e0680481Safresh1    my $guesswork = $self->{opt_guesswork} || q{};
147e0680481Safresh1    my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork);
148e0680481Safresh1    if (!%guesswork || $guesswork{all}) {
149e0680481Safresh1        $$self{GUESSWORK} = {quoting => 1};
150e0680481Safresh1    } elsif ($guesswork{none}) {
151e0680481Safresh1        $$self{GUESSWORK} = {};
152e0680481Safresh1    } else {
153e0680481Safresh1        $$self{GUESSWORK} = {%guesswork};
154e0680481Safresh1    }
155e0680481Safresh1
156b39c5158Smillert    # If requested, do something with the non-POD text.
157b39c5158Smillert    $self->code_handler (\&handle_code) if $$self{opt_code};
158b39c5158Smillert
159b39c5158Smillert    # Return the created object.
160b39c5158Smillert    return $self;
161b39c5158Smillert}
162b39c5158Smillert
163b39c5158Smillert##############################################################################
164b39c5158Smillert# Core parsing
165b39c5158Smillert##############################################################################
166b39c5158Smillert
167b39c5158Smillert# This is the glue that connects the code below with Pod::Simple itself.  The
168b39c5158Smillert# goal is to convert the event stream coming from the POD parser into method
169b39c5158Smillert# calls to handlers once the complete content of a tag has been seen.  Each
170b39c5158Smillert# paragraph or POD command will have textual content associated with it, and
171b39c5158Smillert# as soon as all of a paragraph or POD command has been seen, that content
172b39c5158Smillert# will be passed in to the corresponding method for handling that type of
173b39c5158Smillert# object.  The exceptions are handlers for lists, which have opening tag
174b39c5158Smillert# handlers and closing tag handlers that will be called right away.
175b39c5158Smillert#
176b39c5158Smillert# The internal hash key PENDING is used to store the contents of a tag until
177b39c5158Smillert# all of it has been seen.  It holds a stack of open tags, each one
178b39c5158Smillert# represented by a tuple of the attributes hash for the tag and the contents
179b39c5158Smillert# of the tag.
180b39c5158Smillert
181b39c5158Smillert# Add a block of text to the contents of the current node, formatting it
182b39c5158Smillert# according to the current formatting instructions as we do.
183b39c5158Smillertsub _handle_text {
184b39c5158Smillert    my ($self, $text) = @_;
185b39c5158Smillert    my $tag = $$self{PENDING}[-1];
186b39c5158Smillert    $$tag[1] .= $text;
187b39c5158Smillert}
188b39c5158Smillert
189b39c5158Smillert# Given an element name, get the corresponding method name.
190b39c5158Smillertsub method_for_element {
191b39c5158Smillert    my ($self, $element) = @_;
192b39c5158Smillert    $element =~ tr/-/_/;
193b39c5158Smillert    $element =~ tr/A-Z/a-z/;
194b39c5158Smillert    $element =~ tr/_a-z0-9//cd;
195b39c5158Smillert    return $element;
196b39c5158Smillert}
197b39c5158Smillert
198b39c5158Smillert# Handle the start of a new element.  If cmd_element is defined, assume that
199b39c5158Smillert# we need to collect the entire tree for this element before passing it to the
200b39c5158Smillert# element method, and create a new tree into which we'll collect blocks of
201b39c5158Smillert# text and nested elements.  Otherwise, if start_element is defined, call it.
202b39c5158Smillertsub _handle_element_start {
203b39c5158Smillert    my ($self, $element, $attrs) = @_;
204b39c5158Smillert    my $method = $self->method_for_element ($element);
205b39c5158Smillert
206b39c5158Smillert    # If we have a command handler, we need to accumulate the contents of the
207b39c5158Smillert    # tag before calling it.
208b39c5158Smillert    if ($self->can ("cmd_$method")) {
209b39c5158Smillert        push (@{ $$self{PENDING} }, [ $attrs, '' ]);
210b39c5158Smillert    } elsif ($self->can ("start_$method")) {
211b39c5158Smillert        my $method = 'start_' . $method;
212b39c5158Smillert        $self->$method ($attrs, '');
213b39c5158Smillert    }
214b39c5158Smillert}
215b39c5158Smillert
216b39c5158Smillert# Handle the end of an element.  If we had a cmd_ method for this element,
217b39c5158Smillert# this is where we pass along the text that we've accumulated.  Otherwise, if
218b39c5158Smillert# we have an end_ method for the element, call that.
219b39c5158Smillertsub _handle_element_end {
220b39c5158Smillert    my ($self, $element) = @_;
221b39c5158Smillert    my $method = $self->method_for_element ($element);
222b39c5158Smillert
223b39c5158Smillert    # If we have a command handler, pull off the pending text and pass it to
224b39c5158Smillert    # the handler along with the saved attribute hash.
225b39c5158Smillert    if ($self->can ("cmd_$method")) {
226b39c5158Smillert        my $tag = pop @{ $$self{PENDING} };
227b39c5158Smillert        my $method = 'cmd_' . $method;
228b39c5158Smillert        my $text = $self->$method (@$tag);
229b39c5158Smillert        if (defined $text) {
230b39c5158Smillert            if (@{ $$self{PENDING} } > 1) {
231b39c5158Smillert                $$self{PENDING}[-1][1] .= $text;
232b39c5158Smillert            } else {
233b39c5158Smillert                $self->output ($text);
234b39c5158Smillert            }
235b39c5158Smillert        }
236b39c5158Smillert    } elsif ($self->can ("end_$method")) {
237b39c5158Smillert        my $method = 'end_' . $method;
238b39c5158Smillert        $self->$method ();
239b39c5158Smillert    }
240b39c5158Smillert}
241b39c5158Smillert
242b39c5158Smillert##############################################################################
243b39c5158Smillert# Output formatting
244b39c5158Smillert##############################################################################
245b39c5158Smillert
246b39c5158Smillert# Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
247b39c5158Smillert# because it plays games with tabs.  We can't use formline, even though we'd
248b39c5158Smillert# really like to, because it screws up non-printing characters.  So we have to
249b39c5158Smillert# do the wrapping ourselves.
250b39c5158Smillertsub wrap {
251b39c5158Smillert    my $self = shift;
252b39c5158Smillert    local $_ = shift;
253b39c5158Smillert    my $output = '';
254b39c5158Smillert    my $spaces = ' ' x $$self{MARGIN};
255b39c5158Smillert    my $width = $$self{opt_width} - $$self{MARGIN};
256b39c5158Smillert    while (length > $width) {
25756d68f1eSafresh1        if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) {
258b39c5158Smillert            $output .= $spaces . $1 . "\n";
259b39c5158Smillert        } else {
260b39c5158Smillert            last;
261b39c5158Smillert        }
262b39c5158Smillert    }
263b39c5158Smillert    $output .= $spaces . $_;
264b39c5158Smillert    $output =~ s/\s+$/\n\n/;
265b39c5158Smillert    return $output;
266b39c5158Smillert}
267b39c5158Smillert
268b39c5158Smillert# Reformat a paragraph of text for the current margin.  Takes the text to
269b39c5158Smillert# reformat and returns the formatted text.
270b39c5158Smillertsub reformat {
271b39c5158Smillert    my $self = shift;
272b39c5158Smillert    local $_ = shift;
273b39c5158Smillert
274b39c5158Smillert    # If we're trying to preserve two spaces after sentences, do some munging
27556d68f1eSafresh1    # to support that.  Otherwise, smash all repeated whitespace.  Be careful
27656d68f1eSafresh1    # not to use \s here, which in Unicode input may match non-breaking spaces
27756d68f1eSafresh1    # that we don't want to smash.
278b39c5158Smillert    if ($$self{opt_sentence}) {
279b39c5158Smillert        s/ +$//mg;
280b39c5158Smillert        s/\.\n/. \n/g;
281b39c5158Smillert        s/\n/ /g;
282b39c5158Smillert        s/   +/  /g;
283b39c5158Smillert    } else {
28456d68f1eSafresh1        s/[ \t\n]+/ /g;
285b39c5158Smillert    }
286b39c5158Smillert    return $self->wrap ($_);
287b39c5158Smillert}
288b39c5158Smillert
289b39c5158Smillert# Output text to the output device.  Replace non-breaking spaces with spaces
290e0680481Safresh1# and soft hyphens with nothing, and then determine the output encoding.
291b39c5158Smillertsub output {
29248950c12Ssthen    my ($self, @text) = @_;
29348950c12Ssthen    my $text = join ('', @text);
2949f11ffb7Safresh1    if ($NBSP) {
2959f11ffb7Safresh1        $text =~ s/$NBSP/ /g;
2969f11ffb7Safresh1    }
2979f11ffb7Safresh1    if ($SHY) {
2989f11ffb7Safresh1        $text =~ s/$SHY//g;
2999f11ffb7Safresh1    }
300e0680481Safresh1
301e0680481Safresh1    # The logic used here is described in the POD documentation.  Prefer the
302e0680481Safresh1    # configured encoding, then the pass-through option of using the same
303e0680481Safresh1    # encoding as the input, and then UTF-8, but commit to an encoding for the
304e0680481Safresh1    # document.
305e0680481Safresh1    #
306e0680481Safresh1    # ENCODE says whether to encode or not and is turned off if there is a
307e0680481Safresh1    # PerlIO encoding layer (in start_document).  ENCODING is the encoding
308e0680481Safresh1    # that we previously committed to and is cleared at the start of each
309e0680481Safresh1    # document.
31048950c12Ssthen    if ($$self{ENCODE}) {
311e0680481Safresh1        my $encoding = $$self{ENCODING};
312e0680481Safresh1        if (!$encoding) {
313e0680481Safresh1            $encoding = $self->encoding();
314e0680481Safresh1            if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) {
315e0680481Safresh1                $encoding = 'UTF-8';
316e0680481Safresh1            }
317e0680481Safresh1            if ($encoding) {
318e0680481Safresh1                $$self{ENCODING} = $encoding;
319e0680481Safresh1            }
320e0680481Safresh1        }
321e0680481Safresh1        if ($encoding) {
322e0680481Safresh1            my $check = sub {
323e0680481Safresh1                my ($char) = @_;
324e0680481Safresh1                my $display = '"\x{' . hex($char) . '}"';
325e0680481Safresh1                my $error = "$display does not map to $$self{ENCODING}";
326e0680481Safresh1                $self->whine ($self->line_count(), $error);
327e0680481Safresh1                return Encode::encode ($$self{ENCODING}, chr($char));
328e0680481Safresh1            };
329e0680481Safresh1            print { $$self{output_fh} } encode ($encoding, $text, $check);
330e0680481Safresh1        } else {
331e0680481Safresh1            print { $$self{output_fh} } $text;
332e0680481Safresh1        }
33348950c12Ssthen    } else {
334b39c5158Smillert        print { $$self{output_fh} } $text;
335b39c5158Smillert    }
33648950c12Ssthen}
337b39c5158Smillert
338b39c5158Smillert# Output a block of code (something that isn't part of the POD text).  Called
339b39c5158Smillert# by preprocess_paragraph only if we were given the code option.  Exists here
340b39c5158Smillert# only so that it can be overridden by subclasses.
341b39c5158Smillertsub output_code { $_[0]->output ($_[1]) }
342b39c5158Smillert
343b39c5158Smillert##############################################################################
344b39c5158Smillert# Document initialization
345b39c5158Smillert##############################################################################
346b39c5158Smillert
347b39c5158Smillert# Set up various things that have to be initialized on a per-document basis.
348b39c5158Smillertsub start_document {
349e9ce3842Safresh1    my ($self, $attrs) = @_;
350e9ce3842Safresh1    if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
351e9ce3842Safresh1        $$self{CONTENTLESS} = 1;
352e9ce3842Safresh1    } else {
353e9ce3842Safresh1        delete $$self{CONTENTLESS};
354e9ce3842Safresh1    }
355b39c5158Smillert    my $margin = $$self{opt_indent} + $$self{opt_margin};
356b39c5158Smillert
357b39c5158Smillert    # Initialize a few per-document variables.
358b39c5158Smillert    $$self{INDENTS} = [];       # Stack of indentations.
359b39c5158Smillert    $$self{MARGIN}  = $margin;  # Default left margin.
360b39c5158Smillert    $$self{PENDING} = [[]];     # Pending output.
361b39c5158Smillert
362e0680481Safresh1    # We have to redo encoding handling for each document.  Check whether the
363e0680481Safresh1    # output file handle already has a PerlIO encoding layer set and, if so,
364e0680481Safresh1    # disable encoding.
36548950c12Ssthen    $$self{ENCODE} = 1;
36648950c12Ssthen    eval {
367*3d61058aSafresh1        require PerlIO;
368e9ce3842Safresh1        my @options = (output => 1, details => 1);
369e9ce3842Safresh1        my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1];
37056d68f1eSafresh1        if ($flag && ($flag & PerlIO::F_UTF8 ())) {
37148950c12Ssthen            $$self{ENCODE} = 0;
37248950c12Ssthen        }
37348950c12Ssthen    };
374e0680481Safresh1    $$self{ENCODING} = $$self{opt_encoding};
375b39c5158Smillert
376b39c5158Smillert    return '';
377b39c5158Smillert}
378b39c5158Smillert
379e9ce3842Safresh1# Handle the end of the document.  The only thing we do is handle dying on POD
380e9ce3842Safresh1# errors, since Pod::Parser currently doesn't.
381e9ce3842Safresh1sub end_document {
382e9ce3842Safresh1    my ($self) = @_;
383e9ce3842Safresh1    if ($$self{complain_die} && $self->errors_seen) {
384e9ce3842Safresh1        croak ("POD document had syntax errors");
385e9ce3842Safresh1    }
386e9ce3842Safresh1}
387e9ce3842Safresh1
388b39c5158Smillert##############################################################################
389b39c5158Smillert# Text blocks
390b39c5158Smillert##############################################################################
391b39c5158Smillert
392b39c5158Smillert# Intended for subclasses to override, this method returns text with any
393b39c5158Smillert# non-printing formatting codes stripped out so that length() correctly
394b39c5158Smillert# returns the length of the text.  For basic Pod::Text, it does nothing.
395b39c5158Smillertsub strip_format {
396b39c5158Smillert    my ($self, $string) = @_;
397b39c5158Smillert    return $string;
398b39c5158Smillert}
399b39c5158Smillert
400b39c5158Smillert# This method is called whenever an =item command is complete (in other words,
401b39c5158Smillert# we've seen its associated paragraph or know for certain that it doesn't have
402b39c5158Smillert# one).  It gets the paragraph associated with the item as an argument.  If
403b39c5158Smillert# that argument is empty, just output the item tag; if it contains a newline,
404b39c5158Smillert# output the item tag followed by the newline.  Otherwise, see if there's
405b39c5158Smillert# enough room for us to output the item tag in the margin of the text or if we
406b39c5158Smillert# have to put it on a separate line.
407b39c5158Smillertsub item {
408b39c5158Smillert    my ($self, $text) = @_;
409b39c5158Smillert    my $tag = $$self{ITEM};
410b39c5158Smillert    unless (defined $tag) {
411b39c5158Smillert        carp "Item called without tag";
412b39c5158Smillert        return;
413b39c5158Smillert    }
414b39c5158Smillert    undef $$self{ITEM};
415b39c5158Smillert
416b39c5158Smillert    # Calculate the indentation and margin.  $fits is set to true if the tag
417b39c5158Smillert    # will fit into the margin of the paragraph given our indentation level.
418e0680481Safresh1    my $indent = $$self{INDENTS}[-1] // $$self{opt_indent};
419b39c5158Smillert    my $margin = ' ' x $$self{opt_margin};
420b39c5158Smillert    my $tag_length = length ($self->strip_format ($tag));
421b39c5158Smillert    my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1);
422b39c5158Smillert
423b39c5158Smillert    # If the tag doesn't fit, or if we have no associated text, print out the
424b39c5158Smillert    # tag separately.  Otherwise, put the tag in the margin of the paragraph.
425b39c5158Smillert    if (!$text || $text =~ /^\s+$/ || !$fits) {
426b39c5158Smillert        my $realindent = $$self{MARGIN};
427b39c5158Smillert        $$self{MARGIN} = $indent;
428b39c5158Smillert        my $output = $self->reformat ($tag);
429b39c5158Smillert        $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
430b39c5158Smillert        $output =~ s/\n*$/\n/;
431b39c5158Smillert
432b39c5158Smillert        # If the text is just whitespace, we have an empty item paragraph;
433b39c5158Smillert        # this can result from =over/=item/=back without any intermixed
434b39c5158Smillert        # paragraphs.  Insert some whitespace to keep the =item from merging
435b39c5158Smillert        # into the next paragraph.
436b39c5158Smillert        $output .= "\n" if $text && $text =~ /^\s*$/;
437b39c5158Smillert
438b39c5158Smillert        $self->output ($output);
439b39c5158Smillert        $$self{MARGIN} = $realindent;
440b39c5158Smillert        $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
441b39c5158Smillert    } else {
442b39c5158Smillert        my $space = ' ' x $indent;
443b39c5158Smillert        $space =~ s/^$margin /$margin:/ if $$self{opt_alt};
444b39c5158Smillert        $text = $self->reformat ($text);
445b39c5158Smillert        $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
446b39c5158Smillert        my $tagspace = ' ' x $tag_length;
447b39c5158Smillert        $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
448b39c5158Smillert        $self->output ($text);
449b39c5158Smillert    }
450b39c5158Smillert}
451b39c5158Smillert
452b39c5158Smillert# Handle a basic block of text.  The only tricky thing here is that if there
453b39c5158Smillert# is a pending item tag, we need to format this as an item paragraph.
454b39c5158Smillertsub cmd_para {
455b39c5158Smillert    my ($self, $attrs, $text) = @_;
456b39c5158Smillert    $text =~ s/\s+$/\n/;
457b39c5158Smillert    if (defined $$self{ITEM}) {
458b39c5158Smillert        $self->item ($text . "\n");
459b39c5158Smillert    } else {
460b39c5158Smillert        $self->output ($self->reformat ($text . "\n"));
461b39c5158Smillert    }
462b39c5158Smillert    return '';
463b39c5158Smillert}
464b39c5158Smillert
465b39c5158Smillert# Handle a verbatim paragraph.  Just print it out, but indent it according to
466b39c5158Smillert# our margin.
467b39c5158Smillertsub cmd_verbatim {
468b39c5158Smillert    my ($self, $attrs, $text) = @_;
469b39c5158Smillert    $self->item if defined $$self{ITEM};
470b39c5158Smillert    return if $text =~ /^\s*$/;
471b39c5158Smillert    $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
472b39c5158Smillert    $text =~ s/\s*$/\n\n/;
473b39c5158Smillert    $self->output ($text);
474b39c5158Smillert    return '';
475b39c5158Smillert}
476b39c5158Smillert
477b39c5158Smillert# Handle literal text (produced by =for and similar constructs).  Just output
478b39c5158Smillert# it with the minimum of changes.
479b39c5158Smillertsub cmd_data {
480b39c5158Smillert    my ($self, $attrs, $text) = @_;
481b39c5158Smillert    $text =~ s/^\n+//;
482b39c5158Smillert    $text =~ s/\n{0,2}$/\n/;
483b39c5158Smillert    $self->output ($text);
484b39c5158Smillert    return '';
485b39c5158Smillert}
486b39c5158Smillert
487b39c5158Smillert##############################################################################
488b39c5158Smillert# Headings
489b39c5158Smillert##############################################################################
490b39c5158Smillert
491b39c5158Smillert# The common code for handling all headers.  Takes the header text, the
492b39c5158Smillert# indentation, and the surrounding marker for the alt formatting method.
493b39c5158Smillertsub heading {
494b39c5158Smillert    my ($self, $text, $indent, $marker) = @_;
495b39c5158Smillert    $self->item ("\n\n") if defined $$self{ITEM};
496b39c5158Smillert    $text =~ s/\s+$//;
497b39c5158Smillert    if ($$self{opt_alt}) {
498b39c5158Smillert        my $closemark = reverse (split (//, $marker));
499b39c5158Smillert        my $margin = ' ' x $$self{opt_margin};
500b39c5158Smillert        $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
501b39c5158Smillert    } else {
502b39c5158Smillert        $text .= "\n" if $$self{opt_loose};
503b39c5158Smillert        my $margin = ' ' x ($$self{opt_margin} + $indent);
504b39c5158Smillert        $self->output ($margin . $text . "\n");
505b39c5158Smillert    }
506b39c5158Smillert    return '';
507b39c5158Smillert}
508b39c5158Smillert
509b39c5158Smillert# First level heading.
510b39c5158Smillertsub cmd_head1 {
511b39c5158Smillert    my ($self, $attrs, $text) = @_;
512b39c5158Smillert    $self->heading ($text, 0, '====');
513b39c5158Smillert}
514b39c5158Smillert
515b39c5158Smillert# Second level heading.
516b39c5158Smillertsub cmd_head2 {
517b39c5158Smillert    my ($self, $attrs, $text) = @_;
518b39c5158Smillert    $self->heading ($text, $$self{opt_indent} / 2, '==  ');
519b39c5158Smillert}
520b39c5158Smillert
521b39c5158Smillert# Third level heading.
522b39c5158Smillertsub cmd_head3 {
523b39c5158Smillert    my ($self, $attrs, $text) = @_;
524b39c5158Smillert    $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '=   ');
525b39c5158Smillert}
526b39c5158Smillert
527b39c5158Smillert# Fourth level heading.
528b39c5158Smillertsub cmd_head4 {
529b39c5158Smillert    my ($self, $attrs, $text) = @_;
530b39c5158Smillert    $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '-   ');
531b39c5158Smillert}
532b39c5158Smillert
533b39c5158Smillert##############################################################################
534b39c5158Smillert# List handling
535b39c5158Smillert##############################################################################
536b39c5158Smillert
537b39c5158Smillert# Handle the beginning of an =over block.  Takes the type of the block as the
538b39c5158Smillert# first argument, and then the attr hash.  This is called by the handlers for
539b39c5158Smillert# the four different types of lists (bullet, number, text, and block).
540b39c5158Smillertsub over_common_start {
541b39c5158Smillert    my ($self, $attrs) = @_;
542b39c5158Smillert    $self->item ("\n\n") if defined $$self{ITEM};
543b39c5158Smillert
544b39c5158Smillert    # Find the indentation level.
545b39c5158Smillert    my $indent = $$attrs{indent};
546b39c5158Smillert    unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
547b39c5158Smillert        $indent = $$self{opt_indent};
548b39c5158Smillert    }
549b39c5158Smillert
550b39c5158Smillert    # Add this to our stack of indents and increase our current margin.
551b39c5158Smillert    push (@{ $$self{INDENTS} }, $$self{MARGIN});
552b39c5158Smillert    $$self{MARGIN} += ($indent + 0);
553b39c5158Smillert    return '';
554b39c5158Smillert}
555b39c5158Smillert
556b39c5158Smillert# End an =over block.  Takes no options other than the class pointer.  Output
557b39c5158Smillert# any pending items and then pop one level of indentation.
558b39c5158Smillertsub over_common_end {
559b39c5158Smillert    my ($self) = @_;
560b39c5158Smillert    $self->item ("\n\n") if defined $$self{ITEM};
561b39c5158Smillert    $$self{MARGIN} = pop @{ $$self{INDENTS} };
562b39c5158Smillert    return '';
563b39c5158Smillert}
564b39c5158Smillert
565b39c5158Smillert# Dispatch the start and end calls as appropriate.
566b39c5158Smillertsub start_over_bullet { $_[0]->over_common_start ($_[1]) }
567b39c5158Smillertsub start_over_number { $_[0]->over_common_start ($_[1]) }
568b39c5158Smillertsub start_over_text   { $_[0]->over_common_start ($_[1]) }
569b39c5158Smillertsub start_over_block  { $_[0]->over_common_start ($_[1]) }
570b39c5158Smillertsub end_over_bullet { $_[0]->over_common_end }
571b39c5158Smillertsub end_over_number { $_[0]->over_common_end }
572b39c5158Smillertsub end_over_text   { $_[0]->over_common_end }
573b39c5158Smillertsub end_over_block  { $_[0]->over_common_end }
574b39c5158Smillert
575b39c5158Smillert# The common handler for all item commands.  Takes the type of the item, the
576b39c5158Smillert# attributes, and then the text of the item.
577b39c5158Smillertsub item_common {
578b39c5158Smillert    my ($self, $type, $attrs, $text) = @_;
579b39c5158Smillert    $self->item if defined $$self{ITEM};
580b39c5158Smillert
581b39c5158Smillert    # Clean up the text.  We want to end up with two variables, one ($text)
582b39c5158Smillert    # which contains any body text after taking out the item portion, and
583b39c5158Smillert    # another ($item) which contains the actual item text.  Note the use of
584b39c5158Smillert    # the internal Pod::Simple attribute here; that's a potential land mine.
585b39c5158Smillert    $text =~ s/\s+$//;
586b39c5158Smillert    my ($item, $index);
587b39c5158Smillert    if ($type eq 'bullet') {
588b39c5158Smillert        $item = '*';
589b39c5158Smillert    } elsif ($type eq 'number') {
590b39c5158Smillert        $item = $$attrs{'~orig_content'};
591b39c5158Smillert    } else {
592b39c5158Smillert        $item = $text;
593b39c5158Smillert        $item =~ s/\s*\n\s*/ /g;
594b39c5158Smillert        $text = '';
595b39c5158Smillert    }
596b39c5158Smillert    $$self{ITEM} = $item;
597b39c5158Smillert
598b39c5158Smillert    # If body text for this item was included, go ahead and output that now.
599b39c5158Smillert    if ($text) {
600b39c5158Smillert        $text =~ s/\s*$/\n/;
601b39c5158Smillert        $self->item ($text);
602b39c5158Smillert    }
603b39c5158Smillert    return '';
604b39c5158Smillert}
605b39c5158Smillert
606b39c5158Smillert# Dispatch the item commands to the appropriate place.
607b39c5158Smillertsub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
608b39c5158Smillertsub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
609b39c5158Smillertsub cmd_item_text   { my $self = shift; $self->item_common ('text',   @_) }
610b39c5158Smillertsub cmd_item_block  { my $self = shift; $self->item_common ('block',  @_) }
611b39c5158Smillert
612b39c5158Smillert##############################################################################
613b39c5158Smillert# Formatting codes
614b39c5158Smillert##############################################################################
615b39c5158Smillert
616b39c5158Smillert# The simple ones.
617b39c5158Smillertsub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
618b39c5158Smillertsub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
619b39c5158Smillertsub cmd_i { return '*' . $_[2] . '*' }
620b39c5158Smillertsub cmd_x { return '' }
621b39c5158Smillert
622e0680481Safresh1# Convert all internal whitespace to $NBSP.
623e0680481Safresh1sub cmd_s {
624e0680481Safresh1    my ($self, $attrs, $text) = @_;
625e0680481Safresh1    $text =~ s{ \s }{$NBSP}xmsg;
626e0680481Safresh1    return $text;
627e0680481Safresh1}
628e0680481Safresh1
629b39c5158Smillert# Apply a whole bunch of messy heuristics to not quote things that don't
630b39c5158Smillert# benefit from being quoted.  These originally come from Barrie Slaymaker and
631b39c5158Smillert# largely duplicate code in Pod::Man.
632b39c5158Smillertsub cmd_c {
633b39c5158Smillert    my ($self, $attrs, $text) = @_;
634b39c5158Smillert
635b39c5158Smillert    # A regex that matches the portion of a variable reference that's the
636b39c5158Smillert    # array or hash index, separated out just because we want to use it in
637b39c5158Smillert    # several places in the following regex.
638e0680481Safresh1    my $index = '(?: \[[^]]+\] | \{[^}]+\} )?';
639b39c5158Smillert
640b39c5158Smillert    # Check for things that we don't want to quote, and if we find any of
641b39c5158Smillert    # them, return the string with just a font change and no quoting.
642e0680481Safresh1    #
643e0680481Safresh1    # Traditionally, Pod::Text has not quoted Perl variables, functions,
644e0680481Safresh1    # numbers, or hex constants, but this is not always desirable.  Make this
645e0680481Safresh1    # optional on the quoting guesswork flag.
646e0680481Safresh1    my $extra = qr{(?!)}xms;    # never matches
647e0680481Safresh1    if ($$self{GUESSWORK}{quoting}) {
648e0680481Safresh1        $extra = qr{
649e0680481Safresh1             \$+ [\#^]? \S $index            # special ($^F, $")
650e0680481Safresh1           | [\$\@%&*]+ \#? [:\'\w]+ $index  # plain var or func
651e0680481Safresh1           | [\$\@%&*]* [:\'\w]+
652e0680481Safresh1             (?: -> )? \(\s*[^\s,\)]*\s*\)   # 0/1-arg func call
653e0680481Safresh1           | [+-]? ( \d[\d.]* | \.\d+ )
654e0680481Safresh1             (?: [eE][+-]?\d+ )?             # a number
655e0680481Safresh1           | 0x [a-fA-F\d]+                  # a hex constant
656e0680481Safresh1         }xms;
657e0680481Safresh1    }
658b39c5158Smillert    $text =~ m{
659b39c5158Smillert      ^\s*
660b39c5158Smillert      (?:
661b39c5158Smillert         ( [\'\`\"] ) .* \1                  # already quoted
662b39c5158Smillert       | \` .* \'                            # `quoted'
663e0680481Safresh1       | $extra
664b39c5158Smillert      )
665b39c5158Smillert      \s*\z
666e0680481Safresh1     }xms and return $text;
667b39c5158Smillert
668b39c5158Smillert    # If we didn't return, go ahead and quote the text.
669b39c5158Smillert    return $$self{opt_alt}
670b39c5158Smillert        ? "``$text''"
671b39c5158Smillert        : "$$self{LQUOTE}$text$$self{RQUOTE}";
672b39c5158Smillert}
673b39c5158Smillert
674b39c5158Smillert# Links reduce to the text that we're given, wrapped in angle brackets if it's
675b39c5158Smillert# a URL.
676b39c5158Smillertsub cmd_l {
677b39c5158Smillert    my ($self, $attrs, $text) = @_;
678b39c5158Smillert    if ($$attrs{type} eq 'url') {
679b39c5158Smillert        if (not defined($$attrs{to}) or $$attrs{to} eq $text) {
680b39c5158Smillert            return "<$text>";
681e9ce3842Safresh1        } elsif ($$self{opt_nourls}) {
682e9ce3842Safresh1            return $text;
683b39c5158Smillert        } else {
684b39c5158Smillert            return "$text <$$attrs{to}>";
685b39c5158Smillert        }
686b39c5158Smillert    } else {
687b39c5158Smillert        return $text;
688b39c5158Smillert    }
689b39c5158Smillert}
690b39c5158Smillert
691b39c5158Smillert##############################################################################
692b39c5158Smillert# Backwards compatibility
693b39c5158Smillert##############################################################################
694b39c5158Smillert
695b39c5158Smillert# The old Pod::Text module did everything in a pod2text() function.  This
696b39c5158Smillert# tries to provide the same interface for legacy applications.
697b39c5158Smillertsub pod2text {
698b39c5158Smillert    my @args;
699b39c5158Smillert
700b39c5158Smillert    # This is really ugly; I hate doing option parsing in the middle of a
701b39c5158Smillert    # module.  But the old Pod::Text module supported passing flags to its
702b39c5158Smillert    # entry function, so handle -a and -<number>.
703b39c5158Smillert    while ($_[0] =~ /^-/) {
704b39c5158Smillert        my $flag = shift;
705b39c5158Smillert        if    ($flag eq '-a')       { push (@args, alt => 1)    }
706b39c5158Smillert        elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
707b39c5158Smillert        else {
708b39c5158Smillert            unshift (@_, $flag);
709b39c5158Smillert            last;
710b39c5158Smillert        }
711b39c5158Smillert    }
712b39c5158Smillert
713b39c5158Smillert    # Now that we know what arguments we're using, create the parser.
714b39c5158Smillert    my $parser = Pod::Text->new (@args);
715b39c5158Smillert
716b39c5158Smillert    # If two arguments were given, the second argument is going to be a file
717b39c5158Smillert    # handle.  That means we want to call parse_from_filehandle(), which means
718b39c5158Smillert    # we need to turn the first argument into a file handle.  Magic open will
719b39c5158Smillert    # handle the <&STDIN case automagically.
720b39c5158Smillert    if (defined $_[1]) {
721b39c5158Smillert        my @fhs = @_;
722b39c5158Smillert        local *IN;
723b39c5158Smillert        unless (open (IN, $fhs[0])) {
724b39c5158Smillert            croak ("Can't open $fhs[0] for reading: $!\n");
725b39c5158Smillert            return;
726b39c5158Smillert        }
727b39c5158Smillert        $fhs[0] = \*IN;
728b39c5158Smillert        $parser->output_fh ($fhs[1]);
729b39c5158Smillert        my $retval = $parser->parse_file ($fhs[0]);
730b39c5158Smillert        my $fh = $parser->output_fh ();
731b39c5158Smillert        close $fh;
732b39c5158Smillert        return $retval;
733b39c5158Smillert    } else {
734b39c5158Smillert        $parser->output_fh (\*STDOUT);
735b39c5158Smillert        return $parser->parse_file (@_);
736b39c5158Smillert    }
737b39c5158Smillert}
738b39c5158Smillert
739b39c5158Smillert# Reset the underlying Pod::Simple object between calls to parse_from_file so
740b39c5158Smillert# that the same object can be reused to convert multiple pages.
741b39c5158Smillertsub parse_from_file {
742b39c5158Smillert    my $self = shift;
743b39c5158Smillert    $self->reinit;
744b39c5158Smillert
7459f11ffb7Safresh1    # Fake the old cutting option to Pod::Parser.  This fiddles with internal
746b39c5158Smillert    # Pod::Simple state and is quite ugly; we need a better approach.
747b39c5158Smillert    if (ref ($_[0]) eq 'HASH') {
748b39c5158Smillert        my $opts = shift @_;
749b39c5158Smillert        if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
750b39c5158Smillert            $$self{in_pod} = 1;
751b39c5158Smillert            $$self{last_was_blank} = 1;
752b39c5158Smillert        }
753b39c5158Smillert    }
754b39c5158Smillert
755b39c5158Smillert    # Do the work.
756b39c5158Smillert    my $retval = $self->Pod::Simple::parse_from_file (@_);
757b39c5158Smillert
758b39c5158Smillert    # Flush output, since Pod::Simple doesn't do this.  Ideally we should also
759b39c5158Smillert    # close the file descriptor if we had to open one, but we can't easily
760b39c5158Smillert    # figure this out.
761b39c5158Smillert    my $fh = $self->output_fh ();
762b39c5158Smillert    my $oldfh = select $fh;
763b39c5158Smillert    my $oldflush = $|;
764b39c5158Smillert    $| = 1;
765b39c5158Smillert    print $fh '';
766b39c5158Smillert    $| = $oldflush;
767b39c5158Smillert    select $oldfh;
768b39c5158Smillert    return $retval;
769b39c5158Smillert}
770b39c5158Smillert
771b39c5158Smillert# Pod::Simple failed to provide this backward compatibility function, so
772b39c5158Smillert# implement it ourselves.  File handles are one of the inputs that
773b39c5158Smillert# parse_from_file supports.
774b39c5158Smillertsub parse_from_filehandle {
775b39c5158Smillert    my $self = shift;
776b39c5158Smillert    $self->parse_from_file (@_);
777b39c5158Smillert}
778b39c5158Smillert
779e9ce3842Safresh1# Pod::Simple's parse_file doesn't set output_fh.  Wrap the call and do so
780e9ce3842Safresh1# ourself unless it was already set by the caller, since our documentation has
781e9ce3842Safresh1# always said that this should work.
782e9ce3842Safresh1sub parse_file {
783e9ce3842Safresh1    my ($self, $in) = @_;
784e9ce3842Safresh1    unless (defined $$self{output_fh}) {
785e9ce3842Safresh1        $self->output_fh (\*STDOUT);
786e9ce3842Safresh1    }
787e9ce3842Safresh1    return $self->SUPER::parse_file ($in);
788e9ce3842Safresh1}
789e9ce3842Safresh1
790e5157e49Safresh1# Do the same for parse_lines, just to be polite.  Pod::Simple's man page
791e5157e49Safresh1# implies that the caller is responsible for setting this, but I don't see any
792e5157e49Safresh1# reason not to set a default.
793e5157e49Safresh1sub parse_lines {
794e5157e49Safresh1    my ($self, @lines) = @_;
795e5157e49Safresh1    unless (defined $$self{output_fh}) {
796e5157e49Safresh1        $self->output_fh (\*STDOUT);
797e5157e49Safresh1    }
798e5157e49Safresh1    return $self->SUPER::parse_lines (@lines);
799e5157e49Safresh1}
800e5157e49Safresh1
801e5157e49Safresh1# Likewise for parse_string_document.
802e5157e49Safresh1sub parse_string_document {
803e5157e49Safresh1    my ($self, $doc) = @_;
804e5157e49Safresh1    unless (defined $$self{output_fh}) {
805e5157e49Safresh1        $self->output_fh (\*STDOUT);
806e5157e49Safresh1    }
807e5157e49Safresh1    return $self->SUPER::parse_string_document ($doc);
808e5157e49Safresh1}
809e5157e49Safresh1
810b39c5158Smillert##############################################################################
811b39c5158Smillert# Module return value and documentation
812b39c5158Smillert##############################################################################
813b39c5158Smillert
814b39c5158Smillert1;
815b39c5158Smillert__END__
816b39c5158Smillert
817e9ce3842Safresh1=for stopwords
818e9ce3842Safresh1alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls
819e0680481Safresh1parsers EBCDIC autodetecting superset unrepresentable FH NNN
820e9ce3842Safresh1
821b39c5158Smillert=head1 NAME
822b39c5158Smillert
823b8851fccSafresh1Pod::Text - Convert POD data to formatted text
824b39c5158Smillert
825b39c5158Smillert=head1 SYNOPSIS
826b39c5158Smillert
827b39c5158Smillert    use Pod::Text;
828b46d8ef2Safresh1    my $parser = Pod::Text->new (sentence => 1, width => 78);
829b39c5158Smillert
830b39c5158Smillert    # Read POD from STDIN and write to STDOUT.
831b39c5158Smillert    $parser->parse_from_filehandle;
832b39c5158Smillert
833b39c5158Smillert    # Read POD from file.pod and write to file.txt.
834b39c5158Smillert    $parser->parse_from_file ('file.pod', 'file.txt');
835b39c5158Smillert
836b39c5158Smillert=head1 DESCRIPTION
837b39c5158Smillert
838e0680481Safresh1Pod::Text is a module that can convert documentation in the POD format (the
839e0680481Safresh1preferred language for documenting Perl) into formatted text.  It uses no
840e0680481Safresh1special formatting controls or codes, and its output is therefore suitable for
841e0680481Safresh1nearly any device.
842b39c5158Smillert
843e0680481Safresh1=head2 Encoding
844b39c5158Smillert
845e0680481Safresh1Pod::Text uses the following logic to choose an output encoding, in order:
846e0680481Safresh1
847e0680481Safresh1=over 4
848e0680481Safresh1
849e0680481Safresh1=item 1.
850e0680481Safresh1
851e0680481Safresh1If a PerlIO encoding layer is set on the output file handle, do not do any
852e0680481Safresh1output encoding and will instead rely on the PerlIO encoding layer.
853e0680481Safresh1
854e0680481Safresh1=item 2.
855e0680481Safresh1
856e0680481Safresh1If the C<encoding> or C<utf8> options are set, use the output encoding
857e0680481Safresh1specified by those options.
858e0680481Safresh1
859e0680481Safresh1=item 3.
860e0680481Safresh1
861e0680481Safresh1If the input encoding of the POD source file was explicitly specified (using
862e0680481Safresh1C<=encoding>) or automatically detected by Pod::Simple, use that as the output
863e0680481Safresh1encoding as well.
864e0680481Safresh1
865e0680481Safresh1=item 4.
866e0680481Safresh1
867e0680481Safresh1Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output
868e0680481Safresh1encoding.  Since this is a superset of ASCII, this will result in ASCII output
869e0680481Safresh1unless the POD input contains non-ASCII characters without declaring or
870e0680481Safresh1autodetecting an encoding (usually via EZ<><> escapes).
871e0680481Safresh1
872e0680481Safresh1=item 5.
873e0680481Safresh1
874e0680481Safresh1Otherwise, for EBCDIC systems, output without doing any encoding and hope
875e0680481Safresh1this works.
876e0680481Safresh1
877e0680481Safresh1=back
878e0680481Safresh1
879e0680481Safresh1One caveat: Pod::Text has to commit to an output encoding the first time it
880e0680481Safresh1outputs a non-ASCII character, and then has to stick with it for consistency.
881e0680481Safresh1However, C<=encoding> commands don't have to be at the beginning of a POD
882e0680481Safresh1document.  If someone uses a non-ASCII character early in a document with an
883e0680481Safresh1escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later,
884e0680481Safresh1ideally Pod::Text would follow rule 3 and output the entire document as ISO
885e0680481Safresh18859-1.  Instead, it will commit to UTF-8 following rule 4 as soon as it sees
886e0680481Safresh1that escape, and then stick with that encoding for the rest of the document.
887e0680481Safresh1
888e0680481Safresh1Unfortunately, there's no universally good choice for an output encoding.
889e0680481Safresh1Each choice will be incorrect in some circumstances.  This approach was chosen
890e0680481Safresh1primarily for backwards compatibility.  Callers should consider forcing the
891e0680481Safresh1output encoding via C<encoding> if they have any knowledge about what encoding
892e0680481Safresh1the user may expect.
893e0680481Safresh1
894e0680481Safresh1In particular, consider importing the L<Encode::Locale> module, if available,
895e0680481Safresh1and setting C<encoding> to C<locale> to use an output encoding appropriate to
896e0680481Safresh1the user's locale.  But be aware that if the user is not using locales or is
897e0680481Safresh1using a locale of C<C>, Encode::Locale will set the output encoding to
898e0680481Safresh1US-ASCII.  This will cause all non-ASCII characters will be replaced with C<?>
899e0680481Safresh1and produce a flurry of warnings about unsupported characters, which may or
900e0680481Safresh1may not be what you want.
901e0680481Safresh1
902e0680481Safresh1=head1 CLASS METHODS
903e0680481Safresh1
904e0680481Safresh1=over 4
905e0680481Safresh1
906e0680481Safresh1=item new(ARGS)
907e0680481Safresh1
908e0680481Safresh1Create a new Pod::Text object.  ARGS should be a list of key/value pairs,
909e0680481Safresh1where the keys are chosen from the following.  Each option is annotated with
910e0680481Safresh1the version of Pod::Text in which that option was added with its current
911e0680481Safresh1meaning.
912b39c5158Smillert
913b39c5158Smillert=over 4
914b39c5158Smillert
915b39c5158Smillert=item alt
916b39c5158Smillert
917e0680481Safresh1[2.00] If set to a true value, selects an alternate output format that, among
918e0680481Safresh1other things, uses a different heading style and marks C<=item> entries with a
919b39c5158Smillertcolon in the left margin.  Defaults to false.
920b39c5158Smillert
921b39c5158Smillert=item code
922b39c5158Smillert
923e0680481Safresh1[2.13] If set to a true value, the non-POD parts of the input file will be
924e0680481Safresh1included in the output.  Useful for viewing code documented with POD blocks
925e0680481Safresh1with the POD rendered and the code left intact.
926e0680481Safresh1
927e0680481Safresh1=item encoding
928e0680481Safresh1
929e0680481Safresh1[5.00] Specifies the encoding of the output.  The value must be an encoding
930e0680481Safresh1recognized by the L<Encode> module (see L<Encode::Supported>).  If the output
931e0680481Safresh1contains characters that cannot be represented in this encoding, that is an
932e0680481Safresh1error that will be reported as configured by the C<errors> option.  If error
933e0680481Safresh1handling is other than C<die>, the unrepresentable character will be replaced
934e0680481Safresh1with the Encode substitution character (normally C<?>).
935e0680481Safresh1
936e0680481Safresh1If the output file handle has a PerlIO encoding layer set, this parameter will
937e0680481Safresh1be ignored and no encoding will be done by Pod::Man.  It will instead rely on
938e0680481Safresh1the encoding layer to make whatever output encoding transformations are
939e0680481Safresh1desired.
940e0680481Safresh1
941e0680481Safresh1WARNING: The input encoding of the POD source is independent from the output
942e0680481Safresh1encoding, and setting this option does not affect the interpretation of the
943e0680481Safresh1POD input.  Unless your POD source is US-ASCII, its encoding should be
944e0680481Safresh1declared with the C<=encoding> command in the source, as near to the top of
945e0680481Safresh1the file as possible.  If this is not done, Pod::Simple will will attempt to
946e0680481Safresh1guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will
947e0680481Safresh1produce warnings.  See L<perlpod(1)> for more information.
948b39c5158Smillert
949e9ce3842Safresh1=item errors
950e9ce3842Safresh1
951e0680481Safresh1[3.17] How to report errors.  C<die> says to throw an exception on any POD
952e0680481Safresh1formatting error.  C<stderr> says to report errors on standard error, but not
953e0680481Safresh1to throw an exception.  C<pod> says to include a POD ERRORS section in the
954e0680481Safresh1resulting documentation summarizing the errors.  C<none> ignores POD errors
955e0680481Safresh1entirely, as much as possible.
956e9ce3842Safresh1
957e5157e49Safresh1The default is C<pod>.
958e9ce3842Safresh1
959e0680481Safresh1=item guesswork
960e0680481Safresh1
961e0680481Safresh1[5.01] By default, Pod::Text applies some default formatting rules based on
962e0680481Safresh1guesswork and regular expressions that are intended to make writing Perl
963e0680481Safresh1documentation easier and require less explicit markup.  These rules may not
964e0680481Safresh1always be appropriate, particularly for documentation that isn't about Perl.
965e0680481Safresh1This option allows turning all or some of it off.
966e0680481Safresh1
967e0680481Safresh1The special value C<all> enables all guesswork.  This is also the default for
968e0680481Safresh1backward compatibility reasons.  The special value C<none> disables all
969e0680481Safresh1guesswork.  Otherwise, the value of this option should be a comma-separated
970e0680481Safresh1list of one or more of the following keywords:
971e0680481Safresh1
972e0680481Safresh1=over 4
973e0680481Safresh1
974e0680481Safresh1=item quoting
975e0680481Safresh1
976e0680481Safresh1If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by
977e0680481Safresh1double quotes in nroff (terminal) output unless the contents are already
978e0680481Safresh1quoted.  When this guesswork is enabled, quote marks will also be suppressed
979e0680481Safresh1for Perl variables, function names, function calls, numbers, and hex
980e0680481Safresh1constants.
981e0680481Safresh1
982e0680481Safresh1=back
983e0680481Safresh1
984e0680481Safresh1Any unknown guesswork name is silently ignored (for potential future
985e0680481Safresh1compatibility), so be careful about spelling.
986e0680481Safresh1
987b39c5158Smillert=item indent
988b39c5158Smillert
989e0680481Safresh1[2.00] The number of spaces to indent regular text, and the default
990e0680481Safresh1indentation for C<=over> blocks.  Defaults to 4.
991b39c5158Smillert
992b39c5158Smillert=item loose
993b39c5158Smillert
994e0680481Safresh1[2.00] If set to a true value, a blank line is printed after a C<=head1>
995e0680481Safresh1heading.  If set to false (the default), no blank line is printed after
996e0680481Safresh1C<=head1>, although one is still printed after C<=head2>.  This is the default
997e0680481Safresh1because it's the expected formatting for manual pages; if you're formatting
998b39c5158Smillertarbitrary text documents, setting this to true may result in more pleasing
999b39c5158Smillertoutput.
1000b39c5158Smillert
1001b39c5158Smillert=item margin
1002b39c5158Smillert
1003e0680481Safresh1[2.21] The width of the left margin in spaces.  Defaults to 0.  This is the
1004e0680481Safresh1margin for all text, including headings, not the amount by which regular text
1005e0680481Safresh1is indented; for the latter, see the I<indent> option.  To set the right
1006b39c5158Smillertmargin, see the I<width> option.
1007b39c5158Smillert
1008e9ce3842Safresh1=item nourls
1009e9ce3842Safresh1
1010e0680481Safresh1[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are
1011e0680481Safresh1formatted to show both the anchor text and the URL.  In other words:
1012e9ce3842Safresh1
1013e9ce3842Safresh1    L<foo|http://example.com/>
1014e9ce3842Safresh1
1015e9ce3842Safresh1is formatted as:
1016e9ce3842Safresh1
1017e9ce3842Safresh1    foo <http://example.com/>
1018e9ce3842Safresh1
1019e0680481Safresh1This option, if set to a true value, suppresses the URL when anchor text is
1020e0680481Safresh1given, so this example would be formatted as just C<foo>.  This can produce
1021e0680481Safresh1less cluttered output in cases where the URLs are not particularly important.
1022e9ce3842Safresh1
1023b39c5158Smillert=item quotes
1024b39c5158Smillert
1025e0680481Safresh1[4.00] Sets the quote marks used to surround CE<lt>> text.  If the value is a
1026e0680481Safresh1single character, it is used as both the left and right quote.  Otherwise, it
1027e0680481Safresh1is split in half, and the first half of the string is used as the left quote
1028e0680481Safresh1and the second is used as the right quote.
1029b39c5158Smillert
1030b39c5158SmillertThis may also be set to the special value C<none>, in which case no quote
1031b39c5158Smillertmarks are added around CE<lt>> text.
1032b39c5158Smillert
1033b39c5158Smillert=item sentence
1034b39c5158Smillert
1035e0680481Safresh1[3.00] If set to a true value, Pod::Text will assume that each sentence ends
1036e0680481Safresh1in two spaces, and will try to preserve that spacing.  If set to false, all
1037e0680481Safresh1consecutive whitespace in non-verbatim paragraphs is compressed into a single
1038e0680481Safresh1space.  Defaults to false.
1039b39c5158Smillert
1040b39c5158Smillert=item stderr
1041b39c5158Smillert
1042e0680481Safresh1[3.10] Send error messages about invalid POD to standard error instead of
1043e0680481Safresh1appending a POD ERRORS section to the generated output.  This is equivalent to
1044e0680481Safresh1setting C<errors> to C<stderr> if C<errors> is not already set.  It is
1045e0680481Safresh1supported for backward compatibility.
1046b39c5158Smillert
1047b39c5158Smillert=item utf8
1048b39c5158Smillert
1049e0680481Safresh1[3.12] If this option is set to a true value, the output encoding is set to
1050e0680481Safresh1UTF-8.  This is equivalent to setting C<encoding> to C<UTF-8> if C<encoding>
1051e0680481Safresh1is not already set.  It is supported for backward compatibility.
1052b39c5158Smillert
1053b39c5158Smillert=item width
1054b39c5158Smillert
1055e0680481Safresh1[2.00] The column at which to wrap text on the right-hand side.  Defaults to
1056e0680481Safresh176.
1057b39c5158Smillert
1058b39c5158Smillert=back
1059b39c5158Smillert
1060e0680481Safresh1=back
1061e5157e49Safresh1
1062e0680481Safresh1=head1 INSTANCE METHODS
1063e5157e49Safresh1
1064e0680481Safresh1As a derived class from Pod::Simple, Pod::Text supports the same methods and
1065e0680481Safresh1interfaces.  See L<Pod::Simple> for all the details.  This section summarizes
1066e0680481Safresh1the most-frequently-used methods and the ones added by Pod::Text.
1067e5157e49Safresh1
1068e0680481Safresh1=over 4
1069e5157e49Safresh1
1070e0680481Safresh1=item output_fh(FH)
1071e0680481Safresh1
1072e0680481Safresh1Direct the output from parse_file(), parse_lines(), or parse_string_document()
1073e0680481Safresh1to the file handle FH instead of C<STDOUT>.
1074e0680481Safresh1
1075e0680481Safresh1=item output_string(REF)
1076e0680481Safresh1
1077e0680481Safresh1Direct the output from parse_file(), parse_lines(), or parse_string_document()
1078e0680481Safresh1to the scalar variable pointed to by REF, rather than C<STDOUT>.  For example:
1079e0680481Safresh1
1080e0680481Safresh1    my $man = Pod::Man->new();
1081e0680481Safresh1    my $output;
1082e0680481Safresh1    $man->output_string(\$output);
1083e0680481Safresh1    $man->parse_file('/some/input/file');
1084e0680481Safresh1
1085e0680481Safresh1Be aware that the output in that variable will already be encoded (see
1086e0680481Safresh1L</Encoding>).
1087e0680481Safresh1
1088e0680481Safresh1=item parse_file(PATH)
1089e0680481Safresh1
1090e0680481Safresh1Read the POD source from PATH and format it.  By default, the output is sent
1091e0680481Safresh1to C<STDOUT>, but this can be changed with the output_fh() or output_string()
1092e0680481Safresh1methods.
1093e0680481Safresh1
1094e0680481Safresh1=item parse_from_file(INPUT, OUTPUT)
1095e0680481Safresh1
1096e0680481Safresh1=item parse_from_filehandle(FH, OUTPUT)
1097e0680481Safresh1
1098e0680481Safresh1Read the POD source from INPUT, format it, and output the results to OUTPUT.
1099e0680481Safresh1
1100e0680481Safresh1parse_from_filehandle() is provided for backward compatibility with older
1101e0680481Safresh1versions of Pod::Man.  parse_from_file() should be used instead.
1102e0680481Safresh1
1103e0680481Safresh1=item parse_lines(LINES[, ...[, undef]])
1104e0680481Safresh1
1105e0680481Safresh1Parse the provided lines as POD source, writing the output to either C<STDOUT>
1106e0680481Safresh1or the file handle set with the output_fh() or output_string() methods.  This
1107e0680481Safresh1method can be called repeatedly to provide more input lines.  An explicit
1108e0680481Safresh1C<undef> should be passed to indicate the end of input.
1109e0680481Safresh1
1110e0680481Safresh1This method expects raw bytes, not decoded characters.
1111e0680481Safresh1
1112e0680481Safresh1=item parse_string_document(INPUT)
1113e0680481Safresh1
1114e0680481Safresh1Parse the provided scalar variable as POD source, writing the output to either
1115e0680481Safresh1C<STDOUT> or the file handle set with the output_fh() or output_string()
1116e0680481Safresh1methods.
1117e0680481Safresh1
1118e0680481Safresh1This method expects raw bytes, not decoded characters.
1119e0680481Safresh1
1120e0680481Safresh1=back
1121e0680481Safresh1
1122e0680481Safresh1=head1 FUNCTIONS
1123e0680481Safresh1
1124e0680481Safresh1Pod::Text exports one function for backward compatibility with older versions.
1125e0680481Safresh1This function is deprecated; instead, use the object-oriented interface
1126e0680481Safresh1described above.
1127e0680481Safresh1
1128e0680481Safresh1=over 4
1129e0680481Safresh1
1130e0680481Safresh1=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT])
1131e0680481Safresh1
1132e0680481Safresh1Convert the POD source from INPUT to text and write it to OUTPUT.  If OUTPUT
1133e0680481Safresh1is not given, defaults to C<STDOUT>.  INPUT can be any expression supported as
1134e0680481Safresh1the second argument to two-argument open().
1135e0680481Safresh1
1136e0680481Safresh1If C<-a> is given as an initial argument, pass the C<alt> option to the
1137e0680481Safresh1Pod::Text constructor.  This enables alternative formatting.
1138e0680481Safresh1
1139e0680481Safresh1If C<-NNN> is given as an initial argument, pass the C<width> option to the
1140e0680481Safresh1Pod::Text constructor with the number C<NNN> as its argument.  This sets the
1141e0680481Safresh1wrap line width to NNN.
1142e0680481Safresh1
1143e0680481Safresh1=back
1144b39c5158Smillert
1145b39c5158Smillert=head1 DIAGNOSTICS
1146b39c5158Smillert
1147b39c5158Smillert=over 4
1148b39c5158Smillert
1149b39c5158Smillert=item Bizarre space in item
1150b39c5158Smillert
1151b39c5158Smillert=item Item called without tag
1152b39c5158Smillert
1153b39c5158Smillert(W) Something has gone wrong in internal C<=item> processing.  These
1154b39c5158Smillertmessages indicate a bug in Pod::Text; you should never see them.
1155b39c5158Smillert
1156b39c5158Smillert=item Can't open %s for reading: %s
1157b39c5158Smillert
1158b39c5158Smillert(F) Pod::Text was invoked via the compatibility mode pod2text() interface
1159b39c5158Smillertand the input file it was given could not be opened.
1160b39c5158Smillert
1161e9ce3842Safresh1=item Invalid errors setting "%s"
1162e9ce3842Safresh1
1163e9ce3842Safresh1(F) The C<errors> parameter to the constructor was set to an unknown value.
1164e9ce3842Safresh1
1165b39c5158Smillert=item Invalid quote specification "%s"
1166b39c5158Smillert
1167e9ce3842Safresh1(F) The quote specification given (the C<quotes> option to the
1168b8851fccSafresh1constructor) was invalid.  A quote specification must be either one
1169b8851fccSafresh1character long or an even number (greater than one) characters long.
1170e9ce3842Safresh1
1171e9ce3842Safresh1=item POD document had syntax errors
1172e9ce3842Safresh1
1173e9ce3842Safresh1(F) The POD document being formatted had syntax errors and the C<errors>
1174e9ce3842Safresh1option was set to C<die>.
1175b39c5158Smillert
1176b39c5158Smillert=back
1177b39c5158Smillert
1178e0680481Safresh1=head1 COMPATIBILITY
1179b39c5158Smillert
1180e0680481Safresh1Pod::Text 2.03 (based on L<Pod::Parser>) was the first version of this module
1181e0680481Safresh1included with Perl, in Perl 5.6.0.  Earlier versions of Perl had a different
1182e0680481Safresh1Pod::Text module, with a different API.
1183b39c5158Smillert
1184e0680481Safresh1The current API based on L<Pod::Simple> was added in Pod::Text 3.00.
1185e0680481Safresh1Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to
1186e0680481Safresh1incorporate those changes.  This is the first version that correctly supports
1187e0680481Safresh1all modern POD syntax.  The parse_from_filehandle() method was re-added for
1188e0680481Safresh1backward compatibility in Pod::Text 3.07, included in Perl 5.9.4.
1189b39c5158Smillert
1190e0680481Safresh1Pod::Text 3.12, included in Perl 5.10.1, first implemented the current
1191e0680481Safresh1practice of attempting to match the default output encoding with the input
1192e0680481Safresh1encoding of the POD source, unless overridden by the C<utf8> option or (added
1193e0680481Safresh1later) the C<encoding> option.
1194b39c5158Smillert
1195e0680481Safresh1Support for anchor text in LZ<><> links of type URL was added in Pod::Text
1196e0680481Safresh13.14, included in Perl 5.11.5.
1197b39c5158Smillert
1198e0680481Safresh1parse_lines(), parse_string_document(), and parse_file() set a default output
1199e0680481Safresh1file handle of C<STDOUT> if one was not already set as of Pod::Text 3.18,
1200e0680481Safresh1included in Perl 5.19.5.
1201b39c5158Smillert
1202e0680481Safresh1Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the
1203e0680481Safresh1version of the podlators distribution.  All modules included in podlators, and
1204e0680481Safresh1the podlators distribution itself, share the same version number from this
1205e0680481Safresh1point forward.
1206b39c5158Smillert
1207e0680481Safresh1Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC
1208e0680481Safresh1systems, present in all versions back to 3.00, that would cause opening
1209e0680481Safresh1brackets to disappear.
1210b39c5158Smillert
1211e0680481Safresh1Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it
1212e0680481Safresh1sees a non-ASCII character in the input and the input encoding is not
1213e0680481Safresh1specified.  It also commits to an encoding with the first non-ASCII character
1214e0680481Safresh1and does not change the output encoding if the input encoding changes.  The
1215e0680481Safresh1L<Encode> module is now used for all output encoding rather than PerlIO
1216e0680481Safresh1layers, which fixes earlier problems with output to scalars.
1217b39c5158Smillert
1218b39c5158Smillert=head1 AUTHOR
1219b39c5158Smillert
1220e0680481Safresh1Russ Allbery <rra@cpan.org>, based I<very> heavily on the original Pod::Text
1221e0680481Safresh1by Tom Christiansen <tchrist@mox.perl.com> and its conversion to Pod::Parser
1222e0680481Safresh1by Brad Appleton <bradapp@enteract.com>.  Sean Burke's initial conversion of
1223e0680481Safresh1Pod::Man to use Pod::Simple provided much-needed guidance on how to use
1224e0680481Safresh1Pod::Simple.
1225b39c5158Smillert
1226b39c5158Smillert=head1 COPYRIGHT AND LICENSE
1227b39c5158Smillert
1228e0680481Safresh1Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ
1229e0680481Safresh1Allbery <rra@cpan.org>
1230b39c5158Smillert
1231b39c5158SmillertThis program is free software; you may redistribute it and/or modify it
1232b39c5158Smillertunder the same terms as Perl itself.
1233b39c5158Smillert
1234b46d8ef2Safresh1=head1 SEE ALSO
1235b46d8ef2Safresh1
1236e0680481Safresh1L<Encode::Locale>, L<Encode::Supproted>, L<Pod::Simple>,
1237e0680481Safresh1L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)>
1238b46d8ef2Safresh1
1239b46d8ef2Safresh1The current version of this module is always available from its web site at
1240b46d8ef2Safresh1L<https://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
1241b46d8ef2Safresh1Perl core distribution as of 5.6.0.
1242b46d8ef2Safresh1
1243b39c5158Smillert=cut
1244b46d8ef2Safresh1
1245b46d8ef2Safresh1# Local Variables:
1246b46d8ef2Safresh1# copyright-at-end-flag: t
1247b46d8ef2Safresh1# End:
1248