xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/Text.pm (revision 7c0ec4b8992567abb1e1536622dc789a9a39d9f1)
1# Convert POD data to formatted text.
2#
3# This module converts POD to formatted text.  It replaces the old Pod::Text
4# module that came with versions of Perl prior to 5.6.0 and attempts to match
5# its output except for some specific circumstances where other decisions
6# seemed to produce better output.  It uses Pod::Parser and is designed to be
7# very easy to subclass.
8#
9# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
10
11##############################################################################
12# Modules and declarations
13##############################################################################
14
15package Pod::Text;
16
17use 5.010;
18use strict;
19use warnings;
20
21use Carp qw(carp croak);
22use Encode qw(encode);
23use Exporter ();
24use Pod::Simple ();
25
26our @ISA = qw(Pod::Simple Exporter);
27our $VERSION = '5.01';
28
29# We have to export pod2text for backward compatibility.
30our @EXPORT = qw(pod2text);
31
32# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available.  Code
33# taken from Pod::Simple 3.32, but was only added in 3.30.
34my ($NBSP, $SHY);
35if ($Pod::Simple::VERSION ge 3.30) {
36    $NBSP = $Pod::Simple::nbsp;
37    $SHY  = $Pod::Simple::shy;
38} else {
39    $NBSP = chr utf8::unicode_to_native(0xA0);
40    $SHY  = chr utf8::unicode_to_native(0xAD);
41}
42
43# Import the ASCII constant from Pod::Simple.  This is true iff we're in an
44# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is
45# generally only false for EBCDIC.
46BEGIN { *ASCII = \&Pod::Simple::ASCII }
47
48##############################################################################
49# Initialization
50##############################################################################
51
52# This function handles code blocks.  It's registered as a callback to
53# Pod::Simple and therefore doesn't work as a regular method call, but all it
54# does is call output_code with the line.
55sub handle_code {
56    my ($line, $number, $parser) = @_;
57    $parser->output_code ($line . "\n");
58}
59
60# Initialize the object and set various Pod::Simple options that we need.
61# Here, we also process any additional options passed to the constructor or
62# set up defaults if none were given.  Note that all internal object keys are
63# in all-caps, reserving all lower-case object keys for Pod::Simple and user
64# arguments.
65sub new {
66    my $class = shift;
67    my $self = $class->SUPER::new;
68
69    # Tell Pod::Simple to keep whitespace whenever possible.
70    if ($self->can ('preserve_whitespace')) {
71        $self->preserve_whitespace (1);
72    } else {
73        $self->fullstop_space_harden (1);
74    }
75
76    # The =for and =begin targets that we accept.
77    $self->accept_targets (qw/text TEXT/);
78
79    # Ensure that contiguous blocks of code are merged together.  Otherwise,
80    # some of the guesswork heuristics don't work right.
81    $self->merge_text (1);
82
83    # Pod::Simple doesn't do anything useful with our arguments, but we want
84    # to put them in our object as hash keys and values.  This could cause
85    # problems if we ever clash with Pod::Simple's own internal class
86    # variables.
87    my %opts = @_;
88    my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
89    %$self = (%$self, @opts);
90
91    # Backwards-compatibility support for the stderr option.
92    if ($$self{opt_stderr} and not $$self{opt_errors}) {
93        $$self{opt_errors} = 'stderr';
94    }
95    delete $$self{opt_stderr};
96
97    # Backwards-compatibility support for the utf8 option.
98    if ($$self{opt_utf8} && !$$self{opt_encoding}) {
99        $$self{opt_encoding} = 'UTF-8';
100    }
101    delete $$self{opt_utf8};
102
103    # Validate the errors parameter and act on it.
104    $$self{opt_errors} //= 'pod';
105    if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') {
106        $self->no_errata_section (1);
107        $self->complain_stderr (1);
108        if ($$self{opt_errors} eq 'die') {
109            $$self{complain_die} = 1;
110        }
111    } elsif ($$self{opt_errors} eq 'pod') {
112        $self->no_errata_section (0);
113        $self->complain_stderr (0);
114    } elsif ($$self{opt_errors} eq 'none') {
115        $self->no_errata_section (1);
116        $self->no_whining (1);
117    } else {
118        croak (qq(Invalid errors setting: "$$self{errors}"));
119    }
120    delete $$self{errors};
121
122    # Initialize various things from our parameters.
123    $$self{opt_alt}      //= 0;
124    $$self{opt_indent}   //= 4;
125    $$self{opt_margin}   //= 0;
126    $$self{opt_loose}    //= 0;
127    $$self{opt_sentence} //= 0;
128    $$self{opt_width}    //= 76;
129
130    # Figure out what quotes we'll be using for C<> text.
131    $$self{opt_quotes} ||= '"';
132    if ($$self{opt_quotes} eq 'none') {
133        $$self{LQUOTE} = $$self{RQUOTE} = '';
134    } elsif (length ($$self{opt_quotes}) == 1) {
135        $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
136    } elsif (length ($$self{opt_quotes}) % 2 == 0) {
137        my $length = length ($$self{opt_quotes}) / 2;
138        $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length);
139        $$self{RQUOTE} = substr ($$self{opt_quotes}, $length);
140    } else {
141        croak qq(Invalid quote specification "$$self{opt_quotes}");
142    }
143
144    # Configure guesswork based on options.
145    my $guesswork = $self->{opt_guesswork} || q{};
146    my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork);
147    if (!%guesswork || $guesswork{all}) {
148        $$self{GUESSWORK} = {quoting => 1};
149    } elsif ($guesswork{none}) {
150        $$self{GUESSWORK} = {};
151    } else {
152        $$self{GUESSWORK} = {%guesswork};
153    }
154
155    # If requested, do something with the non-POD text.
156    $self->code_handler (\&handle_code) if $$self{opt_code};
157
158    # Return the created object.
159    return $self;
160}
161
162##############################################################################
163# Core parsing
164##############################################################################
165
166# This is the glue that connects the code below with Pod::Simple itself.  The
167# goal is to convert the event stream coming from the POD parser into method
168# calls to handlers once the complete content of a tag has been seen.  Each
169# paragraph or POD command will have textual content associated with it, and
170# as soon as all of a paragraph or POD command has been seen, that content
171# will be passed in to the corresponding method for handling that type of
172# object.  The exceptions are handlers for lists, which have opening tag
173# handlers and closing tag handlers that will be called right away.
174#
175# The internal hash key PENDING is used to store the contents of a tag until
176# all of it has been seen.  It holds a stack of open tags, each one
177# represented by a tuple of the attributes hash for the tag and the contents
178# of the tag.
179
180# Add a block of text to the contents of the current node, formatting it
181# according to the current formatting instructions as we do.
182sub _handle_text {
183    my ($self, $text) = @_;
184    my $tag = $$self{PENDING}[-1];
185    $$tag[1] .= $text;
186}
187
188# Given an element name, get the corresponding method name.
189sub method_for_element {
190    my ($self, $element) = @_;
191    $element =~ tr/-/_/;
192    $element =~ tr/A-Z/a-z/;
193    $element =~ tr/_a-z0-9//cd;
194    return $element;
195}
196
197# Handle the start of a new element.  If cmd_element is defined, assume that
198# we need to collect the entire tree for this element before passing it to the
199# element method, and create a new tree into which we'll collect blocks of
200# text and nested elements.  Otherwise, if start_element is defined, call it.
201sub _handle_element_start {
202    my ($self, $element, $attrs) = @_;
203    my $method = $self->method_for_element ($element);
204
205    # If we have a command handler, we need to accumulate the contents of the
206    # tag before calling it.
207    if ($self->can ("cmd_$method")) {
208        push (@{ $$self{PENDING} }, [ $attrs, '' ]);
209    } elsif ($self->can ("start_$method")) {
210        my $method = 'start_' . $method;
211        $self->$method ($attrs, '');
212    }
213}
214
215# Handle the end of an element.  If we had a cmd_ method for this element,
216# this is where we pass along the text that we've accumulated.  Otherwise, if
217# we have an end_ method for the element, call that.
218sub _handle_element_end {
219    my ($self, $element) = @_;
220    my $method = $self->method_for_element ($element);
221
222    # If we have a command handler, pull off the pending text and pass it to
223    # the handler along with the saved attribute hash.
224    if ($self->can ("cmd_$method")) {
225        my $tag = pop @{ $$self{PENDING} };
226        my $method = 'cmd_' . $method;
227        my $text = $self->$method (@$tag);
228        if (defined $text) {
229            if (@{ $$self{PENDING} } > 1) {
230                $$self{PENDING}[-1][1] .= $text;
231            } else {
232                $self->output ($text);
233            }
234        }
235    } elsif ($self->can ("end_$method")) {
236        my $method = 'end_' . $method;
237        $self->$method ();
238    }
239}
240
241##############################################################################
242# Output formatting
243##############################################################################
244
245# Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
246# because it plays games with tabs.  We can't use formline, even though we'd
247# really like to, because it screws up non-printing characters.  So we have to
248# do the wrapping ourselves.
249sub wrap {
250    my $self = shift;
251    local $_ = shift;
252    my $output = '';
253    my $spaces = ' ' x $$self{MARGIN};
254    my $width = $$self{opt_width} - $$self{MARGIN};
255    while (length > $width) {
256        if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) {
257            $output .= $spaces . $1 . "\n";
258        } else {
259            last;
260        }
261    }
262    $output .= $spaces . $_;
263    $output =~ s/\s+$/\n\n/;
264    return $output;
265}
266
267# Reformat a paragraph of text for the current margin.  Takes the text to
268# reformat and returns the formatted text.
269sub reformat {
270    my $self = shift;
271    local $_ = shift;
272
273    # If we're trying to preserve two spaces after sentences, do some munging
274    # to support that.  Otherwise, smash all repeated whitespace.  Be careful
275    # not to use \s here, which in Unicode input may match non-breaking spaces
276    # that we don't want to smash.
277    if ($$self{opt_sentence}) {
278        s/ +$//mg;
279        s/\.\n/. \n/g;
280        s/\n/ /g;
281        s/   +/  /g;
282    } else {
283        s/[ \t\n]+/ /g;
284    }
285    return $self->wrap ($_);
286}
287
288# Output text to the output device.  Replace non-breaking spaces with spaces
289# and soft hyphens with nothing, and then determine the output encoding.
290sub output {
291    my ($self, @text) = @_;
292    my $text = join ('', @text);
293    if ($NBSP) {
294        $text =~ s/$NBSP/ /g;
295    }
296    if ($SHY) {
297        $text =~ s/$SHY//g;
298    }
299
300    # The logic used here is described in the POD documentation.  Prefer the
301    # configured encoding, then the pass-through option of using the same
302    # encoding as the input, and then UTF-8, but commit to an encoding for the
303    # document.
304    #
305    # ENCODE says whether to encode or not and is turned off if there is a
306    # PerlIO encoding layer (in start_document).  ENCODING is the encoding
307    # that we previously committed to and is cleared at the start of each
308    # document.
309    if ($$self{ENCODE}) {
310        my $encoding = $$self{ENCODING};
311        if (!$encoding) {
312            $encoding = $self->encoding();
313            if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) {
314                $encoding = 'UTF-8';
315            }
316            if ($encoding) {
317                $$self{ENCODING} = $encoding;
318            }
319        }
320        if ($encoding) {
321            my $check = sub {
322                my ($char) = @_;
323                my $display = '"\x{' . hex($char) . '}"';
324                my $error = "$display does not map to $$self{ENCODING}";
325                $self->whine ($self->line_count(), $error);
326                return Encode::encode ($$self{ENCODING}, chr($char));
327            };
328            print { $$self{output_fh} } encode ($encoding, $text, $check);
329        } else {
330            print { $$self{output_fh} } $text;
331        }
332    } else {
333        print { $$self{output_fh} } $text;
334    }
335}
336
337# Output a block of code (something that isn't part of the POD text).  Called
338# by preprocess_paragraph only if we were given the code option.  Exists here
339# only so that it can be overridden by subclasses.
340sub output_code { $_[0]->output ($_[1]) }
341
342##############################################################################
343# Document initialization
344##############################################################################
345
346# Set up various things that have to be initialized on a per-document basis.
347sub start_document {
348    my ($self, $attrs) = @_;
349    if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
350        $$self{CONTENTLESS} = 1;
351    } else {
352        delete $$self{CONTENTLESS};
353    }
354    my $margin = $$self{opt_indent} + $$self{opt_margin};
355
356    # Initialize a few per-document variables.
357    $$self{INDENTS} = [];       # Stack of indentations.
358    $$self{MARGIN}  = $margin;  # Default left margin.
359    $$self{PENDING} = [[]];     # Pending output.
360
361    # We have to redo encoding handling for each document.  Check whether the
362    # output file handle already has a PerlIO encoding layer set and, if so,
363    # disable encoding.
364    $$self{ENCODE} = 1;
365    eval {
366        my @options = (output => 1, details => 1);
367        my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1];
368        if ($flag && ($flag & PerlIO::F_UTF8 ())) {
369            $$self{ENCODE} = 0;
370        }
371    };
372    $$self{ENCODING} = $$self{opt_encoding};
373
374    return '';
375}
376
377# Handle the end of the document.  The only thing we do is handle dying on POD
378# errors, since Pod::Parser currently doesn't.
379sub end_document {
380    my ($self) = @_;
381    if ($$self{complain_die} && $self->errors_seen) {
382        croak ("POD document had syntax errors");
383    }
384}
385
386##############################################################################
387# Text blocks
388##############################################################################
389
390# Intended for subclasses to override, this method returns text with any
391# non-printing formatting codes stripped out so that length() correctly
392# returns the length of the text.  For basic Pod::Text, it does nothing.
393sub strip_format {
394    my ($self, $string) = @_;
395    return $string;
396}
397
398# This method is called whenever an =item command is complete (in other words,
399# we've seen its associated paragraph or know for certain that it doesn't have
400# one).  It gets the paragraph associated with the item as an argument.  If
401# that argument is empty, just output the item tag; if it contains a newline,
402# output the item tag followed by the newline.  Otherwise, see if there's
403# enough room for us to output the item tag in the margin of the text or if we
404# have to put it on a separate line.
405sub item {
406    my ($self, $text) = @_;
407    my $tag = $$self{ITEM};
408    unless (defined $tag) {
409        carp "Item called without tag";
410        return;
411    }
412    undef $$self{ITEM};
413
414    # Calculate the indentation and margin.  $fits is set to true if the tag
415    # will fit into the margin of the paragraph given our indentation level.
416    my $indent = $$self{INDENTS}[-1] // $$self{opt_indent};
417    my $margin = ' ' x $$self{opt_margin};
418    my $tag_length = length ($self->strip_format ($tag));
419    my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1);
420
421    # If the tag doesn't fit, or if we have no associated text, print out the
422    # tag separately.  Otherwise, put the tag in the margin of the paragraph.
423    if (!$text || $text =~ /^\s+$/ || !$fits) {
424        my $realindent = $$self{MARGIN};
425        $$self{MARGIN} = $indent;
426        my $output = $self->reformat ($tag);
427        $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
428        $output =~ s/\n*$/\n/;
429
430        # If the text is just whitespace, we have an empty item paragraph;
431        # this can result from =over/=item/=back without any intermixed
432        # paragraphs.  Insert some whitespace to keep the =item from merging
433        # into the next paragraph.
434        $output .= "\n" if $text && $text =~ /^\s*$/;
435
436        $self->output ($output);
437        $$self{MARGIN} = $realindent;
438        $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
439    } else {
440        my $space = ' ' x $indent;
441        $space =~ s/^$margin /$margin:/ if $$self{opt_alt};
442        $text = $self->reformat ($text);
443        $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
444        my $tagspace = ' ' x $tag_length;
445        $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
446        $self->output ($text);
447    }
448}
449
450# Handle a basic block of text.  The only tricky thing here is that if there
451# is a pending item tag, we need to format this as an item paragraph.
452sub cmd_para {
453    my ($self, $attrs, $text) = @_;
454    $text =~ s/\s+$/\n/;
455    if (defined $$self{ITEM}) {
456        $self->item ($text . "\n");
457    } else {
458        $self->output ($self->reformat ($text . "\n"));
459    }
460    return '';
461}
462
463# Handle a verbatim paragraph.  Just print it out, but indent it according to
464# our margin.
465sub cmd_verbatim {
466    my ($self, $attrs, $text) = @_;
467    $self->item if defined $$self{ITEM};
468    return if $text =~ /^\s*$/;
469    $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
470    $text =~ s/\s*$/\n\n/;
471    $self->output ($text);
472    return '';
473}
474
475# Handle literal text (produced by =for and similar constructs).  Just output
476# it with the minimum of changes.
477sub cmd_data {
478    my ($self, $attrs, $text) = @_;
479    $text =~ s/^\n+//;
480    $text =~ s/\n{0,2}$/\n/;
481    $self->output ($text);
482    return '';
483}
484
485##############################################################################
486# Headings
487##############################################################################
488
489# The common code for handling all headers.  Takes the header text, the
490# indentation, and the surrounding marker for the alt formatting method.
491sub heading {
492    my ($self, $text, $indent, $marker) = @_;
493    $self->item ("\n\n") if defined $$self{ITEM};
494    $text =~ s/\s+$//;
495    if ($$self{opt_alt}) {
496        my $closemark = reverse (split (//, $marker));
497        my $margin = ' ' x $$self{opt_margin};
498        $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
499    } else {
500        $text .= "\n" if $$self{opt_loose};
501        my $margin = ' ' x ($$self{opt_margin} + $indent);
502        $self->output ($margin . $text . "\n");
503    }
504    return '';
505}
506
507# First level heading.
508sub cmd_head1 {
509    my ($self, $attrs, $text) = @_;
510    $self->heading ($text, 0, '====');
511}
512
513# Second level heading.
514sub cmd_head2 {
515    my ($self, $attrs, $text) = @_;
516    $self->heading ($text, $$self{opt_indent} / 2, '==  ');
517}
518
519# Third level heading.
520sub cmd_head3 {
521    my ($self, $attrs, $text) = @_;
522    $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '=   ');
523}
524
525# Fourth level heading.
526sub cmd_head4 {
527    my ($self, $attrs, $text) = @_;
528    $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '-   ');
529}
530
531##############################################################################
532# List handling
533##############################################################################
534
535# Handle the beginning of an =over block.  Takes the type of the block as the
536# first argument, and then the attr hash.  This is called by the handlers for
537# the four different types of lists (bullet, number, text, and block).
538sub over_common_start {
539    my ($self, $attrs) = @_;
540    $self->item ("\n\n") if defined $$self{ITEM};
541
542    # Find the indentation level.
543    my $indent = $$attrs{indent};
544    unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
545        $indent = $$self{opt_indent};
546    }
547
548    # Add this to our stack of indents and increase our current margin.
549    push (@{ $$self{INDENTS} }, $$self{MARGIN});
550    $$self{MARGIN} += ($indent + 0);
551    return '';
552}
553
554# End an =over block.  Takes no options other than the class pointer.  Output
555# any pending items and then pop one level of indentation.
556sub over_common_end {
557    my ($self) = @_;
558    $self->item ("\n\n") if defined $$self{ITEM};
559    $$self{MARGIN} = pop @{ $$self{INDENTS} };
560    return '';
561}
562
563# Dispatch the start and end calls as appropriate.
564sub start_over_bullet { $_[0]->over_common_start ($_[1]) }
565sub start_over_number { $_[0]->over_common_start ($_[1]) }
566sub start_over_text   { $_[0]->over_common_start ($_[1]) }
567sub start_over_block  { $_[0]->over_common_start ($_[1]) }
568sub end_over_bullet { $_[0]->over_common_end }
569sub end_over_number { $_[0]->over_common_end }
570sub end_over_text   { $_[0]->over_common_end }
571sub end_over_block  { $_[0]->over_common_end }
572
573# The common handler for all item commands.  Takes the type of the item, the
574# attributes, and then the text of the item.
575sub item_common {
576    my ($self, $type, $attrs, $text) = @_;
577    $self->item if defined $$self{ITEM};
578
579    # Clean up the text.  We want to end up with two variables, one ($text)
580    # which contains any body text after taking out the item portion, and
581    # another ($item) which contains the actual item text.  Note the use of
582    # the internal Pod::Simple attribute here; that's a potential land mine.
583    $text =~ s/\s+$//;
584    my ($item, $index);
585    if ($type eq 'bullet') {
586        $item = '*';
587    } elsif ($type eq 'number') {
588        $item = $$attrs{'~orig_content'};
589    } else {
590        $item = $text;
591        $item =~ s/\s*\n\s*/ /g;
592        $text = '';
593    }
594    $$self{ITEM} = $item;
595
596    # If body text for this item was included, go ahead and output that now.
597    if ($text) {
598        $text =~ s/\s*$/\n/;
599        $self->item ($text);
600    }
601    return '';
602}
603
604# Dispatch the item commands to the appropriate place.
605sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
606sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
607sub cmd_item_text   { my $self = shift; $self->item_common ('text',   @_) }
608sub cmd_item_block  { my $self = shift; $self->item_common ('block',  @_) }
609
610##############################################################################
611# Formatting codes
612##############################################################################
613
614# The simple ones.
615sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
616sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
617sub cmd_i { return '*' . $_[2] . '*' }
618sub cmd_x { return '' }
619
620# Convert all internal whitespace to $NBSP.
621sub cmd_s {
622    my ($self, $attrs, $text) = @_;
623    $text =~ s{ \s }{$NBSP}xmsg;
624    return $text;
625}
626
627# Apply a whole bunch of messy heuristics to not quote things that don't
628# benefit from being quoted.  These originally come from Barrie Slaymaker and
629# largely duplicate code in Pod::Man.
630sub cmd_c {
631    my ($self, $attrs, $text) = @_;
632
633    # A regex that matches the portion of a variable reference that's the
634    # array or hash index, separated out just because we want to use it in
635    # several places in the following regex.
636    my $index = '(?: \[[^]]+\] | \{[^}]+\} )?';
637
638    # Check for things that we don't want to quote, and if we find any of
639    # them, return the string with just a font change and no quoting.
640    #
641    # Traditionally, Pod::Text has not quoted Perl variables, functions,
642    # numbers, or hex constants, but this is not always desirable.  Make this
643    # optional on the quoting guesswork flag.
644    my $extra = qr{(?!)}xms;    # never matches
645    if ($$self{GUESSWORK}{quoting}) {
646        $extra = qr{
647             \$+ [\#^]? \S $index            # special ($^F, $")
648           | [\$\@%&*]+ \#? [:\'\w]+ $index  # plain var or func
649           | [\$\@%&*]* [:\'\w]+
650             (?: -> )? \(\s*[^\s,\)]*\s*\)   # 0/1-arg func call
651           | [+-]? ( \d[\d.]* | \.\d+ )
652             (?: [eE][+-]?\d+ )?             # a number
653           | 0x [a-fA-F\d]+                  # a hex constant
654         }xms;
655    }
656    $text =~ m{
657      ^\s*
658      (?:
659         ( [\'\`\"] ) .* \1                  # already quoted
660       | \` .* \'                            # `quoted'
661       | $extra
662      )
663      \s*\z
664     }xms and return $text;
665
666    # If we didn't return, go ahead and quote the text.
667    return $$self{opt_alt}
668        ? "``$text''"
669        : "$$self{LQUOTE}$text$$self{RQUOTE}";
670}
671
672# Links reduce to the text that we're given, wrapped in angle brackets if it's
673# a URL.
674sub cmd_l {
675    my ($self, $attrs, $text) = @_;
676    if ($$attrs{type} eq 'url') {
677        if (not defined($$attrs{to}) or $$attrs{to} eq $text) {
678            return "<$text>";
679        } elsif ($$self{opt_nourls}) {
680            return $text;
681        } else {
682            return "$text <$$attrs{to}>";
683        }
684    } else {
685        return $text;
686    }
687}
688
689##############################################################################
690# Backwards compatibility
691##############################################################################
692
693# The old Pod::Text module did everything in a pod2text() function.  This
694# tries to provide the same interface for legacy applications.
695sub pod2text {
696    my @args;
697
698    # This is really ugly; I hate doing option parsing in the middle of a
699    # module.  But the old Pod::Text module supported passing flags to its
700    # entry function, so handle -a and -<number>.
701    while ($_[0] =~ /^-/) {
702        my $flag = shift;
703        if    ($flag eq '-a')       { push (@args, alt => 1)    }
704        elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
705        else {
706            unshift (@_, $flag);
707            last;
708        }
709    }
710
711    # Now that we know what arguments we're using, create the parser.
712    my $parser = Pod::Text->new (@args);
713
714    # If two arguments were given, the second argument is going to be a file
715    # handle.  That means we want to call parse_from_filehandle(), which means
716    # we need to turn the first argument into a file handle.  Magic open will
717    # handle the <&STDIN case automagically.
718    if (defined $_[1]) {
719        my @fhs = @_;
720        local *IN;
721        unless (open (IN, $fhs[0])) {
722            croak ("Can't open $fhs[0] for reading: $!\n");
723            return;
724        }
725        $fhs[0] = \*IN;
726        $parser->output_fh ($fhs[1]);
727        my $retval = $parser->parse_file ($fhs[0]);
728        my $fh = $parser->output_fh ();
729        close $fh;
730        return $retval;
731    } else {
732        $parser->output_fh (\*STDOUT);
733        return $parser->parse_file (@_);
734    }
735}
736
737# Reset the underlying Pod::Simple object between calls to parse_from_file so
738# that the same object can be reused to convert multiple pages.
739sub parse_from_file {
740    my $self = shift;
741    $self->reinit;
742
743    # Fake the old cutting option to Pod::Parser.  This fiddles with internal
744    # Pod::Simple state and is quite ugly; we need a better approach.
745    if (ref ($_[0]) eq 'HASH') {
746        my $opts = shift @_;
747        if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
748            $$self{in_pod} = 1;
749            $$self{last_was_blank} = 1;
750        }
751    }
752
753    # Do the work.
754    my $retval = $self->Pod::Simple::parse_from_file (@_);
755
756    # Flush output, since Pod::Simple doesn't do this.  Ideally we should also
757    # close the file descriptor if we had to open one, but we can't easily
758    # figure this out.
759    my $fh = $self->output_fh ();
760    my $oldfh = select $fh;
761    my $oldflush = $|;
762    $| = 1;
763    print $fh '';
764    $| = $oldflush;
765    select $oldfh;
766    return $retval;
767}
768
769# Pod::Simple failed to provide this backward compatibility function, so
770# implement it ourselves.  File handles are one of the inputs that
771# parse_from_file supports.
772sub parse_from_filehandle {
773    my $self = shift;
774    $self->parse_from_file (@_);
775}
776
777# Pod::Simple's parse_file doesn't set output_fh.  Wrap the call and do so
778# ourself unless it was already set by the caller, since our documentation has
779# always said that this should work.
780sub parse_file {
781    my ($self, $in) = @_;
782    unless (defined $$self{output_fh}) {
783        $self->output_fh (\*STDOUT);
784    }
785    return $self->SUPER::parse_file ($in);
786}
787
788# Do the same for parse_lines, just to be polite.  Pod::Simple's man page
789# implies that the caller is responsible for setting this, but I don't see any
790# reason not to set a default.
791sub parse_lines {
792    my ($self, @lines) = @_;
793    unless (defined $$self{output_fh}) {
794        $self->output_fh (\*STDOUT);
795    }
796    return $self->SUPER::parse_lines (@lines);
797}
798
799# Likewise for parse_string_document.
800sub parse_string_document {
801    my ($self, $doc) = @_;
802    unless (defined $$self{output_fh}) {
803        $self->output_fh (\*STDOUT);
804    }
805    return $self->SUPER::parse_string_document ($doc);
806}
807
808##############################################################################
809# Module return value and documentation
810##############################################################################
811
8121;
813__END__
814
815=for stopwords
816alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls
817parsers EBCDIC autodetecting superset unrepresentable FH NNN
818
819=head1 NAME
820
821Pod::Text - Convert POD data to formatted text
822
823=head1 SYNOPSIS
824
825    use Pod::Text;
826    my $parser = Pod::Text->new (sentence => 1, width => 78);
827
828    # Read POD from STDIN and write to STDOUT.
829    $parser->parse_from_filehandle;
830
831    # Read POD from file.pod and write to file.txt.
832    $parser->parse_from_file ('file.pod', 'file.txt');
833
834=head1 DESCRIPTION
835
836Pod::Text is a module that can convert documentation in the POD format (the
837preferred language for documenting Perl) into formatted text.  It uses no
838special formatting controls or codes, and its output is therefore suitable for
839nearly any device.
840
841=head2 Encoding
842
843Pod::Text uses the following logic to choose an output encoding, in order:
844
845=over 4
846
847=item 1.
848
849If a PerlIO encoding layer is set on the output file handle, do not do any
850output encoding and will instead rely on the PerlIO encoding layer.
851
852=item 2.
853
854If the C<encoding> or C<utf8> options are set, use the output encoding
855specified by those options.
856
857=item 3.
858
859If the input encoding of the POD source file was explicitly specified (using
860C<=encoding>) or automatically detected by Pod::Simple, use that as the output
861encoding as well.
862
863=item 4.
864
865Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output
866encoding.  Since this is a superset of ASCII, this will result in ASCII output
867unless the POD input contains non-ASCII characters without declaring or
868autodetecting an encoding (usually via EZ<><> escapes).
869
870=item 5.
871
872Otherwise, for EBCDIC systems, output without doing any encoding and hope
873this works.
874
875=back
876
877One caveat: Pod::Text has to commit to an output encoding the first time it
878outputs a non-ASCII character, and then has to stick with it for consistency.
879However, C<=encoding> commands don't have to be at the beginning of a POD
880document.  If someone uses a non-ASCII character early in a document with an
881escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later,
882ideally Pod::Text would follow rule 3 and output the entire document as ISO
8838859-1.  Instead, it will commit to UTF-8 following rule 4 as soon as it sees
884that escape, and then stick with that encoding for the rest of the document.
885
886Unfortunately, there's no universally good choice for an output encoding.
887Each choice will be incorrect in some circumstances.  This approach was chosen
888primarily for backwards compatibility.  Callers should consider forcing the
889output encoding via C<encoding> if they have any knowledge about what encoding
890the user may expect.
891
892In particular, consider importing the L<Encode::Locale> module, if available,
893and setting C<encoding> to C<locale> to use an output encoding appropriate to
894the user's locale.  But be aware that if the user is not using locales or is
895using a locale of C<C>, Encode::Locale will set the output encoding to
896US-ASCII.  This will cause all non-ASCII characters will be replaced with C<?>
897and produce a flurry of warnings about unsupported characters, which may or
898may not be what you want.
899
900=head1 CLASS METHODS
901
902=over 4
903
904=item new(ARGS)
905
906Create a new Pod::Text object.  ARGS should be a list of key/value pairs,
907where the keys are chosen from the following.  Each option is annotated with
908the version of Pod::Text in which that option was added with its current
909meaning.
910
911=over 4
912
913=item alt
914
915[2.00] If set to a true value, selects an alternate output format that, among
916other things, uses a different heading style and marks C<=item> entries with a
917colon in the left margin.  Defaults to false.
918
919=item code
920
921[2.13] If set to a true value, the non-POD parts of the input file will be
922included in the output.  Useful for viewing code documented with POD blocks
923with the POD rendered and the code left intact.
924
925=item encoding
926
927[5.00] Specifies the encoding of the output.  The value must be an encoding
928recognized by the L<Encode> module (see L<Encode::Supported>).  If the output
929contains characters that cannot be represented in this encoding, that is an
930error that will be reported as configured by the C<errors> option.  If error
931handling is other than C<die>, the unrepresentable character will be replaced
932with the Encode substitution character (normally C<?>).
933
934If the output file handle has a PerlIO encoding layer set, this parameter will
935be ignored and no encoding will be done by Pod::Man.  It will instead rely on
936the encoding layer to make whatever output encoding transformations are
937desired.
938
939WARNING: The input encoding of the POD source is independent from the output
940encoding, and setting this option does not affect the interpretation of the
941POD input.  Unless your POD source is US-ASCII, its encoding should be
942declared with the C<=encoding> command in the source, as near to the top of
943the file as possible.  If this is not done, Pod::Simple will will attempt to
944guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will
945produce warnings.  See L<perlpod(1)> for more information.
946
947=item errors
948
949[3.17] How to report errors.  C<die> says to throw an exception on any POD
950formatting error.  C<stderr> says to report errors on standard error, but not
951to throw an exception.  C<pod> says to include a POD ERRORS section in the
952resulting documentation summarizing the errors.  C<none> ignores POD errors
953entirely, as much as possible.
954
955The default is C<pod>.
956
957=item guesswork
958
959[5.01] By default, Pod::Text applies some default formatting rules based on
960guesswork and regular expressions that are intended to make writing Perl
961documentation easier and require less explicit markup.  These rules may not
962always be appropriate, particularly for documentation that isn't about Perl.
963This option allows turning all or some of it off.
964
965The special value C<all> enables all guesswork.  This is also the default for
966backward compatibility reasons.  The special value C<none> disables all
967guesswork.  Otherwise, the value of this option should be a comma-separated
968list of one or more of the following keywords:
969
970=over 4
971
972=item quoting
973
974If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by
975double quotes in nroff (terminal) output unless the contents are already
976quoted.  When this guesswork is enabled, quote marks will also be suppressed
977for Perl variables, function names, function calls, numbers, and hex
978constants.
979
980=back
981
982Any unknown guesswork name is silently ignored (for potential future
983compatibility), so be careful about spelling.
984
985=item indent
986
987[2.00] The number of spaces to indent regular text, and the default
988indentation for C<=over> blocks.  Defaults to 4.
989
990=item loose
991
992[2.00] If set to a true value, a blank line is printed after a C<=head1>
993heading.  If set to false (the default), no blank line is printed after
994C<=head1>, although one is still printed after C<=head2>.  This is the default
995because it's the expected formatting for manual pages; if you're formatting
996arbitrary text documents, setting this to true may result in more pleasing
997output.
998
999=item margin
1000
1001[2.21] The width of the left margin in spaces.  Defaults to 0.  This is the
1002margin for all text, including headings, not the amount by which regular text
1003is indented; for the latter, see the I<indent> option.  To set the right
1004margin, see the I<width> option.
1005
1006=item nourls
1007
1008[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are
1009formatted to show both the anchor text and the URL.  In other words:
1010
1011    L<foo|http://example.com/>
1012
1013is formatted as:
1014
1015    foo <http://example.com/>
1016
1017This option, if set to a true value, suppresses the URL when anchor text is
1018given, so this example would be formatted as just C<foo>.  This can produce
1019less cluttered output in cases where the URLs are not particularly important.
1020
1021=item quotes
1022
1023[4.00] Sets the quote marks used to surround CE<lt>> text.  If the value is a
1024single character, it is used as both the left and right quote.  Otherwise, it
1025is split in half, and the first half of the string is used as the left quote
1026and the second is used as the right quote.
1027
1028This may also be set to the special value C<none>, in which case no quote
1029marks are added around CE<lt>> text.
1030
1031=item sentence
1032
1033[3.00] If set to a true value, Pod::Text will assume that each sentence ends
1034in two spaces, and will try to preserve that spacing.  If set to false, all
1035consecutive whitespace in non-verbatim paragraphs is compressed into a single
1036space.  Defaults to false.
1037
1038=item stderr
1039
1040[3.10] Send error messages about invalid POD to standard error instead of
1041appending a POD ERRORS section to the generated output.  This is equivalent to
1042setting C<errors> to C<stderr> if C<errors> is not already set.  It is
1043supported for backward compatibility.
1044
1045=item utf8
1046
1047[3.12] If this option is set to a true value, the output encoding is set to
1048UTF-8.  This is equivalent to setting C<encoding> to C<UTF-8> if C<encoding>
1049is not already set.  It is supported for backward compatibility.
1050
1051=item width
1052
1053[2.00] The column at which to wrap text on the right-hand side.  Defaults to
105476.
1055
1056=back
1057
1058=back
1059
1060=head1 INSTANCE METHODS
1061
1062As a derived class from Pod::Simple, Pod::Text supports the same methods and
1063interfaces.  See L<Pod::Simple> for all the details.  This section summarizes
1064the most-frequently-used methods and the ones added by Pod::Text.
1065
1066=over 4
1067
1068=item output_fh(FH)
1069
1070Direct the output from parse_file(), parse_lines(), or parse_string_document()
1071to the file handle FH instead of C<STDOUT>.
1072
1073=item output_string(REF)
1074
1075Direct the output from parse_file(), parse_lines(), or parse_string_document()
1076to the scalar variable pointed to by REF, rather than C<STDOUT>.  For example:
1077
1078    my $man = Pod::Man->new();
1079    my $output;
1080    $man->output_string(\$output);
1081    $man->parse_file('/some/input/file');
1082
1083Be aware that the output in that variable will already be encoded (see
1084L</Encoding>).
1085
1086=item parse_file(PATH)
1087
1088Read the POD source from PATH and format it.  By default, the output is sent
1089to C<STDOUT>, but this can be changed with the output_fh() or output_string()
1090methods.
1091
1092=item parse_from_file(INPUT, OUTPUT)
1093
1094=item parse_from_filehandle(FH, OUTPUT)
1095
1096Read the POD source from INPUT, format it, and output the results to OUTPUT.
1097
1098parse_from_filehandle() is provided for backward compatibility with older
1099versions of Pod::Man.  parse_from_file() should be used instead.
1100
1101=item parse_lines(LINES[, ...[, undef]])
1102
1103Parse the provided lines as POD source, writing the output to either C<STDOUT>
1104or the file handle set with the output_fh() or output_string() methods.  This
1105method can be called repeatedly to provide more input lines.  An explicit
1106C<undef> should be passed to indicate the end of input.
1107
1108This method expects raw bytes, not decoded characters.
1109
1110=item parse_string_document(INPUT)
1111
1112Parse the provided scalar variable as POD source, writing the output to either
1113C<STDOUT> or the file handle set with the output_fh() or output_string()
1114methods.
1115
1116This method expects raw bytes, not decoded characters.
1117
1118=back
1119
1120=head1 FUNCTIONS
1121
1122Pod::Text exports one function for backward compatibility with older versions.
1123This function is deprecated; instead, use the object-oriented interface
1124described above.
1125
1126=over 4
1127
1128=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT])
1129
1130Convert the POD source from INPUT to text and write it to OUTPUT.  If OUTPUT
1131is not given, defaults to C<STDOUT>.  INPUT can be any expression supported as
1132the second argument to two-argument open().
1133
1134If C<-a> is given as an initial argument, pass the C<alt> option to the
1135Pod::Text constructor.  This enables alternative formatting.
1136
1137If C<-NNN> is given as an initial argument, pass the C<width> option to the
1138Pod::Text constructor with the number C<NNN> as its argument.  This sets the
1139wrap line width to NNN.
1140
1141=back
1142
1143=head1 DIAGNOSTICS
1144
1145=over 4
1146
1147=item Bizarre space in item
1148
1149=item Item called without tag
1150
1151(W) Something has gone wrong in internal C<=item> processing.  These
1152messages indicate a bug in Pod::Text; you should never see them.
1153
1154=item Can't open %s for reading: %s
1155
1156(F) Pod::Text was invoked via the compatibility mode pod2text() interface
1157and the input file it was given could not be opened.
1158
1159=item Invalid errors setting "%s"
1160
1161(F) The C<errors> parameter to the constructor was set to an unknown value.
1162
1163=item Invalid quote specification "%s"
1164
1165(F) The quote specification given (the C<quotes> option to the
1166constructor) was invalid.  A quote specification must be either one
1167character long or an even number (greater than one) characters long.
1168
1169=item POD document had syntax errors
1170
1171(F) The POD document being formatted had syntax errors and the C<errors>
1172option was set to C<die>.
1173
1174=back
1175
1176=head1 COMPATIBILITY
1177
1178Pod::Text 2.03 (based on L<Pod::Parser>) was the first version of this module
1179included with Perl, in Perl 5.6.0.  Earlier versions of Perl had a different
1180Pod::Text module, with a different API.
1181
1182The current API based on L<Pod::Simple> was added in Pod::Text 3.00.
1183Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to
1184incorporate those changes.  This is the first version that correctly supports
1185all modern POD syntax.  The parse_from_filehandle() method was re-added for
1186backward compatibility in Pod::Text 3.07, included in Perl 5.9.4.
1187
1188Pod::Text 3.12, included in Perl 5.10.1, first implemented the current
1189practice of attempting to match the default output encoding with the input
1190encoding of the POD source, unless overridden by the C<utf8> option or (added
1191later) the C<encoding> option.
1192
1193Support for anchor text in LZ<><> links of type URL was added in Pod::Text
11943.14, included in Perl 5.11.5.
1195
1196parse_lines(), parse_string_document(), and parse_file() set a default output
1197file handle of C<STDOUT> if one was not already set as of Pod::Text 3.18,
1198included in Perl 5.19.5.
1199
1200Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the
1201version of the podlators distribution.  All modules included in podlators, and
1202the podlators distribution itself, share the same version number from this
1203point forward.
1204
1205Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC
1206systems, present in all versions back to 3.00, that would cause opening
1207brackets to disappear.
1208
1209Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it
1210sees a non-ASCII character in the input and the input encoding is not
1211specified.  It also commits to an encoding with the first non-ASCII character
1212and does not change the output encoding if the input encoding changes.  The
1213L<Encode> module is now used for all output encoding rather than PerlIO
1214layers, which fixes earlier problems with output to scalars.
1215
1216=head1 AUTHOR
1217
1218Russ Allbery <rra@cpan.org>, based I<very> heavily on the original Pod::Text
1219by Tom Christiansen <tchrist@mox.perl.com> and its conversion to Pod::Parser
1220by Brad Appleton <bradapp@enteract.com>.  Sean Burke's initial conversion of
1221Pod::Man to use Pod::Simple provided much-needed guidance on how to use
1222Pod::Simple.
1223
1224=head1 COPYRIGHT AND LICENSE
1225
1226Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ
1227Allbery <rra@cpan.org>
1228
1229This program is free software; you may redistribute it and/or modify it
1230under the same terms as Perl itself.
1231
1232=head1 SEE ALSO
1233
1234L<Encode::Locale>, L<Encode::Supproted>, L<Pod::Simple>,
1235L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)>
1236
1237The current version of this module is always available from its web site at
1238L<https://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
1239Perl core distribution as of 5.6.0.
1240
1241=cut
1242
1243# Local Variables:
1244# copyright-at-end-flag: t
1245# End:
1246