xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/Text.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1# Convert POD data to formatted text.
2#
3# This module converts POD to formatted text.  It replaces the old Pod::Text
4# module that came with versions of Perl prior to 5.6.0 and attempts to match
5# its output except for some specific circumstances where other decisions
6# seemed to produce better output.  It uses Pod::Parser and is designed to be
7# very easy to subclass.
8#
9# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
10
11##############################################################################
12# Modules and declarations
13##############################################################################
14
15package Pod::Text;
16
17use 5.010;
18use strict;
19use warnings;
20
21use Carp qw(carp croak);
22use Encode qw(encode);
23use Exporter ();
24use Pod::Simple ();
25
26our @ISA = qw(Pod::Simple Exporter);
27our $VERSION = '5.01_02';
28$VERSION =~ tr/_//d;
29
30# We have to export pod2text for backward compatibility.
31our @EXPORT = qw(pod2text);
32
33# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available.  Code
34# taken from Pod::Simple 3.32, but was only added in 3.30.
35my ($NBSP, $SHY);
36if ($Pod::Simple::VERSION ge 3.30) {
37    $NBSP = $Pod::Simple::nbsp;
38    $SHY  = $Pod::Simple::shy;
39} else {
40    $NBSP = chr utf8::unicode_to_native(0xA0);
41    $SHY  = chr utf8::unicode_to_native(0xAD);
42}
43
44# Import the ASCII constant from Pod::Simple.  This is true iff we're in an
45# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is
46# generally only false for EBCDIC.
47BEGIN { *ASCII = \&Pod::Simple::ASCII }
48
49##############################################################################
50# Initialization
51##############################################################################
52
53# This function handles code blocks.  It's registered as a callback to
54# Pod::Simple and therefore doesn't work as a regular method call, but all it
55# does is call output_code with the line.
56sub handle_code {
57    my ($line, $number, $parser) = @_;
58    $parser->output_code ($line . "\n");
59}
60
61# Initialize the object and set various Pod::Simple options that we need.
62# Here, we also process any additional options passed to the constructor or
63# set up defaults if none were given.  Note that all internal object keys are
64# in all-caps, reserving all lower-case object keys for Pod::Simple and user
65# arguments.
66sub new {
67    my $class = shift;
68    my $self = $class->SUPER::new;
69
70    # Tell Pod::Simple to keep whitespace whenever possible.
71    if ($self->can ('preserve_whitespace')) {
72        $self->preserve_whitespace (1);
73    } else {
74        $self->fullstop_space_harden (1);
75    }
76
77    # The =for and =begin targets that we accept.
78    $self->accept_targets (qw/text TEXT/);
79
80    # Ensure that contiguous blocks of code are merged together.  Otherwise,
81    # some of the guesswork heuristics don't work right.
82    $self->merge_text (1);
83
84    # Pod::Simple doesn't do anything useful with our arguments, but we want
85    # to put them in our object as hash keys and values.  This could cause
86    # problems if we ever clash with Pod::Simple's own internal class
87    # variables.
88    my %opts = @_;
89    my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
90    %$self = (%$self, @opts);
91
92    # Backwards-compatibility support for the stderr option.
93    if ($$self{opt_stderr} and not $$self{opt_errors}) {
94        $$self{opt_errors} = 'stderr';
95    }
96    delete $$self{opt_stderr};
97
98    # Backwards-compatibility support for the utf8 option.
99    if ($$self{opt_utf8} && !$$self{opt_encoding}) {
100        $$self{opt_encoding} = 'UTF-8';
101    }
102    delete $$self{opt_utf8};
103
104    # Validate the errors parameter and act on it.
105    $$self{opt_errors} //= 'pod';
106    if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') {
107        $self->no_errata_section (1);
108        $self->complain_stderr (1);
109        if ($$self{opt_errors} eq 'die') {
110            $$self{complain_die} = 1;
111        }
112    } elsif ($$self{opt_errors} eq 'pod') {
113        $self->no_errata_section (0);
114        $self->complain_stderr (0);
115    } elsif ($$self{opt_errors} eq 'none') {
116        $self->no_errata_section (1);
117        $self->no_whining (1);
118    } else {
119        croak (qq(Invalid errors setting: "$$self{errors}"));
120    }
121    delete $$self{errors};
122
123    # Initialize various things from our parameters.
124    $$self{opt_alt}      //= 0;
125    $$self{opt_indent}   //= 4;
126    $$self{opt_margin}   //= 0;
127    $$self{opt_loose}    //= 0;
128    $$self{opt_sentence} //= 0;
129    $$self{opt_width}    //= 76;
130
131    # Figure out what quotes we'll be using for C<> text.
132    $$self{opt_quotes} ||= '"';
133    if ($$self{opt_quotes} eq 'none') {
134        $$self{LQUOTE} = $$self{RQUOTE} = '';
135    } elsif (length ($$self{opt_quotes}) == 1) {
136        $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
137    } elsif (length ($$self{opt_quotes}) % 2 == 0) {
138        my $length = length ($$self{opt_quotes}) / 2;
139        $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length);
140        $$self{RQUOTE} = substr ($$self{opt_quotes}, $length);
141    } else {
142        croak qq(Invalid quote specification "$$self{opt_quotes}");
143    }
144
145    # Configure guesswork based on options.
146    my $guesswork = $self->{opt_guesswork} || q{};
147    my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork);
148    if (!%guesswork || $guesswork{all}) {
149        $$self{GUESSWORK} = {quoting => 1};
150    } elsif ($guesswork{none}) {
151        $$self{GUESSWORK} = {};
152    } else {
153        $$self{GUESSWORK} = {%guesswork};
154    }
155
156    # If requested, do something with the non-POD text.
157    $self->code_handler (\&handle_code) if $$self{opt_code};
158
159    # Return the created object.
160    return $self;
161}
162
163##############################################################################
164# Core parsing
165##############################################################################
166
167# This is the glue that connects the code below with Pod::Simple itself.  The
168# goal is to convert the event stream coming from the POD parser into method
169# calls to handlers once the complete content of a tag has been seen.  Each
170# paragraph or POD command will have textual content associated with it, and
171# as soon as all of a paragraph or POD command has been seen, that content
172# will be passed in to the corresponding method for handling that type of
173# object.  The exceptions are handlers for lists, which have opening tag
174# handlers and closing tag handlers that will be called right away.
175#
176# The internal hash key PENDING is used to store the contents of a tag until
177# all of it has been seen.  It holds a stack of open tags, each one
178# represented by a tuple of the attributes hash for the tag and the contents
179# of the tag.
180
181# Add a block of text to the contents of the current node, formatting it
182# according to the current formatting instructions as we do.
183sub _handle_text {
184    my ($self, $text) = @_;
185    my $tag = $$self{PENDING}[-1];
186    $$tag[1] .= $text;
187}
188
189# Given an element name, get the corresponding method name.
190sub method_for_element {
191    my ($self, $element) = @_;
192    $element =~ tr/-/_/;
193    $element =~ tr/A-Z/a-z/;
194    $element =~ tr/_a-z0-9//cd;
195    return $element;
196}
197
198# Handle the start of a new element.  If cmd_element is defined, assume that
199# we need to collect the entire tree for this element before passing it to the
200# element method, and create a new tree into which we'll collect blocks of
201# text and nested elements.  Otherwise, if start_element is defined, call it.
202sub _handle_element_start {
203    my ($self, $element, $attrs) = @_;
204    my $method = $self->method_for_element ($element);
205
206    # If we have a command handler, we need to accumulate the contents of the
207    # tag before calling it.
208    if ($self->can ("cmd_$method")) {
209        push (@{ $$self{PENDING} }, [ $attrs, '' ]);
210    } elsif ($self->can ("start_$method")) {
211        my $method = 'start_' . $method;
212        $self->$method ($attrs, '');
213    }
214}
215
216# Handle the end of an element.  If we had a cmd_ method for this element,
217# this is where we pass along the text that we've accumulated.  Otherwise, if
218# we have an end_ method for the element, call that.
219sub _handle_element_end {
220    my ($self, $element) = @_;
221    my $method = $self->method_for_element ($element);
222
223    # If we have a command handler, pull off the pending text and pass it to
224    # the handler along with the saved attribute hash.
225    if ($self->can ("cmd_$method")) {
226        my $tag = pop @{ $$self{PENDING} };
227        my $method = 'cmd_' . $method;
228        my $text = $self->$method (@$tag);
229        if (defined $text) {
230            if (@{ $$self{PENDING} } > 1) {
231                $$self{PENDING}[-1][1] .= $text;
232            } else {
233                $self->output ($text);
234            }
235        }
236    } elsif ($self->can ("end_$method")) {
237        my $method = 'end_' . $method;
238        $self->$method ();
239    }
240}
241
242##############################################################################
243# Output formatting
244##############################################################################
245
246# Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
247# because it plays games with tabs.  We can't use formline, even though we'd
248# really like to, because it screws up non-printing characters.  So we have to
249# do the wrapping ourselves.
250sub wrap {
251    my $self = shift;
252    local $_ = shift;
253    my $output = '';
254    my $spaces = ' ' x $$self{MARGIN};
255    my $width = $$self{opt_width} - $$self{MARGIN};
256    while (length > $width) {
257        if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) {
258            $output .= $spaces . $1 . "\n";
259        } else {
260            last;
261        }
262    }
263    $output .= $spaces . $_;
264    $output =~ s/\s+$/\n\n/;
265    return $output;
266}
267
268# Reformat a paragraph of text for the current margin.  Takes the text to
269# reformat and returns the formatted text.
270sub reformat {
271    my $self = shift;
272    local $_ = shift;
273
274    # If we're trying to preserve two spaces after sentences, do some munging
275    # to support that.  Otherwise, smash all repeated whitespace.  Be careful
276    # not to use \s here, which in Unicode input may match non-breaking spaces
277    # that we don't want to smash.
278    if ($$self{opt_sentence}) {
279        s/ +$//mg;
280        s/\.\n/. \n/g;
281        s/\n/ /g;
282        s/   +/  /g;
283    } else {
284        s/[ \t\n]+/ /g;
285    }
286    return $self->wrap ($_);
287}
288
289# Output text to the output device.  Replace non-breaking spaces with spaces
290# and soft hyphens with nothing, and then determine the output encoding.
291sub output {
292    my ($self, @text) = @_;
293    my $text = join ('', @text);
294    if ($NBSP) {
295        $text =~ s/$NBSP/ /g;
296    }
297    if ($SHY) {
298        $text =~ s/$SHY//g;
299    }
300
301    # The logic used here is described in the POD documentation.  Prefer the
302    # configured encoding, then the pass-through option of using the same
303    # encoding as the input, and then UTF-8, but commit to an encoding for the
304    # document.
305    #
306    # ENCODE says whether to encode or not and is turned off if there is a
307    # PerlIO encoding layer (in start_document).  ENCODING is the encoding
308    # that we previously committed to and is cleared at the start of each
309    # document.
310    if ($$self{ENCODE}) {
311        my $encoding = $$self{ENCODING};
312        if (!$encoding) {
313            $encoding = $self->encoding();
314            if (!$encoding && ASCII && $text =~ /[^\x00-\x7F]/) {
315                $encoding = 'UTF-8';
316            }
317            if ($encoding) {
318                $$self{ENCODING} = $encoding;
319            }
320        }
321        if ($encoding) {
322            my $check = sub {
323                my ($char) = @_;
324                my $display = '"\x{' . hex($char) . '}"';
325                my $error = "$display does not map to $$self{ENCODING}";
326                $self->whine ($self->line_count(), $error);
327                return Encode::encode ($$self{ENCODING}, chr($char));
328            };
329            print { $$self{output_fh} } encode ($encoding, $text, $check);
330        } else {
331            print { $$self{output_fh} } $text;
332        }
333    } else {
334        print { $$self{output_fh} } $text;
335    }
336}
337
338# Output a block of code (something that isn't part of the POD text).  Called
339# by preprocess_paragraph only if we were given the code option.  Exists here
340# only so that it can be overridden by subclasses.
341sub output_code { $_[0]->output ($_[1]) }
342
343##############################################################################
344# Document initialization
345##############################################################################
346
347# Set up various things that have to be initialized on a per-document basis.
348sub start_document {
349    my ($self, $attrs) = @_;
350    if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
351        $$self{CONTENTLESS} = 1;
352    } else {
353        delete $$self{CONTENTLESS};
354    }
355    my $margin = $$self{opt_indent} + $$self{opt_margin};
356
357    # Initialize a few per-document variables.
358    $$self{INDENTS} = [];       # Stack of indentations.
359    $$self{MARGIN}  = $margin;  # Default left margin.
360    $$self{PENDING} = [[]];     # Pending output.
361
362    # We have to redo encoding handling for each document.  Check whether the
363    # output file handle already has a PerlIO encoding layer set and, if so,
364    # disable encoding.
365    $$self{ENCODE} = 1;
366    eval {
367        require PerlIO;
368        my @options = (output => 1, details => 1);
369        my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1];
370        if ($flag && ($flag & PerlIO::F_UTF8 ())) {
371            $$self{ENCODE} = 0;
372        }
373    };
374    $$self{ENCODING} = $$self{opt_encoding};
375
376    return '';
377}
378
379# Handle the end of the document.  The only thing we do is handle dying on POD
380# errors, since Pod::Parser currently doesn't.
381sub end_document {
382    my ($self) = @_;
383    if ($$self{complain_die} && $self->errors_seen) {
384        croak ("POD document had syntax errors");
385    }
386}
387
388##############################################################################
389# Text blocks
390##############################################################################
391
392# Intended for subclasses to override, this method returns text with any
393# non-printing formatting codes stripped out so that length() correctly
394# returns the length of the text.  For basic Pod::Text, it does nothing.
395sub strip_format {
396    my ($self, $string) = @_;
397    return $string;
398}
399
400# This method is called whenever an =item command is complete (in other words,
401# we've seen its associated paragraph or know for certain that it doesn't have
402# one).  It gets the paragraph associated with the item as an argument.  If
403# that argument is empty, just output the item tag; if it contains a newline,
404# output the item tag followed by the newline.  Otherwise, see if there's
405# enough room for us to output the item tag in the margin of the text or if we
406# have to put it on a separate line.
407sub item {
408    my ($self, $text) = @_;
409    my $tag = $$self{ITEM};
410    unless (defined $tag) {
411        carp "Item called without tag";
412        return;
413    }
414    undef $$self{ITEM};
415
416    # Calculate the indentation and margin.  $fits is set to true if the tag
417    # will fit into the margin of the paragraph given our indentation level.
418    my $indent = $$self{INDENTS}[-1] // $$self{opt_indent};
419    my $margin = ' ' x $$self{opt_margin};
420    my $tag_length = length ($self->strip_format ($tag));
421    my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1);
422
423    # If the tag doesn't fit, or if we have no associated text, print out the
424    # tag separately.  Otherwise, put the tag in the margin of the paragraph.
425    if (!$text || $text =~ /^\s+$/ || !$fits) {
426        my $realindent = $$self{MARGIN};
427        $$self{MARGIN} = $indent;
428        my $output = $self->reformat ($tag);
429        $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
430        $output =~ s/\n*$/\n/;
431
432        # If the text is just whitespace, we have an empty item paragraph;
433        # this can result from =over/=item/=back without any intermixed
434        # paragraphs.  Insert some whitespace to keep the =item from merging
435        # into the next paragraph.
436        $output .= "\n" if $text && $text =~ /^\s*$/;
437
438        $self->output ($output);
439        $$self{MARGIN} = $realindent;
440        $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
441    } else {
442        my $space = ' ' x $indent;
443        $space =~ s/^$margin /$margin:/ if $$self{opt_alt};
444        $text = $self->reformat ($text);
445        $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
446        my $tagspace = ' ' x $tag_length;
447        $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
448        $self->output ($text);
449    }
450}
451
452# Handle a basic block of text.  The only tricky thing here is that if there
453# is a pending item tag, we need to format this as an item paragraph.
454sub cmd_para {
455    my ($self, $attrs, $text) = @_;
456    $text =~ s/\s+$/\n/;
457    if (defined $$self{ITEM}) {
458        $self->item ($text . "\n");
459    } else {
460        $self->output ($self->reformat ($text . "\n"));
461    }
462    return '';
463}
464
465# Handle a verbatim paragraph.  Just print it out, but indent it according to
466# our margin.
467sub cmd_verbatim {
468    my ($self, $attrs, $text) = @_;
469    $self->item if defined $$self{ITEM};
470    return if $text =~ /^\s*$/;
471    $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
472    $text =~ s/\s*$/\n\n/;
473    $self->output ($text);
474    return '';
475}
476
477# Handle literal text (produced by =for and similar constructs).  Just output
478# it with the minimum of changes.
479sub cmd_data {
480    my ($self, $attrs, $text) = @_;
481    $text =~ s/^\n+//;
482    $text =~ s/\n{0,2}$/\n/;
483    $self->output ($text);
484    return '';
485}
486
487##############################################################################
488# Headings
489##############################################################################
490
491# The common code for handling all headers.  Takes the header text, the
492# indentation, and the surrounding marker for the alt formatting method.
493sub heading {
494    my ($self, $text, $indent, $marker) = @_;
495    $self->item ("\n\n") if defined $$self{ITEM};
496    $text =~ s/\s+$//;
497    if ($$self{opt_alt}) {
498        my $closemark = reverse (split (//, $marker));
499        my $margin = ' ' x $$self{opt_margin};
500        $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
501    } else {
502        $text .= "\n" if $$self{opt_loose};
503        my $margin = ' ' x ($$self{opt_margin} + $indent);
504        $self->output ($margin . $text . "\n");
505    }
506    return '';
507}
508
509# First level heading.
510sub cmd_head1 {
511    my ($self, $attrs, $text) = @_;
512    $self->heading ($text, 0, '====');
513}
514
515# Second level heading.
516sub cmd_head2 {
517    my ($self, $attrs, $text) = @_;
518    $self->heading ($text, $$self{opt_indent} / 2, '==  ');
519}
520
521# Third level heading.
522sub cmd_head3 {
523    my ($self, $attrs, $text) = @_;
524    $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '=   ');
525}
526
527# Fourth level heading.
528sub cmd_head4 {
529    my ($self, $attrs, $text) = @_;
530    $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '-   ');
531}
532
533##############################################################################
534# List handling
535##############################################################################
536
537# Handle the beginning of an =over block.  Takes the type of the block as the
538# first argument, and then the attr hash.  This is called by the handlers for
539# the four different types of lists (bullet, number, text, and block).
540sub over_common_start {
541    my ($self, $attrs) = @_;
542    $self->item ("\n\n") if defined $$self{ITEM};
543
544    # Find the indentation level.
545    my $indent = $$attrs{indent};
546    unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
547        $indent = $$self{opt_indent};
548    }
549
550    # Add this to our stack of indents and increase our current margin.
551    push (@{ $$self{INDENTS} }, $$self{MARGIN});
552    $$self{MARGIN} += ($indent + 0);
553    return '';
554}
555
556# End an =over block.  Takes no options other than the class pointer.  Output
557# any pending items and then pop one level of indentation.
558sub over_common_end {
559    my ($self) = @_;
560    $self->item ("\n\n") if defined $$self{ITEM};
561    $$self{MARGIN} = pop @{ $$self{INDENTS} };
562    return '';
563}
564
565# Dispatch the start and end calls as appropriate.
566sub start_over_bullet { $_[0]->over_common_start ($_[1]) }
567sub start_over_number { $_[0]->over_common_start ($_[1]) }
568sub start_over_text   { $_[0]->over_common_start ($_[1]) }
569sub start_over_block  { $_[0]->over_common_start ($_[1]) }
570sub end_over_bullet { $_[0]->over_common_end }
571sub end_over_number { $_[0]->over_common_end }
572sub end_over_text   { $_[0]->over_common_end }
573sub end_over_block  { $_[0]->over_common_end }
574
575# The common handler for all item commands.  Takes the type of the item, the
576# attributes, and then the text of the item.
577sub item_common {
578    my ($self, $type, $attrs, $text) = @_;
579    $self->item if defined $$self{ITEM};
580
581    # Clean up the text.  We want to end up with two variables, one ($text)
582    # which contains any body text after taking out the item portion, and
583    # another ($item) which contains the actual item text.  Note the use of
584    # the internal Pod::Simple attribute here; that's a potential land mine.
585    $text =~ s/\s+$//;
586    my ($item, $index);
587    if ($type eq 'bullet') {
588        $item = '*';
589    } elsif ($type eq 'number') {
590        $item = $$attrs{'~orig_content'};
591    } else {
592        $item = $text;
593        $item =~ s/\s*\n\s*/ /g;
594        $text = '';
595    }
596    $$self{ITEM} = $item;
597
598    # If body text for this item was included, go ahead and output that now.
599    if ($text) {
600        $text =~ s/\s*$/\n/;
601        $self->item ($text);
602    }
603    return '';
604}
605
606# Dispatch the item commands to the appropriate place.
607sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
608sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
609sub cmd_item_text   { my $self = shift; $self->item_common ('text',   @_) }
610sub cmd_item_block  { my $self = shift; $self->item_common ('block',  @_) }
611
612##############################################################################
613# Formatting codes
614##############################################################################
615
616# The simple ones.
617sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
618sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
619sub cmd_i { return '*' . $_[2] . '*' }
620sub cmd_x { return '' }
621
622# Convert all internal whitespace to $NBSP.
623sub cmd_s {
624    my ($self, $attrs, $text) = @_;
625    $text =~ s{ \s }{$NBSP}xmsg;
626    return $text;
627}
628
629# Apply a whole bunch of messy heuristics to not quote things that don't
630# benefit from being quoted.  These originally come from Barrie Slaymaker and
631# largely duplicate code in Pod::Man.
632sub cmd_c {
633    my ($self, $attrs, $text) = @_;
634
635    # A regex that matches the portion of a variable reference that's the
636    # array or hash index, separated out just because we want to use it in
637    # several places in the following regex.
638    my $index = '(?: \[[^]]+\] | \{[^}]+\} )?';
639
640    # Check for things that we don't want to quote, and if we find any of
641    # them, return the string with just a font change and no quoting.
642    #
643    # Traditionally, Pod::Text has not quoted Perl variables, functions,
644    # numbers, or hex constants, but this is not always desirable.  Make this
645    # optional on the quoting guesswork flag.
646    my $extra = qr{(?!)}xms;    # never matches
647    if ($$self{GUESSWORK}{quoting}) {
648        $extra = qr{
649             \$+ [\#^]? \S $index            # special ($^F, $")
650           | [\$\@%&*]+ \#? [:\'\w]+ $index  # plain var or func
651           | [\$\@%&*]* [:\'\w]+
652             (?: -> )? \(\s*[^\s,\)]*\s*\)   # 0/1-arg func call
653           | [+-]? ( \d[\d.]* | \.\d+ )
654             (?: [eE][+-]?\d+ )?             # a number
655           | 0x [a-fA-F\d]+                  # a hex constant
656         }xms;
657    }
658    $text =~ m{
659      ^\s*
660      (?:
661         ( [\'\`\"] ) .* \1                  # already quoted
662       | \` .* \'                            # `quoted'
663       | $extra
664      )
665      \s*\z
666     }xms and return $text;
667
668    # If we didn't return, go ahead and quote the text.
669    return $$self{opt_alt}
670        ? "``$text''"
671        : "$$self{LQUOTE}$text$$self{RQUOTE}";
672}
673
674# Links reduce to the text that we're given, wrapped in angle brackets if it's
675# a URL.
676sub cmd_l {
677    my ($self, $attrs, $text) = @_;
678    if ($$attrs{type} eq 'url') {
679        if (not defined($$attrs{to}) or $$attrs{to} eq $text) {
680            return "<$text>";
681        } elsif ($$self{opt_nourls}) {
682            return $text;
683        } else {
684            return "$text <$$attrs{to}>";
685        }
686    } else {
687        return $text;
688    }
689}
690
691##############################################################################
692# Backwards compatibility
693##############################################################################
694
695# The old Pod::Text module did everything in a pod2text() function.  This
696# tries to provide the same interface for legacy applications.
697sub pod2text {
698    my @args;
699
700    # This is really ugly; I hate doing option parsing in the middle of a
701    # module.  But the old Pod::Text module supported passing flags to its
702    # entry function, so handle -a and -<number>.
703    while ($_[0] =~ /^-/) {
704        my $flag = shift;
705        if    ($flag eq '-a')       { push (@args, alt => 1)    }
706        elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
707        else {
708            unshift (@_, $flag);
709            last;
710        }
711    }
712
713    # Now that we know what arguments we're using, create the parser.
714    my $parser = Pod::Text->new (@args);
715
716    # If two arguments were given, the second argument is going to be a file
717    # handle.  That means we want to call parse_from_filehandle(), which means
718    # we need to turn the first argument into a file handle.  Magic open will
719    # handle the <&STDIN case automagically.
720    if (defined $_[1]) {
721        my @fhs = @_;
722        local *IN;
723        unless (open (IN, $fhs[0])) {
724            croak ("Can't open $fhs[0] for reading: $!\n");
725            return;
726        }
727        $fhs[0] = \*IN;
728        $parser->output_fh ($fhs[1]);
729        my $retval = $parser->parse_file ($fhs[0]);
730        my $fh = $parser->output_fh ();
731        close $fh;
732        return $retval;
733    } else {
734        $parser->output_fh (\*STDOUT);
735        return $parser->parse_file (@_);
736    }
737}
738
739# Reset the underlying Pod::Simple object between calls to parse_from_file so
740# that the same object can be reused to convert multiple pages.
741sub parse_from_file {
742    my $self = shift;
743    $self->reinit;
744
745    # Fake the old cutting option to Pod::Parser.  This fiddles with internal
746    # Pod::Simple state and is quite ugly; we need a better approach.
747    if (ref ($_[0]) eq 'HASH') {
748        my $opts = shift @_;
749        if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
750            $$self{in_pod} = 1;
751            $$self{last_was_blank} = 1;
752        }
753    }
754
755    # Do the work.
756    my $retval = $self->Pod::Simple::parse_from_file (@_);
757
758    # Flush output, since Pod::Simple doesn't do this.  Ideally we should also
759    # close the file descriptor if we had to open one, but we can't easily
760    # figure this out.
761    my $fh = $self->output_fh ();
762    my $oldfh = select $fh;
763    my $oldflush = $|;
764    $| = 1;
765    print $fh '';
766    $| = $oldflush;
767    select $oldfh;
768    return $retval;
769}
770
771# Pod::Simple failed to provide this backward compatibility function, so
772# implement it ourselves.  File handles are one of the inputs that
773# parse_from_file supports.
774sub parse_from_filehandle {
775    my $self = shift;
776    $self->parse_from_file (@_);
777}
778
779# Pod::Simple's parse_file doesn't set output_fh.  Wrap the call and do so
780# ourself unless it was already set by the caller, since our documentation has
781# always said that this should work.
782sub parse_file {
783    my ($self, $in) = @_;
784    unless (defined $$self{output_fh}) {
785        $self->output_fh (\*STDOUT);
786    }
787    return $self->SUPER::parse_file ($in);
788}
789
790# Do the same for parse_lines, just to be polite.  Pod::Simple's man page
791# implies that the caller is responsible for setting this, but I don't see any
792# reason not to set a default.
793sub parse_lines {
794    my ($self, @lines) = @_;
795    unless (defined $$self{output_fh}) {
796        $self->output_fh (\*STDOUT);
797    }
798    return $self->SUPER::parse_lines (@lines);
799}
800
801# Likewise for parse_string_document.
802sub parse_string_document {
803    my ($self, $doc) = @_;
804    unless (defined $$self{output_fh}) {
805        $self->output_fh (\*STDOUT);
806    }
807    return $self->SUPER::parse_string_document ($doc);
808}
809
810##############################################################################
811# Module return value and documentation
812##############################################################################
813
8141;
815__END__
816
817=for stopwords
818alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls
819parsers EBCDIC autodetecting superset unrepresentable FH NNN
820
821=head1 NAME
822
823Pod::Text - Convert POD data to formatted text
824
825=head1 SYNOPSIS
826
827    use Pod::Text;
828    my $parser = Pod::Text->new (sentence => 1, width => 78);
829
830    # Read POD from STDIN and write to STDOUT.
831    $parser->parse_from_filehandle;
832
833    # Read POD from file.pod and write to file.txt.
834    $parser->parse_from_file ('file.pod', 'file.txt');
835
836=head1 DESCRIPTION
837
838Pod::Text is a module that can convert documentation in the POD format (the
839preferred language for documenting Perl) into formatted text.  It uses no
840special formatting controls or codes, and its output is therefore suitable for
841nearly any device.
842
843=head2 Encoding
844
845Pod::Text uses the following logic to choose an output encoding, in order:
846
847=over 4
848
849=item 1.
850
851If a PerlIO encoding layer is set on the output file handle, do not do any
852output encoding and will instead rely on the PerlIO encoding layer.
853
854=item 2.
855
856If the C<encoding> or C<utf8> options are set, use the output encoding
857specified by those options.
858
859=item 3.
860
861If the input encoding of the POD source file was explicitly specified (using
862C<=encoding>) or automatically detected by Pod::Simple, use that as the output
863encoding as well.
864
865=item 4.
866
867Otherwise, if running on a non-EBCDIC system, use UTF-8 as the output
868encoding.  Since this is a superset of ASCII, this will result in ASCII output
869unless the POD input contains non-ASCII characters without declaring or
870autodetecting an encoding (usually via EZ<><> escapes).
871
872=item 5.
873
874Otherwise, for EBCDIC systems, output without doing any encoding and hope
875this works.
876
877=back
878
879One caveat: Pod::Text has to commit to an output encoding the first time it
880outputs a non-ASCII character, and then has to stick with it for consistency.
881However, C<=encoding> commands don't have to be at the beginning of a POD
882document.  If someone uses a non-ASCII character early in a document with an
883escape, such as EZ<><0xEF>, and then puts C<=encoding iso-8859-1> later,
884ideally Pod::Text would follow rule 3 and output the entire document as ISO
8858859-1.  Instead, it will commit to UTF-8 following rule 4 as soon as it sees
886that escape, and then stick with that encoding for the rest of the document.
887
888Unfortunately, there's no universally good choice for an output encoding.
889Each choice will be incorrect in some circumstances.  This approach was chosen
890primarily for backwards compatibility.  Callers should consider forcing the
891output encoding via C<encoding> if they have any knowledge about what encoding
892the user may expect.
893
894In particular, consider importing the L<Encode::Locale> module, if available,
895and setting C<encoding> to C<locale> to use an output encoding appropriate to
896the user's locale.  But be aware that if the user is not using locales or is
897using a locale of C<C>, Encode::Locale will set the output encoding to
898US-ASCII.  This will cause all non-ASCII characters will be replaced with C<?>
899and produce a flurry of warnings about unsupported characters, which may or
900may not be what you want.
901
902=head1 CLASS METHODS
903
904=over 4
905
906=item new(ARGS)
907
908Create a new Pod::Text object.  ARGS should be a list of key/value pairs,
909where the keys are chosen from the following.  Each option is annotated with
910the version of Pod::Text in which that option was added with its current
911meaning.
912
913=over 4
914
915=item alt
916
917[2.00] If set to a true value, selects an alternate output format that, among
918other things, uses a different heading style and marks C<=item> entries with a
919colon in the left margin.  Defaults to false.
920
921=item code
922
923[2.13] If set to a true value, the non-POD parts of the input file will be
924included in the output.  Useful for viewing code documented with POD blocks
925with the POD rendered and the code left intact.
926
927=item encoding
928
929[5.00] Specifies the encoding of the output.  The value must be an encoding
930recognized by the L<Encode> module (see L<Encode::Supported>).  If the output
931contains characters that cannot be represented in this encoding, that is an
932error that will be reported as configured by the C<errors> option.  If error
933handling is other than C<die>, the unrepresentable character will be replaced
934with the Encode substitution character (normally C<?>).
935
936If the output file handle has a PerlIO encoding layer set, this parameter will
937be ignored and no encoding will be done by Pod::Man.  It will instead rely on
938the encoding layer to make whatever output encoding transformations are
939desired.
940
941WARNING: The input encoding of the POD source is independent from the output
942encoding, and setting this option does not affect the interpretation of the
943POD input.  Unless your POD source is US-ASCII, its encoding should be
944declared with the C<=encoding> command in the source, as near to the top of
945the file as possible.  If this is not done, Pod::Simple will will attempt to
946guess the encoding and may be successful if it's Latin-1 or UTF-8, but it will
947produce warnings.  See L<perlpod(1)> for more information.
948
949=item errors
950
951[3.17] How to report errors.  C<die> says to throw an exception on any POD
952formatting error.  C<stderr> says to report errors on standard error, but not
953to throw an exception.  C<pod> says to include a POD ERRORS section in the
954resulting documentation summarizing the errors.  C<none> ignores POD errors
955entirely, as much as possible.
956
957The default is C<pod>.
958
959=item guesswork
960
961[5.01] By default, Pod::Text applies some default formatting rules based on
962guesswork and regular expressions that are intended to make writing Perl
963documentation easier and require less explicit markup.  These rules may not
964always be appropriate, particularly for documentation that isn't about Perl.
965This option allows turning all or some of it off.
966
967The special value C<all> enables all guesswork.  This is also the default for
968backward compatibility reasons.  The special value C<none> disables all
969guesswork.  Otherwise, the value of this option should be a comma-separated
970list of one or more of the following keywords:
971
972=over 4
973
974=item quoting
975
976If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by
977double quotes in nroff (terminal) output unless the contents are already
978quoted.  When this guesswork is enabled, quote marks will also be suppressed
979for Perl variables, function names, function calls, numbers, and hex
980constants.
981
982=back
983
984Any unknown guesswork name is silently ignored (for potential future
985compatibility), so be careful about spelling.
986
987=item indent
988
989[2.00] The number of spaces to indent regular text, and the default
990indentation for C<=over> blocks.  Defaults to 4.
991
992=item loose
993
994[2.00] If set to a true value, a blank line is printed after a C<=head1>
995heading.  If set to false (the default), no blank line is printed after
996C<=head1>, although one is still printed after C<=head2>.  This is the default
997because it's the expected formatting for manual pages; if you're formatting
998arbitrary text documents, setting this to true may result in more pleasing
999output.
1000
1001=item margin
1002
1003[2.21] The width of the left margin in spaces.  Defaults to 0.  This is the
1004margin for all text, including headings, not the amount by which regular text
1005is indented; for the latter, see the I<indent> option.  To set the right
1006margin, see the I<width> option.
1007
1008=item nourls
1009
1010[3.17] Normally, LZ<><> formatting codes with a URL but anchor text are
1011formatted to show both the anchor text and the URL.  In other words:
1012
1013    L<foo|http://example.com/>
1014
1015is formatted as:
1016
1017    foo <http://example.com/>
1018
1019This option, if set to a true value, suppresses the URL when anchor text is
1020given, so this example would be formatted as just C<foo>.  This can produce
1021less cluttered output in cases where the URLs are not particularly important.
1022
1023=item quotes
1024
1025[4.00] Sets the quote marks used to surround CE<lt>> text.  If the value is a
1026single character, it is used as both the left and right quote.  Otherwise, it
1027is split in half, and the first half of the string is used as the left quote
1028and the second is used as the right quote.
1029
1030This may also be set to the special value C<none>, in which case no quote
1031marks are added around CE<lt>> text.
1032
1033=item sentence
1034
1035[3.00] If set to a true value, Pod::Text will assume that each sentence ends
1036in two spaces, and will try to preserve that spacing.  If set to false, all
1037consecutive whitespace in non-verbatim paragraphs is compressed into a single
1038space.  Defaults to false.
1039
1040=item stderr
1041
1042[3.10] Send error messages about invalid POD to standard error instead of
1043appending a POD ERRORS section to the generated output.  This is equivalent to
1044setting C<errors> to C<stderr> if C<errors> is not already set.  It is
1045supported for backward compatibility.
1046
1047=item utf8
1048
1049[3.12] If this option is set to a true value, the output encoding is set to
1050UTF-8.  This is equivalent to setting C<encoding> to C<UTF-8> if C<encoding>
1051is not already set.  It is supported for backward compatibility.
1052
1053=item width
1054
1055[2.00] The column at which to wrap text on the right-hand side.  Defaults to
105676.
1057
1058=back
1059
1060=back
1061
1062=head1 INSTANCE METHODS
1063
1064As a derived class from Pod::Simple, Pod::Text supports the same methods and
1065interfaces.  See L<Pod::Simple> for all the details.  This section summarizes
1066the most-frequently-used methods and the ones added by Pod::Text.
1067
1068=over 4
1069
1070=item output_fh(FH)
1071
1072Direct the output from parse_file(), parse_lines(), or parse_string_document()
1073to the file handle FH instead of C<STDOUT>.
1074
1075=item output_string(REF)
1076
1077Direct the output from parse_file(), parse_lines(), or parse_string_document()
1078to the scalar variable pointed to by REF, rather than C<STDOUT>.  For example:
1079
1080    my $man = Pod::Man->new();
1081    my $output;
1082    $man->output_string(\$output);
1083    $man->parse_file('/some/input/file');
1084
1085Be aware that the output in that variable will already be encoded (see
1086L</Encoding>).
1087
1088=item parse_file(PATH)
1089
1090Read the POD source from PATH and format it.  By default, the output is sent
1091to C<STDOUT>, but this can be changed with the output_fh() or output_string()
1092methods.
1093
1094=item parse_from_file(INPUT, OUTPUT)
1095
1096=item parse_from_filehandle(FH, OUTPUT)
1097
1098Read the POD source from INPUT, format it, and output the results to OUTPUT.
1099
1100parse_from_filehandle() is provided for backward compatibility with older
1101versions of Pod::Man.  parse_from_file() should be used instead.
1102
1103=item parse_lines(LINES[, ...[, undef]])
1104
1105Parse the provided lines as POD source, writing the output to either C<STDOUT>
1106or the file handle set with the output_fh() or output_string() methods.  This
1107method can be called repeatedly to provide more input lines.  An explicit
1108C<undef> should be passed to indicate the end of input.
1109
1110This method expects raw bytes, not decoded characters.
1111
1112=item parse_string_document(INPUT)
1113
1114Parse the provided scalar variable as POD source, writing the output to either
1115C<STDOUT> or the file handle set with the output_fh() or output_string()
1116methods.
1117
1118This method expects raw bytes, not decoded characters.
1119
1120=back
1121
1122=head1 FUNCTIONS
1123
1124Pod::Text exports one function for backward compatibility with older versions.
1125This function is deprecated; instead, use the object-oriented interface
1126described above.
1127
1128=over 4
1129
1130=item pod2text([[-a,] [-NNN,]] INPUT[, OUTPUT])
1131
1132Convert the POD source from INPUT to text and write it to OUTPUT.  If OUTPUT
1133is not given, defaults to C<STDOUT>.  INPUT can be any expression supported as
1134the second argument to two-argument open().
1135
1136If C<-a> is given as an initial argument, pass the C<alt> option to the
1137Pod::Text constructor.  This enables alternative formatting.
1138
1139If C<-NNN> is given as an initial argument, pass the C<width> option to the
1140Pod::Text constructor with the number C<NNN> as its argument.  This sets the
1141wrap line width to NNN.
1142
1143=back
1144
1145=head1 DIAGNOSTICS
1146
1147=over 4
1148
1149=item Bizarre space in item
1150
1151=item Item called without tag
1152
1153(W) Something has gone wrong in internal C<=item> processing.  These
1154messages indicate a bug in Pod::Text; you should never see them.
1155
1156=item Can't open %s for reading: %s
1157
1158(F) Pod::Text was invoked via the compatibility mode pod2text() interface
1159and the input file it was given could not be opened.
1160
1161=item Invalid errors setting "%s"
1162
1163(F) The C<errors> parameter to the constructor was set to an unknown value.
1164
1165=item Invalid quote specification "%s"
1166
1167(F) The quote specification given (the C<quotes> option to the
1168constructor) was invalid.  A quote specification must be either one
1169character long or an even number (greater than one) characters long.
1170
1171=item POD document had syntax errors
1172
1173(F) The POD document being formatted had syntax errors and the C<errors>
1174option was set to C<die>.
1175
1176=back
1177
1178=head1 COMPATIBILITY
1179
1180Pod::Text 2.03 (based on L<Pod::Parser>) was the first version of this module
1181included with Perl, in Perl 5.6.0.  Earlier versions of Perl had a different
1182Pod::Text module, with a different API.
1183
1184The current API based on L<Pod::Simple> was added in Pod::Text 3.00.
1185Pod::Text 3.01 was included in Perl 5.9.3, the first version of Perl to
1186incorporate those changes.  This is the first version that correctly supports
1187all modern POD syntax.  The parse_from_filehandle() method was re-added for
1188backward compatibility in Pod::Text 3.07, included in Perl 5.9.4.
1189
1190Pod::Text 3.12, included in Perl 5.10.1, first implemented the current
1191practice of attempting to match the default output encoding with the input
1192encoding of the POD source, unless overridden by the C<utf8> option or (added
1193later) the C<encoding> option.
1194
1195Support for anchor text in LZ<><> links of type URL was added in Pod::Text
11963.14, included in Perl 5.11.5.
1197
1198parse_lines(), parse_string_document(), and parse_file() set a default output
1199file handle of C<STDOUT> if one was not already set as of Pod::Text 3.18,
1200included in Perl 5.19.5.
1201
1202Pod::Text 4.00, included in Perl 5.23.7, aligned the module version and the
1203version of the podlators distribution.  All modules included in podlators, and
1204the podlators distribution itself, share the same version number from this
1205point forward.
1206
1207Pod::Text 4.09, included in Perl 5.25.7, fixed a serious bug on EBCDIC
1208systems, present in all versions back to 3.00, that would cause opening
1209brackets to disappear.
1210
1211Pod::Text 5.00 now defaults, on non-EBCDIC systems, to UTF-8 encoding if it
1212sees a non-ASCII character in the input and the input encoding is not
1213specified.  It also commits to an encoding with the first non-ASCII character
1214and does not change the output encoding if the input encoding changes.  The
1215L<Encode> module is now used for all output encoding rather than PerlIO
1216layers, which fixes earlier problems with output to scalars.
1217
1218=head1 AUTHOR
1219
1220Russ Allbery <rra@cpan.org>, based I<very> heavily on the original Pod::Text
1221by Tom Christiansen <tchrist@mox.perl.com> and its conversion to Pod::Parser
1222by Brad Appleton <bradapp@enteract.com>.  Sean Burke's initial conversion of
1223Pod::Man to use Pod::Simple provided much-needed guidance on how to use
1224Pod::Simple.
1225
1226=head1 COPYRIGHT AND LICENSE
1227
1228Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019, 2022 Russ
1229Allbery <rra@cpan.org>
1230
1231This program is free software; you may redistribute it and/or modify it
1232under the same terms as Perl itself.
1233
1234=head1 SEE ALSO
1235
1236L<Encode::Locale>, L<Encode::Supproted>, L<Pod::Simple>,
1237L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)>
1238
1239The current version of this module is always available from its web site at
1240L<https://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
1241Perl core distribution as of 5.6.0.
1242
1243=cut
1244
1245# Local Variables:
1246# copyright-at-end-flag: t
1247# End:
1248