xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/Text.pm (revision a0747c9f67a4ae71ccb71e62a28d1ea19e06a63c)
1# Convert POD data to formatted text.
2#
3# This module converts POD to formatted text.  It replaces the old Pod::Text
4# module that came with versions of Perl prior to 5.6.0 and attempts to match
5# its output except for some specific circumstances where other decisions
6# seemed to produce better output.  It uses Pod::Parser and is designed to be
7# very easy to subclass.
8#
9# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
10
11##############################################################################
12# Modules and declarations
13##############################################################################
14
15package Pod::Text;
16
17use 5.008;
18use strict;
19use warnings;
20
21use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
22
23use Carp qw(carp croak);
24use Encode qw(encode);
25use Exporter ();
26use Pod::Simple ();
27
28@ISA = qw(Pod::Simple Exporter);
29
30# We have to export pod2text for backward compatibility.
31@EXPORT = qw(pod2text);
32
33$VERSION = '4.14';
34
35# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available.  Code
36# taken from Pod::Simple 3.32, but was only added in 3.30.
37my ($NBSP, $SHY);
38if ($Pod::Simple::VERSION ge 3.30) {
39    $NBSP = $Pod::Simple::nbsp;
40    $SHY  = $Pod::Simple::shy;
41} else {
42    $NBSP = chr utf8::unicode_to_native(0xA0);
43    $SHY  = chr utf8::unicode_to_native(0xAD);
44}
45
46##############################################################################
47# Initialization
48##############################################################################
49
50# This function handles code blocks.  It's registered as a callback to
51# Pod::Simple and therefore doesn't work as a regular method call, but all it
52# does is call output_code with the line.
53sub handle_code {
54    my ($line, $number, $parser) = @_;
55    $parser->output_code ($line . "\n");
56}
57
58# Initialize the object and set various Pod::Simple options that we need.
59# Here, we also process any additional options passed to the constructor or
60# set up defaults if none were given.  Note that all internal object keys are
61# in all-caps, reserving all lower-case object keys for Pod::Simple and user
62# arguments.
63sub new {
64    my $class = shift;
65    my $self = $class->SUPER::new;
66
67    # Tell Pod::Simple to handle S<> by automatically inserting &nbsp;.
68    $self->nbsp_for_S (1);
69
70    # Tell Pod::Simple to keep whitespace whenever possible.
71    if ($self->can ('preserve_whitespace')) {
72        $self->preserve_whitespace (1);
73    } else {
74        $self->fullstop_space_harden (1);
75    }
76
77    # The =for and =begin targets that we accept.
78    $self->accept_targets (qw/text TEXT/);
79
80    # Ensure that contiguous blocks of code are merged together.  Otherwise,
81    # some of the guesswork heuristics don't work right.
82    $self->merge_text (1);
83
84    # Pod::Simple doesn't do anything useful with our arguments, but we want
85    # to put them in our object as hash keys and values.  This could cause
86    # problems if we ever clash with Pod::Simple's own internal class
87    # variables.
88    my %opts = @_;
89    my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
90    %$self = (%$self, @opts);
91
92    # Send errors to stderr if requested.
93    if ($$self{opt_stderr} and not $$self{opt_errors}) {
94        $$self{opt_errors} = 'stderr';
95    }
96    delete $$self{opt_stderr};
97
98    # Validate the errors parameter and act on it.
99    if (not defined $$self{opt_errors}) {
100        $$self{opt_errors} = 'pod';
101    }
102    if ($$self{opt_errors} eq 'stderr' || $$self{opt_errors} eq 'die') {
103        $self->no_errata_section (1);
104        $self->complain_stderr (1);
105        if ($$self{opt_errors} eq 'die') {
106            $$self{complain_die} = 1;
107        }
108    } elsif ($$self{opt_errors} eq 'pod') {
109        $self->no_errata_section (0);
110        $self->complain_stderr (0);
111    } elsif ($$self{opt_errors} eq 'none') {
112        $self->no_errata_section (1);
113        $self->no_whining (1);
114    } else {
115        croak (qq(Invalid errors setting: "$$self{errors}"));
116    }
117    delete $$self{errors};
118
119    # Initialize various things from our parameters.
120    $$self{opt_alt}      = 0  unless defined $$self{opt_alt};
121    $$self{opt_indent}   = 4  unless defined $$self{opt_indent};
122    $$self{opt_margin}   = 0  unless defined $$self{opt_margin};
123    $$self{opt_loose}    = 0  unless defined $$self{opt_loose};
124    $$self{opt_sentence} = 0  unless defined $$self{opt_sentence};
125    $$self{opt_width}    = 76 unless defined $$self{opt_width};
126
127    # Figure out what quotes we'll be using for C<> text.
128    $$self{opt_quotes} ||= '"';
129    if ($$self{opt_quotes} eq 'none') {
130        $$self{LQUOTE} = $$self{RQUOTE} = '';
131    } elsif (length ($$self{opt_quotes}) == 1) {
132        $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
133    } elsif (length ($$self{opt_quotes}) % 2 == 0) {
134        my $length = length ($$self{opt_quotes}) / 2;
135        $$self{LQUOTE} = substr ($$self{opt_quotes}, 0, $length);
136        $$self{RQUOTE} = substr ($$self{opt_quotes}, $length);
137    } else {
138        croak qq(Invalid quote specification "$$self{opt_quotes}");
139    }
140
141    # If requested, do something with the non-POD text.
142    $self->code_handler (\&handle_code) if $$self{opt_code};
143
144    # Return the created object.
145    return $self;
146}
147
148##############################################################################
149# Core parsing
150##############################################################################
151
152# This is the glue that connects the code below with Pod::Simple itself.  The
153# goal is to convert the event stream coming from the POD parser into method
154# calls to handlers once the complete content of a tag has been seen.  Each
155# paragraph or POD command will have textual content associated with it, and
156# as soon as all of a paragraph or POD command has been seen, that content
157# will be passed in to the corresponding method for handling that type of
158# object.  The exceptions are handlers for lists, which have opening tag
159# handlers and closing tag handlers that will be called right away.
160#
161# The internal hash key PENDING is used to store the contents of a tag until
162# all of it has been seen.  It holds a stack of open tags, each one
163# represented by a tuple of the attributes hash for the tag and the contents
164# of the tag.
165
166# Add a block of text to the contents of the current node, formatting it
167# according to the current formatting instructions as we do.
168sub _handle_text {
169    my ($self, $text) = @_;
170    my $tag = $$self{PENDING}[-1];
171    $$tag[1] .= $text;
172}
173
174# Given an element name, get the corresponding method name.
175sub method_for_element {
176    my ($self, $element) = @_;
177    $element =~ tr/-/_/;
178    $element =~ tr/A-Z/a-z/;
179    $element =~ tr/_a-z0-9//cd;
180    return $element;
181}
182
183# Handle the start of a new element.  If cmd_element is defined, assume that
184# we need to collect the entire tree for this element before passing it to the
185# element method, and create a new tree into which we'll collect blocks of
186# text and nested elements.  Otherwise, if start_element is defined, call it.
187sub _handle_element_start {
188    my ($self, $element, $attrs) = @_;
189    my $method = $self->method_for_element ($element);
190
191    # If we have a command handler, we need to accumulate the contents of the
192    # tag before calling it.
193    if ($self->can ("cmd_$method")) {
194        push (@{ $$self{PENDING} }, [ $attrs, '' ]);
195    } elsif ($self->can ("start_$method")) {
196        my $method = 'start_' . $method;
197        $self->$method ($attrs, '');
198    }
199}
200
201# Handle the end of an element.  If we had a cmd_ method for this element,
202# this is where we pass along the text that we've accumulated.  Otherwise, if
203# we have an end_ method for the element, call that.
204sub _handle_element_end {
205    my ($self, $element) = @_;
206    my $method = $self->method_for_element ($element);
207
208    # If we have a command handler, pull off the pending text and pass it to
209    # the handler along with the saved attribute hash.
210    if ($self->can ("cmd_$method")) {
211        my $tag = pop @{ $$self{PENDING} };
212        my $method = 'cmd_' . $method;
213        my $text = $self->$method (@$tag);
214        if (defined $text) {
215            if (@{ $$self{PENDING} } > 1) {
216                $$self{PENDING}[-1][1] .= $text;
217            } else {
218                $self->output ($text);
219            }
220        }
221    } elsif ($self->can ("end_$method")) {
222        my $method = 'end_' . $method;
223        $self->$method ();
224    }
225}
226
227##############################################################################
228# Output formatting
229##############################################################################
230
231# Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
232# because it plays games with tabs.  We can't use formline, even though we'd
233# really like to, because it screws up non-printing characters.  So we have to
234# do the wrapping ourselves.
235sub wrap {
236    my $self = shift;
237    local $_ = shift;
238    my $output = '';
239    my $spaces = ' ' x $$self{MARGIN};
240    my $width = $$self{opt_width} - $$self{MARGIN};
241    while (length > $width) {
242        if (s/^([^\n]{0,$width})[ \t\n]+// || s/^([^\n]{$width})//) {
243            $output .= $spaces . $1 . "\n";
244        } else {
245            last;
246        }
247    }
248    $output .= $spaces . $_;
249    $output =~ s/\s+$/\n\n/;
250    return $output;
251}
252
253# Reformat a paragraph of text for the current margin.  Takes the text to
254# reformat and returns the formatted text.
255sub reformat {
256    my $self = shift;
257    local $_ = shift;
258
259    # If we're trying to preserve two spaces after sentences, do some munging
260    # to support that.  Otherwise, smash all repeated whitespace.  Be careful
261    # not to use \s here, which in Unicode input may match non-breaking spaces
262    # that we don't want to smash.
263    if ($$self{opt_sentence}) {
264        s/ +$//mg;
265        s/\.\n/. \n/g;
266        s/\n/ /g;
267        s/   +/  /g;
268    } else {
269        s/[ \t\n]+/ /g;
270    }
271    return $self->wrap ($_);
272}
273
274# Output text to the output device.  Replace non-breaking spaces with spaces
275# and soft hyphens with nothing, and then try to fix the output encoding if
276# necessary to match the input encoding unless UTF-8 output is forced.  This
277# preserves the traditional pass-through behavior of Pod::Text.
278sub output {
279    my ($self, @text) = @_;
280    my $text = join ('', @text);
281    if ($NBSP) {
282        $text =~ s/$NBSP/ /g;
283    }
284    if ($SHY) {
285        $text =~ s/$SHY//g;
286    }
287    unless ($$self{opt_utf8}) {
288        my $encoding = $$self{encoding} || '';
289        if ($encoding && $encoding ne $$self{ENCODING}) {
290            $$self{ENCODING} = $encoding;
291            eval { binmode ($$self{output_fh}, ":encoding($encoding)") };
292        }
293    }
294    if ($$self{ENCODE}) {
295        print { $$self{output_fh} } encode ('UTF-8', $text);
296    } else {
297        print { $$self{output_fh} } $text;
298    }
299}
300
301# Output a block of code (something that isn't part of the POD text).  Called
302# by preprocess_paragraph only if we were given the code option.  Exists here
303# only so that it can be overridden by subclasses.
304sub output_code { $_[0]->output ($_[1]) }
305
306##############################################################################
307# Document initialization
308##############################################################################
309
310# Set up various things that have to be initialized on a per-document basis.
311sub start_document {
312    my ($self, $attrs) = @_;
313    if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
314        $$self{CONTENTLESS} = 1;
315    } else {
316        delete $$self{CONTENTLESS};
317    }
318    my $margin = $$self{opt_indent} + $$self{opt_margin};
319
320    # Initialize a few per-document variables.
321    $$self{INDENTS} = [];       # Stack of indentations.
322    $$self{MARGIN}  = $margin;  # Default left margin.
323    $$self{PENDING} = [[]];     # Pending output.
324
325    # We have to redo encoding handling for each document.
326    $$self{ENCODING} = '';
327
328    # When UTF-8 output is set, check whether our output file handle already
329    # has a PerlIO encoding layer set.  If it does not, we'll need to encode
330    # our output before printing it (handled in the output() sub).
331    $$self{ENCODE} = 0;
332    if ($$self{opt_utf8}) {
333        $$self{ENCODE} = 1;
334        eval {
335            my @options = (output => 1, details => 1);
336            my $flag = (PerlIO::get_layers ($$self{output_fh}, @options))[-1];
337            if ($flag && ($flag & PerlIO::F_UTF8 ())) {
338                $$self{ENCODE} = 0;
339                $$self{ENCODING} = 'UTF-8';
340            }
341        };
342    }
343
344    return '';
345}
346
347# Handle the end of the document.  The only thing we do is handle dying on POD
348# errors, since Pod::Parser currently doesn't.
349sub end_document {
350    my ($self) = @_;
351    if ($$self{complain_die} && $self->errors_seen) {
352        croak ("POD document had syntax errors");
353    }
354}
355
356##############################################################################
357# Text blocks
358##############################################################################
359
360# Intended for subclasses to override, this method returns text with any
361# non-printing formatting codes stripped out so that length() correctly
362# returns the length of the text.  For basic Pod::Text, it does nothing.
363sub strip_format {
364    my ($self, $string) = @_;
365    return $string;
366}
367
368# This method is called whenever an =item command is complete (in other words,
369# we've seen its associated paragraph or know for certain that it doesn't have
370# one).  It gets the paragraph associated with the item as an argument.  If
371# that argument is empty, just output the item tag; if it contains a newline,
372# output the item tag followed by the newline.  Otherwise, see if there's
373# enough room for us to output the item tag in the margin of the text or if we
374# have to put it on a separate line.
375sub item {
376    my ($self, $text) = @_;
377    my $tag = $$self{ITEM};
378    unless (defined $tag) {
379        carp "Item called without tag";
380        return;
381    }
382    undef $$self{ITEM};
383
384    # Calculate the indentation and margin.  $fits is set to true if the tag
385    # will fit into the margin of the paragraph given our indentation level.
386    my $indent = $$self{INDENTS}[-1];
387    $indent = $$self{opt_indent} unless defined $indent;
388    my $margin = ' ' x $$self{opt_margin};
389    my $tag_length = length ($self->strip_format ($tag));
390    my $fits = ($$self{MARGIN} - $indent >= $tag_length + 1);
391
392    # If the tag doesn't fit, or if we have no associated text, print out the
393    # tag separately.  Otherwise, put the tag in the margin of the paragraph.
394    if (!$text || $text =~ /^\s+$/ || !$fits) {
395        my $realindent = $$self{MARGIN};
396        $$self{MARGIN} = $indent;
397        my $output = $self->reformat ($tag);
398        $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
399        $output =~ s/\n*$/\n/;
400
401        # If the text is just whitespace, we have an empty item paragraph;
402        # this can result from =over/=item/=back without any intermixed
403        # paragraphs.  Insert some whitespace to keep the =item from merging
404        # into the next paragraph.
405        $output .= "\n" if $text && $text =~ /^\s*$/;
406
407        $self->output ($output);
408        $$self{MARGIN} = $realindent;
409        $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
410    } else {
411        my $space = ' ' x $indent;
412        $space =~ s/^$margin /$margin:/ if $$self{opt_alt};
413        $text = $self->reformat ($text);
414        $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
415        my $tagspace = ' ' x $tag_length;
416        $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
417        $self->output ($text);
418    }
419}
420
421# Handle a basic block of text.  The only tricky thing here is that if there
422# is a pending item tag, we need to format this as an item paragraph.
423sub cmd_para {
424    my ($self, $attrs, $text) = @_;
425    $text =~ s/\s+$/\n/;
426    if (defined $$self{ITEM}) {
427        $self->item ($text . "\n");
428    } else {
429        $self->output ($self->reformat ($text . "\n"));
430    }
431    return '';
432}
433
434# Handle a verbatim paragraph.  Just print it out, but indent it according to
435# our margin.
436sub cmd_verbatim {
437    my ($self, $attrs, $text) = @_;
438    $self->item if defined $$self{ITEM};
439    return if $text =~ /^\s*$/;
440    $text =~ s/^(\n*)([ \t]*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
441    $text =~ s/\s*$/\n\n/;
442    $self->output ($text);
443    return '';
444}
445
446# Handle literal text (produced by =for and similar constructs).  Just output
447# it with the minimum of changes.
448sub cmd_data {
449    my ($self, $attrs, $text) = @_;
450    $text =~ s/^\n+//;
451    $text =~ s/\n{0,2}$/\n/;
452    $self->output ($text);
453    return '';
454}
455
456##############################################################################
457# Headings
458##############################################################################
459
460# The common code for handling all headers.  Takes the header text, the
461# indentation, and the surrounding marker for the alt formatting method.
462sub heading {
463    my ($self, $text, $indent, $marker) = @_;
464    $self->item ("\n\n") if defined $$self{ITEM};
465    $text =~ s/\s+$//;
466    if ($$self{opt_alt}) {
467        my $closemark = reverse (split (//, $marker));
468        my $margin = ' ' x $$self{opt_margin};
469        $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
470    } else {
471        $text .= "\n" if $$self{opt_loose};
472        my $margin = ' ' x ($$self{opt_margin} + $indent);
473        $self->output ($margin . $text . "\n");
474    }
475    return '';
476}
477
478# First level heading.
479sub cmd_head1 {
480    my ($self, $attrs, $text) = @_;
481    $self->heading ($text, 0, '====');
482}
483
484# Second level heading.
485sub cmd_head2 {
486    my ($self, $attrs, $text) = @_;
487    $self->heading ($text, $$self{opt_indent} / 2, '==  ');
488}
489
490# Third level heading.
491sub cmd_head3 {
492    my ($self, $attrs, $text) = @_;
493    $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '=   ');
494}
495
496# Fourth level heading.
497sub cmd_head4 {
498    my ($self, $attrs, $text) = @_;
499    $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '-   ');
500}
501
502##############################################################################
503# List handling
504##############################################################################
505
506# Handle the beginning of an =over block.  Takes the type of the block as the
507# first argument, and then the attr hash.  This is called by the handlers for
508# the four different types of lists (bullet, number, text, and block).
509sub over_common_start {
510    my ($self, $attrs) = @_;
511    $self->item ("\n\n") if defined $$self{ITEM};
512
513    # Find the indentation level.
514    my $indent = $$attrs{indent};
515    unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
516        $indent = $$self{opt_indent};
517    }
518
519    # Add this to our stack of indents and increase our current margin.
520    push (@{ $$self{INDENTS} }, $$self{MARGIN});
521    $$self{MARGIN} += ($indent + 0);
522    return '';
523}
524
525# End an =over block.  Takes no options other than the class pointer.  Output
526# any pending items and then pop one level of indentation.
527sub over_common_end {
528    my ($self) = @_;
529    $self->item ("\n\n") if defined $$self{ITEM};
530    $$self{MARGIN} = pop @{ $$self{INDENTS} };
531    return '';
532}
533
534# Dispatch the start and end calls as appropriate.
535sub start_over_bullet { $_[0]->over_common_start ($_[1]) }
536sub start_over_number { $_[0]->over_common_start ($_[1]) }
537sub start_over_text   { $_[0]->over_common_start ($_[1]) }
538sub start_over_block  { $_[0]->over_common_start ($_[1]) }
539sub end_over_bullet { $_[0]->over_common_end }
540sub end_over_number { $_[0]->over_common_end }
541sub end_over_text   { $_[0]->over_common_end }
542sub end_over_block  { $_[0]->over_common_end }
543
544# The common handler for all item commands.  Takes the type of the item, the
545# attributes, and then the text of the item.
546sub item_common {
547    my ($self, $type, $attrs, $text) = @_;
548    $self->item if defined $$self{ITEM};
549
550    # Clean up the text.  We want to end up with two variables, one ($text)
551    # which contains any body text after taking out the item portion, and
552    # another ($item) which contains the actual item text.  Note the use of
553    # the internal Pod::Simple attribute here; that's a potential land mine.
554    $text =~ s/\s+$//;
555    my ($item, $index);
556    if ($type eq 'bullet') {
557        $item = '*';
558    } elsif ($type eq 'number') {
559        $item = $$attrs{'~orig_content'};
560    } else {
561        $item = $text;
562        $item =~ s/\s*\n\s*/ /g;
563        $text = '';
564    }
565    $$self{ITEM} = $item;
566
567    # If body text for this item was included, go ahead and output that now.
568    if ($text) {
569        $text =~ s/\s*$/\n/;
570        $self->item ($text);
571    }
572    return '';
573}
574
575# Dispatch the item commands to the appropriate place.
576sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
577sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
578sub cmd_item_text   { my $self = shift; $self->item_common ('text',   @_) }
579sub cmd_item_block  { my $self = shift; $self->item_common ('block',  @_) }
580
581##############################################################################
582# Formatting codes
583##############################################################################
584
585# The simple ones.
586sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
587sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
588sub cmd_i { return '*' . $_[2] . '*' }
589sub cmd_x { return '' }
590
591# Apply a whole bunch of messy heuristics to not quote things that don't
592# benefit from being quoted.  These originally come from Barrie Slaymaker and
593# largely duplicate code in Pod::Man.
594sub cmd_c {
595    my ($self, $attrs, $text) = @_;
596
597    # A regex that matches the portion of a variable reference that's the
598    # array or hash index, separated out just because we want to use it in
599    # several places in the following regex.
600    my $index = '(?: \[.*\] | \{.*\} )?';
601
602    # Check for things that we don't want to quote, and if we find any of
603    # them, return the string with just a font change and no quoting.
604    $text =~ m{
605      ^\s*
606      (?:
607         ( [\'\`\"] ) .* \1                             # already quoted
608       | \` .* \'                                       # `quoted'
609       | \$+ [\#^]? \S $index                           # special ($^Foo, $")
610       | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
611       | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
612       | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
613       | 0x [a-fA-F\d]+                                 # a hex constant
614      )
615      \s*\z
616     }xo && return $text;
617
618    # If we didn't return, go ahead and quote the text.
619    return $$self{opt_alt}
620        ? "``$text''"
621        : "$$self{LQUOTE}$text$$self{RQUOTE}";
622}
623
624# Links reduce to the text that we're given, wrapped in angle brackets if it's
625# a URL.
626sub cmd_l {
627    my ($self, $attrs, $text) = @_;
628    if ($$attrs{type} eq 'url') {
629        if (not defined($$attrs{to}) or $$attrs{to} eq $text) {
630            return "<$text>";
631        } elsif ($$self{opt_nourls}) {
632            return $text;
633        } else {
634            return "$text <$$attrs{to}>";
635        }
636    } else {
637        return $text;
638    }
639}
640
641##############################################################################
642# Backwards compatibility
643##############################################################################
644
645# The old Pod::Text module did everything in a pod2text() function.  This
646# tries to provide the same interface for legacy applications.
647sub pod2text {
648    my @args;
649
650    # This is really ugly; I hate doing option parsing in the middle of a
651    # module.  But the old Pod::Text module supported passing flags to its
652    # entry function, so handle -a and -<number>.
653    while ($_[0] =~ /^-/) {
654        my $flag = shift;
655        if    ($flag eq '-a')       { push (@args, alt => 1)    }
656        elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
657        else {
658            unshift (@_, $flag);
659            last;
660        }
661    }
662
663    # Now that we know what arguments we're using, create the parser.
664    my $parser = Pod::Text->new (@args);
665
666    # If two arguments were given, the second argument is going to be a file
667    # handle.  That means we want to call parse_from_filehandle(), which means
668    # we need to turn the first argument into a file handle.  Magic open will
669    # handle the <&STDIN case automagically.
670    if (defined $_[1]) {
671        my @fhs = @_;
672        local *IN;
673        unless (open (IN, $fhs[0])) {
674            croak ("Can't open $fhs[0] for reading: $!\n");
675            return;
676        }
677        $fhs[0] = \*IN;
678        $parser->output_fh ($fhs[1]);
679        my $retval = $parser->parse_file ($fhs[0]);
680        my $fh = $parser->output_fh ();
681        close $fh;
682        return $retval;
683    } else {
684        $parser->output_fh (\*STDOUT);
685        return $parser->parse_file (@_);
686    }
687}
688
689# Reset the underlying Pod::Simple object between calls to parse_from_file so
690# that the same object can be reused to convert multiple pages.
691sub parse_from_file {
692    my $self = shift;
693    $self->reinit;
694
695    # Fake the old cutting option to Pod::Parser.  This fiddles with internal
696    # Pod::Simple state and is quite ugly; we need a better approach.
697    if (ref ($_[0]) eq 'HASH') {
698        my $opts = shift @_;
699        if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
700            $$self{in_pod} = 1;
701            $$self{last_was_blank} = 1;
702        }
703    }
704
705    # Do the work.
706    my $retval = $self->Pod::Simple::parse_from_file (@_);
707
708    # Flush output, since Pod::Simple doesn't do this.  Ideally we should also
709    # close the file descriptor if we had to open one, but we can't easily
710    # figure this out.
711    my $fh = $self->output_fh ();
712    my $oldfh = select $fh;
713    my $oldflush = $|;
714    $| = 1;
715    print $fh '';
716    $| = $oldflush;
717    select $oldfh;
718    return $retval;
719}
720
721# Pod::Simple failed to provide this backward compatibility function, so
722# implement it ourselves.  File handles are one of the inputs that
723# parse_from_file supports.
724sub parse_from_filehandle {
725    my $self = shift;
726    $self->parse_from_file (@_);
727}
728
729# Pod::Simple's parse_file doesn't set output_fh.  Wrap the call and do so
730# ourself unless it was already set by the caller, since our documentation has
731# always said that this should work.
732sub parse_file {
733    my ($self, $in) = @_;
734    unless (defined $$self{output_fh}) {
735        $self->output_fh (\*STDOUT);
736    }
737    return $self->SUPER::parse_file ($in);
738}
739
740# Do the same for parse_lines, just to be polite.  Pod::Simple's man page
741# implies that the caller is responsible for setting this, but I don't see any
742# reason not to set a default.
743sub parse_lines {
744    my ($self, @lines) = @_;
745    unless (defined $$self{output_fh}) {
746        $self->output_fh (\*STDOUT);
747    }
748    return $self->SUPER::parse_lines (@lines);
749}
750
751# Likewise for parse_string_document.
752sub parse_string_document {
753    my ($self, $doc) = @_;
754    unless (defined $$self{output_fh}) {
755        $self->output_fh (\*STDOUT);
756    }
757    return $self->SUPER::parse_string_document ($doc);
758}
759
760##############################################################################
761# Module return value and documentation
762##############################################################################
763
7641;
765__END__
766
767=for stopwords
768alt stderr Allbery Sean Burke's Christiansen UTF-8 pre-Unicode utf8 nourls
769parsers
770
771=head1 NAME
772
773Pod::Text - Convert POD data to formatted text
774
775=head1 SYNOPSIS
776
777    use Pod::Text;
778    my $parser = Pod::Text->new (sentence => 1, width => 78);
779
780    # Read POD from STDIN and write to STDOUT.
781    $parser->parse_from_filehandle;
782
783    # Read POD from file.pod and write to file.txt.
784    $parser->parse_from_file ('file.pod', 'file.txt');
785
786=head1 DESCRIPTION
787
788Pod::Text is a module that can convert documentation in the POD format
789(the preferred language for documenting Perl) into formatted text.  It
790uses no special formatting controls or codes whatsoever, and its output is
791therefore suitable for nearly any device.
792
793As a derived class from Pod::Simple, Pod::Text supports the same methods and
794interfaces.  See L<Pod::Simple> for all the details; briefly, one creates a
795new parser with C<< Pod::Text->new() >> and then normally calls parse_file().
796
797new() can take options, in the form of key/value pairs, that control the
798behavior of the parser.  The currently recognized options are:
799
800=over 4
801
802=item alt
803
804If set to a true value, selects an alternate output format that, among other
805things, uses a different heading style and marks C<=item> entries with a
806colon in the left margin.  Defaults to false.
807
808=item code
809
810If set to a true value, the non-POD parts of the input file will be included
811in the output.  Useful for viewing code documented with POD blocks with the
812POD rendered and the code left intact.
813
814=item errors
815
816How to report errors.  C<die> says to throw an exception on any POD
817formatting error.  C<stderr> says to report errors on standard error, but
818not to throw an exception.  C<pod> says to include a POD ERRORS section
819in the resulting documentation summarizing the errors.  C<none> ignores
820POD errors entirely, as much as possible.
821
822The default is C<pod>.
823
824=item indent
825
826The number of spaces to indent regular text, and the default indentation for
827C<=over> blocks.  Defaults to 4.
828
829=item loose
830
831If set to a true value, a blank line is printed after a C<=head1> heading.
832If set to false (the default), no blank line is printed after C<=head1>,
833although one is still printed after C<=head2>.  This is the default because
834it's the expected formatting for manual pages; if you're formatting
835arbitrary text documents, setting this to true may result in more pleasing
836output.
837
838=item margin
839
840The width of the left margin in spaces.  Defaults to 0.  This is the margin
841for all text, including headings, not the amount by which regular text is
842indented; for the latter, see the I<indent> option.  To set the right
843margin, see the I<width> option.
844
845=item nourls
846
847Normally, LZ<><> formatting codes with a URL but anchor text are formatted
848to show both the anchor text and the URL.  In other words:
849
850    L<foo|http://example.com/>
851
852is formatted as:
853
854    foo <http://example.com/>
855
856This option, if set to a true value, suppresses the URL when anchor text
857is given, so this example would be formatted as just C<foo>.  This can
858produce less cluttered output in cases where the URLs are not particularly
859important.
860
861=item quotes
862
863Sets the quote marks used to surround CE<lt>> text.  If the value is a
864single character, it is used as both the left and right quote.  Otherwise,
865it is split in half, and the first half of the string is used as the left
866quote and the second is used as the right quote.
867
868This may also be set to the special value C<none>, in which case no quote
869marks are added around CE<lt>> text.
870
871=item sentence
872
873If set to a true value, Pod::Text will assume that each sentence ends in two
874spaces, and will try to preserve that spacing.  If set to false, all
875consecutive whitespace in non-verbatim paragraphs is compressed into a
876single space.  Defaults to false.
877
878=item stderr
879
880Send error messages about invalid POD to standard error instead of
881appending a POD ERRORS section to the generated output.  This is
882equivalent to setting C<errors> to C<stderr> if C<errors> is not already
883set.  It is supported for backward compatibility.
884
885=item utf8
886
887By default, Pod::Text uses the same output encoding as the input encoding
888of the POD source (provided that Perl was built with PerlIO; otherwise, it
889doesn't encode its output).  If this option is given, the output encoding
890is forced to UTF-8.
891
892Be aware that, when using this option, the input encoding of your POD
893source should be properly declared unless it's US-ASCII.  Pod::Simple will
894attempt to guess the encoding and may be successful if it's Latin-1 or
895UTF-8, but it will produce warnings.  Use the C<=encoding> command to
896declare the encoding.  See L<perlpod(1)> for more information.
897
898=item width
899
900The column at which to wrap text on the right-hand side.  Defaults to 76.
901
902=back
903
904The standard Pod::Simple method parse_file() takes one argument naming the
905POD file to read from.  By default, the output is sent to C<STDOUT>, but
906this can be changed with the output_fh() method.
907
908The standard Pod::Simple method parse_from_file() takes up to two
909arguments, the first being the input file to read POD from and the second
910being the file to write the formatted output to.
911
912You can also call parse_lines() to parse an array of lines or
913parse_string_document() to parse a document already in memory.  As with
914parse_file(), parse_lines() and parse_string_document() default to sending
915their output to C<STDOUT> unless changed with the output_fh() method.  Be
916aware that parse_lines() and parse_string_document() both expect raw bytes,
917not decoded characters.
918
919To put the output from any parse method into a string instead of a file
920handle, call the output_string() method instead of output_fh().
921
922See L<Pod::Simple> for more specific details on the methods available to
923all derived parsers.
924
925=head1 DIAGNOSTICS
926
927=over 4
928
929=item Bizarre space in item
930
931=item Item called without tag
932
933(W) Something has gone wrong in internal C<=item> processing.  These
934messages indicate a bug in Pod::Text; you should never see them.
935
936=item Can't open %s for reading: %s
937
938(F) Pod::Text was invoked via the compatibility mode pod2text() interface
939and the input file it was given could not be opened.
940
941=item Invalid errors setting "%s"
942
943(F) The C<errors> parameter to the constructor was set to an unknown value.
944
945=item Invalid quote specification "%s"
946
947(F) The quote specification given (the C<quotes> option to the
948constructor) was invalid.  A quote specification must be either one
949character long or an even number (greater than one) characters long.
950
951=item POD document had syntax errors
952
953(F) The POD document being formatted had syntax errors and the C<errors>
954option was set to C<die>.
955
956=back
957
958=head1 BUGS
959
960Encoding handling assumes that PerlIO is available and does not work
961properly if it isn't.  The C<utf8> option is therefore not supported
962unless Perl is built with PerlIO support.
963
964=head1 CAVEATS
965
966If Pod::Text is given the C<utf8> option, the encoding of its output file
967handle will be forced to UTF-8 if possible, overriding any existing
968encoding.  This will be done even if the file handle is not created by
969Pod::Text and was passed in from outside.  This maintains consistency
970regardless of PERL_UNICODE and other settings.
971
972If the C<utf8> option is not given, the encoding of its output file handle
973will be forced to the detected encoding of the input POD, which preserves
974whatever the input text is.  This ensures backward compatibility with
975earlier, pre-Unicode versions of this module, without large numbers of
976Perl warnings.
977
978This is not ideal, but it seems to be the best compromise.  If it doesn't
979work for you, please let me know the details of how it broke.
980
981=head1 NOTES
982
983This is a replacement for an earlier Pod::Text module written by Tom
984Christiansen.  It has a revamped interface, since it now uses Pod::Simple,
985but an interface roughly compatible with the old Pod::Text::pod2text()
986function is still available.  Please change to the new calling convention,
987though.
988
989The original Pod::Text contained code to do formatting via termcap
990sequences, although it wasn't turned on by default and it was problematic to
991get it to work at all.  This rewrite doesn't even try to do that, but a
992subclass of it does.  Look for L<Pod::Text::Termcap>.
993
994=head1 AUTHOR
995
996Russ Allbery <rra@cpan.org>, based I<very> heavily on the original
997Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
998Pod::Parser by Brad Appleton <bradapp@enteract.com>.  Sean Burke's initial
999conversion of Pod::Man to use Pod::Simple provided much-needed guidance on
1000how to use Pod::Simple.
1001
1002=head1 COPYRIGHT AND LICENSE
1003
1004Copyright 1999-2002, 2004, 2006, 2008-2009, 2012-2016, 2018-2019 Russ Allbery
1005<rra@cpan.org>
1006
1007This program is free software; you may redistribute it and/or modify it
1008under the same terms as Perl itself.
1009
1010=head1 SEE ALSO
1011
1012L<Pod::Simple>, L<Pod::Text::Termcap>, L<perlpod(1)>, L<pod2text(1)>
1013
1014The current version of this module is always available from its web site at
1015L<https://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
1016Perl core distribution as of 5.6.0.
1017
1018=cut
1019
1020# Local Variables:
1021# copyright-at-end-flag: t
1022# End:
1023