xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/Man.pm (revision e0a5400065cea17a7de6532c2ecb091c5f17622b)
1b46d8ef2Safresh1# Convert POD data to formatted *roff input.
2b39c5158Smillert#
3b39c5158Smillert# This module translates POD documentation into *roff markup using the man
4b39c5158Smillert# macro set, and is intended for converting POD documents written as Unix
5b39c5158Smillert# manual pages to manual pages that can be read by the man(1) command.  It is
6b39c5158Smillert# a replacement for the pod2man command distributed with versions of Perl
7b39c5158Smillert# prior to 5.6.
8b39c5158Smillert#
9b46d8ef2Safresh1# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
10b39c5158Smillert
11b39c5158Smillert##############################################################################
12b39c5158Smillert# Modules and declarations
13b39c5158Smillert##############################################################################
14b39c5158Smillert
15b39c5158Smillertpackage Pod::Man;
16b39c5158Smillert
17e0680481Safresh1use 5.010;
18b39c5158Smillertuse strict;
19b8851fccSafresh1use warnings;
20b8851fccSafresh1
21b8851fccSafresh1use Carp qw(carp croak);
22b39c5158Smillertuse Pod::Simple ();
23b39c5158Smillert
2456d68f1eSafresh1# Conditionally import Encode and set $HAS_ENCODE if it is available.  This is
2556d68f1eSafresh1# required to support building as part of Perl core, since podlators is built
2656d68f1eSafresh1# before Encode is.
27e0680481Safresh1my $HAS_ENCODE;
28b8851fccSafresh1BEGIN {
29b8851fccSafresh1    $HAS_ENCODE = eval { require Encode };
30b8851fccSafresh1}
31b8851fccSafresh1
32e0680481Safresh1our @ISA = qw(Pod::Simple);
333d61058aSafresh1our $VERSION = '5.01_02';
343d61058aSafresh1$VERSION =~ tr/_//d;
35b39c5158Smillert
36e0680481Safresh1# Ensure that $Pod::Simple::nbsp and $Pod::Simple::shy are available.  Code
37e0680481Safresh1# taken from Pod::Simple 3.32, but was only added in 3.30.
38e0680481Safresh1my ($NBSP, $SHY);
39e0680481Safresh1if ($Pod::Simple::VERSION ge 3.30) {
40e0680481Safresh1    $NBSP = $Pod::Simple::nbsp;
41e0680481Safresh1    $SHY  = $Pod::Simple::shy;
42e0680481Safresh1} else {
43e0680481Safresh1    $NBSP = chr utf8::unicode_to_native(0xA0);
44e0680481Safresh1    $SHY  = chr utf8::unicode_to_native(0xAD);
45b39c5158Smillert}
46b39c5158Smillert
47b39c5158Smillert# Import the ASCII constant from Pod::Simple.  This is true iff we're in an
48b39c5158Smillert# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is
49b39c5158Smillert# generally only false for EBCDIC.
50b39c5158SmillertBEGIN { *ASCII = \&Pod::Simple::ASCII }
51b39c5158Smillert
52e5157e49Safresh1# Formatting instructions for various types of blocks.  cleanup makes hyphens
53e5157e49Safresh1# hard, adds spaces between consecutive underscores, and escapes backslashes.
54e5157e49Safresh1# convert translates characters into escapes.  guesswork means to apply the
55e0680481Safresh1# transformations done by the guesswork sub (if enabled).  literal says to
56e0680481Safresh1# protect literal quotes from being turned into UTF-8 quotes.  By default, all
57e0680481Safresh1# transformations are on except literal, but some elements override.
58e5157e49Safresh1#
59e5157e49Safresh1# DEFAULT specifies the default settings.  All other elements should list only
60e5157e49Safresh1# those settings that they are overriding.  Data indicates =for roff blocks,
61e5157e49Safresh1# which should be passed along completely verbatim.
62e5157e49Safresh1#
63e5157e49Safresh1# Formatting inherits negatively, in the sense that if the parent has turned
64e5157e49Safresh1# off guesswork, all child elements should leave it off.
65e5157e49Safresh1my %FORMATTING = (
66e5157e49Safresh1    DEFAULT  => { cleanup => 1, convert => 1, guesswork => 1, literal => 0 },
67e5157e49Safresh1    Data     => { cleanup => 0, convert => 0, guesswork => 0, literal => 0 },
68e5157e49Safresh1    Verbatim => {                             guesswork => 0, literal => 1 },
69e5157e49Safresh1    C        => {                             guesswork => 0, literal => 1 },
70e5157e49Safresh1    X        => { cleanup => 0,               guesswork => 0               },
71e5157e49Safresh1);
72e5157e49Safresh1
73e0680481Safresh1# Try to map an encoding as understood by Perl Encode to an encoding
74e0680481Safresh1# understood by groff's preconv.  Encode doesn't care about hyphens or
75e0680481Safresh1# capitalization, but preconv does.  The key is the canonicalized Encode
76e0680481Safresh1# encoding, and the value is something preconv might understand.
77e0680481Safresh1#
78e0680481Safresh1# FreeBSD mandoc only understands utf-8 and iso-latin-1 as of 2022-09-24.
79e0680481Safresh1# groff preconv prefers iso-8859-1, but also understands iso-latin-1, so
80e0680481Safresh1# convert ISO-8859-1 to iso-latin-1 for FreeBSD.
81e0680481Safresh1my %ENCODINGS = (
82e0680481Safresh1    ascii     => 'us-ascii',
83e0680481Safresh1    big5      => 'big5',
84e0680481Safresh1    big5eten  => 'big5',
85e0680481Safresh1    cp950     => 'big5',
86e0680481Safresh1    cp1047    => 'cp1047',
87e0680481Safresh1    euccn     => 'gb2312',
88e0680481Safresh1    eucjp     => 'euc-jp',
89e0680481Safresh1    euckr     => 'euc-kr',
90e0680481Safresh1    gb2312    => 'gb2312',
91e0680481Safresh1    gb2312raw => 'gb2312',
92e0680481Safresh1    iso88591  => 'iso-latin-1',
93e0680481Safresh1    iso88592  => 'iso-8859-2',
94e0680481Safresh1    iso88595  => 'iso-8859-5',
95e0680481Safresh1    iso88597  => 'iso-8859-7',
96e0680481Safresh1    iso88599  => 'iso-8859-9',
97e0680481Safresh1    iso885913 => 'iso-8859-13',
98e0680481Safresh1    iso885915 => 'iso-8859-15',
99e0680481Safresh1    koi8r     => 'koi8-r',
100e0680481Safresh1    latin1    => 'iso-8859-1',
101e0680481Safresh1    usascii   => 'us-ascii',
102e0680481Safresh1    utf8      => 'utf-8',
103e0680481Safresh1    utf16     => 'utf-16',
104e0680481Safresh1    utf16be   => 'utf-16be',
105e0680481Safresh1    utf16le   => 'utf-16le',
106e0680481Safresh1);
107e0680481Safresh1
108e0680481Safresh1##############################################################################
109e0680481Safresh1# Translation tables
110e0680481Safresh1##############################################################################
111e0680481Safresh1
112e0680481Safresh1# The following table is adapted from Tom Christiansen's pod2man.  It is only
113e0680481Safresh1# used with roff output.  It assumes that the standard preamble has already
114e0680481Safresh1# been printed, since that's what defines all of the accent marks.  We really
115e0680481Safresh1# want to do something better than this when *roff actually supports other
116e0680481Safresh1# character sets itself, since these results are pretty poor.
117e0680481Safresh1#
118e0680481Safresh1# This only works in an ASCII world.  What to do in a non-ASCII world is very
119e0680481Safresh1# unclear, so we just output what we get and hope for the best.
120e0680481Safresh1my %ESCAPES;
121e0680481Safresh1@ESCAPES{0xA0 .. 0xFF} = (
122e0680481Safresh1    $NBSP, undef, undef, undef,            undef, undef, undef, undef,
123e0680481Safresh1    undef, undef, undef, undef,            undef, $SHY,  undef, undef,
124e0680481Safresh1
125e0680481Safresh1    undef, undef, undef, undef,            undef, undef, undef, undef,
126e0680481Safresh1    undef, undef, undef, undef,            undef, undef, undef, undef,
127e0680481Safresh1
128e0680481Safresh1    "A\\*`",  "A\\*'", "A\\*^", "A\\*~",   "A\\*:", "A\\*o", "\\*(Ae", "C\\*,",
129e0680481Safresh1    "E\\*`",  "E\\*'", "E\\*^", "E\\*:",   "I\\*`", "I\\*'", "I\\*^",  "I\\*:",
130e0680481Safresh1
131e0680481Safresh1    "\\*(D-", "N\\*~", "O\\*`", "O\\*'",   "O\\*^", "O\\*~", "O\\*:",  undef,
132e0680481Safresh1    "O\\*/",  "U\\*`", "U\\*'", "U\\*^",   "U\\*:", "Y\\*'", "\\*(Th", "\\*8",
133e0680481Safresh1
134e0680481Safresh1    "a\\*`",  "a\\*'", "a\\*^", "a\\*~",   "a\\*:", "a\\*o", "\\*(ae", "c\\*,",
135e0680481Safresh1    "e\\*`",  "e\\*'", "e\\*^", "e\\*:",   "i\\*`", "i\\*'", "i\\*^",  "i\\*:",
136e0680481Safresh1
137e0680481Safresh1    "\\*(d-", "n\\*~", "o\\*`", "o\\*'",   "o\\*^", "o\\*~", "o\\*:",  undef,
138e0680481Safresh1    "o\\*/" , "u\\*`", "u\\*'", "u\\*^",   "u\\*:", "y\\*'", "\\*(th", "y\\*:",
139e0680481Safresh1) if ASCII;
140e0680481Safresh1
141e0680481Safresh1##############################################################################
142e0680481Safresh1# Utility functions
143e0680481Safresh1##############################################################################
144e0680481Safresh1
145e0680481Safresh1# Quote an argument to a macro.
146e0680481Safresh1#
147e0680481Safresh1# $arg - Intended argument to the macro
148e0680481Safresh1#
149e0680481Safresh1# Returns: $arg suitably escaped and quoted
150e0680481Safresh1sub _quote_macro_argument {
151e0680481Safresh1    my ($arg) = @_;
152e0680481Safresh1    if (length($arg) > 0 && $arg !~ m{ [\s\"] }xms) {
153e0680481Safresh1        return $arg;
154e0680481Safresh1    }
155e0680481Safresh1    $arg =~ s{ \" }{""}xmsg;
156e0680481Safresh1    return qq("$arg");
157e0680481Safresh1}
158e0680481Safresh1
159e0680481Safresh1# Returns whether the given encoding needs a call to Encode::encode.
160e0680481Safresh1sub _needs_encode {
161e0680481Safresh1    my ($encoding) = @_;
162e0680481Safresh1    return $encoding ne 'roff' && $encoding ne 'groff';
163e0680481Safresh1}
164e0680481Safresh1
165b39c5158Smillert##############################################################################
166b39c5158Smillert# Object initialization
167b39c5158Smillert##############################################################################
168b39c5158Smillert
169b39c5158Smillert# Initialize the object and set various Pod::Simple options that we need.
170b39c5158Smillert# Here, we also process any additional options passed to the constructor or
171b39c5158Smillert# set up defaults if none were given.  Note that all internal object keys are
172b39c5158Smillert# in all-caps, reserving all lower-case object keys for Pod::Simple and user
173b39c5158Smillert# arguments.
174b39c5158Smillertsub new {
175b39c5158Smillert    my $class = shift;
176b39c5158Smillert    my $self = $class->SUPER::new;
177b39c5158Smillert
178b39c5158Smillert    # Tell Pod::Simple to keep whitespace whenever possible.
179e5157e49Safresh1    if (my $preserve_whitespace = $self->can ('preserve_whitespace')) {
180e5157e49Safresh1        $self->$preserve_whitespace (1);
181b39c5158Smillert    } else {
182b39c5158Smillert        $self->fullstop_space_harden (1);
183b39c5158Smillert    }
184b39c5158Smillert
185b39c5158Smillert    # The =for and =begin targets that we accept.
186b39c5158Smillert    $self->accept_targets (qw/man MAN roff ROFF/);
187b39c5158Smillert
188b39c5158Smillert    # Ensure that contiguous blocks of code are merged together.  Otherwise,
189b39c5158Smillert    # some of the guesswork heuristics don't work right.
190b39c5158Smillert    $self->merge_text (1);
191b39c5158Smillert
192b39c5158Smillert    # Pod::Simple doesn't do anything useful with our arguments, but we want
193b39c5158Smillert    # to put them in our object as hash keys and values.  This could cause
194b39c5158Smillert    # problems if we ever clash with Pod::Simple's own internal class
195b39c5158Smillert    # variables.
196e0680481Safresh1    my %opts = @_;
197e0680481Safresh1    my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
198e0680481Safresh1    %$self = (%$self, @opts);
199b39c5158Smillert
200e0680481Safresh1    # Pod::Simple uses encoding internally, so we need to store it as
201e0680481Safresh1    # ENCODING.  Set the default to UTF-8 if not specified.
202e0680481Safresh1    #
203e0680481Safresh1    # Degrade to the old roff encoding if Encode is not available.
204b8851fccSafresh1    #
205b8851fccSafresh1    # Suppress the warning message when PERL_CORE is set, indicating this is
206b8851fccSafresh1    # running as part of the core Perl build.  Perl builds podlators (and all
207b8851fccSafresh1    # pure Perl modules) before Encode and other XS modules, so Encode won't
208b8851fccSafresh1    # yet be available.  Rely on the Perl core build to generate man pages
209b8851fccSafresh1    # later, after all the modules are available, so that UTF-8 handling will
210b8851fccSafresh1    # be correct.
211e0680481Safresh1    my %options = @_;
212e0680481Safresh1    if (defined $self->{opt_encoding}) {
213e0680481Safresh1        $$self{ENCODING} = $self->{opt_encoding};
214e0680481Safresh1    } elsif (ASCII) {
215e0680481Safresh1        $$self{ENCODING} = 'UTF-8';
216e0680481Safresh1    } else {
217e0680481Safresh1        $$self{ENCODING} = 'groff';
218e0680481Safresh1    }
219e0680481Safresh1    if (_needs_encode($$self{ENCODING}) && !$HAS_ENCODE) {
220b8851fccSafresh1        if (!$ENV{PERL_CORE}) {
221e0680481Safresh1            carp ('encoding requested but Encode module not available,'
222e0680481Safresh1                    . ' falling back to groff escapes');
223b8851fccSafresh1        }
224e0680481Safresh1        $$self{ENCODING} = 'groff';
225b8851fccSafresh1    }
226b8851fccSafresh1
227e0680481Safresh1    # Send errors to stderr if requested.
228e0680481Safresh1    if ($self->{opt_stderr} and not $self->{opt_errors}) {
229e0680481Safresh1        $self->{opt_errors} = 'stderr';
230e0680481Safresh1    }
231e0680481Safresh1    delete $self->{opt_stderr};
232e0680481Safresh1
233e0680481Safresh1    # Validate the errors parameter and act on it.
234e0680481Safresh1    $self->{opt_errors} //= 'pod';
235e0680481Safresh1    if ($self->{opt_errors} eq 'stderr' || $self->{opt_errors} eq 'die') {
236e0680481Safresh1        $self->no_errata_section (1);
237e0680481Safresh1        $self->complain_stderr (1);
238e0680481Safresh1        if ($self->{opt_errors} eq 'die') {
239e0680481Safresh1            $self->{complain_die} = 1;
240e0680481Safresh1        }
241e0680481Safresh1    } elsif ($self->{opt_errors} eq 'pod') {
242e0680481Safresh1        $self->no_errata_section (0);
243e0680481Safresh1        $self->complain_stderr (0);
244e0680481Safresh1    } elsif ($self->{opt_errors} eq 'none') {
245e0680481Safresh1        $self->no_errata_section (1);
246e0680481Safresh1        $self->no_whining (1);
247e0680481Safresh1    } else {
248e0680481Safresh1        croak (qq(Invalid errors setting: "$self->{opt_errors}"));
249e0680481Safresh1    }
250e0680481Safresh1    delete $self->{opt_errors};
251e0680481Safresh1
252b39c5158Smillert    # Initialize various other internal constants based on our arguments.
253b39c5158Smillert    $self->init_fonts;
254b39c5158Smillert    $self->init_quotes;
255b39c5158Smillert    $self->init_page;
256b39c5158Smillert
257e0680481Safresh1    # Configure guesswork based on options.
258e0680481Safresh1    my $guesswork = $self->{opt_guesswork} || q{};
259e0680481Safresh1    my %guesswork = map { $_ => 1 } split(m{,}xms, $guesswork);
260e0680481Safresh1    if (!%guesswork || $guesswork{all}) {
261e0680481Safresh1        #<<<
262e0680481Safresh1        $$self{GUESSWORK} = {
263e0680481Safresh1            functions => 1,
264e0680481Safresh1            manref    => 1,
265e0680481Safresh1            quoting   => 1,
266e0680481Safresh1            variables => 1,
267e0680481Safresh1        };
268e0680481Safresh1        #>>>
269e0680481Safresh1    } elsif ($guesswork{none}) {
270e0680481Safresh1        $$self{GUESSWORK} = {};
271e0680481Safresh1    } else {
272e0680481Safresh1        $$self{GUESSWORK} = {%guesswork};
273e0680481Safresh1    }
274b39c5158Smillert
275b39c5158Smillert    return $self;
276b39c5158Smillert}
277b39c5158Smillert
278b39c5158Smillert# Translate a font string into an escape.
279b39c5158Smillertsub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
280b39c5158Smillert
281b39c5158Smillert# Determine which fonts the user wishes to use and store them in the object.
282b39c5158Smillert# Regular, italic, bold, and bold-italic are constants, but the fixed width
283b39c5158Smillert# fonts may be set by the user.  Sets the internal hash key FONTS which is
284b39c5158Smillert# used to map our internal font escapes to actual *roff sequences later.
285b39c5158Smillertsub init_fonts {
286b39c5158Smillert    my ($self) = @_;
287b39c5158Smillert
288b39c5158Smillert    # Figure out the fixed-width font.  If user-supplied, make sure that they
289b39c5158Smillert    # are the right length.
290e0680481Safresh1    for (qw(fixed fixedbold fixeditalic fixedbolditalic)) {
291e0680481Safresh1        my $font = $self->{"opt_$_"};
292b39c5158Smillert        if (defined($font) && (length($font) < 1 || length($font) > 2)) {
293e0680481Safresh1            croak(qq(roff font should be 1 or 2 chars, not "$font"));
294b39c5158Smillert        }
295b39c5158Smillert    }
296b39c5158Smillert
297b39c5158Smillert    # Set the default fonts.  We can't be sure portably across different
298b39c5158Smillert    # implementations what fixed bold-italic may be called (if it's even
299b39c5158Smillert    # available), so default to just bold.
300e0680481Safresh1    #<<<
301e0680481Safresh1    $self->{opt_fixed}           ||= 'CW';
302e0680481Safresh1    $self->{opt_fixedbold}       ||= 'CB';
303e0680481Safresh1    $self->{opt_fixeditalic}     ||= 'CI';
304e0680481Safresh1    $self->{opt_fixedbolditalic} ||= 'CB';
305e0680481Safresh1    #>>>
306b39c5158Smillert
307b39c5158Smillert    # Set up a table of font escapes.  First number is fixed-width, second is
308b39c5158Smillert    # bold, third is italic.
309e0680481Safresh1    $self->{FONTS} = {
310e0680481Safresh1        '000' => '\fR',
311e0680481Safresh1        '001' => '\fI',
312e0680481Safresh1        '010' => '\fB',
313e0680481Safresh1        '011' => '\f(BI',
314e0680481Safresh1        '100' => toescape($self->{opt_fixed}),
315e0680481Safresh1        '101' => toescape($self->{opt_fixeditalic}),
316e0680481Safresh1        '110' => toescape($self->{opt_fixedbold}),
317e0680481Safresh1        '111' => toescape($self->{opt_fixedbolditalic}),
318e0680481Safresh1    };
319e0680481Safresh1
320e0680481Safresh1    # Precalculate a regex that matches all fixed-width fonts, which will be
321e0680481Safresh1    # used later by switchquotes.
322e0680481Safresh1    my @fixedpat = map { quotemeta($self->{FONTS}{$_}) } qw(100 101 110 111);
323e0680481Safresh1    my $fixedpat = join('|', @fixedpat);
324e0680481Safresh1    $self->{FIXEDPAT} = qr{ $fixedpat }xms;
325b39c5158Smillert}
326b39c5158Smillert
327b39c5158Smillert# Initialize the quotes that we'll be using for C<> text.  This requires some
3289f11ffb7Safresh1# special handling, both to parse the user parameters if given and to make
3299f11ffb7Safresh1# sure that the quotes will be safe against *roff.  Sets the internal hash
3309f11ffb7Safresh1# keys LQUOTE and RQUOTE.
331b39c5158Smillertsub init_quotes {
332b39c5158Smillert    my ($self) = (@_);
333b39c5158Smillert
3349f11ffb7Safresh1    # Handle the quotes option first, which sets both quotes at once.
335e0680481Safresh1    $self->{opt_quotes} ||= '"';
336e0680481Safresh1    if ($self->{opt_quotes} eq 'none') {
337b39c5158Smillert        $$self{LQUOTE} = $$self{RQUOTE} = '';
338e0680481Safresh1    } elsif (length ($self->{opt_quotes}) == 1) {
339e0680481Safresh1        $$self{LQUOTE} = $$self{RQUOTE} = $self->{opt_quotes};
340e0680481Safresh1    } elsif (length ($self->{opt_quotes}) % 2 == 0) {
341e0680481Safresh1        my $length = length ($self->{opt_quotes}) / 2;
342e0680481Safresh1        $$self{LQUOTE} = substr ($self->{opt_quotes}, 0, $length);
343e0680481Safresh1        $$self{RQUOTE} = substr ($self->{opt_quotes}, $length);
344b39c5158Smillert    } else {
345e0680481Safresh1        croak(qq(Invalid quote specification "$self->{opt_quotes}"))
346b39c5158Smillert    }
347b39c5158Smillert
3489f11ffb7Safresh1    # Now handle the lquote and rquote options.
349e0680481Safresh1    if (defined($self->{opt_lquote})) {
350e0680481Safresh1        $self->{opt_lquote} = q{} if $self->{opt_lquote} eq 'none';
351e0680481Safresh1        $$self{LQUOTE} = $self->{opt_lquote};
3529f11ffb7Safresh1    }
353e0680481Safresh1    if (defined $self->{opt_rquote}) {
354e0680481Safresh1        $self->{opt_rquote} = q{} if $self->{opt_rquote} eq 'none';
355e0680481Safresh1        $$self{RQUOTE} = $self->{opt_rquote};
3569f11ffb7Safresh1    }
357b39c5158Smillert}
358b39c5158Smillert
359b39c5158Smillert# Initialize the page title information and indentation from our arguments.
360b39c5158Smillertsub init_page {
361b39c5158Smillert    my ($self) = @_;
362b39c5158Smillert
36356d68f1eSafresh1    # Get the version from the running Perl.
36456d68f1eSafresh1    my @version = ($] =~ /^(\d+)\.(\d{3})(\d+)$/);
365b39c5158Smillert    for (@version) { $_ += 0 }
366b39c5158Smillert    my $version = join ('.', @version);
367b39c5158Smillert
368b39c5158Smillert    # Set the defaults for page titles and indentation if the user didn't
369b39c5158Smillert    # override anything.
370e0680481Safresh1    $self->{opt_center}  //= 'User Contributed Perl Documentation';
371e0680481Safresh1    $self->{opt_release} //= 'perl v' . $version;
372e0680481Safresh1    $self->{opt_indent}  //= 4;
373b39c5158Smillert}
374b39c5158Smillert
375b39c5158Smillert##############################################################################
376b39c5158Smillert# Core parsing
377b39c5158Smillert##############################################################################
378b39c5158Smillert
379b39c5158Smillert# This is the glue that connects the code below with Pod::Simple itself.  The
380b39c5158Smillert# goal is to convert the event stream coming from the POD parser into method
381b39c5158Smillert# calls to handlers once the complete content of a tag has been seen.  Each
382b39c5158Smillert# paragraph or POD command will have textual content associated with it, and
383b39c5158Smillert# as soon as all of a paragraph or POD command has been seen, that content
384b39c5158Smillert# will be passed in to the corresponding method for handling that type of
385b39c5158Smillert# object.  The exceptions are handlers for lists, which have opening tag
386b39c5158Smillert# handlers and closing tag handlers that will be called right away.
387b39c5158Smillert#
388b39c5158Smillert# The internal hash key PENDING is used to store the contents of a tag until
389b39c5158Smillert# all of it has been seen.  It holds a stack of open tags, each one
390b39c5158Smillert# represented by a tuple of the attributes hash for the tag, formatting
391b39c5158Smillert# options for the tag (which are inherited), and the contents of the tag.
392b39c5158Smillert
393b39c5158Smillert# Add a block of text to the contents of the current node, formatting it
394b39c5158Smillert# according to the current formatting instructions as we do.
395b39c5158Smillertsub _handle_text {
396b39c5158Smillert    my ($self, $text) = @_;
397b39c5158Smillert    my $tag = $$self{PENDING}[-1];
398b39c5158Smillert    $$tag[2] .= $self->format_text ($$tag[1], $text);
399b39c5158Smillert}
400b39c5158Smillert
401b39c5158Smillert# Given an element name, get the corresponding method name.
402b39c5158Smillertsub method_for_element {
403b39c5158Smillert    my ($self, $element) = @_;
404e5157e49Safresh1    $element =~ tr/A-Z-/a-z_/;
405b39c5158Smillert    $element =~ tr/_a-z0-9//cd;
406b39c5158Smillert    return $element;
407b39c5158Smillert}
408b39c5158Smillert
409b39c5158Smillert# Handle the start of a new element.  If cmd_element is defined, assume that
410b39c5158Smillert# we need to collect the entire tree for this element before passing it to the
411b39c5158Smillert# element method, and create a new tree into which we'll collect blocks of
412b39c5158Smillert# text and nested elements.  Otherwise, if start_element is defined, call it.
413b39c5158Smillertsub _handle_element_start {
414b39c5158Smillert    my ($self, $element, $attrs) = @_;
415b39c5158Smillert    my $method = $self->method_for_element ($element);
416b39c5158Smillert
417b39c5158Smillert    # If we have a command handler, we need to accumulate the contents of the
418b39c5158Smillert    # tag before calling it.  Turn off IN_NAME for any command other than
419b39c5158Smillert    # <Para> and the formatting codes so that IN_NAME isn't still set for the
420b39c5158Smillert    # first heading after the NAME heading.
421b39c5158Smillert    if ($self->can ("cmd_$method")) {
422b39c5158Smillert        $$self{IN_NAME} = 0 if ($element ne 'Para' && length ($element) > 1);
423b39c5158Smillert
424b39c5158Smillert        # How we're going to format embedded text blocks depends on the tag
425b39c5158Smillert        # and also depends on our parent tags.  Thankfully, inside tags that
426b39c5158Smillert        # turn off guesswork and reformatting, nothing else can turn it back
427b39c5158Smillert        # on, so this can be strictly inherited.
428e5157e49Safresh1        my $formatting = {
429e5157e49Safresh1            %{ $$self{PENDING}[-1][1] || $FORMATTING{DEFAULT} },
430e5157e49Safresh1            %{ $FORMATTING{$element} || {} },
431e5157e49Safresh1        };
432b39c5158Smillert        push (@{ $$self{PENDING} }, [ $attrs, $formatting, '' ]);
433e5157e49Safresh1    } elsif (my $start_method = $self->can ("start_$method")) {
434e5157e49Safresh1        $self->$start_method ($attrs, '');
435b39c5158Smillert    }
436b39c5158Smillert}
437b39c5158Smillert
438b39c5158Smillert# Handle the end of an element.  If we had a cmd_ method for this element,
439b39c5158Smillert# this is where we pass along the tree that we built.  Otherwise, if we have
440b39c5158Smillert# an end_ method for the element, call that.
441b39c5158Smillertsub _handle_element_end {
442b39c5158Smillert    my ($self, $element) = @_;
443b39c5158Smillert    my $method = $self->method_for_element ($element);
444b39c5158Smillert
445b39c5158Smillert    # If we have a command handler, pull off the pending text and pass it to
446b39c5158Smillert    # the handler along with the saved attribute hash.
447e5157e49Safresh1    if (my $cmd_method = $self->can ("cmd_$method")) {
448b39c5158Smillert        my $tag = pop @{ $$self{PENDING} };
449e5157e49Safresh1        my $text = $self->$cmd_method ($$tag[0], $$tag[2]);
450b39c5158Smillert        if (defined $text) {
451b39c5158Smillert            if (@{ $$self{PENDING} } > 1) {
452b39c5158Smillert                $$self{PENDING}[-1][2] .= $text;
453b39c5158Smillert            } else {
454b39c5158Smillert                $self->output ($text);
455b39c5158Smillert            }
456b39c5158Smillert        }
457e5157e49Safresh1    } elsif (my $end_method = $self->can ("end_$method")) {
458e5157e49Safresh1        $self->$end_method ();
459b39c5158Smillert    }
460b39c5158Smillert}
461b39c5158Smillert
462b39c5158Smillert##############################################################################
463b39c5158Smillert# General formatting
464b39c5158Smillert##############################################################################
465b39c5158Smillert
466b39c5158Smillert# Format a text block.  Takes a hash of formatting options and the text to
467b39c5158Smillert# format.  Currently, the only formatting options are guesswork, cleanup, and
468b39c5158Smillert# convert, all of which are boolean.
469b39c5158Smillertsub format_text {
470b39c5158Smillert    my ($self, $options, $text) = @_;
471b39c5158Smillert    my $guesswork = $$options{guesswork} && !$$self{IN_NAME};
472b39c5158Smillert    my $cleanup = $$options{cleanup};
473b39c5158Smillert    my $convert = $$options{convert};
474b39c5158Smillert    my $literal = $$options{literal};
475b39c5158Smillert
476b39c5158Smillert    # Cleanup just tidies up a few things, telling *roff that the hyphens are
477e0680481Safresh1    # hard, putting a bit of space between consecutive underscores, escaping
478e0680481Safresh1    # backslashes, and converting zero-width spaces to zero-width break
479e0680481Safresh1    # points.
480b39c5158Smillert    if ($cleanup) {
481b39c5158Smillert        $text =~ s/\\/\\e/g;
482b39c5158Smillert        $text =~ s/-/\\-/g;
483b39c5158Smillert        $text =~ s/_(?=_)/_\\|/g;
484e0680481Safresh1        $text =~ s/\x{200B}/\\:/g;
485b39c5158Smillert    }
486b39c5158Smillert
487e0680481Safresh1    # Except in <Data> blocks, if groff or roff encoding is requested and
488e0680481Safresh1    # we're in an ASCII environment, do the encoding.  For EBCDIC, we just
489e0680481Safresh1    # write what we get and hope for the best.  Leave non-breaking spaces and
490e0680481Safresh1    # soft hyphens alone; we'll convert those at the last minute.
491e0680481Safresh1    if ($convert) {
492e0680481Safresh1        if (ASCII) {
493e0680481Safresh1            if ($$self{ENCODING} eq 'groff') {
494e0680481Safresh1                $text =~ s{ ([^\x00-\x7F\xA0\xAD]) }{
495e0680481Safresh1                    '\\[u' . sprintf('%04X', ord($1)) . ']'
496e0680481Safresh1                }xmsge;
497e0680481Safresh1            } elsif ($$self{ENCODING} eq 'roff') {
498e0680481Safresh1                $text =~ s/([^\x00-\x7F\xA0\xAD])/$ESCAPES{ord ($1)} || "X"/eg;
499e0680481Safresh1            }
500e0680481Safresh1        }
501b39c5158Smillert    }
502b39c5158Smillert
503b39c5158Smillert    # Ensure that *roff doesn't convert literal quotes to UTF-8 single quotes,
504e0680481Safresh1    # but don't mess up accent escapes.
505b39c5158Smillert    if ($literal) {
506b39c5158Smillert        $text =~ s/(?<!\\\*)\'/\\*\(Aq/g;
507b39c5158Smillert        $text =~ s/(?<!\\\*)\`/\\\`/g;
508b39c5158Smillert    }
509b39c5158Smillert
510e0680481Safresh1    # If guesswork is is viable for this block, do that.
511b39c5158Smillert    if ($guesswork) {
512b39c5158Smillert        $text = $self->guesswork ($text);
513b39c5158Smillert    }
514b39c5158Smillert
515b39c5158Smillert    return $text;
516b39c5158Smillert}
517b39c5158Smillert
518b39c5158Smillert# Handles C<> text, deciding whether to put \*C` around it or not.  This is a
519b39c5158Smillert# whole bunch of messy heuristics to try to avoid overquoting, originally from
520b39c5158Smillert# Barrie Slaymaker.  This largely duplicates similar code in Pod::Text.
521b39c5158Smillertsub quote_literal {
522b39c5158Smillert    my $self = shift;
523b39c5158Smillert    local $_ = shift;
524b39c5158Smillert
525e0680481Safresh1    # If in NAME section, just return an ASCII quoted string to avoid
526e0680481Safresh1    # confusing tools like whatis.
527e0680481Safresh1    if ($$self{IN_NAME}) {
528e0680481Safresh1        return $self->{LQUOTE} . $_ . $self->{RQUOTE};
529e0680481Safresh1    }
530e0680481Safresh1
531b39c5158Smillert    # A regex that matches the portion of a variable reference that's the
532b39c5158Smillert    # array or hash index, separated out just because we want to use it in
533b39c5158Smillert    # several places in the following regex.
534e0680481Safresh1    my $index = '(?: \[[^]]+\] | \{[^}]+\} )?';
535b39c5158Smillert
536b39c5158Smillert    # Check for things that we don't want to quote, and if we find any of
537b39c5158Smillert    # them, return the string with just a font change and no quoting.
538e0680481Safresh1    #
539e0680481Safresh1    # Traditionally, Pod::Man has not quoted Perl variables, functions,
540e0680481Safresh1    # numbers, or hex constants, but this is not always desirable.  Make this
541e0680481Safresh1    # optional on the quoting guesswork flag.
542e0680481Safresh1    my $extra = qr{(?!)}xms;    # never matches
543e0680481Safresh1    if ($$self{GUESSWORK}{quoting}) {
544e0680481Safresh1        $extra = qr{
545e0680481Safresh1             \$+ [\#^]? \S $index                    # special ($^F, $")
546e0680481Safresh1           | [\$\@%&*]+ \#? [:\'\w]+ $index          # plain var or func
547e0680481Safresh1           | [\$\@%&*]* [:\'\w]+
548e0680481Safresh1             (?: \\-> )? \(\s*[^\s,\)]*\s*\)         # 0/1-arg func call
549e0680481Safresh1           | (?: [+] || \\- )? ( \d[\d.]* | \.\d+ )
550e0680481Safresh1             (?: [eE] (?: [+] || \\- )? \d+ )?       # a number
551e0680481Safresh1           | 0x [a-fA-F\d]+                          # a hex constant
552e0680481Safresh1         }xms;
553e0680481Safresh1    }
554b39c5158Smillert    m{
555b39c5158Smillert      ^\s*
556b39c5158Smillert      (?:
557e0680481Safresh1         ( [\'\"] ) .* \1                    # already quoted
558b39c5158Smillert       | \\\*\(Aq .* \\\*\(Aq                # quoted and escaped
559e0680481Safresh1       | \\?\` .* ( \' | \\?\` | \\\*\(Aq )  # `quoted' or `quoted`
560e0680481Safresh1       | $extra
561b39c5158Smillert      )
562b39c5158Smillert      \s*\z
563e0680481Safresh1     }xms and return '\f(FS' . $_ . '\f(FE';
564b39c5158Smillert
565b39c5158Smillert    # If we didn't return, go ahead and quote the text.
566b39c5158Smillert    return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
567b39c5158Smillert}
568b39c5158Smillert
569b39c5158Smillert# Takes a text block to perform guesswork on.  Returns the text block with
570b39c5158Smillert# formatting codes added.  This is the code that marks up various Perl
571b39c5158Smillert# constructs and things commonly used in man pages without requiring the user
572e0680481Safresh1# to add any explicit markup, and is applied to all non-literal text.  Note
573e0680481Safresh1# that the inserted font sequences must be treated later with mapfonts.
574b39c5158Smillert#
575b39c5158Smillert# This method is very fragile, both in the regular expressions it uses and in
576b39c5158Smillert# the ordering of those modifications.  Care and testing is required when
577b39c5158Smillert# modifying it.
578b39c5158Smillertsub guesswork {
579b39c5158Smillert    my $self = shift;
580b39c5158Smillert    local $_ = shift;
581b39c5158Smillert
582e5157e49Safresh1    # By the time we reach this point, all hyphens will be escaped by adding a
583b39c5158Smillert    # backslash.  We want to undo that escaping if they're part of regular
584b39c5158Smillert    # words and there's only a single dash, since that's a real hyphen that
585b39c5158Smillert    # *roff gets to consider a possible break point.  Make sure that a dash
586b39c5158Smillert    # after the first character of a word stays non-breaking, however.
587b39c5158Smillert    #
588b39c5158Smillert    # Note that this is not user-controllable; we pretty much have to do this
589b39c5158Smillert    # transformation or *roff will mangle the output in unacceptable ways.
590b39c5158Smillert    s{
591e0680481Safresh1        ( (?:\G|^|\s|$NBSP) [\(\"]* [a-zA-Z] ) ( \\- )?
592b39c5158Smillert        ( (?: [a-zA-Z\']+ \\-)+ )
593e0680481Safresh1        ( [a-zA-Z\']+ ) (?= [\)\".?!,;:]* (?:\s|$NBSP|\Z|\\\ ) )
594b39c5158Smillert        \b
595b39c5158Smillert    } {
596b39c5158Smillert        my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
597b39c5158Smillert        $hyphen ||= '';
598b39c5158Smillert        $main =~ s/\\-/-/g;
599b39c5158Smillert        $prefix . $hyphen . $main . $suffix;
600b39c5158Smillert    }egx;
601b39c5158Smillert
6029f11ffb7Safresh1    # Embolden functions in the form func(), including functions that are in
603e0680481Safresh1    # all capitals, but don't embolden if there's anything inside the parens.
604b39c5158Smillert    # The function must start with an alphabetic character or underscore and
605b39c5158Smillert    # then consist of word characters or colons.
606e0680481Safresh1    if ($$self{GUESSWORK}{functions}) {
607b39c5158Smillert        s{
608e0680481Safresh1            (?<! \\ )
609e0680481Safresh1            \b
610e0680481Safresh1            ( [A-Za-z_] [:\w]+ \(\) )
611b39c5158Smillert        } {
612e0680481Safresh1            '\f(BS' . $1 . '\f(BE'
613b39c5158Smillert        }egx;
614b39c5158Smillert    }
615b39c5158Smillert
6169f11ffb7Safresh1    # Change references to manual pages to put the page name in bold but
617b39c5158Smillert    # the number in the regular font, with a thin space between the name and
618b39c5158Smillert    # the number.  Only recognize func(n) where func starts with an alphabetic
619b39c5158Smillert    # character or underscore and contains only word characters, periods (for
620b39c5158Smillert    # configuration file man pages), or colons, and n is a single digit,
621b39c5158Smillert    # optionally followed by some number of lowercase letters.  Note that this
622b39c5158Smillert    # does not recognize man page references like perl(l) or socket(3SOCKET).
623e0680481Safresh1    if ($$self{GUESSWORK}{manref}) {
624b39c5158Smillert        s{
625e0680481Safresh1            \b
626e0680481Safresh1            (?<! \\ )                                   # rule out \e0(1)
627e0680481Safresh1            ( [A-Za-z_] (?:[.:\w] | \\-)+ )
628b39c5158Smillert            ( \( \d [a-z]* \) )
629b39c5158Smillert        } {
630e0680481Safresh1            '\f(BS' . $1 . '\f(BE\|' . $2
631b39c5158Smillert        }egx;
632b39c5158Smillert    }
633b39c5158Smillert
634b39c5158Smillert    # Convert simple Perl variable references to a fixed-width font.  Be
635b39c5158Smillert    # careful not to convert functions, though; there are too many subtleties
636b39c5158Smillert    # with them to want to perform this transformation.
637e0680481Safresh1    if ($$self{GUESSWORK}{variables}) {
638b39c5158Smillert        s{
639b39c5158Smillert           ( ^ | \s+ )
640b39c5158Smillert           ( [\$\@%] [\w:]+ )
641b39c5158Smillert           (?! \( )
642b39c5158Smillert        } {
643b39c5158Smillert            $1 . '\f(FS' . $2 . '\f(FE'
644b39c5158Smillert        }egx;
645b39c5158Smillert    }
646b39c5158Smillert
647b39c5158Smillert    # Done.
648b39c5158Smillert    return $_;
649b39c5158Smillert}
650b39c5158Smillert
651b39c5158Smillert##############################################################################
652b39c5158Smillert# Output
653b39c5158Smillert##############################################################################
654b39c5158Smillert
655b39c5158Smillert# When building up the *roff code, we don't use real *roff fonts.  Instead, we
656b39c5158Smillert# embed font codes of the form \f(<font>[SE] where <font> is one of B, I, or
657b39c5158Smillert# F, S stands for start, and E stands for end.  This method turns these into
658b39c5158Smillert# the right start and end codes.
659b39c5158Smillert#
660b39c5158Smillert# We add this level of complexity because the old pod2man didn't get code like
661e0680481Safresh1# B<< someI<thing> else>> right.  After I<> it switched back to normal text
662e0680481Safresh1# rather than bold.  We take care of this by using variables that state
663e0680481Safresh1# whether bold, italic, or fixed are turned on as a combined pointer to our
664e0680481Safresh1# current font sequence, and set each to the number of current nestings of
665e0680481Safresh1# start tags for that font.
666b39c5158Smillert#
667e0680481Safresh1# The base font must be either \fP or \fR.  \fP changes to the previous font,
668e0680481Safresh1# but only one previous font is kept.  Unfortunately, there is a bug in
669e0680481Safresh1# Solaris 2.6 nroff (not present in GNU groff) where the sequence
670e0680481Safresh1# \fB\fP\f(CW\fP leaves the font set to B rather than R, presumably because
671e0680481Safresh1# \f(CW doesn't actually do a font change.  Because of this, we prefer to use
672e0680481Safresh1# \fR where possible.
673e0680481Safresh1#
674e0680481Safresh1# Unfortunately, this isn't possible for arguments to heading macros, since
675e0680481Safresh1# there we don't know what the outside level font is.  In that case, arrange
676e0680481Safresh1# things so that the outside font is always the "previous" font and end with
677e0680481Safresh1# \fP instead of \fR.  Idea from Zack Weinberg.
678e0680481Safresh1#
679e0680481Safresh1# This function used to be much simpler outside of macro arguments because it
680e0680481Safresh1# went directly from \fB to \f(CW and relied on \f(CW clearing bold since it
681e0680481Safresh1# wasn't \f(CB.  Unfortunately, while this works for mandoc, this is not how
682e0680481Safresh1# groff works; \fBfoo\f(CWbar still prints bar in bold.  Therefore, we force
683e0680481Safresh1# the font back to the base font before each font change.
684b39c5158Smillertsub mapfonts {
685e0680481Safresh1    my ($self, $text, $base) = @_;
686e0680481Safresh1
687e0680481Safresh1    # The closure used to process each font escape, expected to be called from
688e0680481Safresh1    # the right-hand side of an s/// expression.
689b39c5158Smillert    my ($fixed, $bold, $italic) = (0, 0, 0);
690b39c5158Smillert    my %magic = (F => \$fixed, B => \$bold, I => \$italic);
691b39c5158Smillert    my $last = '\fR';
692e0680481Safresh1    my $process = sub {
693e0680481Safresh1        my ($style, $start_stop) = @_;
694e0680481Safresh1        my $sequence = ($last ne '\fR') ? $base : q{};
695e0680481Safresh1        ${ $magic{$style} } += ($start_stop eq 'S') ? 1 : -1;
696e0680481Safresh1        my $f = $self->{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
697e0680481Safresh1        return q{} if ($f eq $last);
698e0680481Safresh1        if ($f ne '\fR') {
699e0680481Safresh1            $sequence .= $f;
700e0680481Safresh1        }
701b39c5158Smillert        $last = $f;
702e0680481Safresh1        return $sequence;
703e0680481Safresh1    };
704b39c5158Smillert
705e0680481Safresh1    # Now, do the actual work.
706e0680481Safresh1    $text =~ s{ \\f\((.)(.) }{$process->($1, $2)}xmsge;
707e0680481Safresh1
708e0680481Safresh1    # We can do a bit of cleanup by collapsing sequences like \fR\fB\fR\fI
709e0680481Safresh1    # into just \fI.
710e0680481Safresh1    $text =~ s{ (?: \\fR )? (?: \\f (.|\(..) \\fR )+ }{\\fR}xms;
711e0680481Safresh1
712b39c5158Smillert    return $text;
713b39c5158Smillert}
714b39c5158Smillert
715b39c5158Smillert# Given a command and a single argument that may or may not contain double
716e0680481Safresh1# quotes and fixed-width text, handle double-quote formatting for it.  If
717e0680481Safresh1# there is no fixed-width text, just return the command followed by the
718e0680481Safresh1# argument with proper quoting.  If there is fixed-width text, work around a
719e0680481Safresh1# Solaris nroff bug with fixed-width fonts by converting fixed-width to
720e0680481Safresh1# regular fonts (nroff sees no difference).
721b39c5158Smillertsub switchquotes {
722b39c5158Smillert    my ($self, $command, $text, $extra) = @_;
723b39c5158Smillert
724e0680481Safresh1    # Separate troff from nroff if there are any fixed-width fonts in use to
725e0680481Safresh1    # work around problems with Solaris nroff.
726e0680481Safresh1    if ($text =~ $self->{FIXEDPAT}) {
727b39c5158Smillert        my $nroff = $text;
728b39c5158Smillert        my $troff = $text;
729b39c5158Smillert
730b39c5158Smillert        # Work around the Solaris nroff bug where \f(CW\fP leaves the font set
731b39c5158Smillert        # to Roman rather than the actual previous font when used in headings.
732b39c5158Smillert        # troff output may still be broken, but at least we can fix nroff by
733b39c5158Smillert        # just switching the font changes to the non-fixed versions.
734e0680481Safresh1        my $font_end = qr{ (?: \\f[PR] | \Q$self->{FONTS}{100}\E ) }xms;
735e0680481Safresh1        $nroff =~ s{\Q$self->{FONTS}{100}\E(.*?)\\f([PR])}{$1}xmsg;
736e0680481Safresh1        $nroff =~ s{\Q$self->{FONTS}{101}\E}{\\fI}xmsg;
737e0680481Safresh1        $nroff =~ s{\Q$self->{FONTS}{110}\E}{\\fB}xmsg;
738e0680481Safresh1        $nroff =~ s{\Q$self->{FONTS}{111}\E}{\\f\(BI}xmsg;
739e0680481Safresh1
740e0680481Safresh1        # We have to deal with \*C` and \*C', which are used to add the quotes
741e0680481Safresh1        # around C<> text, since they may expand to " and if they do this
742e0680481Safresh1        # confuses the .SH macros and the like no end.  Expand them ourselves.
743e0680481Safresh1        my $c_is_quote = index("$self->{LQUOTE}$self->{RQUOTE}", qq(\")) != -1;
744e0680481Safresh1        if ($c_is_quote && $text =~ m{ \\[*]\(C[\'\`] }xms) {
745e0680481Safresh1            $nroff =~ s{ \\[*]\(C\` }{$self->{LQUOTE}}xmsg;
746e0680481Safresh1            $nroff =~ s{ \\[*]\(C\' }{$self->{RQUOTE}}xmsg;
747e0680481Safresh1            $troff =~ s{ \\[*]\(C[\'\`] }{}xmsg;
748e0680481Safresh1        }
749b39c5158Smillert
750b39c5158Smillert        # Now finally output the command.  Bother with .ie only if the nroff
751b39c5158Smillert        # and troff output aren't the same.
752e0680481Safresh1        $nroff = _quote_macro_argument($nroff) . ($extra ? " $extra" : '');
753e0680481Safresh1        $troff = _quote_macro_argument($troff) . ($extra ? " $extra" : '');
754b39c5158Smillert        if ($nroff ne $troff) {
755b39c5158Smillert            return ".ie n $command $nroff\n.el $command $troff\n";
756b39c5158Smillert        } else {
757b39c5158Smillert            return "$command $nroff\n";
758b39c5158Smillert        }
759b39c5158Smillert    } else {
760e0680481Safresh1        $text = _quote_macro_argument($text) . ($extra ? " $extra" : '');
761b39c5158Smillert        return "$command $text\n";
762b39c5158Smillert    }
763b39c5158Smillert}
764b39c5158Smillert
765b39c5158Smillert# Protect leading quotes and periods against interpretation as commands.  Also
766b39c5158Smillert# protect anything starting with a backslash, since it could expand or hide
767b39c5158Smillert# something that *roff would interpret as a command.  This is overkill, but
768b39c5158Smillert# it's much simpler than trying to parse *roff here.
769b39c5158Smillertsub protect {
770b39c5158Smillert    my ($self, $text) = @_;
771b39c5158Smillert    $text =~ s/^([.\'\\])/\\&$1/mg;
772b39c5158Smillert    return $text;
773b39c5158Smillert}
774b39c5158Smillert
775b39c5158Smillert# Make vertical whitespace if NEEDSPACE is set, appropriate to the indentation
776b39c5158Smillert# level the situation.  This function is needed since in *roff one has to
777b39c5158Smillert# create vertical whitespace after paragraphs and between some things, but
778b39c5158Smillert# other macros create their own whitespace.  Also close out a sequence of
779b39c5158Smillert# repeated =items, since calling makespace means we're about to begin the item
780b39c5158Smillert# body.
781b39c5158Smillertsub makespace {
782b39c5158Smillert    my ($self) = @_;
783b39c5158Smillert    $self->output (".PD\n") if $$self{ITEMS} > 1;
784b39c5158Smillert    $$self{ITEMS} = 0;
785b39c5158Smillert    $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
786b39c5158Smillert        if $$self{NEEDSPACE};
787b39c5158Smillert}
788b39c5158Smillert
789b39c5158Smillert# Output any pending index entries, and optionally an index entry given as an
790b39c5158Smillert# argument.  Support multiple index entries in X<> separated by slashes, and
791b39c5158Smillert# strip special escapes from index entries.
792b39c5158Smillertsub outindex {
793b39c5158Smillert    my ($self, $section, $index) = @_;
794b39c5158Smillert    my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
795b39c5158Smillert    return unless ($section || @entries);
796b39c5158Smillert
797b39c5158Smillert    # We're about to output all pending entries, so clear our pending queue.
798b39c5158Smillert    $$self{INDEX} = [];
799b39c5158Smillert
800b39c5158Smillert    # Build the output.  Regular index entries are marked Xref, and headings
801b39c5158Smillert    # pass in their own section.  Undo some *roff formatting on headings.
802b39c5158Smillert    my @output;
803b39c5158Smillert    if (@entries) {
804b39c5158Smillert        push @output, [ 'Xref', join (' ', @entries) ];
805b39c5158Smillert    }
806b39c5158Smillert    if ($section) {
807b39c5158Smillert        $index =~ s/\\-/-/g;
808b39c5158Smillert        $index =~ s/\\(?:s-?\d|.\(..|.)//g;
809b39c5158Smillert        push @output, [ $section, $index ];
810b39c5158Smillert    }
811b39c5158Smillert
812b39c5158Smillert    # Print out the .IX commands.
813b39c5158Smillert    for (@output) {
814b39c5158Smillert        my ($type, $entry) = @$_;
815e9ce3842Safresh1        $entry =~ s/\s+/ /g;
816b39c5158Smillert        $entry =~ s/\"/\"\"/g;
817b39c5158Smillert        $entry =~ s/\\/\\\\/g;
818b39c5158Smillert        $self->output (".IX $type " . '"' . $entry . '"' . "\n");
819b39c5158Smillert    }
820b39c5158Smillert}
821b39c5158Smillert
822b39c5158Smillert# Output some text, without any additional changes.
823b39c5158Smillertsub output {
824b39c5158Smillert    my ($self, @text) = @_;
825e0680481Safresh1    my $text = join('', @text);
826e0680481Safresh1    $text =~ s{$NBSP}{\\ }g;
827e0680481Safresh1    $text =~ s{$SHY}{\\%}g;
828e0680481Safresh1
829e0680481Safresh1    if ($$self{ENCODE} && _needs_encode($$self{ENCODING})) {
830e0680481Safresh1        my $check = sub {
831e0680481Safresh1            my ($char) = @_;
832e0680481Safresh1            my $display = '"\x{' . hex($char) . '}"';
833e0680481Safresh1            my $error = "$display does not map to $$self{ENCODING}";
834e0680481Safresh1            $self->whine ($self->line_count(), $error);
835e0680481Safresh1            return Encode::encode ($$self{ENCODING}, chr($char));
836e0680481Safresh1        };
837e0680481Safresh1        my $output = Encode::encode ($$self{ENCODING}, $text, $check);
838e0680481Safresh1        print { $$self{output_fh} } $output;
83948950c12Ssthen    } else {
840e0680481Safresh1        print { $$self{output_fh} } $text;
841b39c5158Smillert    }
84248950c12Ssthen}
843b39c5158Smillert
844b39c5158Smillert##############################################################################
845b39c5158Smillert# Document initialization
846b39c5158Smillert##############################################################################
847b39c5158Smillert
848b39c5158Smillert# Handle the start of the document.  Here we handle empty documents, as well
849b39c5158Smillert# as setting up our basic macros in a preamble and building the page title.
850b39c5158Smillertsub start_document {
851b39c5158Smillert    my ($self, $attrs) = @_;
852b39c5158Smillert    if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
853b39c5158Smillert        $$self{CONTENTLESS} = 1;
854e9ce3842Safresh1    } else {
855e9ce3842Safresh1        delete $$self{CONTENTLESS};
856b39c5158Smillert    }
857b39c5158Smillert
858e0680481Safresh1    # When an encoding is requested, check whether our output file handle
859e0680481Safresh1    # already has a PerlIO encoding layer set.  If it does not, we'll need to
860e0680481Safresh1    # encode our output before printing it (handled in the output() sub).
861e0680481Safresh1    # Wrap the check in an eval to handle versions of Perl without PerlIO.
8629f11ffb7Safresh1    #
8639f11ffb7Safresh1    # PerlIO::get_layers still requires its argument be a glob, so coerce the
8649f11ffb7Safresh1    # file handle to a glob.
86548950c12Ssthen    $$self{ENCODE} = 0;
866e0680481Safresh1    if ($$self{ENCODING}) {
86748950c12Ssthen        $$self{ENCODE} = 1;
86848950c12Ssthen        eval {
8693d61058aSafresh1            require PerlIO;
870e9ce3842Safresh1            my @options = (output => 1, details => 1);
8719f11ffb7Safresh1            my @layers = PerlIO::get_layers (*{$$self{output_fh}}, @options);
87256d68f1eSafresh1            if ($layers[-1] && ($layers[-1] & PerlIO::F_UTF8 ())) {
87348950c12Ssthen                $$self{ENCODE} = 0;
87448950c12Ssthen            }
87548950c12Ssthen        }
876b39c5158Smillert    }
877b39c5158Smillert
878e5157e49Safresh1    # Determine information for the preamble and then output it unless the
879e5157e49Safresh1    # document was content-free.
880e5157e49Safresh1    if (!$$self{CONTENTLESS}) {
881b39c5158Smillert        my ($name, $section);
882e0680481Safresh1        if (defined $self->{opt_name}) {
883e0680481Safresh1            $name = $self->{opt_name};
884e0680481Safresh1            $section = $self->{opt_section} || 1;
885b39c5158Smillert        } else {
886b39c5158Smillert            ($name, $section) = $self->devise_title;
887b39c5158Smillert        }
888e0680481Safresh1        my $date = $self->{opt_date} // $self->devise_date();
889b39c5158Smillert        $self->preamble ($name, $section, $date)
890e0680481Safresh1            unless $self->bare_output;
891e5157e49Safresh1    }
892b39c5158Smillert
893b39c5158Smillert    # Initialize a few per-document variables.
894b39c5158Smillert    $$self{INDENT}    = 0;      # Current indentation level.
895b39c5158Smillert    $$self{INDENTS}   = [];     # Stack of indentations.
896b39c5158Smillert    $$self{INDEX}     = [];     # Index keys waiting to be printed.
897b39c5158Smillert    $$self{IN_NAME}   = 0;      # Whether processing the NAME section.
898b39c5158Smillert    $$self{ITEMS}     = 0;      # The number of consecutive =items.
899b39c5158Smillert    $$self{ITEMTYPES} = [];     # Stack of =item types, one per list.
900b39c5158Smillert    $$self{SHIFTWAIT} = 0;      # Whether there is a shift waiting.
901b39c5158Smillert    $$self{SHIFTS}    = [];     # Stack of .RS shifts.
902b39c5158Smillert    $$self{PENDING}   = [[]];   # Pending output.
903b39c5158Smillert}
904b39c5158Smillert
905e9ce3842Safresh1# Handle the end of the document.  This handles dying on POD errors, since
906e9ce3842Safresh1# Pod::Parser currently doesn't.  Otherwise, does nothing but print out a
907e9ce3842Safresh1# final comment at the end of the document under debugging.
908b39c5158Smillertsub end_document {
909b39c5158Smillert    my ($self) = @_;
910e9ce3842Safresh1    if ($$self{complain_die} && $self->errors_seen) {
911e9ce3842Safresh1        croak ("POD document had syntax errors");
912e9ce3842Safresh1    }
913b39c5158Smillert    return if $self->bare_output;
914b39c5158Smillert    return if ($$self{CONTENTLESS} && !$$self{ALWAYS_EMIT_SOMETHING});
915b39c5158Smillert}
916b39c5158Smillert
917b39c5158Smillert# Try to figure out the name and section from the file name and return them as
918b39c5158Smillert# a list, returning an empty name and section 1 if we can't find any better
919b39c5158Smillert# information.  Uses File::Basename and File::Spec as necessary.
920b39c5158Smillertsub devise_title {
921b39c5158Smillert    my ($self) = @_;
922b39c5158Smillert    my $name = $self->source_filename || '';
923e0680481Safresh1    my $section = $self->{opt_section} || 1;
924e0680481Safresh1    $section = 3 if (!$self->{opt_section} && $name =~ /\.pm\z/i);
925b39c5158Smillert    $name =~ s/\.p(od|[lm])\z//i;
926b39c5158Smillert
927b8851fccSafresh1    # If Pod::Parser gave us an IO::File reference as the source file name,
928b8851fccSafresh1    # convert that to the empty string as well.  Then, if we don't have a
9299f11ffb7Safresh1    # valid name, convert it to STDIN.
9309f11ffb7Safresh1    #
9319f11ffb7Safresh1    # In podlators 4.00 through 4.07, this also produced a warning, but that
9329f11ffb7Safresh1    # was surprising to a lot of programs that had expected to be able to pipe
9339f11ffb7Safresh1    # POD through pod2man without specifying the name.  In the name of
9349f11ffb7Safresh1    # backward compatibility, just quietly set STDIN as the page title.
935b8851fccSafresh1    if ($name =~ /^IO::File(?:=\w+)\(0x[\da-f]+\)$/i) {
936b8851fccSafresh1        $name = '';
937b8851fccSafresh1    }
938b8851fccSafresh1    if ($name eq '') {
939b8851fccSafresh1        $name = 'STDIN';
940b8851fccSafresh1    }
941b8851fccSafresh1
942b39c5158Smillert    # If the section isn't 3, then the name defaults to just the basename of
943b8851fccSafresh1    # the file.
944b39c5158Smillert    if ($section !~ /^3/) {
945b39c5158Smillert        require File::Basename;
946b39c5158Smillert        $name = uc File::Basename::basename ($name);
947b39c5158Smillert    } else {
948b39c5158Smillert        require File::Spec;
949b39c5158Smillert        my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
950b8851fccSafresh1
951b8851fccSafresh1        # Otherwise, assume we're dealing with a module.  We want to figure
952b8851fccSafresh1        # out the full module name from the path to the file, but we don't
953b8851fccSafresh1        # want to include too much of the path into the module name.  Lose
954b8851fccSafresh1        # anything up to the first of:
955b8851fccSafresh1        #
956b8851fccSafresh1        #     */lib/*perl*/         standard or site_perl module
957b8851fccSafresh1        #     */*perl*/lib/         from -Dprefix=/opt/perl
958b8851fccSafresh1        #     */*perl*/             random module hierarchy
959b8851fccSafresh1        #
960b8851fccSafresh1        # Also strip off a leading site, site_perl, or vendor_perl component,
961b8851fccSafresh1        # any OS-specific component, and any version number component, and
962b8851fccSafresh1        # strip off an initial component of "lib" or "blib/lib" since that's
963b8851fccSafresh1        # what ExtUtils::MakeMaker creates.
964b8851fccSafresh1        #
965b8851fccSafresh1        # splitdir requires at least File::Spec 0.8.
966b39c5158Smillert        my @dirs = File::Spec->splitdir ($dirs);
967b8851fccSafresh1        if (@dirs) {
968b39c5158Smillert            my $cut = 0;
969b39c5158Smillert            my $i;
970b39c5158Smillert            for ($i = 0; $i < @dirs; $i++) {
971b39c5158Smillert                if ($dirs[$i] =~ /perl/) {
972b39c5158Smillert                    $cut = $i + 1;
973b39c5158Smillert                    $cut++ if ($dirs[$i + 1] && $dirs[$i + 1] eq 'lib');
974b39c5158Smillert                    last;
975*e0a54000Safresh1                } elsif ($dirs[$i] eq 'lib' && $dirs[$i + 1] && $dirs[0] eq 'ext') {
976*e0a54000Safresh1                    $cut = $i + 1;
977b39c5158Smillert                }
978b39c5158Smillert            }
979b39c5158Smillert            if ($cut > 0) {
980b39c5158Smillert                splice (@dirs, 0, $cut);
981b39c5158Smillert                shift @dirs if ($dirs[0] =~ /^(site|vendor)(_perl)?$/);
982b39c5158Smillert                shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
983b39c5158Smillert                shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
984b39c5158Smillert            }
985b39c5158Smillert            shift @dirs if $dirs[0] eq 'lib';
986b39c5158Smillert            splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
987b8851fccSafresh1        }
988b39c5158Smillert
989b39c5158Smillert        # Remove empty directories when building the module name; they
990b39c5158Smillert        # occur too easily on Unix by doubling slashes.
991b39c5158Smillert        $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
992b39c5158Smillert    }
993b39c5158Smillert    return ($name, $section);
994b39c5158Smillert}
995b39c5158Smillert
996b39c5158Smillert# Determine the modification date and return that, properly formatted in ISO
997b8851fccSafresh1# format.
998b8851fccSafresh1#
999b8851fccSafresh1# If POD_MAN_DATE is set, that overrides anything else.  This can be used for
1000b8851fccSafresh1# reproducible generation of the same file even if the input file timestamps
10019f11ffb7Safresh1# are unpredictable or the POD comes from standard input.
1002b8851fccSafresh1#
1003b8851fccSafresh1# Otherwise, if SOURCE_DATE_EPOCH is set and can be parsed as seconds since
1004b8851fccSafresh1# the UNIX epoch, base the timestamp on that.  See
1005b8851fccSafresh1# <https://reproducible-builds.org/specs/source-date-epoch/>
1006b8851fccSafresh1#
1007b8851fccSafresh1# Otherwise, use the modification date of the input if we can stat it.  Be
1008b8851fccSafresh1# aware that Pod::Simple returns the stringification of the file handle as
1009b8851fccSafresh1# source_filename for input from a file handle, so we'll stat some random ref
1010b8851fccSafresh1# string in that case.  If that fails, instead use the current time.
1011b8851fccSafresh1#
1012b8851fccSafresh1# $self - Pod::Man object, used to get the source file
1013b8851fccSafresh1#
1014b8851fccSafresh1# Returns: YYYY-MM-DD date suitable for the left-hand footer
1015b39c5158Smillertsub devise_date {
1016b39c5158Smillert    my ($self) = @_;
1017b8851fccSafresh1
1018b8851fccSafresh1    # If POD_MAN_DATE is set, always use it.
1019b8851fccSafresh1    if (defined($ENV{POD_MAN_DATE})) {
1020b8851fccSafresh1        return $ENV{POD_MAN_DATE};
1021b39c5158Smillert    }
1022b39c5158Smillert
1023b8851fccSafresh1    # If SOURCE_DATE_EPOCH is set and can be parsed, use that.
1024b8851fccSafresh1    my $time;
1025b8851fccSafresh1    if (defined($ENV{SOURCE_DATE_EPOCH}) && $ENV{SOURCE_DATE_EPOCH} !~ /\D/) {
1026b8851fccSafresh1        $time = $ENV{SOURCE_DATE_EPOCH};
1027b8851fccSafresh1    }
1028b8851fccSafresh1
1029b8851fccSafresh1    # Otherwise, get the input filename and try to stat it.  If that fails,
1030b8851fccSafresh1    # use the current time.
1031b8851fccSafresh1    if (!defined $time) {
1032b8851fccSafresh1        my $input = $self->source_filename;
1033b8851fccSafresh1        if ($input) {
1034b8851fccSafresh1            $time = (stat($input))[9] || time();
1035b8851fccSafresh1        } else {
1036b8851fccSafresh1            $time = time();
1037b8851fccSafresh1        }
1038b8851fccSafresh1    }
1039b8851fccSafresh1
1040b8851fccSafresh1    # Can't use POSIX::strftime(), which uses Fcntl, because MakeMaker uses
1041b8851fccSafresh1    # this and it has to work in the core which can't load dynamic libraries.
1042b8851fccSafresh1    # Use gmtime instead of localtime so that the generated man page does not
1043b8851fccSafresh1    # depend on the local time zone setting and is more reproducible
1044b8851fccSafresh1    my ($year, $month, $day) = (gmtime($time))[5,4,3];
1045b39c5158Smillert    return sprintf("%04d-%02d-%02d", $year + 1900, $month + 1, $day);
1046b39c5158Smillert}
1047b39c5158Smillert
1048b39c5158Smillert# Print out the preamble and the title.  The meaning of the arguments to .TH
1049b39c5158Smillert# unfortunately vary by system; some systems consider the fourth argument to
1050b39c5158Smillert# be a "source" and others use it as a version number.  Generally it's just
1051b39c5158Smillert# presented as the left-side footer, though, so it doesn't matter too much if
1052b39c5158Smillert# a particular system gives it another interpretation.
1053b39c5158Smillert#
1054b39c5158Smillert# The order of date and release used to be reversed in older versions of this
1055b39c5158Smillert# module, but this order is correct for both Solaris and Linux.
1056b39c5158Smillertsub preamble {
1057b39c5158Smillert    my ($self, $name, $section, $date) = @_;
1058e0680481Safresh1    my $preamble = $self->preamble_template();
1059b39c5158Smillert
1060e0680481Safresh1    # groff's preconv script will use this line to correctly determine the
1061e0680481Safresh1    # input encoding if the encoding is one of the ones it recognizes.  It
1062e0680481Safresh1    # must be the first or second line.
1063e0680481Safresh1    #
1064e0680481Safresh1    # If the output encoding is some version of Unicode, we could also add a
1065e0680481Safresh1    # Unicode Byte Order Mark to the start of the file, but the BOM is now
1066e0680481Safresh1    # deprecated and I am concerned that may break a *roff implementation that
1067e0680481Safresh1    # might otherwise cope with Unicode.  Revisit this if someone files a bug
1068e0680481Safresh1    # report about it.
1069e0680481Safresh1    if (_needs_encode($$self{ENCODING})) {
1070e0680481Safresh1        my $normalized = lc($$self{ENCODING});
1071e0680481Safresh1        $normalized =~ s{-}{}g;
1072e0680481Safresh1        my $coding = $ENCODINGS{$normalized} || lc($$self{ENCODING});
1073e0680481Safresh1        if ($coding ne 'us-ascii') {
1074e0680481Safresh1            $self->output(qq{.\\\" -*- mode: troff; coding: $coding -*-\n});
1075b39c5158Smillert        }
1076b39c5158Smillert    }
1077b39c5158Smillert
1078e0680481Safresh1    # Substitute into the preamble the configuration options.  Because it's
1079e0680481Safresh1    # used as the argument to defining a string, any leading double quote (but
1080e0680481Safresh1    # no other double quotes) in LQUOTE and RQUOTE has to be doubled.
1081e0680481Safresh1    $preamble =~ s{ [@] CFONT [@] }{$self->{opt_fixed}}xms;
1082e0680481Safresh1    my $lquote = $self->{LQUOTE};
1083e0680481Safresh1    my $rquote = $self->{RQUOTE};
1084e0680481Safresh1    $lquote =~ s{ \A \" }{""}xms;
1085e0680481Safresh1    $rquote =~ s{ \A \" }{""}xms;
1086e0680481Safresh1    $preamble =~ s{ [@] LQUOTE [@] }{$lquote}xms;
1087e0680481Safresh1    $preamble =~ s{ [@] RQUOTE [@] }{$rquote}xms;
1088e0680481Safresh1    chomp($preamble);
1089b39c5158Smillert
1090b39c5158Smillert    # Get the version information.
1091e0680481Safresh1    my $version = $self->version_report();
1092b39c5158Smillert
1093e0680481Safresh1    # Build the index line and make sure that it will be syntactically valid.
1094e0680481Safresh1    my $index = _quote_macro_argument("$name $section");
1095e0680481Safresh1
1096e0680481Safresh1    # Quote the arguments to the .TH macro.  (Section should never require
1097e0680481Safresh1    # this, but we may as well be cautious.)
1098e0680481Safresh1    $name = _quote_macro_argument($name);
1099e0680481Safresh1    $section = _quote_macro_argument($section);
1100e0680481Safresh1    $date = _quote_macro_argument($date);
1101e0680481Safresh1    my $center = _quote_macro_argument($self->{opt_center});
1102e0680481Safresh1    my $release = _quote_macro_argument($self->{opt_release});
1103e0680481Safresh1
1104e0680481Safresh1    # Output the majority of the preamble.
1105b39c5158Smillert    $self->output (<<"----END OF HEADER----");
1106b39c5158Smillert.\\" Automatically generated by $version
1107b39c5158Smillert.\\"
1108b39c5158Smillert.\\" Standard preamble:
1109b39c5158Smillert.\\" ========================================================================
1110b39c5158Smillert$preamble
1111b39c5158Smillert.\\" ========================================================================
1112b39c5158Smillert.\\"
1113e0680481Safresh1.IX Title $index
1114e0680481Safresh1.TH $name $section $date $release $center
1115b39c5158Smillert.\\" For nroff, turn off justification.  Always turn off hyphenation; it makes
1116b39c5158Smillert.\\" way too many mistakes in technical documents.
1117b39c5158Smillert.if n .ad l
1118b39c5158Smillert.nh
1119b39c5158Smillert----END OF HEADER----
1120e0680481Safresh1
1121e0680481Safresh1    # If the language was specified, output the language configuration.
1122e0680481Safresh1    if ($self->{opt_language}) {
1123e0680481Safresh1        $self->output(".mso $self->{opt_language}.tmac\n");
1124e0680481Safresh1        $self->output(".hla $self->{opt_language}\n");
1125e0680481Safresh1    }
1126b39c5158Smillert}
1127b39c5158Smillert
1128b39c5158Smillert##############################################################################
1129b39c5158Smillert# Text blocks
1130b39c5158Smillert##############################################################################
1131b39c5158Smillert
1132b39c5158Smillert# Handle a basic block of text.  The only tricky part of this is if this is
1133b39c5158Smillert# the first paragraph of text after an =over, in which case we have to change
1134b39c5158Smillert# indentations for *roff.
1135b39c5158Smillertsub cmd_para {
1136b39c5158Smillert    my ($self, $attrs, $text) = @_;
1137b39c5158Smillert    my $line = $$attrs{start_line};
1138b39c5158Smillert
1139b39c5158Smillert    # Output the paragraph.  We also have to handle =over without =item.  If
1140b39c5158Smillert    # there's an =over without =item, SHIFTWAIT will be set, and we need to
1141b39c5158Smillert    # handle creation of the indent here.  Add the shift to SHIFTS so that it
1142b39c5158Smillert    # will be cleaned up on =back.
1143b39c5158Smillert    $self->makespace;
1144b39c5158Smillert    if ($$self{SHIFTWAIT}) {
1145b39c5158Smillert        $self->output (".RS $$self{INDENT}\n");
1146b39c5158Smillert        push (@{ $$self{SHIFTS} }, $$self{INDENT});
1147b39c5158Smillert        $$self{SHIFTWAIT} = 0;
1148b39c5158Smillert    }
1149b39c5158Smillert
1150b39c5158Smillert    # Force exactly one newline at the end and strip unwanted trailing
1151e5157e49Safresh1    # whitespace at the end, but leave "\ " backslashed space from an S< > at
1152e5157e49Safresh1    # the end of a line.  Reverse the text first, to avoid having to scan the
1153e5157e49Safresh1    # entire paragraph.
1154e5157e49Safresh1    $text = reverse $text;
1155e5157e49Safresh1    $text =~ s/\A\s*?(?= \\|\S|\z)/\n/;
1156e5157e49Safresh1    $text = reverse $text;
1157b39c5158Smillert
1158b39c5158Smillert    # Output the paragraph.
1159e0680481Safresh1    $self->output($self->protect($self->mapfonts($text, '\fR')));
1160e0680481Safresh1    $self->outindex();
1161b39c5158Smillert    $$self{NEEDSPACE} = 1;
1162b39c5158Smillert    return '';
1163b39c5158Smillert}
1164b39c5158Smillert
1165b39c5158Smillert# Handle a verbatim paragraph.  Put a null token at the beginning of each line
1166b39c5158Smillert# to protect against commands and wrap in .Vb/.Ve (which we define in our
1167b39c5158Smillert# prelude).
1168b39c5158Smillertsub cmd_verbatim {
1169b39c5158Smillert    my ($self, $attrs, $text) = @_;
1170b39c5158Smillert
1171b39c5158Smillert    # Ignore an empty verbatim paragraph.
1172e0680481Safresh1    return if $text !~ m{ \S }xms;
1173b39c5158Smillert
1174b39c5158Smillert    # Force exactly one newline at the end and strip unwanted trailing
1175e0680481Safresh1    # whitespace at the end.
1176e0680481Safresh1    $text =~ s{ \s* \z }{\n}xms;
1177b39c5158Smillert
1178b39c5158Smillert    # Get a count of the number of lines before the first blank line, which
1179b39c5158Smillert    # we'll pass to .Vb as its parameter.  This tells *roff to keep that many
1180b39c5158Smillert    # lines together.  We don't want to tell *roff to keep huge blocks
1181b39c5158Smillert    # together.
1182e0680481Safresh1    my @lines = split (m{ \n }xms, $text);
1183b39c5158Smillert    my $unbroken = 0;
1184e0680481Safresh1    for my $line (@lines) {
1185e0680481Safresh1        last if $line =~ m{ \A \s* \z }xms;
1186b39c5158Smillert        $unbroken++;
1187b39c5158Smillert    }
1188e0680481Safresh1    if ($unbroken > 12) {
1189e0680481Safresh1        $unbroken = 10;
1190e0680481Safresh1    }
1191b39c5158Smillert
1192e0680481Safresh1    # Prepend a null token to each line to preserve indentation.
1193e0680481Safresh1    $text =~ s{ ^ }{\\&}xmsg;
1194b39c5158Smillert
1195b39c5158Smillert    # Output the results.
1196e0680481Safresh1    $self->makespace();
1197b39c5158Smillert    $self->output(".Vb $unbroken\n$text.Ve\n");
1198b39c5158Smillert    $$self{NEEDSPACE} = 1;
1199e0680481Safresh1    return q{};
1200b39c5158Smillert}
1201b39c5158Smillert
1202b39c5158Smillert# Handle literal text (produced by =for and similar constructs).  Just output
1203b39c5158Smillert# it with the minimum of changes.
1204b39c5158Smillertsub cmd_data {
1205b39c5158Smillert    my ($self, $attrs, $text) = @_;
1206e0680481Safresh1    $text =~ s{ \A \n+ }{}xms;
1207e0680481Safresh1    $text =~ s{ \n{0,2} \z }{\n}xms;
1208b39c5158Smillert    $self->output($text);
1209e0680481Safresh1    return q{};
1210b39c5158Smillert}
1211b39c5158Smillert
1212b39c5158Smillert##############################################################################
1213b39c5158Smillert# Headings
1214b39c5158Smillert##############################################################################
1215b39c5158Smillert
1216b39c5158Smillert# Common code for all headings.  This is called before the actual heading is
1217b39c5158Smillert# output.  It returns the cleaned up heading text (putting the heading all on
1218b39c5158Smillert# one line) and may do other things, like closing bad =item blocks.
1219b39c5158Smillertsub heading_common {
1220b39c5158Smillert    my ($self, $text, $line) = @_;
1221b39c5158Smillert    $text =~ s/\s+$//;
1222b39c5158Smillert    $text =~ s/\s*\n\s*/ /g;
1223b39c5158Smillert
1224b39c5158Smillert    # This should never happen; it means that we have a heading after =item
1225b39c5158Smillert    # without an intervening =back.  But just in case, handle it anyway.
1226b39c5158Smillert    if ($$self{ITEMS} > 1) {
1227b39c5158Smillert        $$self{ITEMS} = 0;
1228b39c5158Smillert        $self->output (".PD\n");
1229b39c5158Smillert    }
1230b39c5158Smillert
1231b39c5158Smillert    return $text;
1232b39c5158Smillert}
1233b39c5158Smillert
1234b39c5158Smillert# First level heading.  We can't output .IX in the NAME section due to a bug
1235b39c5158Smillert# in some versions of catman, so don't output a .IX for that section.  .SH
1236b39c5158Smillert# already uses small caps, so remove \s0 and \s-1.  Maintain IN_NAME as
1237b39c5158Smillert# appropriate.
1238b39c5158Smillertsub cmd_head1 {
1239b39c5158Smillert    my ($self, $attrs, $text) = @_;
1240b39c5158Smillert    $text =~ s/\\s-?\d//g;
1241b39c5158Smillert    $text = $self->heading_common ($text, $$attrs{start_line});
1242b39c5158Smillert    my $isname = ($text eq 'NAME' || $text =~ /\(NAME\)/);
1243e0680481Safresh1    $self->output($self->switchquotes('.SH', $self->mapfonts($text, '\fP')));
1244b39c5158Smillert    $self->outindex ('Header', $text) unless $isname;
1245b39c5158Smillert    $$self{NEEDSPACE} = 0;
1246b39c5158Smillert    $$self{IN_NAME} = $isname;
1247b39c5158Smillert    return '';
1248b39c5158Smillert}
1249b39c5158Smillert
1250b39c5158Smillert# Second level heading.
1251b39c5158Smillertsub cmd_head2 {
1252b39c5158Smillert    my ($self, $attrs, $text) = @_;
1253b39c5158Smillert    $text = $self->heading_common ($text, $$attrs{start_line});
1254e0680481Safresh1    $self->output($self->switchquotes('.SS', $self->mapfonts($text, '\fP')));
1255b39c5158Smillert    $self->outindex ('Subsection', $text);
1256b39c5158Smillert    $$self{NEEDSPACE} = 0;
1257b39c5158Smillert    return '';
1258b39c5158Smillert}
1259b39c5158Smillert
1260b39c5158Smillert# Third level heading.  *roff doesn't have this concept, so just put the
1261b39c5158Smillert# heading in italics as a normal paragraph.
1262b39c5158Smillertsub cmd_head3 {
1263b39c5158Smillert    my ($self, $attrs, $text) = @_;
1264b39c5158Smillert    $text = $self->heading_common ($text, $$attrs{start_line});
1265b39c5158Smillert    $self->makespace;
1266e0680481Safresh1    $self->output($self->mapfonts('\f(IS' . $text . '\f(IE', '\fR') . "\n");
1267b39c5158Smillert    $self->outindex ('Subsection', $text);
1268b39c5158Smillert    $$self{NEEDSPACE} = 1;
1269b39c5158Smillert    return '';
1270b39c5158Smillert}
1271b39c5158Smillert
1272b39c5158Smillert# Fourth level heading.  *roff doesn't have this concept, so just put the
1273b39c5158Smillert# heading as a normal paragraph.
1274b39c5158Smillertsub cmd_head4 {
1275b39c5158Smillert    my ($self, $attrs, $text) = @_;
1276b39c5158Smillert    $text = $self->heading_common ($text, $$attrs{start_line});
1277b39c5158Smillert    $self->makespace;
1278e0680481Safresh1    $self->output($self->mapfonts($text, '\fR') . "\n");
1279b39c5158Smillert    $self->outindex ('Subsection', $text);
1280b39c5158Smillert    $$self{NEEDSPACE} = 1;
1281b39c5158Smillert    return '';
1282b39c5158Smillert}
1283b39c5158Smillert
1284b39c5158Smillert##############################################################################
1285b39c5158Smillert# Formatting codes
1286b39c5158Smillert##############################################################################
1287b39c5158Smillert
1288b39c5158Smillert# All of the formatting codes that aren't handled internally by the parser,
1289b39c5158Smillert# other than L<> and X<>.
1290b39c5158Smillertsub cmd_b { return $_[0]->{IN_NAME} ? $_[2] : '\f(BS' . $_[2] . '\f(BE' }
1291b39c5158Smillertsub cmd_i { return $_[0]->{IN_NAME} ? $_[2] : '\f(IS' . $_[2] . '\f(IE' }
1292b39c5158Smillertsub cmd_f { return $_[0]->{IN_NAME} ? $_[2] : '\f(IS' . $_[2] . '\f(IE' }
1293b39c5158Smillertsub cmd_c { return $_[0]->quote_literal ($_[2]) }
1294b39c5158Smillert
1295e0680481Safresh1# Convert all internal whitespace to $NBSP.
1296e0680481Safresh1sub cmd_s {
1297e0680481Safresh1    my ($self, $attrs, $text) = @_;
1298e0680481Safresh1    $text =~ s{ \s }{$NBSP}xmsg;
1299e0680481Safresh1    return $text;
1300e0680481Safresh1}
1301e0680481Safresh1
1302b39c5158Smillert# Index entries are just added to the pending entries.
1303b39c5158Smillertsub cmd_x {
1304b39c5158Smillert    my ($self, $attrs, $text) = @_;
1305b39c5158Smillert    push (@{ $$self{INDEX} }, $text);
1306b39c5158Smillert    return '';
1307b39c5158Smillert}
1308b39c5158Smillert
1309b39c5158Smillert# Links reduce to the text that we're given, wrapped in angle brackets if it's
1310e9ce3842Safresh1# a URL, followed by the URL.  We take an option to suppress the URL if anchor
1311e9ce3842Safresh1# text is given.  We need to format the "to" value of the link before
1312e9ce3842Safresh1# comparing it to the text since we may escape hyphens.
1313b39c5158Smillertsub cmd_l {
1314b39c5158Smillert    my ($self, $attrs, $text) = @_;
1315b39c5158Smillert    if ($$attrs{type} eq 'url') {
1316e9ce3842Safresh1        my $to = $$attrs{to};
1317e9ce3842Safresh1        if (defined $to) {
1318e9ce3842Safresh1            my $tag = $$self{PENDING}[-1];
1319e9ce3842Safresh1            $to = $self->format_text ($$tag[1], $to);
1320e9ce3842Safresh1        }
1321e9ce3842Safresh1        if (not defined ($to) or $to eq $text) {
1322b39c5158Smillert            return "<$text>";
1323e0680481Safresh1        } elsif ($self->{opt_nourls}) {
1324e9ce3842Safresh1            return $text;
1325b39c5158Smillert        } else {
1326b39c5158Smillert            return "$text <$$attrs{to}>";
1327b39c5158Smillert        }
1328b39c5158Smillert    } else {
1329b39c5158Smillert        return $text;
1330b39c5158Smillert    }
1331b39c5158Smillert}
1332b39c5158Smillert
1333b39c5158Smillert##############################################################################
1334b39c5158Smillert# List handling
1335b39c5158Smillert##############################################################################
1336b39c5158Smillert
1337b39c5158Smillert# Handle the beginning of an =over block.  Takes the type of the block as the
1338b39c5158Smillert# first argument, and then the attr hash.  This is called by the handlers for
1339b39c5158Smillert# the four different types of lists (bullet, number, text, and block).
1340b39c5158Smillertsub over_common_start {
1341b39c5158Smillert    my ($self, $type, $attrs) = @_;
1342b39c5158Smillert    my $line = $$attrs{start_line};
1343b39c5158Smillert    my $indent = $$attrs{indent};
1344b39c5158Smillert
1345b39c5158Smillert    # Find the indentation level.
1346b39c5158Smillert    unless (defined ($indent) && $indent =~ /^[-+]?\d{1,4}\s*$/) {
1347e0680481Safresh1        $indent = $self->{opt_indent};
1348b39c5158Smillert    }
1349b39c5158Smillert
1350b39c5158Smillert    # If we've gotten multiple indentations in a row, we need to emit the
1351b39c5158Smillert    # pending indentation for the last level that we saw and haven't acted on
1352b39c5158Smillert    # yet.  SHIFTS is the stack of indentations that we've actually emitted
1353b39c5158Smillert    # code for.
1354b39c5158Smillert    if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
1355b39c5158Smillert        $self->output (".RS $$self{INDENT}\n");
1356b39c5158Smillert        push (@{ $$self{SHIFTS} }, $$self{INDENT});
1357b39c5158Smillert    }
1358b39c5158Smillert
1359b39c5158Smillert    # Now, do record-keeping.  INDENTS is a stack of indentations that we've
1360b39c5158Smillert    # seen so far, and INDENT is the current level of indentation.  ITEMTYPES
1361b39c5158Smillert    # is a stack of list types that we've seen.
1362b39c5158Smillert    push (@{ $$self{INDENTS} }, $$self{INDENT});
1363b39c5158Smillert    push (@{ $$self{ITEMTYPES} }, $type);
1364b39c5158Smillert    $$self{INDENT} = $indent + 0;
1365b39c5158Smillert    $$self{SHIFTWAIT} = 1;
1366b39c5158Smillert}
1367b39c5158Smillert
1368b39c5158Smillert# End an =over block.  Takes no options other than the class pointer.
1369b39c5158Smillert# Normally, once we close a block and therefore remove something from INDENTS,
1370b39c5158Smillert# INDENTS will now be longer than SHIFTS, indicating that we also need to emit
1371b39c5158Smillert# *roff code to close the indent.  This isn't *always* true, depending on the
1372b39c5158Smillert# circumstance.  If we're still inside an indentation, we need to emit another
1373b39c5158Smillert# .RE and then a new .RS to unconfuse *roff.
1374b39c5158Smillertsub over_common_end {
1375b39c5158Smillert    my ($self) = @_;
1376b39c5158Smillert    $$self{INDENT} = pop @{ $$self{INDENTS} };
1377b39c5158Smillert    pop @{ $$self{ITEMTYPES} };
1378b39c5158Smillert
1379b39c5158Smillert    # If we emitted code for that indentation, end it.
1380b39c5158Smillert    if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
1381b39c5158Smillert        $self->output (".RE\n");
1382b39c5158Smillert        pop @{ $$self{SHIFTS} };
1383b39c5158Smillert    }
1384b39c5158Smillert
1385b39c5158Smillert    # If we're still in an indentation, *roff will have now lost track of the
1386b39c5158Smillert    # right depth of that indentation, so fix that.
1387b39c5158Smillert    if (@{ $$self{INDENTS} } > 0) {
1388b39c5158Smillert        $self->output (".RE\n");
1389b39c5158Smillert        $self->output (".RS $$self{INDENT}\n");
1390b39c5158Smillert    }
1391b39c5158Smillert    $$self{NEEDSPACE} = 1;
1392b39c5158Smillert    $$self{SHIFTWAIT} = 0;
1393b39c5158Smillert}
1394b39c5158Smillert
1395b39c5158Smillert# Dispatch the start and end calls as appropriate.
1396b39c5158Smillertsub start_over_bullet { my $s = shift; $s->over_common_start ('bullet', @_) }
1397b39c5158Smillertsub start_over_number { my $s = shift; $s->over_common_start ('number', @_) }
1398b39c5158Smillertsub start_over_text   { my $s = shift; $s->over_common_start ('text',   @_) }
1399b39c5158Smillertsub start_over_block  { my $s = shift; $s->over_common_start ('block',  @_) }
1400b39c5158Smillertsub end_over_bullet { $_[0]->over_common_end }
1401b39c5158Smillertsub end_over_number { $_[0]->over_common_end }
1402b39c5158Smillertsub end_over_text   { $_[0]->over_common_end }
1403b39c5158Smillertsub end_over_block  { $_[0]->over_common_end }
1404b39c5158Smillert
1405b39c5158Smillert# The common handler for all item commands.  Takes the type of the item, the
1406b39c5158Smillert# attributes, and then the text of the item.
1407b39c5158Smillert#
1408b39c5158Smillert# Emit an index entry for anything that's interesting, but don't emit index
1409b39c5158Smillert# entries for things like bullets and numbers.  Newlines in an item title are
1410b39c5158Smillert# turned into spaces since *roff can't handle them embedded.
1411b39c5158Smillertsub item_common {
1412b39c5158Smillert    my ($self, $type, $attrs, $text) = @_;
1413b39c5158Smillert    my $line = $$attrs{start_line};
1414b39c5158Smillert
1415b39c5158Smillert    # Clean up the text.  We want to end up with two variables, one ($text)
1416b39c5158Smillert    # which contains any body text after taking out the item portion, and
1417b39c5158Smillert    # another ($item) which contains the actual item text.
1418b39c5158Smillert    $text =~ s/\s+$//;
1419b39c5158Smillert    my ($item, $index);
1420b39c5158Smillert    if ($type eq 'bullet') {
1421b39c5158Smillert        $item = "\\\(bu";
1422b39c5158Smillert        $text =~ s/\n*$/\n/;
1423b39c5158Smillert    } elsif ($type eq 'number') {
1424b39c5158Smillert        $item = $$attrs{number} . '.';
1425b39c5158Smillert    } else {
1426b39c5158Smillert        $item = $text;
1427b39c5158Smillert        $item =~ s/\s*\n\s*/ /g;
1428b39c5158Smillert        $text = '';
1429b39c5158Smillert        $index = $item if ($item =~ /\w/);
1430b39c5158Smillert    }
1431b39c5158Smillert
1432b39c5158Smillert    # Take care of the indentation.  If shifts and indents are equal, close
1433b39c5158Smillert    # the top shift, since we're about to create an indentation with .IP.
1434b39c5158Smillert    # Also output .PD 0 to turn off spacing between items if this item is
1435b39c5158Smillert    # directly following another one.  We only have to do that once for a
1436b39c5158Smillert    # whole chain of items so do it for the second item in the change.  Note
1437b39c5158Smillert    # that makespace is what undoes this.
1438b39c5158Smillert    if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
1439b39c5158Smillert        $self->output (".RE\n");
1440b39c5158Smillert        pop @{ $$self{SHIFTS} };
1441b39c5158Smillert    }
1442b39c5158Smillert    $self->output (".PD 0\n") if ($$self{ITEMS} == 1);
1443b39c5158Smillert
1444b39c5158Smillert    # Now, output the item tag itself.
1445e0680481Safresh1    $item = $self->mapfonts($item, '\fR');
1446b39c5158Smillert    $self->output($self->switchquotes('.IP', $item, $$self{INDENT}));
1447b39c5158Smillert    $$self{NEEDSPACE} = 0;
1448b39c5158Smillert    $$self{ITEMS}++;
1449b39c5158Smillert    $$self{SHIFTWAIT} = 0;
1450b39c5158Smillert
1451b39c5158Smillert    # If body text for this item was included, go ahead and output that now.
1452b39c5158Smillert    if ($text) {
1453b39c5158Smillert        $text =~ s/\s*$/\n/;
1454b39c5158Smillert        $self->makespace;
1455e0680481Safresh1        $self->output($self->protect($self->mapfonts($text, '\fR')));
1456b39c5158Smillert        $$self{NEEDSPACE} = 1;
1457b39c5158Smillert    }
1458b39c5158Smillert    $self->outindex ($index ? ('Item', $index) : ());
1459b39c5158Smillert}
1460b39c5158Smillert
1461b39c5158Smillert# Dispatch the item commands to the appropriate place.
1462b39c5158Smillertsub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
1463b39c5158Smillertsub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
1464b39c5158Smillertsub cmd_item_text   { my $self = shift; $self->item_common ('text',   @_) }
1465b39c5158Smillertsub cmd_item_block  { my $self = shift; $self->item_common ('block',  @_) }
1466b39c5158Smillert
1467b39c5158Smillert##############################################################################
1468b39c5158Smillert# Backward compatibility
1469b39c5158Smillert##############################################################################
1470b39c5158Smillert
1471b39c5158Smillert# Reset the underlying Pod::Simple object between calls to parse_from_file so
1472b39c5158Smillert# that the same object can be reused to convert multiple pages.
1473b39c5158Smillertsub parse_from_file {
1474b39c5158Smillert    my $self = shift;
1475b39c5158Smillert    $self->reinit;
1476b39c5158Smillert
14779f11ffb7Safresh1    # Fake the old cutting option to Pod::Parser.  This fiddles with internal
1478b39c5158Smillert    # Pod::Simple state and is quite ugly; we need a better approach.
1479b39c5158Smillert    if (ref ($_[0]) eq 'HASH') {
1480b39c5158Smillert        my $opts = shift @_;
1481b39c5158Smillert        if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
1482b39c5158Smillert            $$self{in_pod} = 1;
1483b39c5158Smillert            $$self{last_was_blank} = 1;
1484b39c5158Smillert        }
1485b39c5158Smillert    }
1486b39c5158Smillert
1487b39c5158Smillert    # Do the work.
1488b39c5158Smillert    my $retval = $self->SUPER::parse_from_file (@_);
1489b39c5158Smillert
1490b39c5158Smillert    # Flush output, since Pod::Simple doesn't do this.  Ideally we should also
1491b39c5158Smillert    # close the file descriptor if we had to open one, but we can't easily
1492b39c5158Smillert    # figure this out.
1493b39c5158Smillert    my $fh = $self->output_fh ();
1494b39c5158Smillert    my $oldfh = select $fh;
1495b39c5158Smillert    my $oldflush = $|;
1496b39c5158Smillert    $| = 1;
1497b39c5158Smillert    print $fh '';
1498b39c5158Smillert    $| = $oldflush;
1499b39c5158Smillert    select $oldfh;
1500b39c5158Smillert    return $retval;
1501b39c5158Smillert}
1502b39c5158Smillert
1503b39c5158Smillert# Pod::Simple failed to provide this backward compatibility function, so
1504b39c5158Smillert# implement it ourselves.  File handles are one of the inputs that
1505b39c5158Smillert# parse_from_file supports.
1506b39c5158Smillertsub parse_from_filehandle {
1507b39c5158Smillert    my $self = shift;
1508e9ce3842Safresh1    return $self->parse_from_file (@_);
1509e9ce3842Safresh1}
1510e9ce3842Safresh1
1511e9ce3842Safresh1# Pod::Simple's parse_file doesn't set output_fh.  Wrap the call and do so
1512e9ce3842Safresh1# ourself unless it was already set by the caller, since our documentation has
1513e9ce3842Safresh1# always said that this should work.
1514e9ce3842Safresh1sub parse_file {
1515e9ce3842Safresh1    my ($self, $in) = @_;
1516e9ce3842Safresh1    unless (defined $$self{output_fh}) {
1517e9ce3842Safresh1        $self->output_fh (\*STDOUT);
1518e9ce3842Safresh1    }
1519e9ce3842Safresh1    return $self->SUPER::parse_file ($in);
1520b39c5158Smillert}
1521b39c5158Smillert
1522e5157e49Safresh1# Do the same for parse_lines, just to be polite.  Pod::Simple's man page
1523e5157e49Safresh1# implies that the caller is responsible for setting this, but I don't see any
1524e5157e49Safresh1# reason not to set a default.
1525e5157e49Safresh1sub parse_lines {
1526e5157e49Safresh1    my ($self, @lines) = @_;
1527e5157e49Safresh1    unless (defined $$self{output_fh}) {
1528e5157e49Safresh1        $self->output_fh (\*STDOUT);
1529e5157e49Safresh1    }
1530e5157e49Safresh1    return $self->SUPER::parse_lines (@lines);
1531e5157e49Safresh1}
1532e5157e49Safresh1
1533e5157e49Safresh1# Likewise for parse_string_document.
1534e5157e49Safresh1sub parse_string_document {
1535e5157e49Safresh1    my ($self, $doc) = @_;
1536e5157e49Safresh1    unless (defined $$self{output_fh}) {
1537e5157e49Safresh1        $self->output_fh (\*STDOUT);
1538e5157e49Safresh1    }
1539e5157e49Safresh1    return $self->SUPER::parse_string_document ($doc);
1540e5157e49Safresh1}
1541e5157e49Safresh1
1542b39c5158Smillert##############################################################################
1543b39c5158Smillert# Premable
1544b39c5158Smillert##############################################################################
1545b39c5158Smillert
1546e0680481Safresh1# The preamble which starts all *roff output we generate.  Most is static
1547e0680481Safresh1# except for the font to use as a fixed-width font (designed by @CFONT@), and
1548e0680481Safresh1# the left and right quotes to use for C<> text (designated by @LQOUTE@ and
1549e0680481Safresh1# @RQUOTE@).  Accent marks are only defined if the output encoding is roff.
1550b39c5158Smillertsub preamble_template {
1551e0680481Safresh1    my ($self) = @_;
1552b39c5158Smillert    my $preamble = <<'----END OF PREAMBLE----';
1553b39c5158Smillert.de Sp \" Vertical space (when we can't use .PP)
1554b39c5158Smillert.if t .sp .5v
1555b39c5158Smillert.if n .sp
1556b39c5158Smillert..
1557b39c5158Smillert.de Vb \" Begin verbatim text
1558b39c5158Smillert.ft @CFONT@
1559b39c5158Smillert.nf
1560b39c5158Smillert.ne \\$1
1561b39c5158Smillert..
1562b39c5158Smillert.de Ve \" End verbatim text
1563b39c5158Smillert.ft R
1564b39c5158Smillert.fi
1565b39c5158Smillert..
1566e0680481Safresh1.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>.
1567b39c5158Smillert.ie n \{\
1568b39c5158Smillert.    ds C` @LQUOTE@
1569b39c5158Smillert.    ds C' @RQUOTE@
1570b39c5158Smillert'br\}
1571b39c5158Smillert.el\{\
1572e9ce3842Safresh1.    ds C`
1573e9ce3842Safresh1.    ds C'
1574b39c5158Smillert'br\}
1575b39c5158Smillert.\"
1576b39c5158Smillert.\" Escape single quotes in literal strings from groff's Unicode transform.
1577b39c5158Smillert.ie \n(.g .ds Aq \(aq
1578b39c5158Smillert.el       .ds Aq '
1579b39c5158Smillert.\"
1580b8851fccSafresh1.\" If the F register is >0, we'll generate index entries on stderr for
1581b39c5158Smillert.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
1582b39c5158Smillert.\" entries marked with X<> in POD.  Of course, you'll have to process the
1583b39c5158Smillert.\" output yourself in some meaningful fashion.
1584e9ce3842Safresh1.\"
1585e9ce3842Safresh1.\" Avoid warning from groff about undefined register 'F'.
1586e9ce3842Safresh1.de IX
1587e9ce3842Safresh1..
15889f11ffb7Safresh1.nr rF 0
15899f11ffb7Safresh1.if \n(.g .if rF .nr rF 1
15909f11ffb7Safresh1.if (\n(rF:(\n(.g==0)) \{\
15919f11ffb7Safresh1.    if \nF \{\
1592b39c5158Smillert.        de IX
1593b39c5158Smillert.        tm Index:\\$1\t\\n%\t"\\$2"
1594b39c5158Smillert..
1595b8851fccSafresh1.        if !\nF==2 \{\
1596b39c5158Smillert.            nr % 0
1597e9ce3842Safresh1.            nr F 2
1598b39c5158Smillert.        \}
1599b39c5158Smillert.    \}
16009f11ffb7Safresh1.\}
16019f11ffb7Safresh1.rr rF
1602b39c5158Smillert----END OF PREAMBLE----
1603e9ce3842Safresh1#'# for cperl-mode
1604b39c5158Smillert
1605e0680481Safresh1    if ($$self{ENCODING} eq 'roff') {
1606b39c5158Smillert        $preamble .= <<'----END OF PREAMBLE----'
1607b39c5158Smillert.\"
1608b39c5158Smillert.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
1609b39c5158Smillert.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
1610b39c5158Smillert.    \" fudge factors for nroff and troff
1611b39c5158Smillert.if n \{\
1612b39c5158Smillert.    ds #H 0
1613b39c5158Smillert.    ds #V .8m
1614b39c5158Smillert.    ds #F .3m
1615b39c5158Smillert.    ds #[ \f1
1616b39c5158Smillert.    ds #] \fP
1617b39c5158Smillert.\}
1618b39c5158Smillert.if t \{\
1619b39c5158Smillert.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
1620b39c5158Smillert.    ds #V .6m
1621b39c5158Smillert.    ds #F 0
1622b39c5158Smillert.    ds #[ \&
1623b39c5158Smillert.    ds #] \&
1624b39c5158Smillert.\}
1625b39c5158Smillert.    \" simple accents for nroff and troff
1626b39c5158Smillert.if n \{\
1627b39c5158Smillert.    ds ' \&
1628b39c5158Smillert.    ds ` \&
1629b39c5158Smillert.    ds ^ \&
1630b39c5158Smillert.    ds , \&
1631b39c5158Smillert.    ds ~ ~
1632b39c5158Smillert.    ds /
1633b39c5158Smillert.\}
1634b39c5158Smillert.if t \{\
1635e0680481Safresh1.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h'|\\n:u'
1636b39c5158Smillert.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
1637b39c5158Smillert.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
1638b39c5158Smillert.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
1639b39c5158Smillert.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
1640b39c5158Smillert.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
1641b39c5158Smillert.\}
1642b39c5158Smillert.    \" troff and (daisy-wheel) nroff accents
1643b39c5158Smillert.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
1644b39c5158Smillert.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
1645b39c5158Smillert.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
1646b39c5158Smillert.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
1647b39c5158Smillert.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
1648b39c5158Smillert.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
1649b39c5158Smillert.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
1650b39c5158Smillert.ds ae a\h'-(\w'a'u*4/10)'e
1651b39c5158Smillert.ds Ae A\h'-(\w'A'u*4/10)'E
1652b39c5158Smillert.    \" corrections for vroff
1653b39c5158Smillert.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
1654b39c5158Smillert.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
1655b39c5158Smillert.    \" for low resolution devices (crt and lpr)
1656b39c5158Smillert.if \n(.H>23 .if \n(.V>19 \
1657b39c5158Smillert\{\
1658b39c5158Smillert.    ds : e
1659b39c5158Smillert.    ds 8 ss
1660b39c5158Smillert.    ds o a
1661b39c5158Smillert.    ds d- d\h'-1'\(ga
1662b39c5158Smillert.    ds D- D\h'-1'\(hy
1663b39c5158Smillert.    ds th \o'bp'
1664b39c5158Smillert.    ds Th \o'LP'
1665b39c5158Smillert.    ds ae ae
1666b39c5158Smillert.    ds Ae AE
1667b39c5158Smillert.\}
1668b39c5158Smillert.rm #[ #] #H #V #F C
1669b39c5158Smillert----END OF PREAMBLE----
1670b39c5158Smillert#`# for cperl-mode
1671b39c5158Smillert    }
1672b39c5158Smillert    return $preamble;
1673b39c5158Smillert}
1674b39c5158Smillert
1675b39c5158Smillert##############################################################################
1676b39c5158Smillert# Module return value and documentation
1677b39c5158Smillert##############################################################################
1678b39c5158Smillert
1679b39c5158Smillert1;
1680b39c5158Smillert__END__
1681b39c5158Smillert
1682e0680481Safresh1=encoding UTF-8
1683e0680481Safresh1
1684b39c5158Smillert=for stopwords
1685b46d8ef2Safresh1en em ALLCAPS teeny fixedbold fixeditalic fixedbolditalic stderr utf8 UTF-8
1686b46d8ef2Safresh1Allbery Sean Burke Ossanna Solaris formatters troff uppercased Christiansen
1687e0680481Safresh1nourls parsers Kernighan lquote rquote unrepresentable mandoc NetBSD PostScript
1688e0680481Safresh1SMP macOS EBCDIC fallbacks manref reflowed reflowing FH overridable
1689e9ce3842Safresh1
1690e9ce3842Safresh1=head1 NAME
1691e9ce3842Safresh1
1692e9ce3842Safresh1Pod::Man - Convert POD data to formatted *roff input
1693b39c5158Smillert
1694b39c5158Smillert=head1 SYNOPSIS
1695b39c5158Smillert
1696b39c5158Smillert    use Pod::Man;
1697b39c5158Smillert    my $parser = Pod::Man->new (release => $VERSION, section => 8);
1698b39c5158Smillert
1699b39c5158Smillert    # Read POD from STDIN and write to STDOUT.
1700b39c5158Smillert    $parser->parse_file (\*STDIN);
1701b39c5158Smillert
1702b39c5158Smillert    # Read POD from file.pod and write to file.1.
1703b39c5158Smillert    $parser->parse_from_file ('file.pod', 'file.1');
1704b39c5158Smillert
1705b39c5158Smillert=head1 DESCRIPTION
1706b39c5158Smillert
1707b39c5158SmillertPod::Man is a module to convert documentation in the POD format (the
1708b39c5158Smillertpreferred language for documenting Perl) into *roff input using the man
1709b39c5158Smillertmacro set.  The resulting *roff code is suitable for display on a terminal
1710b39c5158Smillertusing L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
1711b39c5158SmillertIt is conventionally invoked using the driver script B<pod2man>, but it can
1712b39c5158Smillertalso be used directly.
1713b39c5158Smillert
1714e0680481Safresh1By default (on non-EBCDIC systems), Pod::Man outputs UTF-8.  Its output should
1715e0680481Safresh1work with the B<man> program on systems that use B<groff> (most Linux
1716e0680481Safresh1distributions) or B<mandoc> (most BSD variants), but may result in mangled
1717e0680481Safresh1output on older UNIX systems.  To choose a different, possibly more
1718e0680481Safresh1backward-compatible output mangling on such systems, set the C<encoding>
1719e0680481Safresh1option to C<roff> (the default in earlier Pod::Man versions).  See the
1720e0680481Safresh1C<encoding> option and L</ENCODING> for more details.
1721b39c5158Smillert
1722e0680481Safresh1See L</COMPATIBILTY> for the versions of Pod::Man with significant
1723e0680481Safresh1backward-incompatible changes (other than constructor options, whose versions
1724e0680481Safresh1are documented below), and the versions of Perl that included them.
1725b39c5158Smillert
1726e0680481Safresh1=head1 CLASS METHODS
1727b39c5158Smillert
1728e0680481Safresh1=over 4
1729b39c5158Smillert
1730e0680481Safresh1=item new(ARGS)
1731b39c5158Smillert
1732e0680481Safresh1Create a new Pod::Man object.  ARGS should be a list of key/value pairs, where
1733e0680481Safresh1the keys are chosen from the following.  Each option is annotated with the
1734e0680481Safresh1version of Pod::Man in which that option was added with its current meaning.
1735b39c5158Smillert
1736b39c5158Smillert=over 4
1737b39c5158Smillert
1738b39c5158Smillert=item center
1739b39c5158Smillert
1740e0680481Safresh1[1.00] Sets the centered page header for the C<.TH> macro.  The default, if
1741e0680481Safresh1this option is not specified, is C<User Contributed Perl Documentation>.
1742b8851fccSafresh1
1743b8851fccSafresh1=item date
1744b8851fccSafresh1
1745e0680481Safresh1[4.00] Sets the left-hand footer for the C<.TH> macro.  If this option is not
1746e0680481Safresh1set, the contents of the environment variable POD_MAN_DATE, if set, will be
1747e0680481Safresh1used.  Failing that, the value of SOURCE_DATE_EPOCH, the modification date of
1748e0680481Safresh1the input file, or the current time if stat() can't find that file (which will
1749e0680481Safresh1be the case if the input is from C<STDIN>) will be used.  If taken from any
1750e0680481Safresh1source other than POD_MAN_DATE (which is used verbatim), the date will be
1751e0680481Safresh1formatted as C<YYYY-MM-DD> and will be based on UTC (so that the output will
1752e0680481Safresh1be reproducible regardless of local time zone).
1753e0680481Safresh1
1754e0680481Safresh1=item encoding
1755e0680481Safresh1
1756e0680481Safresh1[5.00] Specifies the encoding of the output.  The value must be an encoding
1757e0680481Safresh1recognized by the L<Encode> module (see L<Encode::Supported>), or the special
1758e0680481Safresh1values C<roff> or C<groff>.  The default on non-EBCDIC systems is UTF-8.
1759e0680481Safresh1
1760e0680481Safresh1If the output contains characters that cannot be represented in this encoding,
1761e0680481Safresh1that is an error that will be reported as configured by the C<errors> option.
1762e0680481Safresh1If error handling is other than C<die>, the unrepresentable character will be
1763e0680481Safresh1replaced with the Encode substitution character (normally C<?>).
1764e0680481Safresh1
1765e0680481Safresh1If the C<encoding> option is set to the special value C<groff> (the default on
1766e0680481Safresh1EBCDIC systems), or if the Encode module is not available and the encoding is
1767e0680481Safresh1set to anything other than C<roff>, Pod::Man will translate all non-ASCII
1768e0680481Safresh1characters to C<\[uNNNN]> Unicode escapes.  These are not traditionally part
1769e0680481Safresh1of the *roff language, but are supported by B<groff> and B<mandoc> and thus by
1770e0680481Safresh1the majority of manual page processors in use today.
1771e0680481Safresh1
1772e0680481Safresh1If the C<encoding> option is set to the special value C<roff>, Pod::Man will
1773e0680481Safresh1do its historic transformation of (some) ISO 8859-1 characters into *roff
1774e0680481Safresh1escapes that may be adequate in troff and may be readable (if ugly) in nroff.
1775e0680481Safresh1This was the default behavior of versions of Pod::Man before 5.00.  With this
1776e0680481Safresh1encoding, all other non-ASCII characters will be replaced with C<X>.  It may
1777e0680481Safresh1be required for very old troff and nroff implementations that do not support
1778e0680481Safresh1UTF-8, but its representation of any non-ASCII character is very poor and
1779e0680481Safresh1often specific to European languages.
1780e0680481Safresh1
1781e0680481Safresh1If the output file handle has a PerlIO encoding layer set, setting C<encoding>
1782e0680481Safresh1to anything other than C<groff> or C<roff> will be ignored and no encoding
1783e0680481Safresh1will be done by Pod::Man.  It will instead rely on the encoding layer to make
1784e0680481Safresh1whatever output encoding transformations are desired.
1785e0680481Safresh1
1786e0680481Safresh1WARNING: The input encoding of the POD source is independent from the output
1787e0680481Safresh1encoding, and setting this option does not affect the interpretation of the
1788e0680481Safresh1POD input.  Unless your POD source is US-ASCII, its encoding should be
1789e0680481Safresh1declared with the C<=encoding> command in the source.  If this is not done,
1790e0680481Safresh1Pod::Simple will will attempt to guess the encoding and may be successful if
1791e0680481Safresh1it's Latin-1 or UTF-8, but it will produce warnings.  See L<perlpod(1)> for
1792e0680481Safresh1more information.
1793b39c5158Smillert
1794e9ce3842Safresh1=item errors
1795e9ce3842Safresh1
1796e0680481Safresh1[2.27] How to report errors.  C<die> says to throw an exception on any POD
1797e0680481Safresh1formatting error.  C<stderr> says to report errors on standard error, but not
1798e0680481Safresh1to throw an exception.  C<pod> says to include a POD ERRORS section in the
1799e0680481Safresh1resulting documentation summarizing the errors.  C<none> ignores POD errors
1800e0680481Safresh1entirely, as much as possible.
1801e9ce3842Safresh1
1802e5157e49Safresh1The default is C<pod>.
1803e9ce3842Safresh1
1804b39c5158Smillert=item fixed
1805b39c5158Smillert
1806e0680481Safresh1[1.00] The fixed-width font to use for verbatim text and code.  Defaults to
1807e0680481Safresh1C<CW>.  Some systems prefer C<CR> instead.  Only matters for B<troff> output.
1808b39c5158Smillert
1809b39c5158Smillert=item fixedbold
1810b39c5158Smillert
1811e0680481Safresh1[1.00] Bold version of the fixed-width font.  Defaults to C<CB>.  Only matters
1812b39c5158Smillertfor B<troff> output.
1813b39c5158Smillert
1814b39c5158Smillert=item fixeditalic
1815b39c5158Smillert
1816e0680481Safresh1[1.00] Italic version of the fixed-width font (something of a misnomer, since
1817e0680481Safresh1most fixed-width fonts only have an oblique version, not an italic version).
1818e0680481Safresh1Defaults to C<CI>.  Only matters for B<troff> output.
1819b39c5158Smillert
1820b39c5158Smillert=item fixedbolditalic
1821b39c5158Smillert
1822e0680481Safresh1[1.00] Bold italic (in theory, probably oblique in practice) version of the
1823e0680481Safresh1fixed-width font.  Pod::Man doesn't assume you have this, and defaults to
1824e0680481Safresh1C<CB>.  Some systems (such as Solaris) have this font available as C<CX>.
1825e0680481Safresh1Only matters for B<troff> output.
1826e0680481Safresh1
1827e0680481Safresh1=item guesswork
1828e0680481Safresh1
1829e0680481Safresh1[5.00] By default, Pod::Man applies some default formatting rules based on
1830e0680481Safresh1guesswork and regular expressions that are intended to make writing Perl
1831e0680481Safresh1documentation easier and require less explicit markup.  These rules may not
1832e0680481Safresh1always be appropriate, particularly for documentation that isn't about Perl.
1833e0680481Safresh1This option allows turning all or some of it off.
1834e0680481Safresh1
1835e0680481Safresh1The special value C<all> enables all guesswork.  This is also the default for
1836e0680481Safresh1backward compatibility reasons.  The special value C<none> disables all
1837e0680481Safresh1guesswork.  Otherwise, the value of this option should be a comma-separated
1838e0680481Safresh1list of one or more of the following keywords:
1839e0680481Safresh1
1840e0680481Safresh1=over 4
1841e0680481Safresh1
1842e0680481Safresh1=item functions
1843e0680481Safresh1
1844e0680481Safresh1Convert function references like C<foo()> to bold even if they have no markup.
1845e0680481Safresh1The function name accepts valid Perl characters for function names (including
1846e0680481Safresh1C<:>), and the trailing parentheses must be present and empty.
1847e0680481Safresh1
1848e0680481Safresh1=item manref
1849e0680481Safresh1
1850e0680481Safresh1Make the first part (before the parentheses) of manual page references like
1851e0680481Safresh1C<foo(1)> bold even if they have no markup.  The section must be a single
1852e0680481Safresh1number optionally followed by lowercase letters.
1853e0680481Safresh1
1854e0680481Safresh1=item quoting
1855e0680481Safresh1
1856e0680481Safresh1If no guesswork is enabled, any text enclosed in CZ<><> is surrounded by
1857e0680481Safresh1double quotes in nroff (terminal) output unless the contents are already
1858e0680481Safresh1quoted.  When this guesswork is enabled, quote marks will also be suppressed
1859e0680481Safresh1for Perl variables, function names, function calls, numbers, and hex
1860e0680481Safresh1constants.
1861e0680481Safresh1
1862e0680481Safresh1=item variables
1863e0680481Safresh1
1864e0680481Safresh1Convert Perl variable names to a fixed-width font even if they have no markup.
1865e0680481Safresh1This transformation will only be apparent in troff output, or some other
1866e0680481Safresh1output format (unlike nroff terminal output) that supports fixed-width fonts.
1867e0680481Safresh1
1868e0680481Safresh1=back
1869e0680481Safresh1
1870e0680481Safresh1Any unknown guesswork name is silently ignored (for potential future
1871e0680481Safresh1compatibility), so be careful about spelling.
1872e0680481Safresh1
1873e0680481Safresh1=item language
1874e0680481Safresh1
1875e0680481Safresh1[5.00] Add commands telling B<groff> that the input file is in the given
1876e0680481Safresh1language.  The value of this setting must be a language abbreviation for which
1877e0680481Safresh1B<groff> provides supplemental configuration, such as C<ja> (for Japanese) or
1878e0680481Safresh1C<zh> (for Chinese).
1879e0680481Safresh1
1880e0680481Safresh1Specifically, this adds:
1881e0680481Safresh1
1882e0680481Safresh1    .mso <language>.tmac
1883e0680481Safresh1    .hla <language>
1884e0680481Safresh1
1885e0680481Safresh1to the start of the file, which configure correct line breaking for the
1886e0680481Safresh1specified language.  Without these commands, groff may not know how to add
1887e0680481Safresh1proper line breaks for Chinese and Japanese text if the manual page is
1888e0680481Safresh1installed into the normal manual page directory, such as F</usr/share/man>.
1889e0680481Safresh1
1890e0680481Safresh1On many systems, this will be done automatically if the manual page is
1891e0680481Safresh1installed into a language-specific manual page directory, such as
1892e0680481Safresh1F</usr/share/man/zh_CN>.  In that case, this option is not required.
1893e0680481Safresh1
1894e0680481Safresh1Unfortunately, the commands added with this option are specific to B<groff>
1895e0680481Safresh1and will not work with other B<troff> and B<nroff> implementations.
1896b39c5158Smillert
18979f11ffb7Safresh1=item lquote
18989f11ffb7Safresh1
18999f11ffb7Safresh1=item rquote
19009f11ffb7Safresh1
1901e0680481Safresh1[4.08] Sets the quote marks used to surround CE<lt>> text.  C<lquote> sets the
1902e0680481Safresh1left quote mark and C<rquote> sets the right quote mark.  Either may also be
1903e0680481Safresh1set to the special value C<none>, in which case no quote mark is added on that
1904e0680481Safresh1side of CE<lt>> text (but the font is still changed for troff output).
19059f11ffb7Safresh1
19069f11ffb7Safresh1Also see the C<quotes> option, which can be used to set both quotes at once.
19079f11ffb7Safresh1If both C<quotes> and one of the other options is set, C<lquote> or C<rquote>
19089f11ffb7Safresh1overrides C<quotes>.
19099f11ffb7Safresh1
1910b39c5158Smillert=item name
1911b39c5158Smillert
1912e0680481Safresh1[4.08] Set the name of the manual page for the C<.TH> macro.  Without this
1913e0680481Safresh1option, the manual name is set to the uppercased base name of the file being
1914e0680481Safresh1converted unless the manual section is 3, in which case the path is parsed to
1915e0680481Safresh1see if it is a Perl module path.  If it is, a path like C<.../lib/Pod/Man.pm>
1916e0680481Safresh1is converted into a name like C<Pod::Man>.  This option, if given, overrides
1917e0680481Safresh1any automatic determination of the name.
1918b8851fccSafresh1
19199f11ffb7Safresh1If generating a manual page from standard input, the name will be set to
1920e0680481Safresh1C<STDIN> if this option is not provided.  In this case, providing this option
1921e0680481Safresh1is strongly recommended to set a meaningful manual page name.
1922b39c5158Smillert
1923e9ce3842Safresh1=item nourls
1924e9ce3842Safresh1
1925e0680481Safresh1[2.27] Normally, LZ<><> formatting codes with a URL but anchor text are
1926e0680481Safresh1formatted to show both the anchor text and the URL.  In other words:
1927e9ce3842Safresh1
1928e9ce3842Safresh1    L<foo|http://example.com/>
1929e9ce3842Safresh1
1930e9ce3842Safresh1is formatted as:
1931e9ce3842Safresh1
1932e9ce3842Safresh1    foo <http://example.com/>
1933e9ce3842Safresh1
1934e9ce3842Safresh1This option, if set to a true value, suppresses the URL when anchor text
1935e9ce3842Safresh1is given, so this example would be formatted as just C<foo>.  This can
1936e9ce3842Safresh1produce less cluttered output in cases where the URLs are not particularly
1937e9ce3842Safresh1important.
1938e9ce3842Safresh1
1939b39c5158Smillert=item quotes
1940b39c5158Smillert
1941e0680481Safresh1[4.00] Sets the quote marks used to surround CE<lt>> text.  If the value is a
1942e0680481Safresh1single character, it is used as both the left and right quote.  Otherwise, it
1943e0680481Safresh1is split in half, and the first half of the string is used as the left quote
1944e0680481Safresh1and the second is used as the right quote.
1945b39c5158Smillert
1946b39c5158SmillertThis may also be set to the special value C<none>, in which case no quote
1947b39c5158Smillertmarks are added around CE<lt>> text (but the font is still changed for troff
1948b39c5158Smillertoutput).
1949b39c5158Smillert
19509f11ffb7Safresh1Also see the C<lquote> and C<rquote> options, which can be used to set the
19519f11ffb7Safresh1left and right quotes independently.  If both C<quotes> and one of the other
19529f11ffb7Safresh1options is set, C<lquote> or C<rquote> overrides C<quotes>.
19539f11ffb7Safresh1
1954b39c5158Smillert=item release
1955b39c5158Smillert
1956e0680481Safresh1[1.00] Set the centered footer for the C<.TH> macro.  By default, this is set
1957e0680481Safresh1to the version of Perl you run Pod::Man under.  Setting this to the empty
1958e0680481Safresh1string will cause some *roff implementations to use the system default value.
1959b8851fccSafresh1
1960e0680481Safresh1Note that some system C<an> macro sets assume that the centered footer will be
1961e0680481Safresh1a modification date and will prepend something like C<Last modified: >.  If
1962e0680481Safresh1this is the case for your target system, you may want to set C<release> to the
1963e0680481Safresh1last modified date and C<date> to the version number.
1964b39c5158Smillert
1965b39c5158Smillert=item section
1966b39c5158Smillert
1967e0680481Safresh1[1.00] Set the section for the C<.TH> macro.  The standard section numbering
1968e0680481Safresh1convention is to use 1 for user commands, 2 for system calls, 3 for functions,
1969e0680481Safresh14 for devices, 5 for file formats, 6 for games, 7 for miscellaneous
1970e0680481Safresh1information, and 8 for administrator commands.  There is a lot of variation
1971e0680481Safresh1here, however; some systems (like Solaris) use 4 for file formats, 5 for
1972e0680481Safresh1miscellaneous information, and 7 for devices.  Still others use 1m instead of
1973e0680481Safresh18, or some mix of both.  About the only section numbers that are reliably
1974e0680481Safresh1consistent are 1, 2, and 3.
1975b39c5158Smillert
1976b39c5158SmillertBy default, section 1 will be used unless the file ends in C<.pm> in which
1977b39c5158Smillertcase section 3 will be selected.
1978b39c5158Smillert
1979b39c5158Smillert=item stderr
1980b39c5158Smillert
1981e0680481Safresh1[2.19] If set to a true value, send error messages about invalid POD to
1982e0680481Safresh1standard error instead of appending a POD ERRORS section to the generated
1983e0680481Safresh1*roff output.  This is equivalent to setting C<errors> to C<stderr> if
1984e0680481Safresh1C<errors> is not already set.
1985e0680481Safresh1
1986e0680481Safresh1This option is for backward compatibility with Pod::Man versions that did not
1987e0680481Safresh1support C<errors>.  Normally, the C<errors> option should be used instead.
1988b39c5158Smillert
1989b39c5158Smillert=item utf8
1990b39c5158Smillert
1991e0680481Safresh1[2.21] This option used to set the output encoding to UTF-8.  Since this is
1992e0680481Safresh1now the default, it is ignored and does nothing.
1993b39c5158Smillert
1994b39c5158Smillert=back
1995b39c5158Smillert
1996e0680481Safresh1=back
1997b39c5158Smillert
1998e0680481Safresh1=head1 INSTANCE METHODS
1999b39c5158Smillert
2000e0680481Safresh1As a derived class from Pod::Simple, Pod::Man supports the same methods and
2001e0680481Safresh1interfaces.  See L<Pod::Simple> for all the details.  This section summarizes
2002e0680481Safresh1the most-frequently-used methods and the ones added by Pod::Man.
2003e5157e49Safresh1
2004e0680481Safresh1=over 4
2005e5157e49Safresh1
2006e0680481Safresh1=item output_fh(FH)
2007e0680481Safresh1
2008e0680481Safresh1Direct the output from parse_file(), parse_lines(), or parse_string_document()
2009e0680481Safresh1to the file handle FH instead of C<STDOUT>.
2010e0680481Safresh1
2011e0680481Safresh1=item output_string(REF)
2012e0680481Safresh1
2013e0680481Safresh1Direct the output from parse_file(), parse_lines(), or parse_string_document()
2014e0680481Safresh1to the scalar variable pointed to by REF, rather than C<STDOUT>.  For example:
2015e0680481Safresh1
2016e0680481Safresh1    my $man = Pod::Man->new();
2017e0680481Safresh1    my $output;
2018e0680481Safresh1    $man->output_string(\$output);
2019e0680481Safresh1    $man->parse_file('/some/input/file');
2020e0680481Safresh1
2021e0680481Safresh1Be aware that the output in that variable will already be encoded in UTF-8.
2022e0680481Safresh1
2023e0680481Safresh1=item parse_file(PATH)
2024e0680481Safresh1
2025e0680481Safresh1Read the POD source from PATH and format it.  By default, the output is sent
2026e0680481Safresh1to C<STDOUT>, but this can be changed with the output_fh() or output_string()
2027e0680481Safresh1methods.
2028e0680481Safresh1
2029e0680481Safresh1=item parse_from_file(INPUT, OUTPUT)
2030e0680481Safresh1
2031e0680481Safresh1=item parse_from_filehandle(FH, OUTPUT)
2032e0680481Safresh1
2033e0680481Safresh1Read the POD source from INPUT, format it, and output the results to OUTPUT.
2034e0680481Safresh1
2035e0680481Safresh1parse_from_filehandle() is provided for backward compatibility with older
2036e0680481Safresh1versions of Pod::Man.  parse_from_file() should be used instead.
2037e0680481Safresh1
2038e0680481Safresh1=item parse_lines(LINES[, ...[, undef]])
2039e0680481Safresh1
2040e0680481Safresh1Parse the provided lines as POD source, writing the output to either C<STDOUT>
2041e0680481Safresh1or the file handle set with the output_fh() or output_string() methods.  This
2042e0680481Safresh1method can be called repeatedly to provide more input lines.  An explicit
2043e0680481Safresh1C<undef> should be passed to indicate the end of input.
2044e0680481Safresh1
2045e0680481Safresh1This method expects raw bytes, not decoded characters.
2046e0680481Safresh1
2047e0680481Safresh1=item parse_string_document(INPUT)
2048e0680481Safresh1
2049e0680481Safresh1Parse the provided scalar variable as POD source, writing the output to either
2050e0680481Safresh1C<STDOUT> or the file handle set with the output_fh() or output_string()
2051e0680481Safresh1methods.
2052e0680481Safresh1
2053e0680481Safresh1This method expects raw bytes, not decoded characters.
2054e0680481Safresh1
2055e0680481Safresh1=back
2056e0680481Safresh1
2057e0680481Safresh1=head1 ENCODING
2058e0680481Safresh1
2059e0680481Safresh1As of Pod::Man 5.00, the default output encoding for Pod::Man is UTF-8.  This
2060e0680481Safresh1should work correctly on any modern system that uses either B<groff> (most
2061e0680481Safresh1Linux distributions) or B<mandoc> (Alpine Linux and most BSD variants,
2062e0680481Safresh1including macOS).
2063e0680481Safresh1
2064e0680481Safresh1The user will probably have to use a UTF-8 locale to see correct output.  This
2065e0680481Safresh1may be done by default; if not, set the LANG or LC_CTYPE environment variables
2066e0680481Safresh1to an appropriate local.  The locale C<C.UTF-8> is available on most systems
2067e0680481Safresh1if one wants correct output without changing the other things locales affect,
2068e0680481Safresh1such as collation.
2069e0680481Safresh1
2070e0680481Safresh1The backward-compatible output format used in Pod::Man versions before 5.00 is
2071e0680481Safresh1available by setting the C<encoding> option to C<roff>.  This may produce
2072e0680481Safresh1marginally nicer results on older UNIX versions that do not use B<groff> or
2073e0680481Safresh1B<mandoc>, but none of the available options will correctly render Unicode
2074e0680481Safresh1characters on those systems.
2075e0680481Safresh1
2076e0680481Safresh1Below are some additional details about how this choice was made and some
2077e0680481Safresh1discussion of alternatives.
2078e0680481Safresh1
2079e0680481Safresh1=head2 History
2080e0680481Safresh1
2081e0680481Safresh1The default output encoding for Pod::Man has been a long-standing problem.
2082e0680481Safresh1B<troff> and B<nroff> predate Unicode by a significant margin, and their
2083e0680481Safresh1implementations for many UNIX systems reflect that legacy.  It's common for
2084e0680481Safresh1Unicode to not be supported in any form.
2085e0680481Safresh1
2086e0680481Safresh1Because of this, versions of Pod::Man prior to 5.00 maintained the highly
2087e0680481Safresh1conservative output of the original pod2man, which output pure ASCII with
2088e0680481Safresh1complex macros to simulate common western European accented characters when
2089e0680481Safresh1processed with troff.  The nroff output was awkward and sometimes incorrect,
2090e0680481Safresh1and characters not used in western European scripts were replaced with C<X>.
2091e0680481Safresh1This choice maximized backwards compatibility with B<man> and
2092e0680481Safresh1B<nroff>/B<troff> implementations at the cost of incorrect rendering of many
2093e0680481Safresh1POD documents, particularly those containing people's names.
2094e0680481Safresh1
2095e0680481Safresh1The modern implementations, B<groff> (used in most Linux distributions) and
2096e0680481Safresh1B<mandoc> (used by most BSD variants), do now support Unicode.  Other UNIX
2097e0680481Safresh1systems often do not, but they're now a tiny minority of the systems people
2098e0680481Safresh1use on a daily basis.  It's increasingly common (for very good reasons) to use
2099e0680481Safresh1Unicode characters for POD documents rather than using ASCII conversions of
2100e0680481Safresh1people's names or avoiding non-English text, making the limitations in the old
2101e0680481Safresh1output format more apparent.
2102e0680481Safresh1
2103e0680481Safresh1Four options have been proposed to fix this:
2104e0680481Safresh1
2105e0680481Safresh1=over 2
2106e0680481Safresh1
2107e0680481Safresh1=item *
2108e0680481Safresh1
2109e0680481Safresh1Optionally support UTF-8 output but don't change the default.  This is the
2110e0680481Safresh1approach taken since Pod::Man 2.1.0, which added the C<utf8> option.  Some
2111e0680481Safresh1Pod::Man users use this option for better output on platforms known to support
2112e0680481Safresh1Unicode, but since the defaults have not changed, people continued to
2113e0680481Safresh1encounter (and file bug reports about) the poor default rendering.
2114e0680481Safresh1
2115e0680481Safresh1=item *
2116e0680481Safresh1
2117e0680481Safresh1Convert characters to troff C<\(xx> escapes.  This requires maintaining a
2118e0680481Safresh1large translation table and addresses only a tiny part of the problem, since
2119e0680481Safresh1many Unicode characters have no standard troff name.  B<groff> has the largest
2120e0680481Safresh1list, but if one is willing to assume B<groff> is the formatter, the next
2121e0680481Safresh1option is better.
2122e0680481Safresh1
2123e0680481Safresh1=item *
2124e0680481Safresh1
2125e0680481Safresh1Convert characters to groff C<\[uNNNN]> escapes.  This is implemented as the
2126e0680481Safresh1C<groff> encoding for those who want to use it, and is supported by both
2127e0680481Safresh1B<groff> and B<mandoc>.  However, it is no better than UTF-8 output for
2128e0680481Safresh1portability to other implementations.  See L</Testing results> for more
2129e0680481Safresh1details.
2130e0680481Safresh1
2131e0680481Safresh1=item *
2132e0680481Safresh1
2133e0680481Safresh1Change the default output format to UTF-8 and ask those who want maximum
2134e0680481Safresh1backward compatibility to explicitly select the old encoding.  This fixes the
2135e0680481Safresh1issue for most users at the cost of backwards compatibility.  While the
2136e0680481Safresh1rendering of non-ASCII characters is different on older systems that don't
2137e0680481Safresh1support UTF-8, it's not always worse than the old output.
2138e0680481Safresh1
2139e0680481Safresh1=back
2140e0680481Safresh1
2141e0680481Safresh1Pod::Man 5.00 and later makes the last choice.  This arguably produces worse
2142e0680481Safresh1output when manual pages are formatted with B<troff> into PostScript or PDF,
2143e0680481Safresh1but doing this is rare and normally manual, so the encoding can be changed in
2144e0680481Safresh1those cases.  The older output encoding is available by setting C<encoding> to
2145e0680481Safresh1C<roff>.
2146e0680481Safresh1
2147e0680481Safresh1=head2 Testing results
2148e0680481Safresh1
2149e0680481Safresh1Here is the results of testing C<encoding> values of C<utf-8> and C<groff> on
2150e0680481Safresh1various operating systems.  The testing methodology was to create F<man/man1>
2151e0680481Safresh1in the current directory, copy F<encoding.utf8> or F<encoding.groff> from the
2152e0680481Safresh1podlators 5.00 distribution to F<man/man1/encoding.1>, and then run:
2153e0680481Safresh1
2154e0680481Safresh1    LANG=C.UTF-8 MANPATH=$(pwd)/man man 1 encoding
2155e0680481Safresh1
2156e0680481Safresh1If the locale is not explicitly set to one that includes UTF-8, the Unicode
2157e0680481Safresh1characters were usually converted to ASCII (by, for example, dropping an
2158e0680481Safresh1accent) or deleted or replaced with C<< <?> >> if there was no conversion.
2159e0680481Safresh1
2160e0680481Safresh1Tested on 2022-09-25.  Many thanks to the GCC Compile Farm project for access
2161e0680481Safresh1to testing hosts.
2162e0680481Safresh1
2163e0680481Safresh1    OS                   UTF-8      groff
2164e0680481Safresh1    ------------------   -------    -------
2165e0680481Safresh1    AIX 7.1              no [1]     no [2]
2166e0680481Safresh1    Alpine 3.15.0        yes        yes
2167e0680481Safresh1    CentOS 7.9           yes        yes
2168e0680481Safresh1    Debian 7             yes        yes
2169e0680481Safresh1    FreeBSD 13.0         yes        yes
2170e0680481Safresh1    NetBSD 9.2           yes        yes
2171e0680481Safresh1    OpenBSD 7.1          yes        yes
2172e0680481Safresh1    openSUSE Leap 15.4   yes        yes
2173e0680481Safresh1    Solaris 10           yes        no [2]
2174e0680481Safresh1    Solaris 11           no [3]     no [3]
2175e0680481Safresh1
2176e0680481Safresh1I did not have access to a macOS system for testing, but since it uses
2177e0680481Safresh1B<mandoc>, it's behavior is probably the same as the BSD hosts.
2178e0680481Safresh1
2179e0680481Safresh1Notes:
2180e0680481Safresh1
2181e0680481Safresh1=over 4
2182e0680481Safresh1
2183e0680481Safresh1=item [1]
2184e0680481Safresh1
2185e0680481Safresh1Unicode characters were converted to one or two random ASCII characters
2186e0680481Safresh1unrelated to the original character.
2187e0680481Safresh1
2188e0680481Safresh1=item [2]
2189e0680481Safresh1
2190e0680481Safresh1Unicode characters were shown as the body of the groff escape rather than the
2191e0680481Safresh1indicated character (in other words, text like C<[u00EF]>).
2192e0680481Safresh1
2193e0680481Safresh1=item [3]
2194e0680481Safresh1
2195e0680481Safresh1Unicode characters were deleted entirely, as if they weren't there.  Using
2196e0680481Safresh1C<nroff -man> instead of B<man> to format the page showed the same results as
2197e0680481Safresh1Solaris 10.  Using C<groff -k -man -Tutf8> to format the page produced the
2198e0680481Safresh1correct output.
2199e0680481Safresh1
2200e0680481Safresh1=back
2201e0680481Safresh1
2202e0680481Safresh1PostScript and PDF output using groff on a Debian 12 system do not support
2203e0680481Safresh1combining accent marks or SMP characters due to a lack of support in the
2204e0680481Safresh1default output font.
2205e0680481Safresh1
2206e0680481Safresh1Testing on additional platforms is welcome.  Please let the author know if you
2207e0680481Safresh1have additional results.
2208b39c5158Smillert
2209b39c5158Smillert=head1 DIAGNOSTICS
2210b39c5158Smillert
2211b39c5158Smillert=over 4
2212b39c5158Smillert
2213b39c5158Smillert=item roff font should be 1 or 2 chars, not "%s"
2214b39c5158Smillert
2215b39c5158Smillert(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
2216b39c5158Smillertwasn't either one or two characters.  Pod::Man doesn't support *roff fonts
2217e9ce3842Safresh1longer than two characters, although some *roff extensions do (the
2218e9ce3842Safresh1canonical versions of B<nroff> and B<troff> don't either).
2219e9ce3842Safresh1
2220e9ce3842Safresh1=item Invalid errors setting "%s"
2221e9ce3842Safresh1
2222e9ce3842Safresh1(F) The C<errors> parameter to the constructor was set to an unknown value.
2223b39c5158Smillert
2224b39c5158Smillert=item Invalid quote specification "%s"
2225b39c5158Smillert
2226e9ce3842Safresh1(F) The quote specification given (the C<quotes> option to the
2227b8851fccSafresh1constructor) was invalid.  A quote specification must be either one
2228b8851fccSafresh1character long or an even number (greater than one) characters long.
2229e9ce3842Safresh1
2230e9ce3842Safresh1=item POD document had syntax errors
2231e9ce3842Safresh1
2232e9ce3842Safresh1(F) The POD document being formatted had syntax errors and the C<errors>
2233e9ce3842Safresh1option was set to C<die>.
2234b39c5158Smillert
2235b39c5158Smillert=back
2236b39c5158Smillert
2237b8851fccSafresh1=head1 ENVIRONMENT
2238b8851fccSafresh1
2239b8851fccSafresh1=over 4
2240b8851fccSafresh1
2241b8851fccSafresh1=item PERL_CORE
2242b8851fccSafresh1
2243e0680481Safresh1If set and Encode is not available, silently fall back to an encoding of
2244e0680481Safresh1C<groff> without complaining to standard error.  This environment variable is
2245e0680481Safresh1set during Perl core builds, which build Encode after podlators.  Encode is
2246b8851fccSafresh1expected to not (yet) be available in that case.
2247b8851fccSafresh1
2248b8851fccSafresh1=item POD_MAN_DATE
2249b8851fccSafresh1
2250b8851fccSafresh1If set, this will be used as the value of the left-hand footer unless the
2251b8851fccSafresh1C<date> option is explicitly set, overriding the timestamp of the input
2252b8851fccSafresh1file or the current time.  This is primarily useful to ensure reproducible
2253b8851fccSafresh1builds of the same output file given the same source and Pod::Man version,
2254b8851fccSafresh1even when file timestamps may not be consistent.
2255b8851fccSafresh1
2256b8851fccSafresh1=item SOURCE_DATE_EPOCH
2257b8851fccSafresh1
2258b8851fccSafresh1If set, and POD_MAN_DATE and the C<date> options are not set, this will be
2259b8851fccSafresh1used as the modification time of the source file, overriding the timestamp of
2260b8851fccSafresh1the input file or the current time.  It should be set to the desired time in
2261b8851fccSafresh1seconds since UNIX epoch.  This is primarily useful to ensure reproducible
2262b8851fccSafresh1builds of the same output file given the same source and Pod::Man version,
2263b8851fccSafresh1even when file timestamps may not be consistent.  See
2264b8851fccSafresh1L<https://reproducible-builds.org/specs/source-date-epoch/> for the full
2265b8851fccSafresh1specification.
2266b8851fccSafresh1
2267b8851fccSafresh1(Arguably, according to the specification, this variable should be used only
2268b8851fccSafresh1if the timestamp of the input file is not available and Pod::Man uses the
2269b8851fccSafresh1current time.  However, for reproducible builds in Debian, results were more
2270b8851fccSafresh1reliable if this variable overrode the timestamp of the input file.)
2271b8851fccSafresh1
2272b8851fccSafresh1=back
2273b8851fccSafresh1
2274e0680481Safresh1=head1 COMPATIBILITY
2275e0680481Safresh1
2276e0680481Safresh1Pod::Man 1.02 (based on L<Pod::Parser>) was the first version included with
2277e0680481Safresh1Perl, in Perl 5.6.0.
2278e0680481Safresh1
2279e0680481Safresh1The current API based on L<Pod::Simple> was added in Pod::Man 2.00.  Pod::Man
2280e0680481Safresh12.04 was included in Perl 5.9.3, the first version of Perl to incorporate
2281e0680481Safresh1those changes.  This is the first version that correctly supports all modern
2282e0680481Safresh1POD syntax.  The parse_from_filehandle() method was re-added for backward
2283e0680481Safresh1compatibility in Pod::Man 2.09, included in Perl 5.9.4.
2284e0680481Safresh1
2285e0680481Safresh1Support for anchor text in LZ<><> links of type URL was added in Pod::Man
2286e0680481Safresh12.23, included in Perl 5.11.5.
2287e0680481Safresh1
2288e0680481Safresh1parse_lines(), parse_string_document(), and parse_file() set a default output
2289e0680481Safresh1file handle of C<STDOUT> if one was not already set as of Pod::Man 2.28,
2290e0680481Safresh1included in Perl 5.19.5.
2291e0680481Safresh1
2292e0680481Safresh1Support for SOURCE_DATE_EPOCH and POD_MAN_DATE was added in Pod::Man 4.00,
2293e0680481Safresh1included in Perl 5.23.7, and generated dates were changed to use UTC instead
2294e0680481Safresh1of the local time zone.  This is also the first release that aligned the
2295e0680481Safresh1module version and the version of the podlators distribution.  All modules
2296e0680481Safresh1included in podlators, and the podlators distribution itself, share the same
2297e0680481Safresh1version number from this point forward.
2298e0680481Safresh1
2299e0680481Safresh1Pod::Man 4.10, included in Perl 5.27.8, changed the formatting for manual page
2300e0680481Safresh1references and function names to bold instead of italic, following the current
2301e0680481Safresh1Linux manual page standard.
2302e0680481Safresh1
2303e0680481Safresh1Pod::Man 5.00 changed the default output encoding to UTF-8, overridable with
2304e0680481Safresh1the new C<encoding> option.  It also fixed problems with bold or italic
2305e0680481Safresh1extending too far when used with CZ<><> escapes, and began converting Unicode
2306e0680481Safresh1zero-width spaces (U+200B) to the C<\:> *roff escape.  It also dropped
2307e0680481Safresh1attempts to add subtle formatting corrections in the output that would only be
2308e0680481Safresh1visible when typeset with B<troff>, which had previously been a significant
2309e0680481Safresh1source of bugs.
2310e0680481Safresh1
2311b39c5158Smillert=head1 BUGS
2312b39c5158Smillert
2313e0680481Safresh1There are numerous bugs and language-specific assumptions in the nroff
2314e0680481Safresh1fallbacks for accented characters in the C<roff> encoding.  Since the point of
2315e0680481Safresh1this encoding is backward compatibility with the output from earlier versions
2316e0680481Safresh1of Pod::Man, and it is deprecated except when necessary to support old
2317e0680481Safresh1systems, those bugs are unlikely to ever be fixed.
2318b39c5158Smillert
2319b39c5158SmillertPod::Man doesn't handle font names longer than two characters.  Neither do
2320e0680481Safresh1most B<troff> implementations, but groff does as an extension.  It would be
2321e0680481Safresh1nice to support as an option for those who want to use it.
2322b39c5158Smillert
2323b39c5158Smillert=head1 CAVEATS
2324b39c5158Smillert
2325e0680481Safresh1=head2 Sentence spacing
2326b39c5158Smillert
2327e0680481Safresh1Pod::Man copies the input spacing verbatim to the output *roff document.  This
2328e0680481Safresh1means your output will be affected by how B<nroff> generally handles sentence
2329e0680481Safresh1spacing.
2330b39c5158Smillert
2331e0680481Safresh1B<nroff> dates from an era in which it was standard to use two spaces after
2332e0680481Safresh1sentences, and will always add two spaces after a line-ending period (or
2333e0680481Safresh1similar punctuation) when reflowing text.  For example, the following input:
2334b39c5158Smillert
2335e0680481Safresh1    =pod
2336e0680481Safresh1
2337e0680481Safresh1    One sentence.
2338e0680481Safresh1    Another sentence.
2339e0680481Safresh1
2340e0680481Safresh1will result in two spaces after the period when the text is reflowed.  If you
2341e0680481Safresh1use two spaces after sentences anyway, this will be consistent, although you
2342e0680481Safresh1will have to be careful to not end a line with an abbreviation such as C<e.g.>
2343e0680481Safresh1or C<Ms.>.  Output will also be consistent if you use the *roff style guide
2344e0680481Safresh1(and L<XKCD 1285|https://xkcd.com/1285/>) recommendation of putting a line
2345e0680481Safresh1break after each sentence, although that will consistently produce two spaces
2346e0680481Safresh1after each sentence, which may not be what you want.
2347e0680481Safresh1
2348e0680481Safresh1If you prefer one space after sentences (which is the more modern style), you
2349e0680481Safresh1will unfortunately need to ensure that no line in the middle of a paragraph
2350e0680481Safresh1ends in a period or similar sentence-ending paragraph.  Otherwise, B<nroff>
2351e0680481Safresh1will add a two spaces after that sentence when reflowing, and your output
2352e0680481Safresh1document will have inconsistent spacing.
2353e0680481Safresh1
2354e0680481Safresh1=head2 Hyphens
2355e0680481Safresh1
2356e0680481Safresh1The handling of hyphens versus dashes is somewhat fragile, and one may get a
2357e0680481Safresh1the wrong one under some circumstances.  This will normally only matter for
2358e0680481Safresh1line breaking and possibly for troff output.
2359b39c5158Smillert
2360b39c5158Smillert=head1 AUTHOR
2361b39c5158Smillert
2362e0680481Safresh1Written by Russ Allbery <rra@cpan.org>, based on the original B<pod2man> by
2363e0680481Safresh1Tom Christiansen <tchrist@mox.perl.com>.
2364e0680481Safresh1
2365e0680481Safresh1The modifications to work with Pod::Simple instead of Pod::Parser were
2366e0680481Safresh1contributed by Sean Burke <sburke@cpan.org>, but I've since hacked them beyond
2367e0680481Safresh1recognition and all bugs are mine.
2368b39c5158Smillert
2369b39c5158Smillert=head1 COPYRIGHT AND LICENSE
2370b39c5158Smillert
2371e0680481Safresh1Copyright 1999-2010, 2012-2020, 2022 Russ Allbery <rra@cpan.org>
2372b46d8ef2Safresh1
2373b46d8ef2Safresh1Substantial contributions by Sean Burke <sburke@cpan.org>.
2374b39c5158Smillert
2375b39c5158SmillertThis program is free software; you may redistribute it and/or modify it
2376b39c5158Smillertunder the same terms as Perl itself.
2377b39c5158Smillert
2378b39c5158Smillert=head1 SEE ALSO
2379b39c5158Smillert
2380e0680481Safresh1L<Encode::Supported>, L<Pod::Simple>, L<perlpod(1)>, L<pod2man(1)>,
2381e0680481Safresh1L<nroff(1)>, L<troff(1)>, L<man(1)>, L<man(7)>
2382b39c5158Smillert
2383b39c5158SmillertOssanna, Joseph F., and Brian W. Kernighan.  "Troff User's Manual,"
2384b39c5158SmillertComputing Science Technical Report No. 54, AT&T Bell Laboratories.  This is
2385b39c5158Smillertthe best documentation of standard B<nroff> and B<troff>.  At the time of
23869f11ffb7Safresh1this writing, it's available at L<http://www.troff.org/54.pdf>.
2387b39c5158Smillert
2388e0680481Safresh1The manual page documenting the man macro set may be L<man(5)> instead of
2389e0680481Safresh1L<man(7)> on your system.
2390e0680481Safresh1
2391e0680481Safresh1See L<perlpodstyle(1)> for documentation on writing manual pages in POD if
2392e0680481Safresh1you've not done it before and aren't familiar with the conventions.
2393b39c5158Smillert
2394b39c5158SmillertThe current version of this module is always available from its web site at
2395b46d8ef2Safresh1L<https://www.eyrie.org/~eagle/software/podlators/>.  It is also part of the
2396b39c5158SmillertPerl core distribution as of 5.6.0.
2397b39c5158Smillert
2398b39c5158Smillert=cut
2399b46d8ef2Safresh1
2400b46d8ef2Safresh1# Local Variables:
2401b46d8ef2Safresh1# copyright-at-end-flag: t
2402b46d8ef2Safresh1# End:
2403