xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/ParseLink.pm (revision 53555c846a0a6f917dbd0a191f826da995ab1c42)
1# Parse an L<> formatting code in POD text.
2#
3# This module implements parsing of the text of an L<> formatting code as
4# defined in perlpodspec.  It should be suitable for any POD formatter.  It
5# exports only one function, parselink(), which returns the five-item parse
6# defined in perlpodspec.
7#
8# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
9
10##############################################################################
11# Modules and declarations
12##############################################################################
13
14package Pod::ParseLink;
15
16use 5.010;
17use strict;
18use warnings;
19
20use Exporter;
21
22our @ISA = qw(Exporter);
23our @EXPORT = qw(parselink);
24our $VERSION = '5.01';
25
26##############################################################################
27# Implementation
28##############################################################################
29
30# Parse the name and section portion of a link into a name and section.
31sub _parse_section {
32    my ($link) = @_;
33    $link =~ s/^\s+//;
34    $link =~ s/\s+$//;
35
36    # If the whole link is enclosed in quotes, interpret it all as a section
37    # even if it contains a slash.
38    return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/);
39
40    # Split into page and section on slash, and then clean up quoting in the
41    # section.  If there is no section and the name contains spaces, also
42    # guess that it's an old section link.
43    my ($page, $section) = split (/\s*\/\s*/, $link, 2);
44    $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section;
45    if ($page && $page =~ / / && !defined ($section)) {
46        $section = $page;
47        $page = undef;
48    } else {
49        $page = undef unless $page;
50        $section = undef unless $section;
51    }
52    return ($page, $section);
53}
54
55# Infer link text from the page and section.
56sub _infer_text {
57    my ($page, $section) = @_;
58    my $inferred;
59    if ($page && !$section) {
60        $inferred = $page;
61    } elsif (!$page && $section) {
62        $inferred = '"' . $section . '"';
63    } elsif ($page && $section) {
64        $inferred = '"' . $section . '" in ' . $page;
65    }
66    return $inferred;
67}
68
69# Given the contents of an L<> formatting code, parse it into the link text,
70# the possibly inferred link text, the name or URL, the section, and the type
71# of link (pod, man, or url).
72sub parselink {
73    my ($link) = @_;
74    $link =~ s/\s+/ /g;
75    my $text;
76    if ($link =~ /\|/) {
77        ($text, $link) = split (/\|/, $link, 2);
78    }
79    if ($link =~ /\A\w+:[^:\s]\S*\Z/) {
80        my $inferred;
81        if (defined ($text) && length ($text) > 0) {
82            return ($text, $text, $link, undef, 'url');
83        } else {
84            return ($text, $link, $link, undef, 'url');
85        }
86    } else {
87        my ($name, $section) = _parse_section ($link);
88        my $inferred;
89        if (defined ($text) && length ($text) > 0) {
90            $inferred = $text;
91        } else {
92            $inferred = _infer_text ($name, $section);
93        }
94        my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod';
95        return ($text, $inferred, $name, $section, $type);
96    }
97}
98
99##############################################################################
100# Module return value and documentation
101##############################################################################
102
103# Ensure we evaluate to true.
1041;
105__END__
106
107=for stopwords
108markup Allbery URL
109
110=head1 NAME
111
112Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text
113
114=head1 SYNOPSIS
115
116    use Pod::ParseLink;
117    my $link = get_link();
118    my ($text, $inferred, $name, $section, $type) = parselink($link);
119
120=head1 DESCRIPTION
121
122This module only provides a single function, parselink(), which takes the
123text of an LE<lt>E<gt> formatting code and parses it.  It returns the
124anchor text for the link (if any was given), the anchor text possibly
125inferred from the name and section, the name or URL, the section if any,
126and the type of link.  The type will be one of C<url>, C<pod>, or C<man>,
127indicating a URL, a link to a POD page, or a link to a Unix manual page.
128
129Parsing is implemented per L<perlpodspec>.  For backward compatibility,
130links where there is no section and name contains spaces, or links where the
131entirety of the link (except for the anchor text if given) is enclosed in
132double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>).
133
134The inferred anchor text is implemented per L<perlpodspec>:
135
136    L<name>         =>  L<name|name>
137    L</section>     =>  L<"section"|/section>
138    L<name/section> =>  L<"section" in name|name/section>
139
140The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes,
141and the section, anchor text, and inferred anchor text may contain any
142formatting codes.  Any double quotes around the section are removed as part
143of the parsing, as is any leading or trailing whitespace.
144
145If the text of the LE<lt>E<gt> escape is entirely enclosed in double
146quotes, it's interpreted as a link to a section for backward
147compatibility.
148
149No attempt is made to resolve formatting codes.  This must be done after
150calling parselink() (since EE<lt>E<gt> formatting codes can be used to
151escape characters that would otherwise be significant to the parser and
152resolving them before parsing would result in an incorrect parse of a
153formatting code like:
154
155    L<verticalE<verbar>barE<sol>slash>
156
157which should be interpreted as a link to the C<vertical|bar/slash> POD page
158and not as a link to the C<slash> section of the C<bar> POD page with an
159anchor text of C<vertical>.  Note that not only the anchor text will need to
160have formatting codes expanded, but so will the target of the link (to deal
161with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of
162the section may be necessary depending on whether the translator wants to
163consider markup in sections to be significant when resolving links.  See
164L<perlpodspec> for more information.
165
166=head1 AUTHOR
167
168Russ Allbery <rra@cpan.org>
169
170=head1 COPYRIGHT AND LICENSE
171
172Copyright 2001, 2008, 2009, 2014, 2018-2019, 2022 Russ Allbery <rra@cpan.org>
173
174This program is free software; you may redistribute it and/or modify it
175under the same terms as Perl itself.
176
177=head1 SEE ALSO
178
179L<Pod::Parser>
180
181The current version of this module is always available from its web site at
182L<https://www.eyrie.org/~eagle/software/podlators/>.
183
184=cut
185
186# Local Variables:
187# copyright-at-end-flag: t
188# End:
189