xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/ParseLink.pm (revision 3d61058aa5c692477b6d18acfbbdb653a9930ff9)
1b46d8ef2Safresh1# Parse an L<> formatting code in POD text.
2b39c5158Smillert#
3b39c5158Smillert# This module implements parsing of the text of an L<> formatting code as
4b39c5158Smillert# defined in perlpodspec.  It should be suitable for any POD formatter.  It
5b39c5158Smillert# exports only one function, parselink(), which returns the five-item parse
6b39c5158Smillert# defined in perlpodspec.
7b39c5158Smillert#
8b46d8ef2Safresh1# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl
9b39c5158Smillert
10b39c5158Smillert##############################################################################
11b39c5158Smillert# Modules and declarations
12b39c5158Smillert##############################################################################
13b39c5158Smillert
14b39c5158Smillertpackage Pod::ParseLink;
15b39c5158Smillert
16e0680481Safresh1use 5.010;
17b39c5158Smillertuse strict;
18b8851fccSafresh1use warnings;
19b8851fccSafresh1
20b39c5158Smillertuse Exporter;
21b39c5158Smillert
22e0680481Safresh1our @ISA = qw(Exporter);
23e0680481Safresh1our @EXPORT = qw(parselink);
24*3d61058aSafresh1our $VERSION = '5.01_02';
25*3d61058aSafresh1$VERSION =~ tr/_//d;
26b39c5158Smillert
27b39c5158Smillert##############################################################################
28b39c5158Smillert# Implementation
29b39c5158Smillert##############################################################################
30b39c5158Smillert
31b39c5158Smillert# Parse the name and section portion of a link into a name and section.
32b39c5158Smillertsub _parse_section {
33b39c5158Smillert    my ($link) = @_;
34b39c5158Smillert    $link =~ s/^\s+//;
35b39c5158Smillert    $link =~ s/\s+$//;
36b39c5158Smillert
37b39c5158Smillert    # If the whole link is enclosed in quotes, interpret it all as a section
38b39c5158Smillert    # even if it contains a slash.
39b39c5158Smillert    return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/);
40b39c5158Smillert
41b39c5158Smillert    # Split into page and section on slash, and then clean up quoting in the
42b39c5158Smillert    # section.  If there is no section and the name contains spaces, also
43b39c5158Smillert    # guess that it's an old section link.
44b39c5158Smillert    my ($page, $section) = split (/\s*\/\s*/, $link, 2);
45b39c5158Smillert    $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section;
46b39c5158Smillert    if ($page && $page =~ / / && !defined ($section)) {
47b39c5158Smillert        $section = $page;
48b39c5158Smillert        $page = undef;
49b39c5158Smillert    } else {
50b39c5158Smillert        $page = undef unless $page;
51b39c5158Smillert        $section = undef unless $section;
52b39c5158Smillert    }
53b39c5158Smillert    return ($page, $section);
54b39c5158Smillert}
55b39c5158Smillert
56b39c5158Smillert# Infer link text from the page and section.
57b39c5158Smillertsub _infer_text {
58b39c5158Smillert    my ($page, $section) = @_;
59b39c5158Smillert    my $inferred;
60b39c5158Smillert    if ($page && !$section) {
61b39c5158Smillert        $inferred = $page;
62b39c5158Smillert    } elsif (!$page && $section) {
63b39c5158Smillert        $inferred = '"' . $section . '"';
64b39c5158Smillert    } elsif ($page && $section) {
65b39c5158Smillert        $inferred = '"' . $section . '" in ' . $page;
66b39c5158Smillert    }
67b39c5158Smillert    return $inferred;
68b39c5158Smillert}
69b39c5158Smillert
70b39c5158Smillert# Given the contents of an L<> formatting code, parse it into the link text,
71b39c5158Smillert# the possibly inferred link text, the name or URL, the section, and the type
72b39c5158Smillert# of link (pod, man, or url).
73b39c5158Smillertsub parselink {
74b39c5158Smillert    my ($link) = @_;
75b39c5158Smillert    $link =~ s/\s+/ /g;
76b39c5158Smillert    my $text;
77b39c5158Smillert    if ($link =~ /\|/) {
78b39c5158Smillert        ($text, $link) = split (/\|/, $link, 2);
79b39c5158Smillert    }
80b39c5158Smillert    if ($link =~ /\A\w+:[^:\s]\S*\Z/) {
81b39c5158Smillert        my $inferred;
82b39c5158Smillert        if (defined ($text) && length ($text) > 0) {
83b39c5158Smillert            return ($text, $text, $link, undef, 'url');
84b39c5158Smillert        } else {
85b39c5158Smillert            return ($text, $link, $link, undef, 'url');
86b39c5158Smillert        }
87b39c5158Smillert    } else {
88b39c5158Smillert        my ($name, $section) = _parse_section ($link);
89b39c5158Smillert        my $inferred;
90b39c5158Smillert        if (defined ($text) && length ($text) > 0) {
91b39c5158Smillert            $inferred = $text;
92b39c5158Smillert        } else {
93b39c5158Smillert            $inferred = _infer_text ($name, $section);
94b39c5158Smillert        }
95b39c5158Smillert        my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod';
96b39c5158Smillert        return ($text, $inferred, $name, $section, $type);
97b39c5158Smillert    }
98b39c5158Smillert}
99b39c5158Smillert
100b39c5158Smillert##############################################################################
101b39c5158Smillert# Module return value and documentation
102b39c5158Smillert##############################################################################
103b39c5158Smillert
104b39c5158Smillert# Ensure we evaluate to true.
105b39c5158Smillert1;
106b39c5158Smillert__END__
107b39c5158Smillert
108b46d8ef2Safresh1=for stopwords
109b46d8ef2Safresh1markup Allbery URL
110b46d8ef2Safresh1
111b39c5158Smillert=head1 NAME
112b39c5158Smillert
113b39c5158SmillertPod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text
114b39c5158Smillert
115b39c5158Smillert=head1 SYNOPSIS
116b39c5158Smillert
117b39c5158Smillert    use Pod::ParseLink;
118b8851fccSafresh1    my $link = get_link();
119b39c5158Smillert    my ($text, $inferred, $name, $section, $type) = parselink($link);
120b39c5158Smillert
121b39c5158Smillert=head1 DESCRIPTION
122b39c5158Smillert
123b39c5158SmillertThis module only provides a single function, parselink(), which takes the
124b39c5158Smillerttext of an LE<lt>E<gt> formatting code and parses it.  It returns the
125b39c5158Smillertanchor text for the link (if any was given), the anchor text possibly
126b39c5158Smillertinferred from the name and section, the name or URL, the section if any,
127b39c5158Smillertand the type of link.  The type will be one of C<url>, C<pod>, or C<man>,
128b39c5158Smillertindicating a URL, a link to a POD page, or a link to a Unix manual page.
129b39c5158Smillert
130b39c5158SmillertParsing is implemented per L<perlpodspec>.  For backward compatibility,
131b39c5158Smillertlinks where there is no section and name contains spaces, or links where the
132b39c5158Smillertentirety of the link (except for the anchor text if given) is enclosed in
133b39c5158Smillertdouble-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>).
134b39c5158Smillert
135b39c5158SmillertThe inferred anchor text is implemented per L<perlpodspec>:
136b39c5158Smillert
137b39c5158Smillert    L<name>         =>  L<name|name>
138b39c5158Smillert    L</section>     =>  L<"section"|/section>
139b39c5158Smillert    L<name/section> =>  L<"section" in name|name/section>
140b39c5158Smillert
141b39c5158SmillertThe name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes,
142b39c5158Smillertand the section, anchor text, and inferred anchor text may contain any
143b39c5158Smillertformatting codes.  Any double quotes around the section are removed as part
144b39c5158Smillertof the parsing, as is any leading or trailing whitespace.
145b39c5158Smillert
146b39c5158SmillertIf the text of the LE<lt>E<gt> escape is entirely enclosed in double
147b39c5158Smillertquotes, it's interpreted as a link to a section for backward
148b39c5158Smillertcompatibility.
149b39c5158Smillert
150b39c5158SmillertNo attempt is made to resolve formatting codes.  This must be done after
151b39c5158Smillertcalling parselink() (since EE<lt>E<gt> formatting codes can be used to
152b39c5158Smillertescape characters that would otherwise be significant to the parser and
153b39c5158Smillertresolving them before parsing would result in an incorrect parse of a
154b39c5158Smillertformatting code like:
155b39c5158Smillert
156b39c5158Smillert    L<verticalE<verbar>barE<sol>slash>
157b39c5158Smillert
158b39c5158Smillertwhich should be interpreted as a link to the C<vertical|bar/slash> POD page
159b39c5158Smillertand not as a link to the C<slash> section of the C<bar> POD page with an
160b39c5158Smillertanchor text of C<vertical>.  Note that not only the anchor text will need to
161b39c5158Smillerthave formatting codes expanded, but so will the target of the link (to deal
162b39c5158Smillertwith EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of
163b39c5158Smillertthe section may be necessary depending on whether the translator wants to
164b39c5158Smillertconsider markup in sections to be significant when resolving links.  See
165b39c5158SmillertL<perlpodspec> for more information.
166b39c5158Smillert
167b39c5158Smillert=head1 AUTHOR
168b39c5158Smillert
16956d68f1eSafresh1Russ Allbery <rra@cpan.org>
170b39c5158Smillert
171b39c5158Smillert=head1 COPYRIGHT AND LICENSE
172b39c5158Smillert
173e0680481Safresh1Copyright 2001, 2008, 2009, 2014, 2018-2019, 2022 Russ Allbery <rra@cpan.org>
174b39c5158Smillert
175b39c5158SmillertThis program is free software; you may redistribute it and/or modify it
176b39c5158Smillertunder the same terms as Perl itself.
177b39c5158Smillert
178b46d8ef2Safresh1=head1 SEE ALSO
179b46d8ef2Safresh1
180b46d8ef2Safresh1L<Pod::Parser>
181b46d8ef2Safresh1
182b46d8ef2Safresh1The current version of this module is always available from its web site at
183b46d8ef2Safresh1L<https://www.eyrie.org/~eagle/software/podlators/>.
184b46d8ef2Safresh1
185b39c5158Smillert=cut
186b46d8ef2Safresh1
187b46d8ef2Safresh1# Local Variables:
188b46d8ef2Safresh1# copyright-at-end-flag: t
189b46d8ef2Safresh1# End:
190