xref: /openbsd-src/gnu/usr.bin/perl/cpan/podlators/lib/Pod/ParseLink.pm (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1# Pod::ParseLink -- Parse an L<> formatting code in POD text.
2#
3# Copyright 2001, 2008, 2009 by Russ Allbery <rra@stanford.edu>
4#
5# This program is free software; you may redistribute it and/or modify it
6# under the same terms as Perl itself.
7#
8# This module implements parsing of the text of an L<> formatting code as
9# defined in perlpodspec.  It should be suitable for any POD formatter.  It
10# exports only one function, parselink(), which returns the five-item parse
11# defined in perlpodspec.
12#
13# Perl core hackers, please note that this module is also separately
14# maintained outside of the Perl core as part of the podlators.  Please send
15# me any patches at the address above in addition to sending them to the
16# standard Perl mailing lists.
17
18##############################################################################
19# Modules and declarations
20##############################################################################
21
22package Pod::ParseLink;
23
24require 5.004;
25
26use strict;
27use vars qw(@EXPORT @ISA $VERSION);
28
29use Exporter;
30@ISA    = qw(Exporter);
31@EXPORT = qw(parselink);
32
33$VERSION = '1.10';
34
35##############################################################################
36# Implementation
37##############################################################################
38
39# Parse the name and section portion of a link into a name and section.
40sub _parse_section {
41    my ($link) = @_;
42    $link =~ s/^\s+//;
43    $link =~ s/\s+$//;
44
45    # If the whole link is enclosed in quotes, interpret it all as a section
46    # even if it contains a slash.
47    return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/);
48
49    # Split into page and section on slash, and then clean up quoting in the
50    # section.  If there is no section and the name contains spaces, also
51    # guess that it's an old section link.
52    my ($page, $section) = split (/\s*\/\s*/, $link, 2);
53    $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section;
54    if ($page && $page =~ / / && !defined ($section)) {
55        $section = $page;
56        $page = undef;
57    } else {
58        $page = undef unless $page;
59        $section = undef unless $section;
60    }
61    return ($page, $section);
62}
63
64# Infer link text from the page and section.
65sub _infer_text {
66    my ($page, $section) = @_;
67    my $inferred;
68    if ($page && !$section) {
69        $inferred = $page;
70    } elsif (!$page && $section) {
71        $inferred = '"' . $section . '"';
72    } elsif ($page && $section) {
73        $inferred = '"' . $section . '" in ' . $page;
74    }
75    return $inferred;
76}
77
78# Given the contents of an L<> formatting code, parse it into the link text,
79# the possibly inferred link text, the name or URL, the section, and the type
80# of link (pod, man, or url).
81sub parselink {
82    my ($link) = @_;
83    $link =~ s/\s+/ /g;
84    my $text;
85    if ($link =~ /\|/) {
86        ($text, $link) = split (/\|/, $link, 2);
87    }
88    if ($link =~ /\A\w+:[^:\s]\S*\Z/) {
89        my $inferred;
90        if (defined ($text) && length ($text) > 0) {
91            return ($text, $text, $link, undef, 'url');
92        } else {
93            return ($text, $link, $link, undef, 'url');
94        }
95    } else {
96        my ($name, $section) = _parse_section ($link);
97        my $inferred;
98        if (defined ($text) && length ($text) > 0) {
99            $inferred = $text;
100        } else {
101            $inferred = _infer_text ($name, $section);
102        }
103        my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod';
104        return ($text, $inferred, $name, $section, $type);
105    }
106}
107
108##############################################################################
109# Module return value and documentation
110##############################################################################
111
112# Ensure we evaluate to true.
1131;
114__END__
115
116=head1 NAME
117
118Pod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text
119
120=for stopwords
121markup Allbery URL
122
123=head1 SYNOPSIS
124
125    use Pod::ParseLink;
126    my ($text, $inferred, $name, $section, $type) = parselink ($link);
127
128=head1 DESCRIPTION
129
130This module only provides a single function, parselink(), which takes the
131text of an LE<lt>E<gt> formatting code and parses it.  It returns the
132anchor text for the link (if any was given), the anchor text possibly
133inferred from the name and section, the name or URL, the section if any,
134and the type of link.  The type will be one of C<url>, C<pod>, or C<man>,
135indicating a URL, a link to a POD page, or a link to a Unix manual page.
136
137Parsing is implemented per L<perlpodspec>.  For backward compatibility,
138links where there is no section and name contains spaces, or links where the
139entirety of the link (except for the anchor text if given) is enclosed in
140double-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>).
141
142The inferred anchor text is implemented per L<perlpodspec>:
143
144    L<name>         =>  L<name|name>
145    L</section>     =>  L<"section"|/section>
146    L<name/section> =>  L<"section" in name|name/section>
147
148The name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes,
149and the section, anchor text, and inferred anchor text may contain any
150formatting codes.  Any double quotes around the section are removed as part
151of the parsing, as is any leading or trailing whitespace.
152
153If the text of the LE<lt>E<gt> escape is entirely enclosed in double
154quotes, it's interpreted as a link to a section for backward
155compatibility.
156
157No attempt is made to resolve formatting codes.  This must be done after
158calling parselink() (since EE<lt>E<gt> formatting codes can be used to
159escape characters that would otherwise be significant to the parser and
160resolving them before parsing would result in an incorrect parse of a
161formatting code like:
162
163    L<verticalE<verbar>barE<sol>slash>
164
165which should be interpreted as a link to the C<vertical|bar/slash> POD page
166and not as a link to the C<slash> section of the C<bar> POD page with an
167anchor text of C<vertical>.  Note that not only the anchor text will need to
168have formatting codes expanded, but so will the target of the link (to deal
169with EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of
170the section may be necessary depending on whether the translator wants to
171consider markup in sections to be significant when resolving links.  See
172L<perlpodspec> for more information.
173
174=head1 SEE ALSO
175
176L<Pod::Parser>
177
178The current version of this module is always available from its web site at
179L<http://www.eyrie.org/~eagle/software/podlators/>.
180
181=head1 AUTHOR
182
183Russ Allbery <rra@stanford.edu>.
184
185=head1 COPYRIGHT AND LICENSE
186
187Copyright 2001, 2008, 2009 Russ Allbery <rra@stanford.edu>.
188
189This program is free software; you may redistribute it and/or modify it
190under the same terms as Perl itself.
191
192=cut
193