1b46d8ef2Safresh1# Parse an L<> formatting code in POD text. 2b39c5158Smillert# 3b39c5158Smillert# This module implements parsing of the text of an L<> formatting code as 4b39c5158Smillert# defined in perlpodspec. It should be suitable for any POD formatter. It 5b39c5158Smillert# exports only one function, parselink(), which returns the five-item parse 6b39c5158Smillert# defined in perlpodspec. 7b39c5158Smillert# 8b46d8ef2Safresh1# SPDX-License-Identifier: GPL-1.0-or-later OR Artistic-1.0-Perl 9b39c5158Smillert 10b39c5158Smillert############################################################################## 11b39c5158Smillert# Modules and declarations 12b39c5158Smillert############################################################################## 13b39c5158Smillert 14b39c5158Smillertpackage Pod::ParseLink; 15b39c5158Smillert 16e0680481Safresh1use 5.010; 17b39c5158Smillertuse strict; 18b8851fccSafresh1use warnings; 19b8851fccSafresh1 20b39c5158Smillertuse Exporter; 21b39c5158Smillert 22e0680481Safresh1our @ISA = qw(Exporter); 23e0680481Safresh1our @EXPORT = qw(parselink); 24*3d61058aSafresh1our $VERSION = '5.01_02'; 25*3d61058aSafresh1$VERSION =~ tr/_//d; 26b39c5158Smillert 27b39c5158Smillert############################################################################## 28b39c5158Smillert# Implementation 29b39c5158Smillert############################################################################## 30b39c5158Smillert 31b39c5158Smillert# Parse the name and section portion of a link into a name and section. 32b39c5158Smillertsub _parse_section { 33b39c5158Smillert my ($link) = @_; 34b39c5158Smillert $link =~ s/^\s+//; 35b39c5158Smillert $link =~ s/\s+$//; 36b39c5158Smillert 37b39c5158Smillert # If the whole link is enclosed in quotes, interpret it all as a section 38b39c5158Smillert # even if it contains a slash. 39b39c5158Smillert return (undef, $1) if ($link =~ /^"\s*(.*?)\s*"$/); 40b39c5158Smillert 41b39c5158Smillert # Split into page and section on slash, and then clean up quoting in the 42b39c5158Smillert # section. If there is no section and the name contains spaces, also 43b39c5158Smillert # guess that it's an old section link. 44b39c5158Smillert my ($page, $section) = split (/\s*\/\s*/, $link, 2); 45b39c5158Smillert $section =~ s/^"\s*(.*?)\s*"$/$1/ if $section; 46b39c5158Smillert if ($page && $page =~ / / && !defined ($section)) { 47b39c5158Smillert $section = $page; 48b39c5158Smillert $page = undef; 49b39c5158Smillert } else { 50b39c5158Smillert $page = undef unless $page; 51b39c5158Smillert $section = undef unless $section; 52b39c5158Smillert } 53b39c5158Smillert return ($page, $section); 54b39c5158Smillert} 55b39c5158Smillert 56b39c5158Smillert# Infer link text from the page and section. 57b39c5158Smillertsub _infer_text { 58b39c5158Smillert my ($page, $section) = @_; 59b39c5158Smillert my $inferred; 60b39c5158Smillert if ($page && !$section) { 61b39c5158Smillert $inferred = $page; 62b39c5158Smillert } elsif (!$page && $section) { 63b39c5158Smillert $inferred = '"' . $section . '"'; 64b39c5158Smillert } elsif ($page && $section) { 65b39c5158Smillert $inferred = '"' . $section . '" in ' . $page; 66b39c5158Smillert } 67b39c5158Smillert return $inferred; 68b39c5158Smillert} 69b39c5158Smillert 70b39c5158Smillert# Given the contents of an L<> formatting code, parse it into the link text, 71b39c5158Smillert# the possibly inferred link text, the name or URL, the section, and the type 72b39c5158Smillert# of link (pod, man, or url). 73b39c5158Smillertsub parselink { 74b39c5158Smillert my ($link) = @_; 75b39c5158Smillert $link =~ s/\s+/ /g; 76b39c5158Smillert my $text; 77b39c5158Smillert if ($link =~ /\|/) { 78b39c5158Smillert ($text, $link) = split (/\|/, $link, 2); 79b39c5158Smillert } 80b39c5158Smillert if ($link =~ /\A\w+:[^:\s]\S*\Z/) { 81b39c5158Smillert my $inferred; 82b39c5158Smillert if (defined ($text) && length ($text) > 0) { 83b39c5158Smillert return ($text, $text, $link, undef, 'url'); 84b39c5158Smillert } else { 85b39c5158Smillert return ($text, $link, $link, undef, 'url'); 86b39c5158Smillert } 87b39c5158Smillert } else { 88b39c5158Smillert my ($name, $section) = _parse_section ($link); 89b39c5158Smillert my $inferred; 90b39c5158Smillert if (defined ($text) && length ($text) > 0) { 91b39c5158Smillert $inferred = $text; 92b39c5158Smillert } else { 93b39c5158Smillert $inferred = _infer_text ($name, $section); 94b39c5158Smillert } 95b39c5158Smillert my $type = ($name && $name =~ /\(\S*\)/) ? 'man' : 'pod'; 96b39c5158Smillert return ($text, $inferred, $name, $section, $type); 97b39c5158Smillert } 98b39c5158Smillert} 99b39c5158Smillert 100b39c5158Smillert############################################################################## 101b39c5158Smillert# Module return value and documentation 102b39c5158Smillert############################################################################## 103b39c5158Smillert 104b39c5158Smillert# Ensure we evaluate to true. 105b39c5158Smillert1; 106b39c5158Smillert__END__ 107b39c5158Smillert 108b46d8ef2Safresh1=for stopwords 109b46d8ef2Safresh1markup Allbery URL 110b46d8ef2Safresh1 111b39c5158Smillert=head1 NAME 112b39c5158Smillert 113b39c5158SmillertPod::ParseLink - Parse an LE<lt>E<gt> formatting code in POD text 114b39c5158Smillert 115b39c5158Smillert=head1 SYNOPSIS 116b39c5158Smillert 117b39c5158Smillert use Pod::ParseLink; 118b8851fccSafresh1 my $link = get_link(); 119b39c5158Smillert my ($text, $inferred, $name, $section, $type) = parselink($link); 120b39c5158Smillert 121b39c5158Smillert=head1 DESCRIPTION 122b39c5158Smillert 123b39c5158SmillertThis module only provides a single function, parselink(), which takes the 124b39c5158Smillerttext of an LE<lt>E<gt> formatting code and parses it. It returns the 125b39c5158Smillertanchor text for the link (if any was given), the anchor text possibly 126b39c5158Smillertinferred from the name and section, the name or URL, the section if any, 127b39c5158Smillertand the type of link. The type will be one of C<url>, C<pod>, or C<man>, 128b39c5158Smillertindicating a URL, a link to a POD page, or a link to a Unix manual page. 129b39c5158Smillert 130b39c5158SmillertParsing is implemented per L<perlpodspec>. For backward compatibility, 131b39c5158Smillertlinks where there is no section and name contains spaces, or links where the 132b39c5158Smillertentirety of the link (except for the anchor text if given) is enclosed in 133b39c5158Smillertdouble-quotes are interpreted as links to a section (LE<lt>/sectionE<gt>). 134b39c5158Smillert 135b39c5158SmillertThe inferred anchor text is implemented per L<perlpodspec>: 136b39c5158Smillert 137b39c5158Smillert L<name> => L<name|name> 138b39c5158Smillert L</section> => L<"section"|/section> 139b39c5158Smillert L<name/section> => L<"section" in name|name/section> 140b39c5158Smillert 141b39c5158SmillertThe name may contain embedded EE<lt>E<gt> and ZE<lt>E<gt> formatting codes, 142b39c5158Smillertand the section, anchor text, and inferred anchor text may contain any 143b39c5158Smillertformatting codes. Any double quotes around the section are removed as part 144b39c5158Smillertof the parsing, as is any leading or trailing whitespace. 145b39c5158Smillert 146b39c5158SmillertIf the text of the LE<lt>E<gt> escape is entirely enclosed in double 147b39c5158Smillertquotes, it's interpreted as a link to a section for backward 148b39c5158Smillertcompatibility. 149b39c5158Smillert 150b39c5158SmillertNo attempt is made to resolve formatting codes. This must be done after 151b39c5158Smillertcalling parselink() (since EE<lt>E<gt> formatting codes can be used to 152b39c5158Smillertescape characters that would otherwise be significant to the parser and 153b39c5158Smillertresolving them before parsing would result in an incorrect parse of a 154b39c5158Smillertformatting code like: 155b39c5158Smillert 156b39c5158Smillert L<verticalE<verbar>barE<sol>slash> 157b39c5158Smillert 158b39c5158Smillertwhich should be interpreted as a link to the C<vertical|bar/slash> POD page 159b39c5158Smillertand not as a link to the C<slash> section of the C<bar> POD page with an 160b39c5158Smillertanchor text of C<vertical>. Note that not only the anchor text will need to 161b39c5158Smillerthave formatting codes expanded, but so will the target of the link (to deal 162b39c5158Smillertwith EE<lt>E<gt> and ZE<lt>E<gt> formatting codes), and special handling of 163b39c5158Smillertthe section may be necessary depending on whether the translator wants to 164b39c5158Smillertconsider markup in sections to be significant when resolving links. See 165b39c5158SmillertL<perlpodspec> for more information. 166b39c5158Smillert 167b39c5158Smillert=head1 AUTHOR 168b39c5158Smillert 16956d68f1eSafresh1Russ Allbery <rra@cpan.org> 170b39c5158Smillert 171b39c5158Smillert=head1 COPYRIGHT AND LICENSE 172b39c5158Smillert 173e0680481Safresh1Copyright 2001, 2008, 2009, 2014, 2018-2019, 2022 Russ Allbery <rra@cpan.org> 174b39c5158Smillert 175b39c5158SmillertThis program is free software; you may redistribute it and/or modify it 176b39c5158Smillertunder the same terms as Perl itself. 177b39c5158Smillert 178b46d8ef2Safresh1=head1 SEE ALSO 179b46d8ef2Safresh1 180b46d8ef2Safresh1L<Pod::Parser> 181b46d8ef2Safresh1 182b46d8ef2Safresh1The current version of this module is always available from its web site at 183b46d8ef2Safresh1L<https://www.eyrie.org/~eagle/software/podlators/>. 184b46d8ef2Safresh1 185b39c5158Smillert=cut 186b46d8ef2Safresh1 187b46d8ef2Safresh1# Local Variables: 188b46d8ef2Safresh1# copyright-at-end-flag: t 189b46d8ef2Safresh1# End: 190