xref: /llvm-project/clang-tools-extra/clangd/URI.cpp (revision d5953e3e3092f7142a07aa012fc9665ede09e53b)
1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "URI.h"
10 #include "support/Logger.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Path.h"
15 #include <algorithm>
16 
17 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
18 
19 namespace clang {
20 namespace clangd {
21 namespace {
22 
isWindowsPath(llvm::StringRef Path)23 bool isWindowsPath(llvm::StringRef Path) {
24   return Path.size() > 1 && llvm::isAlpha(Path[0]) && Path[1] == ':';
25 }
26 
isNetworkPath(llvm::StringRef Path)27 bool isNetworkPath(llvm::StringRef Path) {
28   return Path.size() > 2 && Path[0] == Path[1] &&
29          llvm::sys::path::is_separator(Path[0]);
30 }
31 
32 /// This manages file paths in the file system. All paths in the scheme
33 /// are absolute (with leading '/').
34 /// Note that this scheme is hardcoded into the library and not registered in
35 /// registry.
36 class FileSystemScheme : public URIScheme {
37 public:
38   llvm::Expected<std::string>
getAbsolutePath(llvm::StringRef Authority,llvm::StringRef Body,llvm::StringRef) const39   getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body,
40                   llvm::StringRef /*HintPath*/) const override {
41     if (!Body.starts_with("/"))
42       return error("File scheme: expect body to be an absolute path starting "
43                    "with '/': {0}",
44                    Body);
45     llvm::SmallString<128> Path;
46     if (!Authority.empty()) {
47       // Windows UNC paths e.g. file://server/share => \\server\share
48       ("//" + Authority).toVector(Path);
49     } else if (isWindowsPath(Body.substr(1))) {
50       // Windows paths e.g. file:///X:/path => X:\path
51       Body.consume_front("/");
52     }
53     Path.append(Body);
54     llvm::sys::path::native(Path);
55     return std::string(Path);
56   }
57 
58   llvm::Expected<URI>
uriFromAbsolutePath(llvm::StringRef AbsolutePath) const59   uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
60     std::string Body;
61     llvm::StringRef Authority;
62     llvm::StringRef Root = llvm::sys::path::root_name(AbsolutePath);
63     if (isNetworkPath(Root)) {
64       // Windows UNC paths e.g. \\server\share => file://server/share
65       Authority = Root.drop_front(2);
66       AbsolutePath.consume_front(Root);
67     } else if (isWindowsPath(Root)) {
68       // Windows paths e.g. X:\path => file:///X:/path
69       Body = "/";
70     }
71     Body += llvm::sys::path::convert_to_slash(AbsolutePath);
72     return URI("file", Authority, Body);
73   }
74 };
75 
76 llvm::Expected<std::unique_ptr<URIScheme>>
findSchemeByName(llvm::StringRef Scheme)77 findSchemeByName(llvm::StringRef Scheme) {
78   if (Scheme == "file")
79     return std::make_unique<FileSystemScheme>();
80 
81   for (const auto &URIScheme : URISchemeRegistry::entries()) {
82     if (URIScheme.getName() != Scheme)
83       continue;
84     return URIScheme.instantiate();
85   }
86   return error("Can't find scheme: {0}", Scheme);
87 }
88 
shouldEscape(unsigned char C)89 bool shouldEscape(unsigned char C) {
90   // Unreserved characters.
91   if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
92       (C >= '0' && C <= '9'))
93     return false;
94   switch (C) {
95   case '-':
96   case '_':
97   case '.':
98   case '~':
99   case '/': // '/' is only reserved when parsing.
100   // ':' is only reserved for relative URI paths, which clangd doesn't produce.
101   case ':':
102     return false;
103   }
104   return true;
105 }
106 
107 /// Encodes a string according to percent-encoding.
108 /// - Unreserved characters are not escaped.
109 /// - Reserved characters always escaped with exceptions like '/'.
110 /// - All other characters are escaped.
percentEncode(llvm::StringRef Content,std::string & Out)111 void percentEncode(llvm::StringRef Content, std::string &Out) {
112   for (unsigned char C : Content)
113     if (shouldEscape(C)) {
114       Out.push_back('%');
115       Out.push_back(llvm::hexdigit(C / 16));
116       Out.push_back(llvm::hexdigit(C % 16));
117     } else {
118       Out.push_back(C);
119     }
120 }
121 
122 /// Decodes a string according to percent-encoding.
percentDecode(llvm::StringRef Content)123 std::string percentDecode(llvm::StringRef Content) {
124   std::string Result;
125   for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
126     if (*I != '%') {
127       Result += *I;
128       continue;
129     }
130     if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
131         llvm::isHexDigit(*(I + 2))) {
132       Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
133       I += 2;
134     } else
135       Result.push_back(*I);
136   }
137   return Result;
138 }
139 
isValidScheme(llvm::StringRef Scheme)140 bool isValidScheme(llvm::StringRef Scheme) {
141   if (Scheme.empty())
142     return false;
143   if (!llvm::isAlpha(Scheme[0]))
144     return false;
145   return llvm::all_of(llvm::drop_begin(Scheme), [](char C) {
146     return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
147   });
148 }
149 
150 } // namespace
151 
URI(llvm::StringRef Scheme,llvm::StringRef Authority,llvm::StringRef Body)152 URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
153          llvm::StringRef Body)
154     : Scheme(Scheme), Authority(Authority), Body(Body) {
155   assert(!Scheme.empty());
156   assert((Authority.empty() || Body.starts_with("/")) &&
157          "URI body must start with '/' when authority is present.");
158 }
159 
toString() const160 std::string URI::toString() const {
161   std::string Result;
162   percentEncode(Scheme, Result);
163   Result.push_back(':');
164   if (Authority.empty() && Body.empty())
165     return Result;
166   // If authority if empty, we only print body if it starts with "/"; otherwise,
167   // the URI is invalid.
168   if (!Authority.empty() || llvm::StringRef(Body).starts_with("/")) {
169     Result.append("//");
170     percentEncode(Authority, Result);
171   }
172   percentEncode(Body, Result);
173   return Result;
174 }
175 
parse(llvm::StringRef OrigUri)176 llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
177   URI U;
178   llvm::StringRef Uri = OrigUri;
179 
180   auto Pos = Uri.find(':');
181   if (Pos == llvm::StringRef::npos)
182     return error("Scheme must be provided in URI: {0}", OrigUri);
183   auto SchemeStr = Uri.substr(0, Pos);
184   U.Scheme = percentDecode(SchemeStr);
185   if (!isValidScheme(U.Scheme))
186     return error("Invalid scheme: {0} (decoded: {1})", SchemeStr, U.Scheme);
187   Uri = Uri.substr(Pos + 1);
188   if (Uri.consume_front("//")) {
189     Pos = Uri.find('/');
190     U.Authority = percentDecode(Uri.substr(0, Pos));
191     Uri = Uri.substr(Pos);
192   }
193   U.Body = percentDecode(Uri);
194   return U;
195 }
196 
resolve(llvm::StringRef FileURI,llvm::StringRef HintPath)197 llvm::Expected<std::string> URI::resolve(llvm::StringRef FileURI,
198                                          llvm::StringRef HintPath) {
199   auto Uri = URI::parse(FileURI);
200   if (!Uri)
201     return Uri.takeError();
202   auto Path = URI::resolve(*Uri, HintPath);
203   if (!Path)
204     return Path.takeError();
205   return *Path;
206 }
207 
create(llvm::StringRef AbsolutePath,llvm::StringRef Scheme)208 llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
209                                 llvm::StringRef Scheme) {
210   if (!llvm::sys::path::is_absolute(AbsolutePath))
211     return error("Not a valid absolute path: {0}", AbsolutePath);
212   auto S = findSchemeByName(Scheme);
213   if (!S)
214     return S.takeError();
215   return S->get()->uriFromAbsolutePath(AbsolutePath);
216 }
217 
create(llvm::StringRef AbsolutePath)218 URI URI::create(llvm::StringRef AbsolutePath) {
219   if (!llvm::sys::path::is_absolute(AbsolutePath))
220     llvm_unreachable(
221         ("Not a valid absolute path: " + AbsolutePath).str().c_str());
222   for (auto &Entry : URISchemeRegistry::entries()) {
223     auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
224     // For some paths, conversion to different URI schemes is impossible. These
225     // should be just skipped.
226     if (!URI) {
227       // Ignore the error.
228       llvm::consumeError(URI.takeError());
229       continue;
230     }
231     return std::move(*URI);
232   }
233   // Fallback to file: scheme which should work for any paths.
234   return URI::createFile(AbsolutePath);
235 }
236 
createFile(llvm::StringRef AbsolutePath)237 URI URI::createFile(llvm::StringRef AbsolutePath) {
238   auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
239   if (!U)
240     llvm_unreachable(llvm::toString(U.takeError()).c_str());
241   return std::move(*U);
242 }
243 
resolve(const URI & Uri,llvm::StringRef HintPath)244 llvm::Expected<std::string> URI::resolve(const URI &Uri,
245                                          llvm::StringRef HintPath) {
246   auto S = findSchemeByName(Uri.Scheme);
247   if (!S)
248     return S.takeError();
249   return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
250 }
251 
resolvePath(llvm::StringRef AbsPath,llvm::StringRef HintPath)252 llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
253                                              llvm::StringRef HintPath) {
254   if (!llvm::sys::path::is_absolute(AbsPath))
255     llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
256   for (auto &Entry : URISchemeRegistry::entries()) {
257     auto S = Entry.instantiate();
258     auto U = S->uriFromAbsolutePath(AbsPath);
259     // For some paths, conversion to different URI schemes is impossible. These
260     // should be just skipped.
261     if (!U) {
262       // Ignore the error.
263       llvm::consumeError(U.takeError());
264       continue;
265     }
266     return S->getAbsolutePath(U->Authority, U->Body, HintPath);
267   }
268   // Fallback to file: scheme which doesn't do any canonicalization.
269   return std::string(AbsPath);
270 }
271 
includeSpelling(const URI & Uri)272 llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
273   auto S = findSchemeByName(Uri.Scheme);
274   if (!S)
275     return S.takeError();
276   return S->get()->getIncludeSpelling(Uri);
277 }
278 
279 } // namespace clangd
280 } // namespace clang
281