xref: /freebsd-src/contrib/llvm-project/lldb/source/Utility/FileSpec.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===-- FileSpec.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/FileSpec.h"
10 #include "lldb/Utility/RegularExpression.h"
11 #include "lldb/Utility/Stream.h"
12 
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/ErrorOr.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Program.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/TargetParser/Triple.h"
23 
24 #include <algorithm>
25 #include <optional>
26 #include <system_error>
27 #include <vector>
28 
29 #include <cassert>
30 #include <climits>
31 #include <cstdio>
32 #include <cstring>
33 
34 using namespace lldb;
35 using namespace lldb_private;
36 
37 namespace {
38 
39 static constexpr FileSpec::Style GetNativeStyle() {
40 #if defined(_WIN32)
41   return FileSpec::Style::windows;
42 #else
43   return FileSpec::Style::posix;
44 #endif
45 }
46 
47 bool PathStyleIsPosix(FileSpec::Style style) {
48   return llvm::sys::path::is_style_posix(style);
49 }
50 
51 const char *GetPathSeparators(FileSpec::Style style) {
52   return llvm::sys::path::get_separator(style).data();
53 }
54 
55 char GetPreferredPathSeparator(FileSpec::Style style) {
56   return GetPathSeparators(style)[0];
57 }
58 
59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60   if (PathStyleIsPosix(style))
61     return;
62 
63   std::replace(path.begin(), path.end(), '/', '\\');
64 }
65 
66 } // end anonymous namespace
67 
68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69 
70 // Default constructor that can take an optional full path to a file on disk.
71 FileSpec::FileSpec(llvm::StringRef path, Style style, const Checksum &checksum)
72     : m_checksum(checksum), m_style(style) {
73   SetFile(path, style, checksum);
74 }
75 
76 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
77     : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
78 
79 namespace {
80 /// Safely get a character at the specified index.
81 ///
82 /// \param[in] path
83 ///     A full, partial, or relative path to a file.
84 ///
85 /// \param[in] i
86 ///     An index into path which may or may not be valid.
87 ///
88 /// \return
89 ///   The character at index \a i if the index is valid, or 0 if
90 ///   the index is not valid.
91 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
92   if (i < path.size())
93     return path[i];
94   return 0;
95 }
96 
97 /// Check if a path needs to be normalized.
98 ///
99 /// Check if a path needs to be normalized. We currently consider a
100 /// path to need normalization if any of the following are true
101 ///  - path contains "/./"
102 ///  - path contains "/../"
103 ///  - path contains "//"
104 ///  - path ends with "/"
105 /// Paths that start with "./" or with "../" are not considered to
106 /// need normalization since we aren't trying to resolve the path,
107 /// we are just trying to remove redundant things from the path.
108 ///
109 /// \param[in] path
110 ///     A full, partial, or relative path to a file.
111 ///
112 /// \return
113 ///   Returns \b true if the path needs to be normalized.
114 bool needsNormalization(const llvm::StringRef &path) {
115   if (path.empty())
116     return false;
117   // We strip off leading "." values so these paths need to be normalized
118   if (path[0] == '.')
119     return true;
120   for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
121        i = path.find_first_of("\\/", i + 1)) {
122     const auto next = safeCharAtIndex(path, i+1);
123     switch (next) {
124       case 0:
125         // path separator char at the end of the string which should be
126         // stripped unless it is the one and only character
127         return i > 0;
128       case '/':
129       case '\\':
130         // two path separator chars in the middle of a path needs to be
131         // normalized
132         if (i > 0)
133           return true;
134         ++i;
135         break;
136 
137       case '.': {
138           const auto next_next = safeCharAtIndex(path, i+2);
139           switch (next_next) {
140             default: break;
141             case 0: return true; // ends with "/."
142             case '/':
143             case '\\':
144               return true; // contains "/./"
145             case '.': {
146               const auto next_next_next = safeCharAtIndex(path, i+3);
147               switch (next_next_next) {
148                 default: break;
149                 case 0: return true; // ends with "/.."
150                 case '/':
151                 case '\\':
152                   return true; // contains "/../"
153               }
154               break;
155             }
156           }
157         }
158         break;
159 
160       default:
161         break;
162     }
163   }
164   return false;
165 }
166 
167 
168 }
169 
170 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
171 
172 // Update the contents of this object with a new path. The path will be split
173 // up into a directory and filename and stored as uniqued string values for
174 // quick comparison and efficient memory usage.
175 void FileSpec::SetFile(llvm::StringRef pathname, Style style,
176                        const Checksum &checksum) {
177   Clear();
178   m_style = (style == Style::native) ? GetNativeStyle() : style;
179   m_checksum = checksum;
180 
181   if (pathname.empty())
182     return;
183 
184   llvm::SmallString<128> resolved(pathname);
185 
186   // Normalize the path by removing ".", ".." and other redundant components.
187   if (needsNormalization(resolved))
188     llvm::sys::path::remove_dots(resolved, true, m_style);
189 
190   // Normalize back slashes to forward slashes
191   if (m_style == Style::windows)
192     std::replace(resolved.begin(), resolved.end(), '\\', '/');
193 
194   if (resolved.empty()) {
195     // If we have no path after normalization set the path to the current
196     // directory. This matches what python does and also a few other path
197     // utilities.
198     m_filename.SetString(".");
199     return;
200   }
201 
202   // Split path into filename and directory. We rely on the underlying char
203   // pointer to be nullptr when the components are empty.
204   llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
205   if(!filename.empty())
206     m_filename.SetString(filename);
207 
208   llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
209   if(!directory.empty())
210     m_directory.SetString(directory);
211 }
212 
213 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
214   return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
215 }
216 
217 // Convert to pointer operator. This allows code to check any FileSpec objects
218 // to see if they contain anything valid using code such as:
219 //
220 //  if (file_spec)
221 //  {}
222 FileSpec::operator bool() const { return m_filename || m_directory; }
223 
224 // Logical NOT operator. This allows code to check any FileSpec objects to see
225 // if they are invalid using code such as:
226 //
227 //  if (!file_spec)
228 //  {}
229 bool FileSpec::operator!() const { return !m_directory && !m_filename; }
230 
231 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
232   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
233   return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
234 }
235 
236 bool FileSpec::FileEquals(const FileSpec &rhs) const {
237   const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
238   return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
239 }
240 
241 // Equal to operator
242 bool FileSpec::operator==(const FileSpec &rhs) const {
243   return FileEquals(rhs) && DirectoryEquals(rhs);
244 }
245 
246 // Not equal to operator
247 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
248 
249 // Less than operator
250 bool FileSpec::operator<(const FileSpec &rhs) const {
251   return FileSpec::Compare(*this, rhs, true) < 0;
252 }
253 
254 // Dump a FileSpec object to a stream
255 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
256   f.Dump(s.AsRawOstream());
257   return s;
258 }
259 
260 // Clear this object by releasing both the directory and filename string values
261 // and making them both the empty string.
262 void FileSpec::Clear() {
263   m_directory.Clear();
264   m_filename.Clear();
265   PathWasModified();
266 }
267 
268 // Compare two FileSpec objects. If "full" is true, then both the directory and
269 // the filename must match. If "full" is false, then the directory names for
270 // "a" and "b" are only compared if they are both non-empty. This allows a
271 // FileSpec object to only contain a filename and it can match FileSpec objects
272 // that have matching filenames with different paths.
273 //
274 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
275 // "a" is greater than "b".
276 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
277   int result = 0;
278 
279   // case sensitivity of compare
280   const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
281 
282   // If full is true, then we must compare both the directory and filename.
283 
284   // If full is false, then if either directory is empty, then we match on the
285   // basename only, and if both directories have valid values, we still do a
286   // full compare. This allows for matching when we just have a filename in one
287   // of the FileSpec objects.
288 
289   if (full || (a.m_directory && b.m_directory)) {
290     result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
291     if (result)
292       return result;
293   }
294   return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
295 }
296 
297 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
298   if (full || (a.GetDirectory() && b.GetDirectory()))
299     return a == b;
300 
301   return a.FileEquals(b);
302 }
303 
304 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
305   if (pattern.GetDirectory())
306     return pattern == file;
307   if (pattern.GetFilename())
308     return pattern.FileEquals(file);
309   return true;
310 }
311 
312 std::optional<FileSpec::Style>
313 FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
314   if (absolute_path.starts_with("/"))
315     return Style::posix;
316   if (absolute_path.starts_with(R"(\\)"))
317     return Style::windows;
318   if (absolute_path.size() >= 3 && llvm::isAlpha(absolute_path[0]) &&
319       (absolute_path.substr(1, 2) == R"(:\)" ||
320        absolute_path.substr(1, 2) == R"(:/)"))
321     return Style::windows;
322   return std::nullopt;
323 }
324 
325 // Dump the object to the supplied stream. If the object contains a valid
326 // directory name, it will be displayed followed by a directory delimiter, and
327 // the filename.
328 void FileSpec::Dump(llvm::raw_ostream &s) const {
329   std::string path{GetPath(true)};
330   s << path;
331   char path_separator = GetPreferredPathSeparator(m_style);
332   if (!m_filename && !path.empty() && path.back() != path_separator)
333     s << path_separator;
334 }
335 
336 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
337 
338 void FileSpec::SetDirectory(ConstString directory) {
339   m_directory = directory;
340   PathWasModified();
341 }
342 
343 void FileSpec::SetDirectory(llvm::StringRef directory) {
344   m_directory = ConstString(directory);
345   PathWasModified();
346 }
347 
348 void FileSpec::SetFilename(ConstString filename) {
349   m_filename = filename;
350   PathWasModified();
351 }
352 
353 void FileSpec::SetFilename(llvm::StringRef filename) {
354   m_filename = ConstString(filename);
355   PathWasModified();
356 }
357 
358 void FileSpec::ClearFilename() {
359   m_filename.Clear();
360   PathWasModified();
361 }
362 
363 void FileSpec::ClearDirectory() {
364   m_directory.Clear();
365   PathWasModified();
366 }
367 
368 // Extract the directory and path into a fixed buffer. This is needed as the
369 // directory and path are stored in separate string values.
370 size_t FileSpec::GetPath(char *path, size_t path_max_len,
371                          bool denormalize) const {
372   if (!path)
373     return 0;
374 
375   std::string result = GetPath(denormalize);
376   ::snprintf(path, path_max_len, "%s", result.c_str());
377   return std::min(path_max_len - 1, result.length());
378 }
379 
380 std::string FileSpec::GetPath(bool denormalize) const {
381   llvm::SmallString<64> result;
382   GetPath(result, denormalize);
383   return static_cast<std::string>(result);
384 }
385 
386 ConstString FileSpec::GetPathAsConstString(bool denormalize) const {
387   return ConstString{GetPath(denormalize)};
388 }
389 
390 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
391                        bool denormalize) const {
392   path.append(m_directory.GetStringRef().begin(),
393               m_directory.GetStringRef().end());
394   // Since the path was normalized and all paths use '/' when stored in these
395   // objects, we don't need to look for the actual syntax specific path
396   // separator, we just look for and insert '/'.
397   if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
398       m_filename.GetStringRef().back() != '/')
399     path.insert(path.end(), '/');
400   path.append(m_filename.GetStringRef().begin(),
401               m_filename.GetStringRef().end());
402   if (denormalize && !path.empty())
403     Denormalize(path, m_style);
404 }
405 
406 llvm::StringRef FileSpec::GetFileNameExtension() const {
407   return llvm::sys::path::extension(m_filename.GetStringRef(), m_style);
408 }
409 
410 ConstString FileSpec::GetFileNameStrippingExtension() const {
411   return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
412 }
413 
414 // Return the size in bytes that this object takes in memory. This returns the
415 // size in bytes of this object, not any shared string values it may refer to.
416 size_t FileSpec::MemorySize() const {
417   return m_filename.MemorySize() + m_directory.MemorySize();
418 }
419 
420 FileSpec
421 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
422   FileSpec ret = *this;
423   ret.AppendPathComponent(component);
424   return ret;
425 }
426 
427 FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
428   llvm::SmallString<64> current_path;
429   GetPath(current_path, false);
430   if (llvm::sys::path::has_parent_path(current_path, m_style))
431     return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
432                     m_style);
433   return *this;
434 }
435 
436 void FileSpec::PrependPathComponent(llvm::StringRef component) {
437   llvm::SmallString<64> new_path(component);
438   llvm::SmallString<64> current_path;
439   GetPath(current_path, false);
440   llvm::sys::path::append(new_path,
441                           llvm::sys::path::begin(current_path, m_style),
442                           llvm::sys::path::end(current_path), m_style);
443   SetFile(new_path, m_style);
444 }
445 
446 void FileSpec::PrependPathComponent(const FileSpec &new_path) {
447   return PrependPathComponent(new_path.GetPath(false));
448 }
449 
450 void FileSpec::AppendPathComponent(llvm::StringRef component) {
451   llvm::SmallString<64> current_path;
452   GetPath(current_path, false);
453   llvm::sys::path::append(current_path, m_style, component);
454   SetFile(current_path, m_style);
455 }
456 
457 void FileSpec::AppendPathComponent(const FileSpec &new_path) {
458   return AppendPathComponent(new_path.GetPath(false));
459 }
460 
461 bool FileSpec::RemoveLastPathComponent() {
462   llvm::SmallString<64> current_path;
463   GetPath(current_path, false);
464   if (llvm::sys::path::has_parent_path(current_path, m_style)) {
465     SetFile(llvm::sys::path::parent_path(current_path, m_style));
466     return true;
467   }
468   return false;
469 }
470 
471 std::vector<llvm::StringRef> FileSpec::GetComponents() const {
472   std::vector<llvm::StringRef> components;
473 
474   auto dir_begin = llvm::sys::path::begin(m_directory.GetStringRef(), m_style);
475   auto dir_end = llvm::sys::path::end(m_directory.GetStringRef());
476 
477   for (auto iter = dir_begin; iter != dir_end; ++iter) {
478     if (*iter == "/" || *iter == ".")
479       continue;
480 
481     components.push_back(*iter);
482   }
483 
484   if (!m_filename.IsEmpty() && m_filename != "/" && m_filename != ".")
485     components.push_back(m_filename.GetStringRef());
486 
487   return components;
488 }
489 
490 /// Returns true if the filespec represents an implementation source
491 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
492 /// extension).
493 ///
494 /// \return
495 ///     \b true if the filespec represents an implementation source
496 ///     file, \b false otherwise.
497 bool FileSpec::IsSourceImplementationFile() const {
498   llvm::StringRef extension = GetFileNameExtension();
499   if (extension.empty())
500     return false;
501 
502   static RegularExpression g_source_file_regex(llvm::StringRef(
503       "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
504       "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
505       "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
506       "$"));
507   return g_source_file_regex.Execute(extension);
508 }
509 
510 bool FileSpec::IsRelative() const {
511   return !IsAbsolute();
512 }
513 
514 bool FileSpec::IsAbsolute() const {
515   // Check if we have cached if this path is absolute to avoid recalculating.
516   if (m_absolute != Absolute::Calculate)
517     return m_absolute == Absolute::Yes;
518 
519   m_absolute = Absolute::No;
520 
521   llvm::SmallString<64> path;
522   GetPath(path, false);
523 
524   if (!path.empty()) {
525     // We consider paths starting with ~ to be absolute.
526     if (path[0] == '~' || llvm::sys::path::is_absolute(path, m_style))
527       m_absolute = Absolute::Yes;
528   }
529 
530   return m_absolute == Absolute::Yes;
531 }
532 
533 void FileSpec::MakeAbsolute(const FileSpec &dir) {
534   if (IsRelative())
535     PrependPathComponent(dir);
536 }
537 
538 void llvm::format_provider<FileSpec>::format(const FileSpec &F,
539                                              raw_ostream &Stream,
540                                              StringRef Style) {
541   assert((Style.empty() || Style.equals_insensitive("F") ||
542           Style.equals_insensitive("D")) &&
543          "Invalid FileSpec style!");
544 
545   StringRef dir = F.GetDirectory().GetStringRef();
546   StringRef file = F.GetFilename().GetStringRef();
547 
548   if (dir.empty() && file.empty()) {
549     Stream << "(empty)";
550     return;
551   }
552 
553   if (Style.equals_insensitive("F")) {
554     Stream << (file.empty() ? "(empty)" : file);
555     return;
556   }
557 
558   // Style is either D or empty, either way we need to print the directory.
559   if (!dir.empty()) {
560     // Directory is stored in normalized form, which might be different than
561     // preferred form.  In order to handle this, we need to cut off the
562     // filename, then denormalize, then write the entire denorm'ed directory.
563     llvm::SmallString<64> denormalized_dir = dir;
564     Denormalize(denormalized_dir, F.GetPathStyle());
565     Stream << denormalized_dir;
566     Stream << GetPreferredPathSeparator(F.GetPathStyle());
567   }
568 
569   if (Style.equals_insensitive("D")) {
570     // We only want to print the directory, so now just exit.
571     if (dir.empty())
572       Stream << "(empty)";
573     return;
574   }
575 
576   if (!file.empty())
577     Stream << file;
578 }
579