1 //===-- FileSpec.cpp ------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Utility/FileSpec.h" 10 #include "lldb/Utility/RegularExpression.h" 11 #include "lldb/Utility/Stream.h" 12 13 #include "llvm/ADT/SmallString.h" 14 #include "llvm/ADT/SmallVector.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Support/ErrorOr.h" 19 #include "llvm/Support/FileSystem.h" 20 #include "llvm/Support/Program.h" 21 #include "llvm/Support/raw_ostream.h" 22 #include "llvm/TargetParser/Triple.h" 23 24 #include <algorithm> 25 #include <optional> 26 #include <system_error> 27 #include <vector> 28 29 #include <cassert> 30 #include <climits> 31 #include <cstdio> 32 #include <cstring> 33 34 using namespace lldb; 35 using namespace lldb_private; 36 37 namespace { 38 39 static constexpr FileSpec::Style GetNativeStyle() { 40 #if defined(_WIN32) 41 return FileSpec::Style::windows; 42 #else 43 return FileSpec::Style::posix; 44 #endif 45 } 46 47 bool PathStyleIsPosix(FileSpec::Style style) { 48 return llvm::sys::path::is_style_posix(style); 49 } 50 51 const char *GetPathSeparators(FileSpec::Style style) { 52 return llvm::sys::path::get_separator(style).data(); 53 } 54 55 char GetPreferredPathSeparator(FileSpec::Style style) { 56 return GetPathSeparators(style)[0]; 57 } 58 59 void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) { 60 if (PathStyleIsPosix(style)) 61 return; 62 63 std::replace(path.begin(), path.end(), '/', '\\'); 64 } 65 66 } // end anonymous namespace 67 68 FileSpec::FileSpec() : m_style(GetNativeStyle()) {} 69 70 // Default constructor that can take an optional full path to a file on disk. 71 FileSpec::FileSpec(llvm::StringRef path, Style style, const Checksum &checksum) 72 : m_checksum(checksum), m_style(style) { 73 SetFile(path, style, checksum); 74 } 75 76 FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple) 77 : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {} 78 79 namespace { 80 /// Safely get a character at the specified index. 81 /// 82 /// \param[in] path 83 /// A full, partial, or relative path to a file. 84 /// 85 /// \param[in] i 86 /// An index into path which may or may not be valid. 87 /// 88 /// \return 89 /// The character at index \a i if the index is valid, or 0 if 90 /// the index is not valid. 91 inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) { 92 if (i < path.size()) 93 return path[i]; 94 return 0; 95 } 96 97 /// Check if a path needs to be normalized. 98 /// 99 /// Check if a path needs to be normalized. We currently consider a 100 /// path to need normalization if any of the following are true 101 /// - path contains "/./" 102 /// - path contains "/../" 103 /// - path contains "//" 104 /// - path ends with "/" 105 /// Paths that start with "./" or with "../" are not considered to 106 /// need normalization since we aren't trying to resolve the path, 107 /// we are just trying to remove redundant things from the path. 108 /// 109 /// \param[in] path 110 /// A full, partial, or relative path to a file. 111 /// 112 /// \return 113 /// Returns \b true if the path needs to be normalized. 114 bool needsNormalization(const llvm::StringRef &path) { 115 if (path.empty()) 116 return false; 117 // We strip off leading "." values so these paths need to be normalized 118 if (path[0] == '.') 119 return true; 120 for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos; 121 i = path.find_first_of("\\/", i + 1)) { 122 const auto next = safeCharAtIndex(path, i+1); 123 switch (next) { 124 case 0: 125 // path separator char at the end of the string which should be 126 // stripped unless it is the one and only character 127 return i > 0; 128 case '/': 129 case '\\': 130 // two path separator chars in the middle of a path needs to be 131 // normalized 132 if (i > 0) 133 return true; 134 ++i; 135 break; 136 137 case '.': { 138 const auto next_next = safeCharAtIndex(path, i+2); 139 switch (next_next) { 140 default: break; 141 case 0: return true; // ends with "/." 142 case '/': 143 case '\\': 144 return true; // contains "/./" 145 case '.': { 146 const auto next_next_next = safeCharAtIndex(path, i+3); 147 switch (next_next_next) { 148 default: break; 149 case 0: return true; // ends with "/.." 150 case '/': 151 case '\\': 152 return true; // contains "/../" 153 } 154 break; 155 } 156 } 157 } 158 break; 159 160 default: 161 break; 162 } 163 } 164 return false; 165 } 166 167 168 } 169 170 void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); } 171 172 // Update the contents of this object with a new path. The path will be split 173 // up into a directory and filename and stored as uniqued string values for 174 // quick comparison and efficient memory usage. 175 void FileSpec::SetFile(llvm::StringRef pathname, Style style, 176 const Checksum &checksum) { 177 Clear(); 178 m_style = (style == Style::native) ? GetNativeStyle() : style; 179 m_checksum = checksum; 180 181 if (pathname.empty()) 182 return; 183 184 llvm::SmallString<128> resolved(pathname); 185 186 // Normalize the path by removing ".", ".." and other redundant components. 187 if (needsNormalization(resolved)) 188 llvm::sys::path::remove_dots(resolved, true, m_style); 189 190 // Normalize back slashes to forward slashes 191 if (m_style == Style::windows) 192 std::replace(resolved.begin(), resolved.end(), '\\', '/'); 193 194 if (resolved.empty()) { 195 // If we have no path after normalization set the path to the current 196 // directory. This matches what python does and also a few other path 197 // utilities. 198 m_filename.SetString("."); 199 return; 200 } 201 202 // Split path into filename and directory. We rely on the underlying char 203 // pointer to be nullptr when the components are empty. 204 llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style); 205 if(!filename.empty()) 206 m_filename.SetString(filename); 207 208 llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style); 209 if(!directory.empty()) 210 m_directory.SetString(directory); 211 } 212 213 void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) { 214 return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix); 215 } 216 217 // Convert to pointer operator. This allows code to check any FileSpec objects 218 // to see if they contain anything valid using code such as: 219 // 220 // if (file_spec) 221 // {} 222 FileSpec::operator bool() const { return m_filename || m_directory; } 223 224 // Logical NOT operator. This allows code to check any FileSpec objects to see 225 // if they are invalid using code such as: 226 // 227 // if (!file_spec) 228 // {} 229 bool FileSpec::operator!() const { return !m_directory && !m_filename; } 230 231 bool FileSpec::DirectoryEquals(const FileSpec &rhs) const { 232 const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive(); 233 return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive); 234 } 235 236 bool FileSpec::FileEquals(const FileSpec &rhs) const { 237 const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive(); 238 return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive); 239 } 240 241 // Equal to operator 242 bool FileSpec::operator==(const FileSpec &rhs) const { 243 return FileEquals(rhs) && DirectoryEquals(rhs); 244 } 245 246 // Not equal to operator 247 bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); } 248 249 // Less than operator 250 bool FileSpec::operator<(const FileSpec &rhs) const { 251 return FileSpec::Compare(*this, rhs, true) < 0; 252 } 253 254 // Dump a FileSpec object to a stream 255 Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) { 256 f.Dump(s.AsRawOstream()); 257 return s; 258 } 259 260 // Clear this object by releasing both the directory and filename string values 261 // and making them both the empty string. 262 void FileSpec::Clear() { 263 m_directory.Clear(); 264 m_filename.Clear(); 265 PathWasModified(); 266 } 267 268 // Compare two FileSpec objects. If "full" is true, then both the directory and 269 // the filename must match. If "full" is false, then the directory names for 270 // "a" and "b" are only compared if they are both non-empty. This allows a 271 // FileSpec object to only contain a filename and it can match FileSpec objects 272 // that have matching filenames with different paths. 273 // 274 // Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if 275 // "a" is greater than "b". 276 int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) { 277 int result = 0; 278 279 // case sensitivity of compare 280 const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive(); 281 282 // If full is true, then we must compare both the directory and filename. 283 284 // If full is false, then if either directory is empty, then we match on the 285 // basename only, and if both directories have valid values, we still do a 286 // full compare. This allows for matching when we just have a filename in one 287 // of the FileSpec objects. 288 289 if (full || (a.m_directory && b.m_directory)) { 290 result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive); 291 if (result) 292 return result; 293 } 294 return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive); 295 } 296 297 bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) { 298 if (full || (a.GetDirectory() && b.GetDirectory())) 299 return a == b; 300 301 return a.FileEquals(b); 302 } 303 304 bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) { 305 if (pattern.GetDirectory()) 306 return pattern == file; 307 if (pattern.GetFilename()) 308 return pattern.FileEquals(file); 309 return true; 310 } 311 312 std::optional<FileSpec::Style> 313 FileSpec::GuessPathStyle(llvm::StringRef absolute_path) { 314 if (absolute_path.starts_with("/")) 315 return Style::posix; 316 if (absolute_path.starts_with(R"(\\)")) 317 return Style::windows; 318 if (absolute_path.size() >= 3 && llvm::isAlpha(absolute_path[0]) && 319 (absolute_path.substr(1, 2) == R"(:\)" || 320 absolute_path.substr(1, 2) == R"(:/)")) 321 return Style::windows; 322 return std::nullopt; 323 } 324 325 // Dump the object to the supplied stream. If the object contains a valid 326 // directory name, it will be displayed followed by a directory delimiter, and 327 // the filename. 328 void FileSpec::Dump(llvm::raw_ostream &s) const { 329 std::string path{GetPath(true)}; 330 s << path; 331 char path_separator = GetPreferredPathSeparator(m_style); 332 if (!m_filename && !path.empty() && path.back() != path_separator) 333 s << path_separator; 334 } 335 336 FileSpec::Style FileSpec::GetPathStyle() const { return m_style; } 337 338 void FileSpec::SetDirectory(ConstString directory) { 339 m_directory = directory; 340 PathWasModified(); 341 } 342 343 void FileSpec::SetDirectory(llvm::StringRef directory) { 344 m_directory = ConstString(directory); 345 PathWasModified(); 346 } 347 348 void FileSpec::SetFilename(ConstString filename) { 349 m_filename = filename; 350 PathWasModified(); 351 } 352 353 void FileSpec::SetFilename(llvm::StringRef filename) { 354 m_filename = ConstString(filename); 355 PathWasModified(); 356 } 357 358 void FileSpec::ClearFilename() { 359 m_filename.Clear(); 360 PathWasModified(); 361 } 362 363 void FileSpec::ClearDirectory() { 364 m_directory.Clear(); 365 PathWasModified(); 366 } 367 368 // Extract the directory and path into a fixed buffer. This is needed as the 369 // directory and path are stored in separate string values. 370 size_t FileSpec::GetPath(char *path, size_t path_max_len, 371 bool denormalize) const { 372 if (!path) 373 return 0; 374 375 std::string result = GetPath(denormalize); 376 ::snprintf(path, path_max_len, "%s", result.c_str()); 377 return std::min(path_max_len - 1, result.length()); 378 } 379 380 std::string FileSpec::GetPath(bool denormalize) const { 381 llvm::SmallString<64> result; 382 GetPath(result, denormalize); 383 return static_cast<std::string>(result); 384 } 385 386 ConstString FileSpec::GetPathAsConstString(bool denormalize) const { 387 return ConstString{GetPath(denormalize)}; 388 } 389 390 void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path, 391 bool denormalize) const { 392 path.append(m_directory.GetStringRef().begin(), 393 m_directory.GetStringRef().end()); 394 // Since the path was normalized and all paths use '/' when stored in these 395 // objects, we don't need to look for the actual syntax specific path 396 // separator, we just look for and insert '/'. 397 if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' && 398 m_filename.GetStringRef().back() != '/') 399 path.insert(path.end(), '/'); 400 path.append(m_filename.GetStringRef().begin(), 401 m_filename.GetStringRef().end()); 402 if (denormalize && !path.empty()) 403 Denormalize(path, m_style); 404 } 405 406 llvm::StringRef FileSpec::GetFileNameExtension() const { 407 return llvm::sys::path::extension(m_filename.GetStringRef(), m_style); 408 } 409 410 ConstString FileSpec::GetFileNameStrippingExtension() const { 411 return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style)); 412 } 413 414 // Return the size in bytes that this object takes in memory. This returns the 415 // size in bytes of this object, not any shared string values it may refer to. 416 size_t FileSpec::MemorySize() const { 417 return m_filename.MemorySize() + m_directory.MemorySize(); 418 } 419 420 FileSpec 421 FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const { 422 FileSpec ret = *this; 423 ret.AppendPathComponent(component); 424 return ret; 425 } 426 427 FileSpec FileSpec::CopyByRemovingLastPathComponent() const { 428 llvm::SmallString<64> current_path; 429 GetPath(current_path, false); 430 if (llvm::sys::path::has_parent_path(current_path, m_style)) 431 return FileSpec(llvm::sys::path::parent_path(current_path, m_style), 432 m_style); 433 return *this; 434 } 435 436 void FileSpec::PrependPathComponent(llvm::StringRef component) { 437 llvm::SmallString<64> new_path(component); 438 llvm::SmallString<64> current_path; 439 GetPath(current_path, false); 440 llvm::sys::path::append(new_path, 441 llvm::sys::path::begin(current_path, m_style), 442 llvm::sys::path::end(current_path), m_style); 443 SetFile(new_path, m_style); 444 } 445 446 void FileSpec::PrependPathComponent(const FileSpec &new_path) { 447 return PrependPathComponent(new_path.GetPath(false)); 448 } 449 450 void FileSpec::AppendPathComponent(llvm::StringRef component) { 451 llvm::SmallString<64> current_path; 452 GetPath(current_path, false); 453 llvm::sys::path::append(current_path, m_style, component); 454 SetFile(current_path, m_style); 455 } 456 457 void FileSpec::AppendPathComponent(const FileSpec &new_path) { 458 return AppendPathComponent(new_path.GetPath(false)); 459 } 460 461 bool FileSpec::RemoveLastPathComponent() { 462 llvm::SmallString<64> current_path; 463 GetPath(current_path, false); 464 if (llvm::sys::path::has_parent_path(current_path, m_style)) { 465 SetFile(llvm::sys::path::parent_path(current_path, m_style)); 466 return true; 467 } 468 return false; 469 } 470 471 std::vector<llvm::StringRef> FileSpec::GetComponents() const { 472 std::vector<llvm::StringRef> components; 473 474 auto dir_begin = llvm::sys::path::begin(m_directory.GetStringRef(), m_style); 475 auto dir_end = llvm::sys::path::end(m_directory.GetStringRef()); 476 477 for (auto iter = dir_begin; iter != dir_end; ++iter) { 478 if (*iter == "/" || *iter == ".") 479 continue; 480 481 components.push_back(*iter); 482 } 483 484 if (!m_filename.IsEmpty() && m_filename != "/" && m_filename != ".") 485 components.push_back(m_filename.GetStringRef()); 486 487 return components; 488 } 489 490 /// Returns true if the filespec represents an implementation source 491 /// file (files with a ".c", ".cpp", ".m", ".mm" (many more) 492 /// extension). 493 /// 494 /// \return 495 /// \b true if the filespec represents an implementation source 496 /// file, \b false otherwise. 497 bool FileSpec::IsSourceImplementationFile() const { 498 llvm::StringRef extension = GetFileNameExtension(); 499 if (extension.empty()) 500 return false; 501 502 static RegularExpression g_source_file_regex(llvm::StringRef( 503 "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|[" 504 "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO][" 505 "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])" 506 "$")); 507 return g_source_file_regex.Execute(extension); 508 } 509 510 bool FileSpec::IsRelative() const { 511 return !IsAbsolute(); 512 } 513 514 bool FileSpec::IsAbsolute() const { 515 // Check if we have cached if this path is absolute to avoid recalculating. 516 if (m_absolute != Absolute::Calculate) 517 return m_absolute == Absolute::Yes; 518 519 m_absolute = Absolute::No; 520 521 llvm::SmallString<64> path; 522 GetPath(path, false); 523 524 if (!path.empty()) { 525 // We consider paths starting with ~ to be absolute. 526 if (path[0] == '~' || llvm::sys::path::is_absolute(path, m_style)) 527 m_absolute = Absolute::Yes; 528 } 529 530 return m_absolute == Absolute::Yes; 531 } 532 533 void FileSpec::MakeAbsolute(const FileSpec &dir) { 534 if (IsRelative()) 535 PrependPathComponent(dir); 536 } 537 538 void llvm::format_provider<FileSpec>::format(const FileSpec &F, 539 raw_ostream &Stream, 540 StringRef Style) { 541 assert((Style.empty() || Style.equals_insensitive("F") || 542 Style.equals_insensitive("D")) && 543 "Invalid FileSpec style!"); 544 545 StringRef dir = F.GetDirectory().GetStringRef(); 546 StringRef file = F.GetFilename().GetStringRef(); 547 548 if (dir.empty() && file.empty()) { 549 Stream << "(empty)"; 550 return; 551 } 552 553 if (Style.equals_insensitive("F")) { 554 Stream << (file.empty() ? "(empty)" : file); 555 return; 556 } 557 558 // Style is either D or empty, either way we need to print the directory. 559 if (!dir.empty()) { 560 // Directory is stored in normalized form, which might be different than 561 // preferred form. In order to handle this, we need to cut off the 562 // filename, then denormalize, then write the entire denorm'ed directory. 563 llvm::SmallString<64> denormalized_dir = dir; 564 Denormalize(denormalized_dir, F.GetPathStyle()); 565 Stream << denormalized_dir; 566 Stream << GetPreferredPathSeparator(F.GetPathStyle()); 567 } 568 569 if (Style.equals_insensitive("D")) { 570 // We only want to print the directory, so now just exit. 571 if (dir.empty()) 572 Stream << "(empty)"; 573 return; 574 } 575 576 if (!file.empty()) 577 Stream << file; 578 } 579