1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "SourceCode.h" 9 10 #include "FuzzyMatch.h" 11 #include "Preamble.h" 12 #include "Protocol.h" 13 #include "support/Context.h" 14 #include "support/Logger.h" 15 #include "clang/Basic/FileEntry.h" 16 #include "clang/Basic/LangOptions.h" 17 #include "clang/Basic/SourceLocation.h" 18 #include "clang/Basic/SourceManager.h" 19 #include "clang/Basic/TokenKinds.h" 20 #include "clang/Driver/Types.h" 21 #include "clang/Format/Format.h" 22 #include "clang/Lex/Lexer.h" 23 #include "clang/Lex/Preprocessor.h" 24 #include "clang/Lex/Token.h" 25 #include "clang/Tooling/Core/Replacement.h" 26 #include "clang/Tooling/Syntax/Tokens.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/BitVector.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/ADT/StringMap.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/Support/Compiler.h" 34 #include "llvm/Support/Errc.h" 35 #include "llvm/Support/Error.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/LineIterator.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/VirtualFileSystem.h" 41 #include "llvm/Support/xxhash.h" 42 #include <algorithm> 43 #include <cstddef> 44 #include <optional> 45 #include <string> 46 #include <vector> 47 48 namespace clang { 49 namespace clangd { 50 51 // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! 52 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. 53 54 // Iterates over unicode codepoints in the (UTF-8) string. For each, 55 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. 56 // Returns true if CB returned true, false if we hit the end of string. 57 // 58 // If the string is not valid UTF-8, we log this error and "decode" the 59 // text in some arbitrary way. This is pretty sad, but this tends to happen deep 60 // within indexing of headers where clang misdetected the encoding, and 61 // propagating the error all the way back up is (probably?) not be worth it. 62 template <typename Callback> 63 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { 64 bool LoggedInvalid = false; 65 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 66 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. 67 for (size_t I = 0; I < U8.size();) { 68 unsigned char C = static_cast<unsigned char>(U8[I]); 69 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. 70 if (CB(1, 1)) 71 return true; 72 ++I; 73 continue; 74 } 75 // This convenient property of UTF-8 holds for all non-ASCII characters. 76 size_t UTF8Length = llvm::countl_one(C); 77 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. 78 // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*. 79 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) { 80 if (!LoggedInvalid) { 81 elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8)); 82 LoggedInvalid = true; 83 } 84 // We can't give a correct result, but avoid returning something wild. 85 // Pretend this is a valid ASCII byte, for lack of better options. 86 // (Too late to get ISO-8859-* right, we've skipped some bytes already). 87 if (CB(1, 1)) 88 return true; 89 ++I; 90 continue; 91 } 92 I += UTF8Length; // Skip over all trailing bytes. 93 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 94 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) 95 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) 96 return true; 97 } 98 return false; 99 } 100 101 // Returns the byte offset into the string that is an offset of \p Units in 102 // the specified encoding. 103 // Conceptually, this converts to the encoding, truncates to CodeUnits, 104 // converts back to UTF-8, and returns the length in bytes. 105 static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc, 106 bool &Valid) { 107 Valid = Units >= 0; 108 if (Units <= 0) 109 return 0; 110 size_t Result = 0; 111 switch (Enc) { 112 case OffsetEncoding::UTF8: 113 Result = Units; 114 break; 115 case OffsetEncoding::UTF16: 116 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 117 Result += U8Len; 118 Units -= U16Len; 119 return Units <= 0; 120 }); 121 if (Units < 0) // Offset in the middle of a surrogate pair. 122 Valid = false; 123 break; 124 case OffsetEncoding::UTF32: 125 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 126 Result += U8Len; 127 Units--; 128 return Units <= 0; 129 }); 130 break; 131 case OffsetEncoding::UnsupportedEncoding: 132 llvm_unreachable("unsupported encoding"); 133 } 134 // Don't return an out-of-range index if we overran. 135 if (Result > U8.size()) { 136 Valid = false; 137 return U8.size(); 138 } 139 return Result; 140 } 141 142 Key<OffsetEncoding> kCurrentOffsetEncoding; 143 static OffsetEncoding lspEncoding() { 144 auto *Enc = Context::current().get(kCurrentOffsetEncoding); 145 return Enc ? *Enc : OffsetEncoding::UTF16; 146 } 147 148 // Like most strings in clangd, the input is UTF-8 encoded. 149 size_t lspLength(llvm::StringRef Code) { 150 size_t Count = 0; 151 switch (lspEncoding()) { 152 case OffsetEncoding::UTF8: 153 Count = Code.size(); 154 break; 155 case OffsetEncoding::UTF16: 156 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 157 Count += U16Len; 158 return false; 159 }); 160 break; 161 case OffsetEncoding::UTF32: 162 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 163 ++Count; 164 return false; 165 }); 166 break; 167 case OffsetEncoding::UnsupportedEncoding: 168 llvm_unreachable("unsupported encoding"); 169 } 170 return Count; 171 } 172 173 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, 174 bool AllowColumnsBeyondLineLength) { 175 if (P.line < 0) 176 return error(llvm::errc::invalid_argument, 177 "Line value can't be negative ({0})", P.line); 178 if (P.character < 0) 179 return error(llvm::errc::invalid_argument, 180 "Character value can't be negative ({0})", P.character); 181 size_t StartOfLine = 0; 182 for (int I = 0; I != P.line; ++I) { 183 size_t NextNL = Code.find('\n', StartOfLine); 184 if (NextNL == llvm::StringRef::npos) 185 return error(llvm::errc::invalid_argument, 186 "Line value is out of range ({0})", P.line); 187 StartOfLine = NextNL + 1; 188 } 189 StringRef Line = 190 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); 191 192 // P.character may be in UTF-16, transcode if necessary. 193 bool Valid; 194 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); 195 if (!Valid && !AllowColumnsBeyondLineLength) 196 return error(llvm::errc::invalid_argument, 197 "{0} offset {1} is invalid for line {2}", lspEncoding(), 198 P.character, P.line); 199 return StartOfLine + ByteInLine; 200 } 201 202 Position offsetToPosition(llvm::StringRef Code, size_t Offset) { 203 Offset = std::min(Code.size(), Offset); 204 llvm::StringRef Before = Code.substr(0, Offset); 205 int Lines = Before.count('\n'); 206 size_t PrevNL = Before.rfind('\n'); 207 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 208 Position Pos; 209 Pos.line = Lines; 210 Pos.character = lspLength(Before.substr(StartOfLine)); 211 return Pos; 212 } 213 214 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) { 215 // We use the SourceManager's line tables, but its column number is in bytes. 216 FileID FID; 217 unsigned Offset; 218 std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc); 219 Position P; 220 P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1; 221 bool Invalid = false; 222 llvm::StringRef Code = SM.getBufferData(FID, &Invalid); 223 if (!Invalid) { 224 auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1; 225 auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes); 226 P.character = lspLength(LineSoFar); 227 } 228 return P; 229 } 230 231 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) { 232 if (Loc.isFileID()) 233 return true; 234 auto Spelling = SM.getDecomposedSpellingLoc(Loc); 235 bool InvalidSLocEntry = false; 236 const auto SLocEntry = SM.getSLocEntry(Spelling.first, &InvalidSLocEntry); 237 if (InvalidSLocEntry) 238 return false; 239 StringRef SpellingFile = SLocEntry.getFile().getName(); 240 if (SpellingFile == "<scratch space>") 241 return false; 242 if (SpellingFile == "<built-in>") 243 // __STDC__ etc are considered spelled, but BAR in arg -DFOO=BAR is not. 244 return !SM.isWrittenInCommandLineFile( 245 SM.getComposedLoc(Spelling.first, Spelling.second)); 246 return true; 247 } 248 249 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { 250 if (!R.getBegin().isValid() || !R.getEnd().isValid()) 251 return false; 252 253 FileID BeginFID; 254 size_t BeginOffset = 0; 255 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); 256 257 FileID EndFID; 258 size_t EndOffset = 0; 259 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd()); 260 261 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset; 262 } 263 264 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) { 265 assert(SM.getLocForEndOfFile(IncludedFile).isFileID()); 266 FileID IncludingFile; 267 unsigned Offset; 268 std::tie(IncludingFile, Offset) = 269 SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile)); 270 bool Invalid = false; 271 llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid); 272 if (Invalid) 273 return SourceLocation(); 274 // Now buf is "...\n#include <foo>\n..." 275 // and Offset points here: ^ 276 // Rewind to the preceding # on the line. 277 assert(Offset < Buf.size()); 278 for (;; --Offset) { 279 if (Buf[Offset] == '#') 280 return SM.getComposedLoc(IncludingFile, Offset); 281 if (Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on? 282 return SourceLocation(); 283 } 284 } 285 286 static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM, 287 const LangOptions &LangOpts) { 288 Token TheTok; 289 if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) 290 return 0; 291 // FIXME: Here we check whether the token at the location is a greatergreater 292 // (>>) token and consider it as a single greater (>). This is to get it 293 // working for templates but it isn't correct for the right shift operator. We 294 // can avoid this by using half open char ranges in getFileRange() but getting 295 // token ending is not well supported in macroIDs. 296 if (TheTok.is(tok::greatergreater)) 297 return 1; 298 return TheTok.getLength(); 299 } 300 301 // Returns location of the last character of the token at a given loc 302 static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, 303 const SourceManager &SM, 304 const LangOptions &LangOpts) { 305 unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); 306 return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); 307 } 308 309 // Returns location of the starting of the token at a given EndLoc 310 static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, 311 const SourceManager &SM, 312 const LangOptions &LangOpts) { 313 return EndLoc.getLocWithOffset( 314 -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); 315 } 316 317 // Converts a char source range to a token range. 318 static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM, 319 const LangOptions &LangOpts) { 320 if (!Range.isTokenRange()) 321 Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); 322 return Range.getAsRange(); 323 } 324 // Returns the union of two token ranges. 325 // To find the maximum of the Ends of the ranges, we compare the location of the 326 // last character of the token. 327 static SourceRange unionTokenRange(SourceRange R1, SourceRange R2, 328 const SourceManager &SM, 329 const LangOptions &LangOpts) { 330 SourceLocation Begin = 331 SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin()) 332 ? R1.getBegin() 333 : R2.getBegin(); 334 SourceLocation End = 335 SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts), 336 getLocForTokenEnd(R2.getEnd(), SM, LangOpts)) 337 ? R2.getEnd() 338 : R1.getEnd(); 339 return SourceRange(Begin, End); 340 } 341 342 // Given a range whose endpoints may be in different expansions or files, 343 // tries to find a range within a common file by following up the expansion and 344 // include location in each. 345 static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM, 346 const LangOptions &LangOpts) { 347 // Fast path for most common cases. 348 if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd())) 349 return R; 350 // Record the stack of expansion locations for the beginning, keyed by FileID. 351 llvm::DenseMap<FileID, SourceLocation> BeginExpansions; 352 for (SourceLocation Begin = R.getBegin(); Begin.isValid(); 353 Begin = Begin.isFileID() 354 ? includeHashLoc(SM.getFileID(Begin), SM) 355 : SM.getImmediateExpansionRange(Begin).getBegin()) { 356 BeginExpansions[SM.getFileID(Begin)] = Begin; 357 } 358 // Move up the stack of expansion locations for the end until we find the 359 // location in BeginExpansions with that has the same file id. 360 for (SourceLocation End = R.getEnd(); End.isValid(); 361 End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM) 362 : toTokenRange(SM.getImmediateExpansionRange(End), 363 SM, LangOpts) 364 .getEnd()) { 365 auto It = BeginExpansions.find(SM.getFileID(End)); 366 if (It != BeginExpansions.end()) { 367 if (SM.getFileOffset(It->second) > SM.getFileOffset(End)) 368 return SourceLocation(); 369 return {It->second, End}; 370 } 371 } 372 return SourceRange(); 373 } 374 375 // Find an expansion range (not necessarily immediate) the ends of which are in 376 // the same file id. 377 static SourceRange 378 getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM, 379 const LangOptions &LangOpts) { 380 return rangeInCommonFile( 381 toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM, 382 LangOpts); 383 } 384 385 // Returns the file range for a given Location as a Token Range 386 // This is quite similar to getFileLoc in SourceManager as both use 387 // getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). 388 // However: 389 // - We want to maintain the full range information as we move from one file to 390 // the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. 391 // - We want to split '>>' tokens as the lexer parses the '>>' in nested 392 // template instantiations as a '>>' instead of two '>'s. 393 // There is also getExpansionRange but it simply calls 394 // getImmediateExpansionRange on the begin and ends separately which is wrong. 395 static SourceRange getTokenFileRange(SourceLocation Loc, 396 const SourceManager &SM, 397 const LangOptions &LangOpts) { 398 SourceRange FileRange = Loc; 399 while (!FileRange.getBegin().isFileID()) { 400 if (SM.isMacroArgExpansion(FileRange.getBegin())) { 401 FileRange = unionTokenRange( 402 SM.getImmediateSpellingLoc(FileRange.getBegin()), 403 SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts); 404 assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd())); 405 } else { 406 SourceRange ExpansionRangeForBegin = 407 getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts); 408 SourceRange ExpansionRangeForEnd = 409 getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts); 410 if (ExpansionRangeForBegin.isInvalid() || 411 ExpansionRangeForEnd.isInvalid()) 412 return SourceRange(); 413 assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(), 414 ExpansionRangeForEnd.getBegin()) && 415 "Both Expansion ranges should be in same file."); 416 FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, 417 SM, LangOpts); 418 } 419 } 420 return FileRange; 421 } 422 423 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) { 424 if (!Loc.isValid()) 425 return false; 426 FileID FID = SM.getFileID(SM.getExpansionLoc(Loc)); 427 return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); 428 } 429 430 std::optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM, 431 const LangOptions &LangOpts, 432 SourceRange R) { 433 SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); 434 if (!isValidFileRange(SM, R1)) 435 return std::nullopt; 436 437 SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); 438 if (!isValidFileRange(SM, R2)) 439 return std::nullopt; 440 441 SourceRange Result = 442 rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts); 443 unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); 444 // Convert from closed token range to half-open (char) range 445 Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); 446 if (!isValidFileRange(SM, Result)) 447 return std::nullopt; 448 449 return Result; 450 } 451 452 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { 453 assert(isValidFileRange(SM, R)); 454 auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin())); 455 assert(Buf); 456 457 size_t BeginOffset = SM.getFileOffset(R.getBegin()); 458 size_t EndOffset = SM.getFileOffset(R.getEnd()); 459 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); 460 } 461 462 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, 463 Position P) { 464 llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer(); 465 auto Offset = 466 positionToOffset(Code, P, /*AllowColumnsBeyondLineLength=*/false); 467 if (!Offset) 468 return Offset.takeError(); 469 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); 470 } 471 472 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) { 473 // Clang is 1-based, LSP uses 0-based indexes. 474 Position Begin = sourceLocToPosition(SM, R.getBegin()); 475 Position End = sourceLocToPosition(SM, R.getEnd()); 476 477 return {Begin, End}; 478 } 479 480 void unionRanges(Range &A, Range B) { 481 if (B.start < A.start) 482 A.start = B.start; 483 if (A.end < B.end) 484 A.end = B.end; 485 } 486 487 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, 488 size_t Offset) { 489 Offset = std::min(Code.size(), Offset); 490 llvm::StringRef Before = Code.substr(0, Offset); 491 int Lines = Before.count('\n'); 492 size_t PrevNL = Before.rfind('\n'); 493 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 494 return {Lines + 1, Offset - StartOfLine + 1}; 495 } 496 497 std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) { 498 size_t Pos = QName.rfind("::"); 499 if (Pos == llvm::StringRef::npos) 500 return {llvm::StringRef(), QName}; 501 return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)}; 502 } 503 504 TextEdit replacementToEdit(llvm::StringRef Code, 505 const tooling::Replacement &R) { 506 Range ReplacementRange = { 507 offsetToPosition(Code, R.getOffset()), 508 offsetToPosition(Code, R.getOffset() + R.getLength())}; 509 return {ReplacementRange, std::string(R.getReplacementText())}; 510 } 511 512 std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code, 513 const tooling::Replacements &Repls) { 514 std::vector<TextEdit> Edits; 515 for (const auto &R : Repls) 516 Edits.push_back(replacementToEdit(Code, R)); 517 return Edits; 518 } 519 520 std::optional<std::string> getCanonicalPath(const FileEntryRef F, 521 FileManager &FileMgr) { 522 llvm::SmallString<128> FilePath = F.getName(); 523 if (!llvm::sys::path::is_absolute(FilePath)) { 524 if (auto EC = 525 FileMgr.getVirtualFileSystem().makeAbsolute( 526 FilePath)) { 527 elog("Could not turn relative path '{0}' to absolute: {1}", FilePath, 528 EC.message()); 529 return std::nullopt; 530 } 531 } 532 533 // Handle the symbolic link path case where the current working directory 534 // (getCurrentWorkingDirectory) is a symlink. We always want to the real 535 // file path (instead of the symlink path) for the C++ symbols. 536 // 537 // Consider the following example: 538 // 539 // src dir: /project/src/foo.h 540 // current working directory (symlink): /tmp/build -> /project/src/ 541 // 542 // The file path of Symbol is "/project/src/foo.h" instead of 543 // "/tmp/build/foo.h" 544 if (auto Dir = FileMgr.getOptionalDirectoryRef( 545 llvm::sys::path::parent_path(FilePath))) { 546 llvm::SmallString<128> RealPath; 547 llvm::StringRef DirName = FileMgr.getCanonicalName(*Dir); 548 llvm::sys::path::append(RealPath, DirName, 549 llvm::sys::path::filename(FilePath)); 550 return RealPath.str().str(); 551 } 552 553 return FilePath.str().str(); 554 } 555 556 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, 557 const LangOptions &L) { 558 TextEdit Result; 559 Result.range = 560 halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L)); 561 Result.newText = FixIt.CodeToInsert; 562 return Result; 563 } 564 565 FileDigest digest(llvm::StringRef Content) { 566 uint64_t Hash{llvm::xxh3_64bits(Content)}; 567 FileDigest Result; 568 for (unsigned I = 0; I < Result.size(); ++I) { 569 Result[I] = uint8_t(Hash); 570 Hash >>= 8; 571 } 572 return Result; 573 } 574 575 std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) { 576 bool Invalid = false; 577 llvm::StringRef Content = SM.getBufferData(FID, &Invalid); 578 if (Invalid) 579 return std::nullopt; 580 return digest(Content); 581 } 582 583 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, 584 llvm::StringRef Content, 585 const ThreadsafeFS &TFS, 586 bool FormatFile) { 587 // Unless we're formatting a substantial amount of code (the entire file 588 // or an arbitrarily large range), skip libFormat's heuristic check for 589 // .h files that tries to determine whether the file contains objective-c 590 // code. (This is accomplished by passing empty code contents to getStyle(). 591 // The heuristic is the only thing that looks at the contents.) 592 // This is a workaround for PR60151, a known issue in libFormat where this 593 // heuristic can OOM on large files. If we *are* formatting the entire file, 594 // there's no point in doing this because the actual format::reformat() call 595 // will run into the same OOM; we'd just be risking inconsistencies between 596 // clangd and clang-format on smaller .h files where they disagree on what 597 // language is detected. 598 if (!FormatFile) 599 Content = {}; 600 auto Style = format::getStyle(format::DefaultFormatStyle, File, 601 format::DefaultFallbackStyle, Content, 602 TFS.view(/*CWD=*/std::nullopt).get()); 603 if (!Style) { 604 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, 605 Style.takeError()); 606 return format::getLLVMStyle(); 607 } 608 return *Style; 609 } 610 611 llvm::Expected<tooling::Replacements> 612 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, 613 const format::FormatStyle &Style) { 614 auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style); 615 if (!CleanReplaces) 616 return CleanReplaces; 617 return formatReplacements(Code, std::move(*CleanReplaces), Style); 618 } 619 620 static void 621 lex(llvm::StringRef Code, const LangOptions &LangOpts, 622 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)> 623 Action) { 624 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! 625 std::string NullTerminatedCode = Code.str(); 626 SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); 627 auto &SM = FileSM.get(); 628 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) 629 Action(Tok, SM); 630 } 631 632 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, 633 const format::FormatStyle &Style) { 634 llvm::StringMap<unsigned> Identifiers; 635 auto LangOpt = format::getFormattingLangOpts(Style); 636 lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) { 637 if (Tok.kind() == tok::identifier) 638 ++Identifiers[Tok.text(SM)]; 639 // FIXME: Should this function really return keywords too ? 640 else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind())) 641 ++Identifiers[Keyword]; 642 }); 643 return Identifiers; 644 } 645 646 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier, 647 llvm::StringRef Content, 648 const LangOptions &LangOpts) { 649 std::vector<Range> Ranges; 650 lex(Content, LangOpts, 651 [&](const syntax::Token &Tok, const SourceManager &SM) { 652 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) 653 return; 654 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); 655 }); 656 return Ranges; 657 } 658 659 bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { 660 // Keywords are initialized in constructor. 661 clang::IdentifierTable KeywordsTable(LangOpts); 662 return KeywordsTable.find(NewName) != KeywordsTable.end(); 663 } 664 665 namespace { 666 struct NamespaceEvent { 667 enum { 668 BeginNamespace, // namespace <ns> {. Payload is resolved <ns>. 669 EndNamespace, // } // namespace <ns>. Payload is resolved *outer* 670 // namespace. 671 UsingDirective // using namespace <ns>. Payload is unresolved <ns>. 672 } Trigger; 673 std::string Payload; 674 Position Pos; 675 }; 676 // Scans C++ source code for constructs that change the visible namespaces. 677 void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts, 678 llvm::function_ref<void(NamespaceEvent)> Callback) { 679 680 // Stack of enclosing namespaces, e.g. {"clang", "clangd"} 681 std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd" 682 // Stack counts open braces. true if the brace opened a namespace. 683 llvm::BitVector BraceStack; 684 685 enum { 686 Default, 687 Namespace, // just saw 'namespace' 688 NamespaceName, // just saw 'namespace' NSName 689 Using, // just saw 'using' 690 UsingNamespace, // just saw 'using namespace' 691 UsingNamespaceName, // just saw 'using namespace' NSName 692 } State = Default; 693 std::string NSName; 694 695 NamespaceEvent Event; 696 lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) { 697 Event.Pos = sourceLocToPosition(SM, Tok.location()); 698 switch (Tok.kind()) { 699 case tok::kw_using: 700 State = State == Default ? Using : Default; 701 break; 702 case tok::kw_namespace: 703 switch (State) { 704 case Using: 705 State = UsingNamespace; 706 break; 707 case Default: 708 State = Namespace; 709 break; 710 default: 711 State = Default; 712 break; 713 } 714 break; 715 case tok::identifier: 716 switch (State) { 717 case UsingNamespace: 718 NSName.clear(); 719 [[fallthrough]]; 720 case UsingNamespaceName: 721 NSName.append(Tok.text(SM).str()); 722 State = UsingNamespaceName; 723 break; 724 case Namespace: 725 NSName.clear(); 726 [[fallthrough]]; 727 case NamespaceName: 728 NSName.append(Tok.text(SM).str()); 729 State = NamespaceName; 730 break; 731 case Using: 732 case Default: 733 State = Default; 734 break; 735 } 736 break; 737 case tok::coloncolon: 738 // This can come at the beginning or in the middle of a namespace 739 // name. 740 switch (State) { 741 case UsingNamespace: 742 NSName.clear(); 743 [[fallthrough]]; 744 case UsingNamespaceName: 745 NSName.append("::"); 746 State = UsingNamespaceName; 747 break; 748 case NamespaceName: 749 NSName.append("::"); 750 State = NamespaceName; 751 break; 752 case Namespace: // Not legal here. 753 case Using: 754 case Default: 755 State = Default; 756 break; 757 } 758 break; 759 case tok::l_brace: 760 // Record which { started a namespace, so we know when } ends one. 761 if (State == NamespaceName) { 762 // Parsed: namespace <name> { 763 BraceStack.push_back(true); 764 Enclosing.push_back(NSName); 765 Event.Trigger = NamespaceEvent::BeginNamespace; 766 Event.Payload = llvm::join(Enclosing, "::"); 767 Callback(Event); 768 } else { 769 // This case includes anonymous namespaces (State = Namespace). 770 // For our purposes, they're not namespaces and we ignore them. 771 BraceStack.push_back(false); 772 } 773 State = Default; 774 break; 775 case tok::r_brace: 776 // If braces are unmatched, we're going to be confused, but don't 777 // crash. 778 if (!BraceStack.empty()) { 779 if (BraceStack.back()) { 780 // Parsed: } // namespace 781 Enclosing.pop_back(); 782 Event.Trigger = NamespaceEvent::EndNamespace; 783 Event.Payload = llvm::join(Enclosing, "::"); 784 Callback(Event); 785 } 786 BraceStack.pop_back(); 787 } 788 break; 789 case tok::semi: 790 if (State == UsingNamespaceName) { 791 // Parsed: using namespace <name> ; 792 Event.Trigger = NamespaceEvent::UsingDirective; 793 Event.Payload = std::move(NSName); 794 Callback(Event); 795 } 796 State = Default; 797 break; 798 default: 799 State = Default; 800 break; 801 } 802 }); 803 } 804 805 // Returns the prefix namespaces of NS: {"" ... NS}. 806 llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) { 807 llvm::SmallVector<llvm::StringRef> Results; 808 Results.push_back(NS.take_front(0)); 809 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 810 for (llvm::StringRef &R : Results) 811 R = NS.take_front(R.end() - NS.begin()); 812 return Results; 813 } 814 815 // Checks whether \p FileName is a valid spelling of main file. 816 bool isMainFile(llvm::StringRef FileName, const SourceManager &SM) { 817 auto FE = SM.getFileManager().getOptionalFileRef(FileName); 818 return FE && FE == SM.getFileEntryRefForID(SM.getMainFileID()); 819 } 820 821 } // namespace 822 823 std::vector<std::string> visibleNamespaces(llvm::StringRef Code, 824 const LangOptions &LangOpts) { 825 std::string Current; 826 // Map from namespace to (resolved) namespaces introduced via using directive. 827 llvm::StringMap<llvm::StringSet<>> UsingDirectives; 828 829 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 830 llvm::StringRef NS = Event.Payload; 831 switch (Event.Trigger) { 832 case NamespaceEvent::BeginNamespace: 833 case NamespaceEvent::EndNamespace: 834 Current = std::move(Event.Payload); 835 break; 836 case NamespaceEvent::UsingDirective: 837 if (NS.consume_front("::")) 838 UsingDirectives[Current].insert(NS); 839 else { 840 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 841 if (Enclosing.empty()) 842 UsingDirectives[Current].insert(NS); 843 else 844 UsingDirectives[Current].insert((Enclosing + "::" + NS).str()); 845 } 846 } 847 break; 848 } 849 }); 850 851 std::vector<std::string> Found; 852 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 853 Found.push_back(std::string(Enclosing)); 854 auto It = UsingDirectives.find(Enclosing); 855 if (It != UsingDirectives.end()) 856 for (const auto &Used : It->second) 857 Found.push_back(std::string(Used.getKey())); 858 } 859 860 llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) { 861 if (Current == RHS) 862 return false; 863 if (Current == LHS) 864 return true; 865 return LHS < RHS; 866 }); 867 Found.erase(std::unique(Found.begin(), Found.end()), Found.end()); 868 return Found; 869 } 870 871 llvm::StringSet<> collectWords(llvm::StringRef Content) { 872 // We assume short words are not significant. 873 // We may want to consider other stopwords, e.g. language keywords. 874 // (A very naive implementation showed no benefit, but lexing might do better) 875 static constexpr int MinWordLength = 4; 876 877 std::vector<CharRole> Roles(Content.size()); 878 calculateRoles(Content, Roles); 879 880 llvm::StringSet<> Result; 881 llvm::SmallString<256> Word; 882 auto Flush = [&] { 883 if (Word.size() >= MinWordLength) { 884 for (char &C : Word) 885 C = llvm::toLower(C); 886 Result.insert(Word); 887 } 888 Word.clear(); 889 }; 890 for (unsigned I = 0; I < Content.size(); ++I) { 891 switch (Roles[I]) { 892 case Head: 893 Flush(); 894 [[fallthrough]]; 895 case Tail: 896 Word.push_back(Content[I]); 897 break; 898 case Unknown: 899 case Separator: 900 Flush(); 901 break; 902 } 903 } 904 Flush(); 905 906 return Result; 907 } 908 909 static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before, 910 llvm::StringRef After) { 911 // `foo` is an identifier. 912 if (Before.ends_with("`") && After.starts_with("`")) 913 return true; 914 // In foo::bar, both foo and bar are identifiers. 915 if (Before.ends_with("::") || After.starts_with("::")) 916 return true; 917 // Doxygen tags like \c foo indicate identifiers. 918 // Don't search too far back. 919 // This duplicates clang's doxygen parser, revisit if it gets complicated. 920 Before = Before.take_back(100); // Don't search too far back. 921 auto Pos = Before.find_last_of("\\@"); 922 if (Pos != llvm::StringRef::npos) { 923 llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' '); 924 if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" || 925 Tag == "param" || Tag == "param[in]" || Tag == "param[out]" || 926 Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" || 927 Tag == "throws" || Tag == "link") 928 return true; 929 } 930 931 // Word contains underscore. 932 // This handles things like snake_case and MACRO_CASE. 933 if (Word.contains('_')) { 934 return true; 935 } 936 // Word contains capital letter other than at beginning. 937 // This handles things like lowerCamel and UpperCamel. 938 // The check for also containing a lowercase letter is to rule out 939 // initialisms like "HTTP". 940 bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; 941 bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; 942 if (HasLower && HasUpper) { 943 return true; 944 } 945 // FIXME: consider mid-sentence Capitalization? 946 return false; 947 } 948 949 std::optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc, 950 const syntax::TokenBuffer &TB, 951 const LangOptions &LangOpts) { 952 const auto &SM = TB.sourceManager(); 953 auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB); 954 for (const auto &T : Touching) { 955 // If the token is an identifier or a keyword, don't use any heuristics. 956 if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) { 957 SpelledWord Result; 958 Result.Location = T.location(); 959 Result.Text = T.text(SM); 960 Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind()); 961 Result.PartOfSpelledToken = &T; 962 Result.SpelledToken = &T; 963 auto Expanded = 964 TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location())); 965 if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text) 966 Result.ExpandedToken = &Expanded.front(); 967 return Result; 968 } 969 } 970 FileID File; 971 unsigned Offset; 972 std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc); 973 bool Invalid = false; 974 llvm::StringRef Code = SM.getBufferData(File, &Invalid); 975 if (Invalid) 976 return std::nullopt; 977 unsigned B = Offset, E = Offset; 978 while (B > 0 && isAsciiIdentifierContinue(Code[B - 1])) 979 --B; 980 while (E < Code.size() && isAsciiIdentifierContinue(Code[E])) 981 ++E; 982 if (B == E) 983 return std::nullopt; 984 985 SpelledWord Result; 986 Result.Location = SM.getComposedLoc(File, B); 987 Result.Text = Code.slice(B, E); 988 Result.LikelyIdentifier = 989 isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) && 990 // should not be a keyword 991 tok::isAnyIdentifier( 992 IdentifierTable(LangOpts).get(Result.Text).getTokenID()); 993 for (const auto &T : Touching) 994 if (T.location() <= Result.Location) 995 Result.PartOfSpelledToken = &T; 996 return Result; 997 } 998 999 std::optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok, 1000 Preprocessor &PP) { 1001 if (SpelledTok.kind() != tok::identifier) 1002 return std::nullopt; 1003 SourceLocation Loc = SpelledTok.location(); 1004 assert(Loc.isFileID()); 1005 const auto &SM = PP.getSourceManager(); 1006 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); 1007 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) 1008 return std::nullopt; 1009 1010 // We need to take special case to handle #define and #undef. 1011 // Preprocessor::getMacroDefinitionAtLoc() only considers a macro 1012 // definition to be in scope *after* the location of the macro name in a 1013 // #define that introduces it, and *before* the location of the macro name 1014 // in an #undef that undefines it. To handle these cases, we check for 1015 // the macro being in scope either just after or just before the location 1016 // of the token. In getting the location before, we also take care to check 1017 // for start-of-file. 1018 FileID FID = SM.getFileID(Loc); 1019 assert(Loc != SM.getLocForEndOfFile(FID)); 1020 SourceLocation JustAfterToken = Loc.getLocWithOffset(1); 1021 auto *MacroInfo = 1022 PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo(); 1023 if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) { 1024 SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1); 1025 MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken) 1026 .getMacroInfo(); 1027 } 1028 if (!MacroInfo) { 1029 return std::nullopt; 1030 } 1031 return DefinedMacro{ 1032 IdentifierInfo->getName(), MacroInfo, 1033 translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)}; 1034 } 1035 1036 llvm::Expected<std::string> Edit::apply() const { 1037 return tooling::applyAllReplacements(InitialCode, Replacements); 1038 } 1039 1040 std::vector<TextEdit> Edit::asTextEdits() const { 1041 return replacementsToEdits(InitialCode, Replacements); 1042 } 1043 1044 bool Edit::canApplyTo(llvm::StringRef Code) const { 1045 // Create line iterators, since line numbers are important while applying our 1046 // edit we cannot skip blank lines. 1047 auto LHS = llvm::MemoryBuffer::getMemBuffer(Code); 1048 llvm::line_iterator LHSIt(*LHS, /*SkipBlanks=*/false); 1049 1050 auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode); 1051 llvm::line_iterator RHSIt(*RHS, /*SkipBlanks=*/false); 1052 1053 // Compare the InitialCode we prepared the edit for with the Code we received 1054 // line by line to make sure there are no differences. 1055 // FIXME: This check is too conservative now, it should be enough to only 1056 // check lines around the replacements contained inside the Edit. 1057 while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) { 1058 if (*LHSIt != *RHSIt) 1059 return false; 1060 ++LHSIt; 1061 ++RHSIt; 1062 } 1063 1064 // After we reach EOF for any of the files we make sure the other one doesn't 1065 // contain any additional content except empty lines, they should not 1066 // interfere with the edit we produced. 1067 while (!LHSIt.is_at_eof()) { 1068 if (!LHSIt->empty()) 1069 return false; 1070 ++LHSIt; 1071 } 1072 while (!RHSIt.is_at_eof()) { 1073 if (!RHSIt->empty()) 1074 return false; 1075 ++RHSIt; 1076 } 1077 return true; 1078 } 1079 1080 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) { 1081 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style)) 1082 E.Replacements = std::move(*NewEdits); 1083 else 1084 return NewEdits.takeError(); 1085 return llvm::Error::success(); 1086 } 1087 1088 // Workaround for editors that have buggy handling of newlines at end of file. 1089 // 1090 // The editor is supposed to expose document contents over LSP as an exact 1091 // string, with whitespace and newlines well-defined. But internally many 1092 // editors treat text as an array of lines, and there can be ambiguity over 1093 // whether the last line ends with a newline or not. 1094 // 1095 // This confusion can lead to incorrect edits being sent. Failing to apply them 1096 // is catastrophic: we're desynced, LSP has no mechanism to get back in sync. 1097 // We apply a heuristic to avoid this state. 1098 // 1099 // If our current view of an N-line file does *not* end in a newline, but the 1100 // editor refers to the start of the next line (an impossible location), then 1101 // we silently add a newline to make this valid. 1102 // We will still validate that the rangeLength is correct, *including* the 1103 // inferred newline. 1104 // 1105 // See https://github.com/neovim/neovim/issues/17085 1106 static void inferFinalNewline(llvm::Expected<size_t> &Err, 1107 std::string &Contents, const Position &Pos) { 1108 if (Err) 1109 return; 1110 if (!Contents.empty() && Contents.back() == '\n') 1111 return; 1112 if (Pos.character != 0) 1113 return; 1114 if (Pos.line != llvm::count(Contents, '\n') + 1) 1115 return; 1116 log("Editor sent invalid change coordinates, inferring newline at EOF"); 1117 Contents.push_back('\n'); 1118 consumeError(Err.takeError()); 1119 Err = Contents.size(); 1120 } 1121 1122 llvm::Error applyChange(std::string &Contents, 1123 const TextDocumentContentChangeEvent &Change) { 1124 if (!Change.range) { 1125 Contents = Change.text; 1126 return llvm::Error::success(); 1127 } 1128 1129 const Position &Start = Change.range->start; 1130 llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false); 1131 inferFinalNewline(StartIndex, Contents, Start); 1132 if (!StartIndex) 1133 return StartIndex.takeError(); 1134 1135 const Position &End = Change.range->end; 1136 llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false); 1137 inferFinalNewline(EndIndex, Contents, End); 1138 if (!EndIndex) 1139 return EndIndex.takeError(); 1140 1141 if (*EndIndex < *StartIndex) 1142 return error(llvm::errc::invalid_argument, 1143 "Range's end position ({0}) is before start position ({1})", 1144 End, Start); 1145 1146 // Since the range length between two LSP positions is dependent on the 1147 // contents of the buffer we compute the range length between the start and 1148 // end position ourselves and compare it to the range length of the LSP 1149 // message to verify the buffers of the client and server are in sync. 1150 1151 // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 1152 // code units. 1153 ssize_t ComputedRangeLength = 1154 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); 1155 1156 if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) 1157 return error(llvm::errc::invalid_argument, 1158 "Change's rangeLength ({0}) doesn't match the " 1159 "computed range length ({1}).", 1160 *Change.rangeLength, ComputedRangeLength); 1161 1162 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text); 1163 1164 return llvm::Error::success(); 1165 } 1166 1167 EligibleRegion getEligiblePoints(llvm::StringRef Code, 1168 llvm::StringRef FullyQualifiedName, 1169 const LangOptions &LangOpts) { 1170 EligibleRegion ER; 1171 // Start with global namespace. 1172 std::vector<std::string> Enclosing = {""}; 1173 // FIXME: In addition to namespaces try to generate events for function 1174 // definitions as well. One might use a closing parantheses(")" followed by an 1175 // opening brace "{" to trigger the start. 1176 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 1177 // Using Directives only introduces declarations to current scope, they do 1178 // not change the current namespace, so skip them. 1179 if (Event.Trigger == NamespaceEvent::UsingDirective) 1180 return; 1181 // Do not qualify the global namespace. 1182 if (!Event.Payload.empty()) 1183 Event.Payload.append("::"); 1184 1185 std::string CurrentNamespace; 1186 if (Event.Trigger == NamespaceEvent::BeginNamespace) { 1187 Enclosing.emplace_back(std::move(Event.Payload)); 1188 CurrentNamespace = Enclosing.back(); 1189 // parseNameSpaceEvents reports the beginning position of a token; we want 1190 // to insert after '{', so increment by one. 1191 ++Event.Pos.character; 1192 } else { 1193 // Event.Payload points to outer namespace when exiting a scope, so use 1194 // the namespace we've last entered instead. 1195 CurrentNamespace = std::move(Enclosing.back()); 1196 Enclosing.pop_back(); 1197 assert(Enclosing.back() == Event.Payload); 1198 } 1199 1200 // Ignore namespaces that are not a prefix of the target. 1201 if (!FullyQualifiedName.starts_with(CurrentNamespace)) 1202 return; 1203 1204 // Prefer the namespace that shares the longest prefix with target. 1205 if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) { 1206 ER.EligiblePoints.clear(); 1207 ER.EnclosingNamespace = CurrentNamespace; 1208 } 1209 if (CurrentNamespace.size() == ER.EnclosingNamespace.size()) 1210 ER.EligiblePoints.emplace_back(std::move(Event.Pos)); 1211 }); 1212 // If there were no shared namespaces just return EOF. 1213 if (ER.EligiblePoints.empty()) { 1214 assert(ER.EnclosingNamespace.empty()); 1215 ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size())); 1216 } 1217 return ER; 1218 } 1219 1220 bool isHeaderFile(llvm::StringRef FileName, 1221 std::optional<LangOptions> LangOpts) { 1222 // Respect the langOpts, for non-file-extension cases, e.g. standard library 1223 // files. 1224 if (LangOpts && LangOpts->IsHeaderFile) 1225 return true; 1226 namespace types = clang::driver::types; 1227 auto Lang = types::lookupTypeForExtension( 1228 llvm::sys::path::extension(FileName).substr(1)); 1229 return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang); 1230 } 1231 1232 bool isProtoFile(SourceLocation Loc, const SourceManager &SM) { 1233 auto FileName = SM.getFilename(Loc); 1234 if (!FileName.ends_with(".proto.h") && !FileName.ends_with(".pb.h")) 1235 return false; 1236 auto FID = SM.getFileID(Loc); 1237 // All proto generated headers should start with this line. 1238 static const char *ProtoHeaderComment = 1239 "// Generated by the protocol buffer compiler. DO NOT EDIT!"; 1240 // Double check that this is an actual protobuf header. 1241 return SM.getBufferData(FID).starts_with(ProtoHeaderComment); 1242 } 1243 1244 SourceLocation translatePreamblePatchLocation(SourceLocation Loc, 1245 const SourceManager &SM) { 1246 auto DefFile = SM.getFileID(Loc); 1247 if (auto FE = SM.getFileEntryRefForID(DefFile)) { 1248 auto IncludeLoc = SM.getIncludeLoc(DefFile); 1249 // Preamble patch is included inside the builtin file. 1250 if (IncludeLoc.isValid() && SM.isWrittenInBuiltinFile(IncludeLoc) && 1251 FE->getName().ends_with(PreamblePatch::HeaderName)) { 1252 auto Presumed = SM.getPresumedLoc(Loc); 1253 // Check that line directive is pointing at main file. 1254 if (Presumed.isValid() && Presumed.getFileID().isInvalid() && 1255 isMainFile(Presumed.getFilename(), SM)) { 1256 Loc = SM.translateLineCol(SM.getMainFileID(), Presumed.getLine(), 1257 Presumed.getColumn()); 1258 } 1259 } 1260 } 1261 return Loc; 1262 } 1263 1264 clangd::Range rangeTillEOL(llvm::StringRef Code, unsigned HashOffset) { 1265 clangd::Range Result; 1266 Result.end = Result.start = offsetToPosition(Code, HashOffset); 1267 1268 // Span the warning until the EOL or EOF. 1269 Result.end.character += 1270 lspLength(Code.drop_front(HashOffset).take_until([](char C) { 1271 return C == '\n' || C == '\r'; 1272 })); 1273 return Result; 1274 } 1275 } // namespace clangd 1276 } // namespace clang 1277