1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "SourceCode.h" 9 10 #include "FuzzyMatch.h" 11 #include "Preamble.h" 12 #include "Protocol.h" 13 #include "refactor/Tweak.h" 14 #include "support/Context.h" 15 #include "support/Logger.h" 16 #include "support/Threading.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "clang/Driver/Types.h" 23 #include "clang/Format/Format.h" 24 #include "clang/Lex/Lexer.h" 25 #include "clang/Lex/Preprocessor.h" 26 #include "clang/Lex/Token.h" 27 #include "clang/Tooling/Core/Replacement.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/BitVector.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/STLExtras.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringMap.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Compiler.h" 37 #include "llvm/Support/Errc.h" 38 #include "llvm/Support/Error.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include "llvm/Support/LineIterator.h" 41 #include "llvm/Support/MemoryBuffer.h" 42 #include "llvm/Support/Path.h" 43 #include "llvm/Support/SHA1.h" 44 #include "llvm/Support/VirtualFileSystem.h" 45 #include "llvm/Support/xxhash.h" 46 #include <algorithm> 47 #include <cstddef> 48 #include <string> 49 #include <vector> 50 51 namespace clang { 52 namespace clangd { 53 54 // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! 55 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. 56 57 // Iterates over unicode codepoints in the (UTF-8) string. For each, 58 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. 59 // Returns true if CB returned true, false if we hit the end of string. 60 // 61 // If the string is not valid UTF-8, we log this error and "decode" the 62 // text in some arbitrary way. This is pretty sad, but this tends to happen deep 63 // within indexing of headers where clang misdetected the encoding, and 64 // propagating the error all the way back up is (probably?) not be worth it. 65 template <typename Callback> 66 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { 67 bool LoggedInvalid = false; 68 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 69 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. 70 for (size_t I = 0; I < U8.size();) { 71 unsigned char C = static_cast<unsigned char>(U8[I]); 72 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. 73 if (CB(1, 1)) 74 return true; 75 ++I; 76 continue; 77 } 78 // This convenient property of UTF-8 holds for all non-ASCII characters. 79 size_t UTF8Length = llvm::countLeadingOnes(C); 80 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. 81 // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*. 82 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) { 83 if (!LoggedInvalid) { 84 elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8)); 85 LoggedInvalid = true; 86 } 87 // We can't give a correct result, but avoid returning something wild. 88 // Pretend this is a valid ASCII byte, for lack of better options. 89 // (Too late to get ISO-8859-* right, we've skipped some bytes already). 90 if (CB(1, 1)) 91 return true; 92 ++I; 93 continue; 94 } 95 I += UTF8Length; // Skip over all trailing bytes. 96 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 97 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) 98 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) 99 return true; 100 } 101 return false; 102 } 103 104 // Returns the byte offset into the string that is an offset of \p Units in 105 // the specified encoding. 106 // Conceptually, this converts to the encoding, truncates to CodeUnits, 107 // converts back to UTF-8, and returns the length in bytes. 108 static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc, 109 bool &Valid) { 110 Valid = Units >= 0; 111 if (Units <= 0) 112 return 0; 113 size_t Result = 0; 114 switch (Enc) { 115 case OffsetEncoding::UTF8: 116 Result = Units; 117 break; 118 case OffsetEncoding::UTF16: 119 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 120 Result += U8Len; 121 Units -= U16Len; 122 return Units <= 0; 123 }); 124 if (Units < 0) // Offset in the middle of a surrogate pair. 125 Valid = false; 126 break; 127 case OffsetEncoding::UTF32: 128 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 129 Result += U8Len; 130 Units--; 131 return Units <= 0; 132 }); 133 break; 134 case OffsetEncoding::UnsupportedEncoding: 135 llvm_unreachable("unsupported encoding"); 136 } 137 // Don't return an out-of-range index if we overran. 138 if (Result > U8.size()) { 139 Valid = false; 140 return U8.size(); 141 } 142 return Result; 143 } 144 145 Key<OffsetEncoding> kCurrentOffsetEncoding; 146 static OffsetEncoding lspEncoding() { 147 auto *Enc = Context::current().get(kCurrentOffsetEncoding); 148 return Enc ? *Enc : OffsetEncoding::UTF16; 149 } 150 151 // Like most strings in clangd, the input is UTF-8 encoded. 152 size_t lspLength(llvm::StringRef Code) { 153 size_t Count = 0; 154 switch (lspEncoding()) { 155 case OffsetEncoding::UTF8: 156 Count = Code.size(); 157 break; 158 case OffsetEncoding::UTF16: 159 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 160 Count += U16Len; 161 return false; 162 }); 163 break; 164 case OffsetEncoding::UTF32: 165 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 166 ++Count; 167 return false; 168 }); 169 break; 170 case OffsetEncoding::UnsupportedEncoding: 171 llvm_unreachable("unsupported encoding"); 172 } 173 return Count; 174 } 175 176 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, 177 bool AllowColumnsBeyondLineLength) { 178 if (P.line < 0) 179 return error(llvm::errc::invalid_argument, 180 "Line value can't be negative ({0})", P.line); 181 if (P.character < 0) 182 return error(llvm::errc::invalid_argument, 183 "Character value can't be negative ({0})", P.character); 184 size_t StartOfLine = 0; 185 for (int I = 0; I != P.line; ++I) { 186 size_t NextNL = Code.find('\n', StartOfLine); 187 if (NextNL == llvm::StringRef::npos) 188 return error(llvm::errc::invalid_argument, 189 "Line value is out of range ({0})", P.line); 190 StartOfLine = NextNL + 1; 191 } 192 StringRef Line = 193 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); 194 195 // P.character may be in UTF-16, transcode if necessary. 196 bool Valid; 197 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); 198 if (!Valid && !AllowColumnsBeyondLineLength) 199 return error(llvm::errc::invalid_argument, 200 "{0} offset {1} is invalid for line {2}", lspEncoding(), 201 P.character, P.line); 202 return StartOfLine + ByteInLine; 203 } 204 205 Position offsetToPosition(llvm::StringRef Code, size_t Offset) { 206 Offset = std::min(Code.size(), Offset); 207 llvm::StringRef Before = Code.substr(0, Offset); 208 int Lines = Before.count('\n'); 209 size_t PrevNL = Before.rfind('\n'); 210 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 211 Position Pos; 212 Pos.line = Lines; 213 Pos.character = lspLength(Before.substr(StartOfLine)); 214 return Pos; 215 } 216 217 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) { 218 // We use the SourceManager's line tables, but its column number is in bytes. 219 FileID FID; 220 unsigned Offset; 221 std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc); 222 Position P; 223 P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1; 224 bool Invalid = false; 225 llvm::StringRef Code = SM.getBufferData(FID, &Invalid); 226 if (!Invalid) { 227 auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1; 228 auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes); 229 P.character = lspLength(LineSoFar); 230 } 231 return P; 232 } 233 234 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) { 235 if (Loc.isMacroID()) { 236 std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM); 237 if (llvm::StringRef(PrintLoc).startswith("<scratch") || 238 llvm::StringRef(PrintLoc).startswith("<command line>")) 239 return false; 240 } 241 return true; 242 } 243 244 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { 245 if (!R.getBegin().isValid() || !R.getEnd().isValid()) 246 return false; 247 248 FileID BeginFID; 249 size_t BeginOffset = 0; 250 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); 251 252 FileID EndFID; 253 size_t EndOffset = 0; 254 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd()); 255 256 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset; 257 } 258 259 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) { 260 assert(SM.getLocForEndOfFile(IncludedFile).isFileID()); 261 FileID IncludingFile; 262 unsigned Offset; 263 std::tie(IncludingFile, Offset) = 264 SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile)); 265 bool Invalid = false; 266 llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid); 267 if (Invalid) 268 return SourceLocation(); 269 // Now buf is "...\n#include <foo>\n..." 270 // and Offset points here: ^ 271 // Rewind to the preceding # on the line. 272 assert(Offset < Buf.size()); 273 for (;; --Offset) { 274 if (Buf[Offset] == '#') 275 return SM.getComposedLoc(IncludingFile, Offset); 276 if (Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on? 277 return SourceLocation(); 278 } 279 } 280 281 static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM, 282 const LangOptions &LangOpts) { 283 Token TheTok; 284 if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) 285 return 0; 286 // FIXME: Here we check whether the token at the location is a greatergreater 287 // (>>) token and consider it as a single greater (>). This is to get it 288 // working for templates but it isn't correct for the right shift operator. We 289 // can avoid this by using half open char ranges in getFileRange() but getting 290 // token ending is not well supported in macroIDs. 291 if (TheTok.is(tok::greatergreater)) 292 return 1; 293 return TheTok.getLength(); 294 } 295 296 // Returns location of the last character of the token at a given loc 297 static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, 298 const SourceManager &SM, 299 const LangOptions &LangOpts) { 300 unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); 301 return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); 302 } 303 304 // Returns location of the starting of the token at a given EndLoc 305 static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, 306 const SourceManager &SM, 307 const LangOptions &LangOpts) { 308 return EndLoc.getLocWithOffset( 309 -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); 310 } 311 312 // Converts a char source range to a token range. 313 static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM, 314 const LangOptions &LangOpts) { 315 if (!Range.isTokenRange()) 316 Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); 317 return Range.getAsRange(); 318 } 319 // Returns the union of two token ranges. 320 // To find the maximum of the Ends of the ranges, we compare the location of the 321 // last character of the token. 322 static SourceRange unionTokenRange(SourceRange R1, SourceRange R2, 323 const SourceManager &SM, 324 const LangOptions &LangOpts) { 325 SourceLocation Begin = 326 SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin()) 327 ? R1.getBegin() 328 : R2.getBegin(); 329 SourceLocation End = 330 SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts), 331 getLocForTokenEnd(R2.getEnd(), SM, LangOpts)) 332 ? R2.getEnd() 333 : R1.getEnd(); 334 return SourceRange(Begin, End); 335 } 336 337 // Given a range whose endpoints may be in different expansions or files, 338 // tries to find a range within a common file by following up the expansion and 339 // include location in each. 340 static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM, 341 const LangOptions &LangOpts) { 342 // Fast path for most common cases. 343 if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd())) 344 return R; 345 // Record the stack of expansion locations for the beginning, keyed by FileID. 346 llvm::DenseMap<FileID, SourceLocation> BeginExpansions; 347 for (SourceLocation Begin = R.getBegin(); Begin.isValid(); 348 Begin = Begin.isFileID() 349 ? includeHashLoc(SM.getFileID(Begin), SM) 350 : SM.getImmediateExpansionRange(Begin).getBegin()) { 351 BeginExpansions[SM.getFileID(Begin)] = Begin; 352 } 353 // Move up the stack of expansion locations for the end until we find the 354 // location in BeginExpansions with that has the same file id. 355 for (SourceLocation End = R.getEnd(); End.isValid(); 356 End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM) 357 : toTokenRange(SM.getImmediateExpansionRange(End), 358 SM, LangOpts) 359 .getEnd()) { 360 auto It = BeginExpansions.find(SM.getFileID(End)); 361 if (It != BeginExpansions.end()) { 362 if (SM.getFileOffset(It->second) > SM.getFileOffset(End)) 363 return SourceLocation(); 364 return {It->second, End}; 365 } 366 } 367 return SourceRange(); 368 } 369 370 // Find an expansion range (not necessarily immediate) the ends of which are in 371 // the same file id. 372 static SourceRange 373 getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM, 374 const LangOptions &LangOpts) { 375 return rangeInCommonFile( 376 toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM, 377 LangOpts); 378 } 379 380 // Returns the file range for a given Location as a Token Range 381 // This is quite similar to getFileLoc in SourceManager as both use 382 // getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). 383 // However: 384 // - We want to maintain the full range information as we move from one file to 385 // the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. 386 // - We want to split '>>' tokens as the lexer parses the '>>' in nested 387 // template instantiations as a '>>' instead of two '>'s. 388 // There is also getExpansionRange but it simply calls 389 // getImmediateExpansionRange on the begin and ends separately which is wrong. 390 static SourceRange getTokenFileRange(SourceLocation Loc, 391 const SourceManager &SM, 392 const LangOptions &LangOpts) { 393 SourceRange FileRange = Loc; 394 while (!FileRange.getBegin().isFileID()) { 395 if (SM.isMacroArgExpansion(FileRange.getBegin())) { 396 FileRange = unionTokenRange( 397 SM.getImmediateSpellingLoc(FileRange.getBegin()), 398 SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts); 399 assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd())); 400 } else { 401 SourceRange ExpansionRangeForBegin = 402 getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts); 403 SourceRange ExpansionRangeForEnd = 404 getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts); 405 if (ExpansionRangeForBegin.isInvalid() || 406 ExpansionRangeForEnd.isInvalid()) 407 return SourceRange(); 408 assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(), 409 ExpansionRangeForEnd.getBegin()) && 410 "Both Expansion ranges should be in same file."); 411 FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, 412 SM, LangOpts); 413 } 414 } 415 return FileRange; 416 } 417 418 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) { 419 if (!Loc.isValid()) 420 return false; 421 FileID FID = SM.getFileID(SM.getExpansionLoc(Loc)); 422 return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); 423 } 424 425 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM, 426 const LangOptions &LangOpts, 427 SourceRange R) { 428 SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); 429 if (!isValidFileRange(SM, R1)) 430 return llvm::None; 431 432 SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); 433 if (!isValidFileRange(SM, R2)) 434 return llvm::None; 435 436 SourceRange Result = 437 rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts); 438 unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); 439 // Convert from closed token range to half-open (char) range 440 Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); 441 if (!isValidFileRange(SM, Result)) 442 return llvm::None; 443 444 return Result; 445 } 446 447 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { 448 assert(isValidFileRange(SM, R)); 449 auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin())); 450 assert(Buf); 451 452 size_t BeginOffset = SM.getFileOffset(R.getBegin()); 453 size_t EndOffset = SM.getFileOffset(R.getEnd()); 454 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); 455 } 456 457 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, 458 Position P) { 459 llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer(); 460 auto Offset = 461 positionToOffset(Code, P, /*AllowColumnsBeyondLineLength=*/false); 462 if (!Offset) 463 return Offset.takeError(); 464 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); 465 } 466 467 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) { 468 // Clang is 1-based, LSP uses 0-based indexes. 469 Position Begin = sourceLocToPosition(SM, R.getBegin()); 470 Position End = sourceLocToPosition(SM, R.getEnd()); 471 472 return {Begin, End}; 473 } 474 475 void unionRanges(Range &A, Range B) { 476 if (B.start < A.start) 477 A.start = B.start; 478 if (A.end < B.end) 479 A.end = B.end; 480 } 481 482 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, 483 size_t Offset) { 484 Offset = std::min(Code.size(), Offset); 485 llvm::StringRef Before = Code.substr(0, Offset); 486 int Lines = Before.count('\n'); 487 size_t PrevNL = Before.rfind('\n'); 488 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 489 return {Lines + 1, Offset - StartOfLine + 1}; 490 } 491 492 std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) { 493 size_t Pos = QName.rfind("::"); 494 if (Pos == llvm::StringRef::npos) 495 return {llvm::StringRef(), QName}; 496 return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)}; 497 } 498 499 TextEdit replacementToEdit(llvm::StringRef Code, 500 const tooling::Replacement &R) { 501 Range ReplacementRange = { 502 offsetToPosition(Code, R.getOffset()), 503 offsetToPosition(Code, R.getOffset() + R.getLength())}; 504 return {ReplacementRange, std::string(R.getReplacementText())}; 505 } 506 507 std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code, 508 const tooling::Replacements &Repls) { 509 std::vector<TextEdit> Edits; 510 for (const auto &R : Repls) 511 Edits.push_back(replacementToEdit(Code, R)); 512 return Edits; 513 } 514 515 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, 516 const SourceManager &SourceMgr) { 517 if (!F) 518 return None; 519 520 llvm::SmallString<128> FilePath = F->getName(); 521 if (!llvm::sys::path::is_absolute(FilePath)) { 522 if (auto EC = 523 SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute( 524 FilePath)) { 525 elog("Could not turn relative path '{0}' to absolute: {1}", FilePath, 526 EC.message()); 527 return None; 528 } 529 } 530 531 // Handle the symbolic link path case where the current working directory 532 // (getCurrentWorkingDirectory) is a symlink. We always want to the real 533 // file path (instead of the symlink path) for the C++ symbols. 534 // 535 // Consider the following example: 536 // 537 // src dir: /project/src/foo.h 538 // current working directory (symlink): /tmp/build -> /project/src/ 539 // 540 // The file path of Symbol is "/project/src/foo.h" instead of 541 // "/tmp/build/foo.h" 542 if (auto Dir = SourceMgr.getFileManager().getDirectory( 543 llvm::sys::path::parent_path(FilePath))) { 544 llvm::SmallString<128> RealPath; 545 llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(*Dir); 546 llvm::sys::path::append(RealPath, DirName, 547 llvm::sys::path::filename(FilePath)); 548 return RealPath.str().str(); 549 } 550 551 return FilePath.str().str(); 552 } 553 554 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, 555 const LangOptions &L) { 556 TextEdit Result; 557 Result.range = 558 halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L)); 559 Result.newText = FixIt.CodeToInsert; 560 return Result; 561 } 562 563 FileDigest digest(llvm::StringRef Content) { 564 uint64_t Hash{llvm::xxHash64(Content)}; 565 FileDigest Result; 566 for (unsigned I = 0; I < Result.size(); ++I) { 567 Result[I] = uint8_t(Hash); 568 Hash >>= 8; 569 } 570 return Result; 571 } 572 573 llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) { 574 bool Invalid = false; 575 llvm::StringRef Content = SM.getBufferData(FID, &Invalid); 576 if (Invalid) 577 return None; 578 return digest(Content); 579 } 580 581 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, 582 llvm::StringRef Content, 583 const ThreadsafeFS &TFS) { 584 auto Style = format::getStyle(format::DefaultFormatStyle, File, 585 format::DefaultFallbackStyle, Content, 586 TFS.view(/*CWD=*/llvm::None).get()); 587 if (!Style) { 588 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, 589 Style.takeError()); 590 return format::getLLVMStyle(); 591 } 592 return *Style; 593 } 594 595 llvm::Expected<tooling::Replacements> 596 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, 597 const format::FormatStyle &Style) { 598 auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style); 599 if (!CleanReplaces) 600 return CleanReplaces; 601 return formatReplacements(Code, std::move(*CleanReplaces), Style); 602 } 603 604 static void 605 lex(llvm::StringRef Code, const LangOptions &LangOpts, 606 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)> 607 Action) { 608 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! 609 std::string NullTerminatedCode = Code.str(); 610 SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); 611 auto &SM = FileSM.get(); 612 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) 613 Action(Tok, SM); 614 } 615 616 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, 617 const format::FormatStyle &Style) { 618 llvm::StringMap<unsigned> Identifiers; 619 auto LangOpt = format::getFormattingLangOpts(Style); 620 lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) { 621 if (Tok.kind() == tok::identifier) 622 ++Identifiers[Tok.text(SM)]; 623 // FIXME: Should this function really return keywords too ? 624 else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind())) 625 ++Identifiers[Keyword]; 626 }); 627 return Identifiers; 628 } 629 630 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier, 631 llvm::StringRef Content, 632 const LangOptions &LangOpts) { 633 std::vector<Range> Ranges; 634 lex(Content, LangOpts, 635 [&](const syntax::Token &Tok, const SourceManager &SM) { 636 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) 637 return; 638 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); 639 }); 640 return Ranges; 641 } 642 643 bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { 644 // Keywords are initialized in constructor. 645 clang::IdentifierTable KeywordsTable(LangOpts); 646 return KeywordsTable.find(NewName) != KeywordsTable.end(); 647 } 648 649 namespace { 650 struct NamespaceEvent { 651 enum { 652 BeginNamespace, // namespace <ns> {. Payload is resolved <ns>. 653 EndNamespace, // } // namespace <ns>. Payload is resolved *outer* 654 // namespace. 655 UsingDirective // using namespace <ns>. Payload is unresolved <ns>. 656 } Trigger; 657 std::string Payload; 658 Position Pos; 659 }; 660 // Scans C++ source code for constructs that change the visible namespaces. 661 void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts, 662 llvm::function_ref<void(NamespaceEvent)> Callback) { 663 664 // Stack of enclosing namespaces, e.g. {"clang", "clangd"} 665 std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd" 666 // Stack counts open braces. true if the brace opened a namespace. 667 llvm::BitVector BraceStack; 668 669 enum { 670 Default, 671 Namespace, // just saw 'namespace' 672 NamespaceName, // just saw 'namespace' NSName 673 Using, // just saw 'using' 674 UsingNamespace, // just saw 'using namespace' 675 UsingNamespaceName, // just saw 'using namespace' NSName 676 } State = Default; 677 std::string NSName; 678 679 NamespaceEvent Event; 680 lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) { 681 Event.Pos = sourceLocToPosition(SM, Tok.location()); 682 switch (Tok.kind()) { 683 case tok::kw_using: 684 State = State == Default ? Using : Default; 685 break; 686 case tok::kw_namespace: 687 switch (State) { 688 case Using: 689 State = UsingNamespace; 690 break; 691 case Default: 692 State = Namespace; 693 break; 694 default: 695 State = Default; 696 break; 697 } 698 break; 699 case tok::identifier: 700 switch (State) { 701 case UsingNamespace: 702 NSName.clear(); 703 LLVM_FALLTHROUGH; 704 case UsingNamespaceName: 705 NSName.append(Tok.text(SM).str()); 706 State = UsingNamespaceName; 707 break; 708 case Namespace: 709 NSName.clear(); 710 LLVM_FALLTHROUGH; 711 case NamespaceName: 712 NSName.append(Tok.text(SM).str()); 713 State = NamespaceName; 714 break; 715 case Using: 716 case Default: 717 State = Default; 718 break; 719 } 720 break; 721 case tok::coloncolon: 722 // This can come at the beginning or in the middle of a namespace 723 // name. 724 switch (State) { 725 case UsingNamespace: 726 NSName.clear(); 727 LLVM_FALLTHROUGH; 728 case UsingNamespaceName: 729 NSName.append("::"); 730 State = UsingNamespaceName; 731 break; 732 case NamespaceName: 733 NSName.append("::"); 734 State = NamespaceName; 735 break; 736 case Namespace: // Not legal here. 737 case Using: 738 case Default: 739 State = Default; 740 break; 741 } 742 break; 743 case tok::l_brace: 744 // Record which { started a namespace, so we know when } ends one. 745 if (State == NamespaceName) { 746 // Parsed: namespace <name> { 747 BraceStack.push_back(true); 748 Enclosing.push_back(NSName); 749 Event.Trigger = NamespaceEvent::BeginNamespace; 750 Event.Payload = llvm::join(Enclosing, "::"); 751 Callback(Event); 752 } else { 753 // This case includes anonymous namespaces (State = Namespace). 754 // For our purposes, they're not namespaces and we ignore them. 755 BraceStack.push_back(false); 756 } 757 State = Default; 758 break; 759 case tok::r_brace: 760 // If braces are unmatched, we're going to be confused, but don't 761 // crash. 762 if (!BraceStack.empty()) { 763 if (BraceStack.back()) { 764 // Parsed: } // namespace 765 Enclosing.pop_back(); 766 Event.Trigger = NamespaceEvent::EndNamespace; 767 Event.Payload = llvm::join(Enclosing, "::"); 768 Callback(Event); 769 } 770 BraceStack.pop_back(); 771 } 772 break; 773 case tok::semi: 774 if (State == UsingNamespaceName) { 775 // Parsed: using namespace <name> ; 776 Event.Trigger = NamespaceEvent::UsingDirective; 777 Event.Payload = std::move(NSName); 778 Callback(Event); 779 } 780 State = Default; 781 break; 782 default: 783 State = Default; 784 break; 785 } 786 }); 787 } 788 789 // Returns the prefix namespaces of NS: {"" ... NS}. 790 llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) { 791 llvm::SmallVector<llvm::StringRef> Results; 792 Results.push_back(NS.take_front(0)); 793 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 794 for (llvm::StringRef &R : Results) 795 R = NS.take_front(R.end() - NS.begin()); 796 return Results; 797 } 798 799 } // namespace 800 801 std::vector<std::string> visibleNamespaces(llvm::StringRef Code, 802 const LangOptions &LangOpts) { 803 std::string Current; 804 // Map from namespace to (resolved) namespaces introduced via using directive. 805 llvm::StringMap<llvm::StringSet<>> UsingDirectives; 806 807 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 808 llvm::StringRef NS = Event.Payload; 809 switch (Event.Trigger) { 810 case NamespaceEvent::BeginNamespace: 811 case NamespaceEvent::EndNamespace: 812 Current = std::move(Event.Payload); 813 break; 814 case NamespaceEvent::UsingDirective: 815 if (NS.consume_front("::")) 816 UsingDirectives[Current].insert(NS); 817 else { 818 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 819 if (Enclosing.empty()) 820 UsingDirectives[Current].insert(NS); 821 else 822 UsingDirectives[Current].insert((Enclosing + "::" + NS).str()); 823 } 824 } 825 break; 826 } 827 }); 828 829 std::vector<std::string> Found; 830 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 831 Found.push_back(std::string(Enclosing)); 832 auto It = UsingDirectives.find(Enclosing); 833 if (It != UsingDirectives.end()) 834 for (const auto &Used : It->second) 835 Found.push_back(std::string(Used.getKey())); 836 } 837 838 llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) { 839 if (Current == RHS) 840 return false; 841 if (Current == LHS) 842 return true; 843 return LHS < RHS; 844 }); 845 Found.erase(std::unique(Found.begin(), Found.end()), Found.end()); 846 return Found; 847 } 848 849 llvm::StringSet<> collectWords(llvm::StringRef Content) { 850 // We assume short words are not significant. 851 // We may want to consider other stopwords, e.g. language keywords. 852 // (A very naive implementation showed no benefit, but lexing might do better) 853 static constexpr int MinWordLength = 4; 854 855 std::vector<CharRole> Roles(Content.size()); 856 calculateRoles(Content, Roles); 857 858 llvm::StringSet<> Result; 859 llvm::SmallString<256> Word; 860 auto Flush = [&] { 861 if (Word.size() >= MinWordLength) { 862 for (char &C : Word) 863 C = llvm::toLower(C); 864 Result.insert(Word); 865 } 866 Word.clear(); 867 }; 868 for (unsigned I = 0; I < Content.size(); ++I) { 869 switch (Roles[I]) { 870 case Head: 871 Flush(); 872 LLVM_FALLTHROUGH; 873 case Tail: 874 Word.push_back(Content[I]); 875 break; 876 case Unknown: 877 case Separator: 878 Flush(); 879 break; 880 } 881 } 882 Flush(); 883 884 return Result; 885 } 886 887 static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before, 888 llvm::StringRef After) { 889 // `foo` is an identifier. 890 if (Before.endswith("`") && After.startswith("`")) 891 return true; 892 // In foo::bar, both foo and bar are identifiers. 893 if (Before.endswith("::") || After.startswith("::")) 894 return true; 895 // Doxygen tags like \c foo indicate identifiers. 896 // Don't search too far back. 897 // This duplicates clang's doxygen parser, revisit if it gets complicated. 898 Before = Before.take_back(100); // Don't search too far back. 899 auto Pos = Before.find_last_of("\\@"); 900 if (Pos != llvm::StringRef::npos) { 901 llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' '); 902 if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" || 903 Tag == "param" || Tag == "param[in]" || Tag == "param[out]" || 904 Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" || 905 Tag == "throws" || Tag == "link") 906 return true; 907 } 908 909 // Word contains underscore. 910 // This handles things like snake_case and MACRO_CASE. 911 if (Word.contains('_')) { 912 return true; 913 } 914 // Word contains capital letter other than at beginning. 915 // This handles things like lowerCamel and UpperCamel. 916 // The check for also containing a lowercase letter is to rule out 917 // initialisms like "HTTP". 918 bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; 919 bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; 920 if (HasLower && HasUpper) { 921 return true; 922 } 923 // FIXME: consider mid-sentence Capitalization? 924 return false; 925 } 926 927 llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc, 928 const syntax::TokenBuffer &TB, 929 const LangOptions &LangOpts) { 930 const auto &SM = TB.sourceManager(); 931 auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB); 932 for (const auto &T : Touching) { 933 // If the token is an identifier or a keyword, don't use any heuristics. 934 if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) { 935 SpelledWord Result; 936 Result.Location = T.location(); 937 Result.Text = T.text(SM); 938 Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind()); 939 Result.PartOfSpelledToken = &T; 940 Result.SpelledToken = &T; 941 auto Expanded = 942 TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location())); 943 if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text) 944 Result.ExpandedToken = &Expanded.front(); 945 return Result; 946 } 947 } 948 FileID File; 949 unsigned Offset; 950 std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc); 951 bool Invalid = false; 952 llvm::StringRef Code = SM.getBufferData(File, &Invalid); 953 if (Invalid) 954 return llvm::None; 955 unsigned B = Offset, E = Offset; 956 while (B > 0 && isAsciiIdentifierContinue(Code[B - 1])) 957 --B; 958 while (E < Code.size() && isAsciiIdentifierContinue(Code[E])) 959 ++E; 960 if (B == E) 961 return llvm::None; 962 963 SpelledWord Result; 964 Result.Location = SM.getComposedLoc(File, B); 965 Result.Text = Code.slice(B, E); 966 Result.LikelyIdentifier = 967 isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) && 968 // should not be a keyword 969 tok::isAnyIdentifier( 970 IdentifierTable(LangOpts).get(Result.Text).getTokenID()); 971 for (const auto &T : Touching) 972 if (T.location() <= Result.Location) 973 Result.PartOfSpelledToken = &T; 974 return Result; 975 } 976 977 llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok, 978 Preprocessor &PP) { 979 if (SpelledTok.kind() != tok::identifier) 980 return None; 981 SourceLocation Loc = SpelledTok.location(); 982 assert(Loc.isFileID()); 983 const auto &SM = PP.getSourceManager(); 984 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); 985 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) 986 return None; 987 988 // We need to take special case to handle #define and #undef. 989 // Preprocessor::getMacroDefinitionAtLoc() only considers a macro 990 // definition to be in scope *after* the location of the macro name in a 991 // #define that introduces it, and *before* the location of the macro name 992 // in an #undef that undefines it. To handle these cases, we check for 993 // the macro being in scope either just after or just before the location 994 // of the token. In getting the location before, we also take care to check 995 // for start-of-file. 996 FileID FID = SM.getFileID(Loc); 997 assert(Loc != SM.getLocForEndOfFile(FID)); 998 SourceLocation JustAfterToken = Loc.getLocWithOffset(1); 999 auto *MacroInfo = 1000 PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo(); 1001 if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) { 1002 SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1); 1003 MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken) 1004 .getMacroInfo(); 1005 } 1006 if (!MacroInfo) { 1007 return None; 1008 } 1009 return DefinedMacro{ 1010 IdentifierInfo->getName(), MacroInfo, 1011 translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)}; 1012 } 1013 1014 llvm::Expected<std::string> Edit::apply() const { 1015 return tooling::applyAllReplacements(InitialCode, Replacements); 1016 } 1017 1018 std::vector<TextEdit> Edit::asTextEdits() const { 1019 return replacementsToEdits(InitialCode, Replacements); 1020 } 1021 1022 bool Edit::canApplyTo(llvm::StringRef Code) const { 1023 // Create line iterators, since line numbers are important while applying our 1024 // edit we cannot skip blank lines. 1025 auto LHS = llvm::MemoryBuffer::getMemBuffer(Code); 1026 llvm::line_iterator LHSIt(*LHS, /*SkipBlanks=*/false); 1027 1028 auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode); 1029 llvm::line_iterator RHSIt(*RHS, /*SkipBlanks=*/false); 1030 1031 // Compare the InitialCode we prepared the edit for with the Code we received 1032 // line by line to make sure there are no differences. 1033 // FIXME: This check is too conservative now, it should be enough to only 1034 // check lines around the replacements contained inside the Edit. 1035 while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) { 1036 if (*LHSIt != *RHSIt) 1037 return false; 1038 ++LHSIt; 1039 ++RHSIt; 1040 } 1041 1042 // After we reach EOF for any of the files we make sure the other one doesn't 1043 // contain any additional content except empty lines, they should not 1044 // interfere with the edit we produced. 1045 while (!LHSIt.is_at_eof()) { 1046 if (!LHSIt->empty()) 1047 return false; 1048 ++LHSIt; 1049 } 1050 while (!RHSIt.is_at_eof()) { 1051 if (!RHSIt->empty()) 1052 return false; 1053 ++RHSIt; 1054 } 1055 return true; 1056 } 1057 1058 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) { 1059 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style)) 1060 E.Replacements = std::move(*NewEdits); 1061 else 1062 return NewEdits.takeError(); 1063 return llvm::Error::success(); 1064 } 1065 1066 llvm::Error applyChange(std::string &Contents, 1067 const TextDocumentContentChangeEvent &Change) { 1068 if (!Change.range) { 1069 Contents = Change.text; 1070 return llvm::Error::success(); 1071 } 1072 1073 const Position &Start = Change.range->start; 1074 llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false); 1075 if (!StartIndex) 1076 return StartIndex.takeError(); 1077 1078 const Position &End = Change.range->end; 1079 llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false); 1080 if (!EndIndex) 1081 return EndIndex.takeError(); 1082 1083 if (*EndIndex < *StartIndex) 1084 return error(llvm::errc::invalid_argument, 1085 "Range's end position ({0}) is before start position ({1})", 1086 End, Start); 1087 1088 // Since the range length between two LSP positions is dependent on the 1089 // contents of the buffer we compute the range length between the start and 1090 // end position ourselves and compare it to the range length of the LSP 1091 // message to verify the buffers of the client and server are in sync. 1092 1093 // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 1094 // code units. 1095 ssize_t ComputedRangeLength = 1096 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); 1097 1098 if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) 1099 return error(llvm::errc::invalid_argument, 1100 "Change's rangeLength ({0}) doesn't match the " 1101 "computed range length ({1}).", 1102 *Change.rangeLength, ComputedRangeLength); 1103 1104 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text); 1105 1106 return llvm::Error::success(); 1107 } 1108 1109 EligibleRegion getEligiblePoints(llvm::StringRef Code, 1110 llvm::StringRef FullyQualifiedName, 1111 const LangOptions &LangOpts) { 1112 EligibleRegion ER; 1113 // Start with global namespace. 1114 std::vector<std::string> Enclosing = {""}; 1115 // FIXME: In addition to namespaces try to generate events for function 1116 // definitions as well. One might use a closing parantheses(")" followed by an 1117 // opening brace "{" to trigger the start. 1118 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 1119 // Using Directives only introduces declarations to current scope, they do 1120 // not change the current namespace, so skip them. 1121 if (Event.Trigger == NamespaceEvent::UsingDirective) 1122 return; 1123 // Do not qualify the global namespace. 1124 if (!Event.Payload.empty()) 1125 Event.Payload.append("::"); 1126 1127 std::string CurrentNamespace; 1128 if (Event.Trigger == NamespaceEvent::BeginNamespace) { 1129 Enclosing.emplace_back(std::move(Event.Payload)); 1130 CurrentNamespace = Enclosing.back(); 1131 // parseNameSpaceEvents reports the beginning position of a token; we want 1132 // to insert after '{', so increment by one. 1133 ++Event.Pos.character; 1134 } else { 1135 // Event.Payload points to outer namespace when exiting a scope, so use 1136 // the namespace we've last entered instead. 1137 CurrentNamespace = std::move(Enclosing.back()); 1138 Enclosing.pop_back(); 1139 assert(Enclosing.back() == Event.Payload); 1140 } 1141 1142 // Ignore namespaces that are not a prefix of the target. 1143 if (!FullyQualifiedName.startswith(CurrentNamespace)) 1144 return; 1145 1146 // Prefer the namespace that shares the longest prefix with target. 1147 if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) { 1148 ER.EligiblePoints.clear(); 1149 ER.EnclosingNamespace = CurrentNamespace; 1150 } 1151 if (CurrentNamespace.size() == ER.EnclosingNamespace.size()) 1152 ER.EligiblePoints.emplace_back(std::move(Event.Pos)); 1153 }); 1154 // If there were no shared namespaces just return EOF. 1155 if (ER.EligiblePoints.empty()) { 1156 assert(ER.EnclosingNamespace.empty()); 1157 ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size())); 1158 } 1159 return ER; 1160 } 1161 1162 bool isHeaderFile(llvm::StringRef FileName, 1163 llvm::Optional<LangOptions> LangOpts) { 1164 // Respect the langOpts, for non-file-extension cases, e.g. standard library 1165 // files. 1166 if (LangOpts && LangOpts->IsHeaderFile) 1167 return true; 1168 namespace types = clang::driver::types; 1169 auto Lang = types::lookupTypeForExtension( 1170 llvm::sys::path::extension(FileName).substr(1)); 1171 return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang); 1172 } 1173 1174 bool isProtoFile(SourceLocation Loc, const SourceManager &SM) { 1175 auto FileName = SM.getFilename(Loc); 1176 if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h")) 1177 return false; 1178 auto FID = SM.getFileID(Loc); 1179 // All proto generated headers should start with this line. 1180 static const char *PROTO_HEADER_COMMENT = 1181 "// Generated by the protocol buffer compiler. DO NOT EDIT!"; 1182 // Double check that this is an actual protobuf header. 1183 return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT); 1184 } 1185 1186 namespace { 1187 1188 // Is Line an #if or #ifdef directive? 1189 // FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non 1190 // self-contained and is probably not what we want. 1191 bool isIf(llvm::StringRef Line) { 1192 Line = Line.ltrim(); 1193 if (!Line.consume_front("#")) 1194 return false; 1195 Line = Line.ltrim(); 1196 return Line.startswith("if"); 1197 } 1198 1199 // Is Line an #error directive mentioning includes? 1200 bool isErrorAboutInclude(llvm::StringRef Line) { 1201 Line = Line.ltrim(); 1202 if (!Line.consume_front("#")) 1203 return false; 1204 Line = Line.ltrim(); 1205 if (!Line.startswith("error")) 1206 return false; 1207 return Line.contains_insensitive( 1208 "includ"); // Matches "include" or "including". 1209 } 1210 1211 // Heuristically headers that only want to be included via an umbrella. 1212 bool isDontIncludeMeHeader(llvm::StringRef Content) { 1213 llvm::StringRef Line; 1214 // Only sniff up to 100 lines or 10KB. 1215 Content = Content.take_front(100 * 100); 1216 for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { 1217 std::tie(Line, Content) = Content.split('\n'); 1218 if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) 1219 return true; 1220 } 1221 return false; 1222 } 1223 1224 } // namespace 1225 1226 bool isSelfContainedHeader(const FileEntry *FE, FileID FID, 1227 const SourceManager &SM, HeaderSearch &HeaderInfo) { 1228 // FIXME: Should files that have been #import'd be considered 1229 // self-contained? That's really a property of the includer, 1230 // not of the file. 1231 if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) && 1232 !HeaderInfo.hasFileBeenImported(FE)) 1233 return false; 1234 // This pattern indicates that a header can't be used without 1235 // particular preprocessor state, usually set up by another header. 1236 return !isDontIncludeMeHeader(SM.getBufferData(FID)); 1237 } 1238 1239 } // namespace clangd 1240 } // namespace clang 1241