1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "SourceCode.h" 9 10 #include "FuzzyMatch.h" 11 #include "Preamble.h" 12 #include "Protocol.h" 13 #include "support/Context.h" 14 #include "support/Logger.h" 15 #include "clang/Basic/LangOptions.h" 16 #include "clang/Basic/SourceLocation.h" 17 #include "clang/Basic/SourceManager.h" 18 #include "clang/Basic/TokenKinds.h" 19 #include "clang/Driver/Types.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Lex/Lexer.h" 22 #include "clang/Lex/Preprocessor.h" 23 #include "clang/Lex/Token.h" 24 #include "clang/Tooling/Core/Replacement.h" 25 #include "clang/Tooling/Syntax/Tokens.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/BitVector.h" 28 #include "llvm/ADT/None.h" 29 #include "llvm/ADT/STLExtras.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/ADT/StringMap.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/Support/Compiler.h" 34 #include "llvm/Support/Errc.h" 35 #include "llvm/Support/Error.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/LineIterator.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/Path.h" 40 #include "llvm/Support/VirtualFileSystem.h" 41 #include "llvm/Support/xxhash.h" 42 #include <algorithm> 43 #include <cstddef> 44 #include <string> 45 #include <vector> 46 47 namespace clang { 48 namespace clangd { 49 50 // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! 51 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. 52 53 // Iterates over unicode codepoints in the (UTF-8) string. For each, 54 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. 55 // Returns true if CB returned true, false if we hit the end of string. 56 // 57 // If the string is not valid UTF-8, we log this error and "decode" the 58 // text in some arbitrary way. This is pretty sad, but this tends to happen deep 59 // within indexing of headers where clang misdetected the encoding, and 60 // propagating the error all the way back up is (probably?) not be worth it. 61 template <typename Callback> 62 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { 63 bool LoggedInvalid = false; 64 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 65 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. 66 for (size_t I = 0; I < U8.size();) { 67 unsigned char C = static_cast<unsigned char>(U8[I]); 68 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. 69 if (CB(1, 1)) 70 return true; 71 ++I; 72 continue; 73 } 74 // This convenient property of UTF-8 holds for all non-ASCII characters. 75 size_t UTF8Length = llvm::countLeadingOnes(C); 76 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. 77 // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*. 78 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) { 79 if (!LoggedInvalid) { 80 elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8)); 81 LoggedInvalid = true; 82 } 83 // We can't give a correct result, but avoid returning something wild. 84 // Pretend this is a valid ASCII byte, for lack of better options. 85 // (Too late to get ISO-8859-* right, we've skipped some bytes already). 86 if (CB(1, 1)) 87 return true; 88 ++I; 89 continue; 90 } 91 I += UTF8Length; // Skip over all trailing bytes. 92 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 93 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) 94 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) 95 return true; 96 } 97 return false; 98 } 99 100 // Returns the byte offset into the string that is an offset of \p Units in 101 // the specified encoding. 102 // Conceptually, this converts to the encoding, truncates to CodeUnits, 103 // converts back to UTF-8, and returns the length in bytes. 104 static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc, 105 bool &Valid) { 106 Valid = Units >= 0; 107 if (Units <= 0) 108 return 0; 109 size_t Result = 0; 110 switch (Enc) { 111 case OffsetEncoding::UTF8: 112 Result = Units; 113 break; 114 case OffsetEncoding::UTF16: 115 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 116 Result += U8Len; 117 Units -= U16Len; 118 return Units <= 0; 119 }); 120 if (Units < 0) // Offset in the middle of a surrogate pair. 121 Valid = false; 122 break; 123 case OffsetEncoding::UTF32: 124 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 125 Result += U8Len; 126 Units--; 127 return Units <= 0; 128 }); 129 break; 130 case OffsetEncoding::UnsupportedEncoding: 131 llvm_unreachable("unsupported encoding"); 132 } 133 // Don't return an out-of-range index if we overran. 134 if (Result > U8.size()) { 135 Valid = false; 136 return U8.size(); 137 } 138 return Result; 139 } 140 141 Key<OffsetEncoding> kCurrentOffsetEncoding; 142 static OffsetEncoding lspEncoding() { 143 auto *Enc = Context::current().get(kCurrentOffsetEncoding); 144 return Enc ? *Enc : OffsetEncoding::UTF16; 145 } 146 147 // Like most strings in clangd, the input is UTF-8 encoded. 148 size_t lspLength(llvm::StringRef Code) { 149 size_t Count = 0; 150 switch (lspEncoding()) { 151 case OffsetEncoding::UTF8: 152 Count = Code.size(); 153 break; 154 case OffsetEncoding::UTF16: 155 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 156 Count += U16Len; 157 return false; 158 }); 159 break; 160 case OffsetEncoding::UTF32: 161 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 162 ++Count; 163 return false; 164 }); 165 break; 166 case OffsetEncoding::UnsupportedEncoding: 167 llvm_unreachable("unsupported encoding"); 168 } 169 return Count; 170 } 171 172 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, 173 bool AllowColumnsBeyondLineLength) { 174 if (P.line < 0) 175 return error(llvm::errc::invalid_argument, 176 "Line value can't be negative ({0})", P.line); 177 if (P.character < 0) 178 return error(llvm::errc::invalid_argument, 179 "Character value can't be negative ({0})", P.character); 180 size_t StartOfLine = 0; 181 for (int I = 0; I != P.line; ++I) { 182 size_t NextNL = Code.find('\n', StartOfLine); 183 if (NextNL == llvm::StringRef::npos) 184 return error(llvm::errc::invalid_argument, 185 "Line value is out of range ({0})", P.line); 186 StartOfLine = NextNL + 1; 187 } 188 StringRef Line = 189 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); 190 191 // P.character may be in UTF-16, transcode if necessary. 192 bool Valid; 193 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); 194 if (!Valid && !AllowColumnsBeyondLineLength) 195 return error(llvm::errc::invalid_argument, 196 "{0} offset {1} is invalid for line {2}", lspEncoding(), 197 P.character, P.line); 198 return StartOfLine + ByteInLine; 199 } 200 201 Position offsetToPosition(llvm::StringRef Code, size_t Offset) { 202 Offset = std::min(Code.size(), Offset); 203 llvm::StringRef Before = Code.substr(0, Offset); 204 int Lines = Before.count('\n'); 205 size_t PrevNL = Before.rfind('\n'); 206 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 207 Position Pos; 208 Pos.line = Lines; 209 Pos.character = lspLength(Before.substr(StartOfLine)); 210 return Pos; 211 } 212 213 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) { 214 // We use the SourceManager's line tables, but its column number is in bytes. 215 FileID FID; 216 unsigned Offset; 217 std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc); 218 Position P; 219 P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1; 220 bool Invalid = false; 221 llvm::StringRef Code = SM.getBufferData(FID, &Invalid); 222 if (!Invalid) { 223 auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1; 224 auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes); 225 P.character = lspLength(LineSoFar); 226 } 227 return P; 228 } 229 230 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) { 231 if (Loc.isMacroID()) { 232 std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM); 233 if (llvm::StringRef(PrintLoc).startswith("<scratch") || 234 llvm::StringRef(PrintLoc).startswith("<command line>")) 235 return false; 236 } 237 return true; 238 } 239 240 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { 241 if (!R.getBegin().isValid() || !R.getEnd().isValid()) 242 return false; 243 244 FileID BeginFID; 245 size_t BeginOffset = 0; 246 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); 247 248 FileID EndFID; 249 size_t EndOffset = 0; 250 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd()); 251 252 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset; 253 } 254 255 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) { 256 assert(SM.getLocForEndOfFile(IncludedFile).isFileID()); 257 FileID IncludingFile; 258 unsigned Offset; 259 std::tie(IncludingFile, Offset) = 260 SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile)); 261 bool Invalid = false; 262 llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid); 263 if (Invalid) 264 return SourceLocation(); 265 // Now buf is "...\n#include <foo>\n..." 266 // and Offset points here: ^ 267 // Rewind to the preceding # on the line. 268 assert(Offset < Buf.size()); 269 for (;; --Offset) { 270 if (Buf[Offset] == '#') 271 return SM.getComposedLoc(IncludingFile, Offset); 272 if (Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on? 273 return SourceLocation(); 274 } 275 } 276 277 static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM, 278 const LangOptions &LangOpts) { 279 Token TheTok; 280 if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) 281 return 0; 282 // FIXME: Here we check whether the token at the location is a greatergreater 283 // (>>) token and consider it as a single greater (>). This is to get it 284 // working for templates but it isn't correct for the right shift operator. We 285 // can avoid this by using half open char ranges in getFileRange() but getting 286 // token ending is not well supported in macroIDs. 287 if (TheTok.is(tok::greatergreater)) 288 return 1; 289 return TheTok.getLength(); 290 } 291 292 // Returns location of the last character of the token at a given loc 293 static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, 294 const SourceManager &SM, 295 const LangOptions &LangOpts) { 296 unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); 297 return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); 298 } 299 300 // Returns location of the starting of the token at a given EndLoc 301 static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, 302 const SourceManager &SM, 303 const LangOptions &LangOpts) { 304 return EndLoc.getLocWithOffset( 305 -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); 306 } 307 308 // Converts a char source range to a token range. 309 static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM, 310 const LangOptions &LangOpts) { 311 if (!Range.isTokenRange()) 312 Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); 313 return Range.getAsRange(); 314 } 315 // Returns the union of two token ranges. 316 // To find the maximum of the Ends of the ranges, we compare the location of the 317 // last character of the token. 318 static SourceRange unionTokenRange(SourceRange R1, SourceRange R2, 319 const SourceManager &SM, 320 const LangOptions &LangOpts) { 321 SourceLocation Begin = 322 SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin()) 323 ? R1.getBegin() 324 : R2.getBegin(); 325 SourceLocation End = 326 SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts), 327 getLocForTokenEnd(R2.getEnd(), SM, LangOpts)) 328 ? R2.getEnd() 329 : R1.getEnd(); 330 return SourceRange(Begin, End); 331 } 332 333 // Given a range whose endpoints may be in different expansions or files, 334 // tries to find a range within a common file by following up the expansion and 335 // include location in each. 336 static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM, 337 const LangOptions &LangOpts) { 338 // Fast path for most common cases. 339 if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd())) 340 return R; 341 // Record the stack of expansion locations for the beginning, keyed by FileID. 342 llvm::DenseMap<FileID, SourceLocation> BeginExpansions; 343 for (SourceLocation Begin = R.getBegin(); Begin.isValid(); 344 Begin = Begin.isFileID() 345 ? includeHashLoc(SM.getFileID(Begin), SM) 346 : SM.getImmediateExpansionRange(Begin).getBegin()) { 347 BeginExpansions[SM.getFileID(Begin)] = Begin; 348 } 349 // Move up the stack of expansion locations for the end until we find the 350 // location in BeginExpansions with that has the same file id. 351 for (SourceLocation End = R.getEnd(); End.isValid(); 352 End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM) 353 : toTokenRange(SM.getImmediateExpansionRange(End), 354 SM, LangOpts) 355 .getEnd()) { 356 auto It = BeginExpansions.find(SM.getFileID(End)); 357 if (It != BeginExpansions.end()) { 358 if (SM.getFileOffset(It->second) > SM.getFileOffset(End)) 359 return SourceLocation(); 360 return {It->second, End}; 361 } 362 } 363 return SourceRange(); 364 } 365 366 // Find an expansion range (not necessarily immediate) the ends of which are in 367 // the same file id. 368 static SourceRange 369 getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM, 370 const LangOptions &LangOpts) { 371 return rangeInCommonFile( 372 toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM, 373 LangOpts); 374 } 375 376 // Returns the file range for a given Location as a Token Range 377 // This is quite similar to getFileLoc in SourceManager as both use 378 // getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). 379 // However: 380 // - We want to maintain the full range information as we move from one file to 381 // the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. 382 // - We want to split '>>' tokens as the lexer parses the '>>' in nested 383 // template instantiations as a '>>' instead of two '>'s. 384 // There is also getExpansionRange but it simply calls 385 // getImmediateExpansionRange on the begin and ends separately which is wrong. 386 static SourceRange getTokenFileRange(SourceLocation Loc, 387 const SourceManager &SM, 388 const LangOptions &LangOpts) { 389 SourceRange FileRange = Loc; 390 while (!FileRange.getBegin().isFileID()) { 391 if (SM.isMacroArgExpansion(FileRange.getBegin())) { 392 FileRange = unionTokenRange( 393 SM.getImmediateSpellingLoc(FileRange.getBegin()), 394 SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts); 395 assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd())); 396 } else { 397 SourceRange ExpansionRangeForBegin = 398 getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts); 399 SourceRange ExpansionRangeForEnd = 400 getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts); 401 if (ExpansionRangeForBegin.isInvalid() || 402 ExpansionRangeForEnd.isInvalid()) 403 return SourceRange(); 404 assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(), 405 ExpansionRangeForEnd.getBegin()) && 406 "Both Expansion ranges should be in same file."); 407 FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, 408 SM, LangOpts); 409 } 410 } 411 return FileRange; 412 } 413 414 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) { 415 if (!Loc.isValid()) 416 return false; 417 FileID FID = SM.getFileID(SM.getExpansionLoc(Loc)); 418 return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); 419 } 420 421 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM, 422 const LangOptions &LangOpts, 423 SourceRange R) { 424 SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); 425 if (!isValidFileRange(SM, R1)) 426 return llvm::None; 427 428 SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); 429 if (!isValidFileRange(SM, R2)) 430 return llvm::None; 431 432 SourceRange Result = 433 rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts); 434 unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); 435 // Convert from closed token range to half-open (char) range 436 Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); 437 if (!isValidFileRange(SM, Result)) 438 return llvm::None; 439 440 return Result; 441 } 442 443 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { 444 assert(isValidFileRange(SM, R)); 445 auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin())); 446 assert(Buf); 447 448 size_t BeginOffset = SM.getFileOffset(R.getBegin()); 449 size_t EndOffset = SM.getFileOffset(R.getEnd()); 450 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); 451 } 452 453 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, 454 Position P) { 455 llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer(); 456 auto Offset = 457 positionToOffset(Code, P, /*AllowColumnsBeyondLineLength=*/false); 458 if (!Offset) 459 return Offset.takeError(); 460 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); 461 } 462 463 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) { 464 // Clang is 1-based, LSP uses 0-based indexes. 465 Position Begin = sourceLocToPosition(SM, R.getBegin()); 466 Position End = sourceLocToPosition(SM, R.getEnd()); 467 468 return {Begin, End}; 469 } 470 471 void unionRanges(Range &A, Range B) { 472 if (B.start < A.start) 473 A.start = B.start; 474 if (A.end < B.end) 475 A.end = B.end; 476 } 477 478 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, 479 size_t Offset) { 480 Offset = std::min(Code.size(), Offset); 481 llvm::StringRef Before = Code.substr(0, Offset); 482 int Lines = Before.count('\n'); 483 size_t PrevNL = Before.rfind('\n'); 484 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 485 return {Lines + 1, Offset - StartOfLine + 1}; 486 } 487 488 std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) { 489 size_t Pos = QName.rfind("::"); 490 if (Pos == llvm::StringRef::npos) 491 return {llvm::StringRef(), QName}; 492 return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)}; 493 } 494 495 TextEdit replacementToEdit(llvm::StringRef Code, 496 const tooling::Replacement &R) { 497 Range ReplacementRange = { 498 offsetToPosition(Code, R.getOffset()), 499 offsetToPosition(Code, R.getOffset() + R.getLength())}; 500 return {ReplacementRange, std::string(R.getReplacementText())}; 501 } 502 503 std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code, 504 const tooling::Replacements &Repls) { 505 std::vector<TextEdit> Edits; 506 for (const auto &R : Repls) 507 Edits.push_back(replacementToEdit(Code, R)); 508 return Edits; 509 } 510 511 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, 512 const SourceManager &SourceMgr) { 513 if (!F) 514 return None; 515 516 llvm::SmallString<128> FilePath = F->getName(); 517 if (!llvm::sys::path::is_absolute(FilePath)) { 518 if (auto EC = 519 SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute( 520 FilePath)) { 521 elog("Could not turn relative path '{0}' to absolute: {1}", FilePath, 522 EC.message()); 523 return None; 524 } 525 } 526 527 // Handle the symbolic link path case where the current working directory 528 // (getCurrentWorkingDirectory) is a symlink. We always want to the real 529 // file path (instead of the symlink path) for the C++ symbols. 530 // 531 // Consider the following example: 532 // 533 // src dir: /project/src/foo.h 534 // current working directory (symlink): /tmp/build -> /project/src/ 535 // 536 // The file path of Symbol is "/project/src/foo.h" instead of 537 // "/tmp/build/foo.h" 538 if (auto Dir = SourceMgr.getFileManager().getDirectory( 539 llvm::sys::path::parent_path(FilePath))) { 540 llvm::SmallString<128> RealPath; 541 llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(*Dir); 542 llvm::sys::path::append(RealPath, DirName, 543 llvm::sys::path::filename(FilePath)); 544 return RealPath.str().str(); 545 } 546 547 return FilePath.str().str(); 548 } 549 550 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, 551 const LangOptions &L) { 552 TextEdit Result; 553 Result.range = 554 halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L)); 555 Result.newText = FixIt.CodeToInsert; 556 return Result; 557 } 558 559 FileDigest digest(llvm::StringRef Content) { 560 uint64_t Hash{llvm::xxHash64(Content)}; 561 FileDigest Result; 562 for (unsigned I = 0; I < Result.size(); ++I) { 563 Result[I] = uint8_t(Hash); 564 Hash >>= 8; 565 } 566 return Result; 567 } 568 569 llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) { 570 bool Invalid = false; 571 llvm::StringRef Content = SM.getBufferData(FID, &Invalid); 572 if (Invalid) 573 return None; 574 return digest(Content); 575 } 576 577 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, 578 llvm::StringRef Content, 579 const ThreadsafeFS &TFS) { 580 auto Style = format::getStyle(format::DefaultFormatStyle, File, 581 format::DefaultFallbackStyle, Content, 582 TFS.view(/*CWD=*/llvm::None).get()); 583 if (!Style) { 584 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, 585 Style.takeError()); 586 return format::getLLVMStyle(); 587 } 588 return *Style; 589 } 590 591 llvm::Expected<tooling::Replacements> 592 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, 593 const format::FormatStyle &Style) { 594 auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style); 595 if (!CleanReplaces) 596 return CleanReplaces; 597 return formatReplacements(Code, std::move(*CleanReplaces), Style); 598 } 599 600 static void 601 lex(llvm::StringRef Code, const LangOptions &LangOpts, 602 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)> 603 Action) { 604 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! 605 std::string NullTerminatedCode = Code.str(); 606 SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); 607 auto &SM = FileSM.get(); 608 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) 609 Action(Tok, SM); 610 } 611 612 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, 613 const format::FormatStyle &Style) { 614 llvm::StringMap<unsigned> Identifiers; 615 auto LangOpt = format::getFormattingLangOpts(Style); 616 lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) { 617 if (Tok.kind() == tok::identifier) 618 ++Identifiers[Tok.text(SM)]; 619 // FIXME: Should this function really return keywords too ? 620 else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind())) 621 ++Identifiers[Keyword]; 622 }); 623 return Identifiers; 624 } 625 626 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier, 627 llvm::StringRef Content, 628 const LangOptions &LangOpts) { 629 std::vector<Range> Ranges; 630 lex(Content, LangOpts, 631 [&](const syntax::Token &Tok, const SourceManager &SM) { 632 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) 633 return; 634 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); 635 }); 636 return Ranges; 637 } 638 639 bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { 640 // Keywords are initialized in constructor. 641 clang::IdentifierTable KeywordsTable(LangOpts); 642 return KeywordsTable.find(NewName) != KeywordsTable.end(); 643 } 644 645 namespace { 646 struct NamespaceEvent { 647 enum { 648 BeginNamespace, // namespace <ns> {. Payload is resolved <ns>. 649 EndNamespace, // } // namespace <ns>. Payload is resolved *outer* 650 // namespace. 651 UsingDirective // using namespace <ns>. Payload is unresolved <ns>. 652 } Trigger; 653 std::string Payload; 654 Position Pos; 655 }; 656 // Scans C++ source code for constructs that change the visible namespaces. 657 void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts, 658 llvm::function_ref<void(NamespaceEvent)> Callback) { 659 660 // Stack of enclosing namespaces, e.g. {"clang", "clangd"} 661 std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd" 662 // Stack counts open braces. true if the brace opened a namespace. 663 llvm::BitVector BraceStack; 664 665 enum { 666 Default, 667 Namespace, // just saw 'namespace' 668 NamespaceName, // just saw 'namespace' NSName 669 Using, // just saw 'using' 670 UsingNamespace, // just saw 'using namespace' 671 UsingNamespaceName, // just saw 'using namespace' NSName 672 } State = Default; 673 std::string NSName; 674 675 NamespaceEvent Event; 676 lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) { 677 Event.Pos = sourceLocToPosition(SM, Tok.location()); 678 switch (Tok.kind()) { 679 case tok::kw_using: 680 State = State == Default ? Using : Default; 681 break; 682 case tok::kw_namespace: 683 switch (State) { 684 case Using: 685 State = UsingNamespace; 686 break; 687 case Default: 688 State = Namespace; 689 break; 690 default: 691 State = Default; 692 break; 693 } 694 break; 695 case tok::identifier: 696 switch (State) { 697 case UsingNamespace: 698 NSName.clear(); 699 [[fallthrough]]; 700 case UsingNamespaceName: 701 NSName.append(Tok.text(SM).str()); 702 State = UsingNamespaceName; 703 break; 704 case Namespace: 705 NSName.clear(); 706 [[fallthrough]]; 707 case NamespaceName: 708 NSName.append(Tok.text(SM).str()); 709 State = NamespaceName; 710 break; 711 case Using: 712 case Default: 713 State = Default; 714 break; 715 } 716 break; 717 case tok::coloncolon: 718 // This can come at the beginning or in the middle of a namespace 719 // name. 720 switch (State) { 721 case UsingNamespace: 722 NSName.clear(); 723 [[fallthrough]]; 724 case UsingNamespaceName: 725 NSName.append("::"); 726 State = UsingNamespaceName; 727 break; 728 case NamespaceName: 729 NSName.append("::"); 730 State = NamespaceName; 731 break; 732 case Namespace: // Not legal here. 733 case Using: 734 case Default: 735 State = Default; 736 break; 737 } 738 break; 739 case tok::l_brace: 740 // Record which { started a namespace, so we know when } ends one. 741 if (State == NamespaceName) { 742 // Parsed: namespace <name> { 743 BraceStack.push_back(true); 744 Enclosing.push_back(NSName); 745 Event.Trigger = NamespaceEvent::BeginNamespace; 746 Event.Payload = llvm::join(Enclosing, "::"); 747 Callback(Event); 748 } else { 749 // This case includes anonymous namespaces (State = Namespace). 750 // For our purposes, they're not namespaces and we ignore them. 751 BraceStack.push_back(false); 752 } 753 State = Default; 754 break; 755 case tok::r_brace: 756 // If braces are unmatched, we're going to be confused, but don't 757 // crash. 758 if (!BraceStack.empty()) { 759 if (BraceStack.back()) { 760 // Parsed: } // namespace 761 Enclosing.pop_back(); 762 Event.Trigger = NamespaceEvent::EndNamespace; 763 Event.Payload = llvm::join(Enclosing, "::"); 764 Callback(Event); 765 } 766 BraceStack.pop_back(); 767 } 768 break; 769 case tok::semi: 770 if (State == UsingNamespaceName) { 771 // Parsed: using namespace <name> ; 772 Event.Trigger = NamespaceEvent::UsingDirective; 773 Event.Payload = std::move(NSName); 774 Callback(Event); 775 } 776 State = Default; 777 break; 778 default: 779 State = Default; 780 break; 781 } 782 }); 783 } 784 785 // Returns the prefix namespaces of NS: {"" ... NS}. 786 llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) { 787 llvm::SmallVector<llvm::StringRef> Results; 788 Results.push_back(NS.take_front(0)); 789 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 790 for (llvm::StringRef &R : Results) 791 R = NS.take_front(R.end() - NS.begin()); 792 return Results; 793 } 794 795 } // namespace 796 797 std::vector<std::string> visibleNamespaces(llvm::StringRef Code, 798 const LangOptions &LangOpts) { 799 std::string Current; 800 // Map from namespace to (resolved) namespaces introduced via using directive. 801 llvm::StringMap<llvm::StringSet<>> UsingDirectives; 802 803 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 804 llvm::StringRef NS = Event.Payload; 805 switch (Event.Trigger) { 806 case NamespaceEvent::BeginNamespace: 807 case NamespaceEvent::EndNamespace: 808 Current = std::move(Event.Payload); 809 break; 810 case NamespaceEvent::UsingDirective: 811 if (NS.consume_front("::")) 812 UsingDirectives[Current].insert(NS); 813 else { 814 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 815 if (Enclosing.empty()) 816 UsingDirectives[Current].insert(NS); 817 else 818 UsingDirectives[Current].insert((Enclosing + "::" + NS).str()); 819 } 820 } 821 break; 822 } 823 }); 824 825 std::vector<std::string> Found; 826 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 827 Found.push_back(std::string(Enclosing)); 828 auto It = UsingDirectives.find(Enclosing); 829 if (It != UsingDirectives.end()) 830 for (const auto &Used : It->second) 831 Found.push_back(std::string(Used.getKey())); 832 } 833 834 llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) { 835 if (Current == RHS) 836 return false; 837 if (Current == LHS) 838 return true; 839 return LHS < RHS; 840 }); 841 Found.erase(std::unique(Found.begin(), Found.end()), Found.end()); 842 return Found; 843 } 844 845 llvm::StringSet<> collectWords(llvm::StringRef Content) { 846 // We assume short words are not significant. 847 // We may want to consider other stopwords, e.g. language keywords. 848 // (A very naive implementation showed no benefit, but lexing might do better) 849 static constexpr int MinWordLength = 4; 850 851 std::vector<CharRole> Roles(Content.size()); 852 calculateRoles(Content, Roles); 853 854 llvm::StringSet<> Result; 855 llvm::SmallString<256> Word; 856 auto Flush = [&] { 857 if (Word.size() >= MinWordLength) { 858 for (char &C : Word) 859 C = llvm::toLower(C); 860 Result.insert(Word); 861 } 862 Word.clear(); 863 }; 864 for (unsigned I = 0; I < Content.size(); ++I) { 865 switch (Roles[I]) { 866 case Head: 867 Flush(); 868 [[fallthrough]]; 869 case Tail: 870 Word.push_back(Content[I]); 871 break; 872 case Unknown: 873 case Separator: 874 Flush(); 875 break; 876 } 877 } 878 Flush(); 879 880 return Result; 881 } 882 883 static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before, 884 llvm::StringRef After) { 885 // `foo` is an identifier. 886 if (Before.endswith("`") && After.startswith("`")) 887 return true; 888 // In foo::bar, both foo and bar are identifiers. 889 if (Before.endswith("::") || After.startswith("::")) 890 return true; 891 // Doxygen tags like \c foo indicate identifiers. 892 // Don't search too far back. 893 // This duplicates clang's doxygen parser, revisit if it gets complicated. 894 Before = Before.take_back(100); // Don't search too far back. 895 auto Pos = Before.find_last_of("\\@"); 896 if (Pos != llvm::StringRef::npos) { 897 llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' '); 898 if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" || 899 Tag == "param" || Tag == "param[in]" || Tag == "param[out]" || 900 Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" || 901 Tag == "throws" || Tag == "link") 902 return true; 903 } 904 905 // Word contains underscore. 906 // This handles things like snake_case and MACRO_CASE. 907 if (Word.contains('_')) { 908 return true; 909 } 910 // Word contains capital letter other than at beginning. 911 // This handles things like lowerCamel and UpperCamel. 912 // The check for also containing a lowercase letter is to rule out 913 // initialisms like "HTTP". 914 bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; 915 bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; 916 if (HasLower && HasUpper) { 917 return true; 918 } 919 // FIXME: consider mid-sentence Capitalization? 920 return false; 921 } 922 923 llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc, 924 const syntax::TokenBuffer &TB, 925 const LangOptions &LangOpts) { 926 const auto &SM = TB.sourceManager(); 927 auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB); 928 for (const auto &T : Touching) { 929 // If the token is an identifier or a keyword, don't use any heuristics. 930 if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) { 931 SpelledWord Result; 932 Result.Location = T.location(); 933 Result.Text = T.text(SM); 934 Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind()); 935 Result.PartOfSpelledToken = &T; 936 Result.SpelledToken = &T; 937 auto Expanded = 938 TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location())); 939 if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text) 940 Result.ExpandedToken = &Expanded.front(); 941 return Result; 942 } 943 } 944 FileID File; 945 unsigned Offset; 946 std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc); 947 bool Invalid = false; 948 llvm::StringRef Code = SM.getBufferData(File, &Invalid); 949 if (Invalid) 950 return llvm::None; 951 unsigned B = Offset, E = Offset; 952 while (B > 0 && isAsciiIdentifierContinue(Code[B - 1])) 953 --B; 954 while (E < Code.size() && isAsciiIdentifierContinue(Code[E])) 955 ++E; 956 if (B == E) 957 return llvm::None; 958 959 SpelledWord Result; 960 Result.Location = SM.getComposedLoc(File, B); 961 Result.Text = Code.slice(B, E); 962 Result.LikelyIdentifier = 963 isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) && 964 // should not be a keyword 965 tok::isAnyIdentifier( 966 IdentifierTable(LangOpts).get(Result.Text).getTokenID()); 967 for (const auto &T : Touching) 968 if (T.location() <= Result.Location) 969 Result.PartOfSpelledToken = &T; 970 return Result; 971 } 972 973 llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok, 974 Preprocessor &PP) { 975 if (SpelledTok.kind() != tok::identifier) 976 return None; 977 SourceLocation Loc = SpelledTok.location(); 978 assert(Loc.isFileID()); 979 const auto &SM = PP.getSourceManager(); 980 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); 981 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) 982 return None; 983 984 // We need to take special case to handle #define and #undef. 985 // Preprocessor::getMacroDefinitionAtLoc() only considers a macro 986 // definition to be in scope *after* the location of the macro name in a 987 // #define that introduces it, and *before* the location of the macro name 988 // in an #undef that undefines it. To handle these cases, we check for 989 // the macro being in scope either just after or just before the location 990 // of the token. In getting the location before, we also take care to check 991 // for start-of-file. 992 FileID FID = SM.getFileID(Loc); 993 assert(Loc != SM.getLocForEndOfFile(FID)); 994 SourceLocation JustAfterToken = Loc.getLocWithOffset(1); 995 auto *MacroInfo = 996 PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo(); 997 if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) { 998 SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1); 999 MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken) 1000 .getMacroInfo(); 1001 } 1002 if (!MacroInfo) { 1003 return None; 1004 } 1005 return DefinedMacro{ 1006 IdentifierInfo->getName(), MacroInfo, 1007 translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)}; 1008 } 1009 1010 llvm::Expected<std::string> Edit::apply() const { 1011 return tooling::applyAllReplacements(InitialCode, Replacements); 1012 } 1013 1014 std::vector<TextEdit> Edit::asTextEdits() const { 1015 return replacementsToEdits(InitialCode, Replacements); 1016 } 1017 1018 bool Edit::canApplyTo(llvm::StringRef Code) const { 1019 // Create line iterators, since line numbers are important while applying our 1020 // edit we cannot skip blank lines. 1021 auto LHS = llvm::MemoryBuffer::getMemBuffer(Code); 1022 llvm::line_iterator LHSIt(*LHS, /*SkipBlanks=*/false); 1023 1024 auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode); 1025 llvm::line_iterator RHSIt(*RHS, /*SkipBlanks=*/false); 1026 1027 // Compare the InitialCode we prepared the edit for with the Code we received 1028 // line by line to make sure there are no differences. 1029 // FIXME: This check is too conservative now, it should be enough to only 1030 // check lines around the replacements contained inside the Edit. 1031 while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) { 1032 if (*LHSIt != *RHSIt) 1033 return false; 1034 ++LHSIt; 1035 ++RHSIt; 1036 } 1037 1038 // After we reach EOF for any of the files we make sure the other one doesn't 1039 // contain any additional content except empty lines, they should not 1040 // interfere with the edit we produced. 1041 while (!LHSIt.is_at_eof()) { 1042 if (!LHSIt->empty()) 1043 return false; 1044 ++LHSIt; 1045 } 1046 while (!RHSIt.is_at_eof()) { 1047 if (!RHSIt->empty()) 1048 return false; 1049 ++RHSIt; 1050 } 1051 return true; 1052 } 1053 1054 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) { 1055 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style)) 1056 E.Replacements = std::move(*NewEdits); 1057 else 1058 return NewEdits.takeError(); 1059 return llvm::Error::success(); 1060 } 1061 1062 llvm::Error applyChange(std::string &Contents, 1063 const TextDocumentContentChangeEvent &Change) { 1064 if (!Change.range) { 1065 Contents = Change.text; 1066 return llvm::Error::success(); 1067 } 1068 1069 const Position &Start = Change.range->start; 1070 llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false); 1071 if (!StartIndex) 1072 return StartIndex.takeError(); 1073 1074 const Position &End = Change.range->end; 1075 llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false); 1076 if (!EndIndex) 1077 return EndIndex.takeError(); 1078 1079 if (*EndIndex < *StartIndex) 1080 return error(llvm::errc::invalid_argument, 1081 "Range's end position ({0}) is before start position ({1})", 1082 End, Start); 1083 1084 // Since the range length between two LSP positions is dependent on the 1085 // contents of the buffer we compute the range length between the start and 1086 // end position ourselves and compare it to the range length of the LSP 1087 // message to verify the buffers of the client and server are in sync. 1088 1089 // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 1090 // code units. 1091 ssize_t ComputedRangeLength = 1092 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); 1093 1094 if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) 1095 return error(llvm::errc::invalid_argument, 1096 "Change's rangeLength ({0}) doesn't match the " 1097 "computed range length ({1}).", 1098 *Change.rangeLength, ComputedRangeLength); 1099 1100 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text); 1101 1102 return llvm::Error::success(); 1103 } 1104 1105 EligibleRegion getEligiblePoints(llvm::StringRef Code, 1106 llvm::StringRef FullyQualifiedName, 1107 const LangOptions &LangOpts) { 1108 EligibleRegion ER; 1109 // Start with global namespace. 1110 std::vector<std::string> Enclosing = {""}; 1111 // FIXME: In addition to namespaces try to generate events for function 1112 // definitions as well. One might use a closing parantheses(")" followed by an 1113 // opening brace "{" to trigger the start. 1114 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 1115 // Using Directives only introduces declarations to current scope, they do 1116 // not change the current namespace, so skip them. 1117 if (Event.Trigger == NamespaceEvent::UsingDirective) 1118 return; 1119 // Do not qualify the global namespace. 1120 if (!Event.Payload.empty()) 1121 Event.Payload.append("::"); 1122 1123 std::string CurrentNamespace; 1124 if (Event.Trigger == NamespaceEvent::BeginNamespace) { 1125 Enclosing.emplace_back(std::move(Event.Payload)); 1126 CurrentNamespace = Enclosing.back(); 1127 // parseNameSpaceEvents reports the beginning position of a token; we want 1128 // to insert after '{', so increment by one. 1129 ++Event.Pos.character; 1130 } else { 1131 // Event.Payload points to outer namespace when exiting a scope, so use 1132 // the namespace we've last entered instead. 1133 CurrentNamespace = std::move(Enclosing.back()); 1134 Enclosing.pop_back(); 1135 assert(Enclosing.back() == Event.Payload); 1136 } 1137 1138 // Ignore namespaces that are not a prefix of the target. 1139 if (!FullyQualifiedName.startswith(CurrentNamespace)) 1140 return; 1141 1142 // Prefer the namespace that shares the longest prefix with target. 1143 if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) { 1144 ER.EligiblePoints.clear(); 1145 ER.EnclosingNamespace = CurrentNamespace; 1146 } 1147 if (CurrentNamespace.size() == ER.EnclosingNamespace.size()) 1148 ER.EligiblePoints.emplace_back(std::move(Event.Pos)); 1149 }); 1150 // If there were no shared namespaces just return EOF. 1151 if (ER.EligiblePoints.empty()) { 1152 assert(ER.EnclosingNamespace.empty()); 1153 ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size())); 1154 } 1155 return ER; 1156 } 1157 1158 bool isHeaderFile(llvm::StringRef FileName, 1159 llvm::Optional<LangOptions> LangOpts) { 1160 // Respect the langOpts, for non-file-extension cases, e.g. standard library 1161 // files. 1162 if (LangOpts && LangOpts->IsHeaderFile) 1163 return true; 1164 namespace types = clang::driver::types; 1165 auto Lang = types::lookupTypeForExtension( 1166 llvm::sys::path::extension(FileName).substr(1)); 1167 return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang); 1168 } 1169 1170 bool isProtoFile(SourceLocation Loc, const SourceManager &SM) { 1171 auto FileName = SM.getFilename(Loc); 1172 if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h")) 1173 return false; 1174 auto FID = SM.getFileID(Loc); 1175 // All proto generated headers should start with this line. 1176 static const char *ProtoHeaderComment = 1177 "// Generated by the protocol buffer compiler. DO NOT EDIT!"; 1178 // Double check that this is an actual protobuf header. 1179 return SM.getBufferData(FID).startswith(ProtoHeaderComment); 1180 } 1181 1182 namespace { 1183 1184 // Is Line an #if or #ifdef directive? 1185 // FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non 1186 // self-contained and is probably not what we want. 1187 bool isIf(llvm::StringRef Line) { 1188 Line = Line.ltrim(); 1189 if (!Line.consume_front("#")) 1190 return false; 1191 Line = Line.ltrim(); 1192 return Line.startswith("if"); 1193 } 1194 1195 // Is Line an #error directive mentioning includes? 1196 bool isErrorAboutInclude(llvm::StringRef Line) { 1197 Line = Line.ltrim(); 1198 if (!Line.consume_front("#")) 1199 return false; 1200 Line = Line.ltrim(); 1201 if (!Line.startswith("error")) 1202 return false; 1203 return Line.contains_insensitive( 1204 "includ"); // Matches "include" or "including". 1205 } 1206 1207 // Heuristically headers that only want to be included via an umbrella. 1208 bool isDontIncludeMeHeader(llvm::StringRef Content) { 1209 llvm::StringRef Line; 1210 // Only sniff up to 100 lines or 10KB. 1211 Content = Content.take_front(100 * 100); 1212 for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { 1213 std::tie(Line, Content) = Content.split('\n'); 1214 if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) 1215 return true; 1216 } 1217 return false; 1218 } 1219 1220 } // namespace 1221 1222 bool isSelfContainedHeader(const FileEntry *FE, FileID FID, 1223 const SourceManager &SM, HeaderSearch &HeaderInfo) { 1224 // FIXME: Should files that have been #import'd be considered 1225 // self-contained? That's really a property of the includer, 1226 // not of the file. 1227 if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) && 1228 !HeaderInfo.hasFileBeenImported(FE)) 1229 return false; 1230 // This pattern indicates that a header can't be used without 1231 // particular preprocessor state, usually set up by another header. 1232 return !isDontIncludeMeHeader(SM.getBufferData(FID)); 1233 } 1234 1235 } // namespace clangd 1236 } // namespace clang 1237