1bdd1243dSDimitry Andric //===--- HeaderAnalysis.cpp -------------------------------------*- C++ -*-===//
2bdd1243dSDimitry Andric //
3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bdd1243dSDimitry Andric //
7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===//
8bdd1243dSDimitry Andric
9bdd1243dSDimitry Andric #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
10bdd1243dSDimitry Andric #include "clang/Basic/SourceLocation.h"
11bdd1243dSDimitry Andric #include "clang/Lex/HeaderSearch.h"
12bdd1243dSDimitry Andric
13bdd1243dSDimitry Andric namespace clang::tooling {
14bdd1243dSDimitry Andric namespace {
15bdd1243dSDimitry Andric
16bdd1243dSDimitry Andric // Is Line an #if or #ifdef directive?
17bdd1243dSDimitry Andric // FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non
18bdd1243dSDimitry Andric // self-contained and is probably not what we want.
isIf(llvm::StringRef Line)19bdd1243dSDimitry Andric bool isIf(llvm::StringRef Line) {
20bdd1243dSDimitry Andric Line = Line.ltrim();
21bdd1243dSDimitry Andric if (!Line.consume_front("#"))
22bdd1243dSDimitry Andric return false;
23bdd1243dSDimitry Andric Line = Line.ltrim();
24*5f757f3fSDimitry Andric return Line.starts_with("if");
25bdd1243dSDimitry Andric }
26bdd1243dSDimitry Andric
27bdd1243dSDimitry Andric // Is Line an #error directive mentioning includes?
isErrorAboutInclude(llvm::StringRef Line)28bdd1243dSDimitry Andric bool isErrorAboutInclude(llvm::StringRef Line) {
29bdd1243dSDimitry Andric Line = Line.ltrim();
30bdd1243dSDimitry Andric if (!Line.consume_front("#"))
31bdd1243dSDimitry Andric return false;
32bdd1243dSDimitry Andric Line = Line.ltrim();
33*5f757f3fSDimitry Andric if (!Line.starts_with("error"))
34bdd1243dSDimitry Andric return false;
35bdd1243dSDimitry Andric return Line.contains_insensitive(
36bdd1243dSDimitry Andric "includ"); // Matches "include" or "including".
37bdd1243dSDimitry Andric }
38bdd1243dSDimitry Andric
39bdd1243dSDimitry Andric // Heuristically headers that only want to be included via an umbrella.
isDontIncludeMeHeader(StringRef Content)40bdd1243dSDimitry Andric bool isDontIncludeMeHeader(StringRef Content) {
41bdd1243dSDimitry Andric llvm::StringRef Line;
42bdd1243dSDimitry Andric // Only sniff up to 100 lines or 10KB.
43bdd1243dSDimitry Andric Content = Content.take_front(100 * 100);
44bdd1243dSDimitry Andric for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
45bdd1243dSDimitry Andric std::tie(Line, Content) = Content.split('\n');
46bdd1243dSDimitry Andric if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
47bdd1243dSDimitry Andric return true;
48bdd1243dSDimitry Andric }
49bdd1243dSDimitry Andric return false;
50bdd1243dSDimitry Andric }
51bdd1243dSDimitry Andric
isImportLine(llvm::StringRef Line)52bdd1243dSDimitry Andric bool isImportLine(llvm::StringRef Line) {
53bdd1243dSDimitry Andric Line = Line.ltrim();
54bdd1243dSDimitry Andric if (!Line.consume_front("#"))
55bdd1243dSDimitry Andric return false;
56bdd1243dSDimitry Andric Line = Line.ltrim();
57*5f757f3fSDimitry Andric return Line.starts_with("import");
58bdd1243dSDimitry Andric }
59bdd1243dSDimitry Andric
getFileContents(FileEntryRef FE,const SourceManager & SM)60*5f757f3fSDimitry Andric llvm::StringRef getFileContents(FileEntryRef FE, const SourceManager &SM) {
61bdd1243dSDimitry Andric return const_cast<SourceManager &>(SM)
62bdd1243dSDimitry Andric .getMemoryBufferForFileOrNone(FE)
63bdd1243dSDimitry Andric .value_or(llvm::MemoryBufferRef())
64bdd1243dSDimitry Andric .getBuffer();
65bdd1243dSDimitry Andric }
66bdd1243dSDimitry Andric
67bdd1243dSDimitry Andric } // namespace
68bdd1243dSDimitry Andric
isSelfContainedHeader(FileEntryRef FE,const SourceManager & SM,const HeaderSearch & HeaderInfo)69*5f757f3fSDimitry Andric bool isSelfContainedHeader(FileEntryRef FE, const SourceManager &SM,
7006c3fb27SDimitry Andric const HeaderSearch &HeaderInfo) {
71bdd1243dSDimitry Andric if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) &&
72bdd1243dSDimitry Andric !HeaderInfo.hasFileBeenImported(FE) &&
73bdd1243dSDimitry Andric // Any header that contains #imports is supposed to be #import'd so no
74bdd1243dSDimitry Andric // need to check for anything but the main-file.
75bdd1243dSDimitry Andric (SM.getFileEntryForID(SM.getMainFileID()) != FE ||
76bdd1243dSDimitry Andric !codeContainsImports(getFileContents(FE, SM))))
77bdd1243dSDimitry Andric return false;
78bdd1243dSDimitry Andric // This pattern indicates that a header can't be used without
79bdd1243dSDimitry Andric // particular preprocessor state, usually set up by another header.
80bdd1243dSDimitry Andric return !isDontIncludeMeHeader(getFileContents(FE, SM));
81bdd1243dSDimitry Andric }
82bdd1243dSDimitry Andric
codeContainsImports(llvm::StringRef Code)83bdd1243dSDimitry Andric bool codeContainsImports(llvm::StringRef Code) {
84bdd1243dSDimitry Andric // Only sniff up to 100 lines or 10KB.
85bdd1243dSDimitry Andric Code = Code.take_front(100 * 100);
86bdd1243dSDimitry Andric llvm::StringRef Line;
87bdd1243dSDimitry Andric for (unsigned I = 0; I < 100 && !Code.empty(); ++I) {
88bdd1243dSDimitry Andric std::tie(Line, Code) = Code.split('\n');
89bdd1243dSDimitry Andric if (isImportLine(Line))
90bdd1243dSDimitry Andric return true;
91bdd1243dSDimitry Andric }
92bdd1243dSDimitry Andric return false;
93bdd1243dSDimitry Andric }
94bdd1243dSDimitry Andric
parseIWYUPragma(const char * Text)95bdd1243dSDimitry Andric std::optional<StringRef> parseIWYUPragma(const char *Text) {
96bdd1243dSDimitry Andric // Skip the comment start, // or /*.
97bdd1243dSDimitry Andric if (Text[0] != '/' || (Text[1] != '/' && Text[1] != '*'))
98bdd1243dSDimitry Andric return std::nullopt;
99bdd1243dSDimitry Andric bool BlockComment = Text[1] == '*';
100bdd1243dSDimitry Andric Text += 2;
101bdd1243dSDimitry Andric
102bdd1243dSDimitry Andric // Per spec, direcitves are whitespace- and case-sensitive.
103bdd1243dSDimitry Andric constexpr llvm::StringLiteral IWYUPragma = " IWYU pragma: ";
104bdd1243dSDimitry Andric if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size()))
105bdd1243dSDimitry Andric return std::nullopt;
106bdd1243dSDimitry Andric Text += IWYUPragma.size();
107bdd1243dSDimitry Andric const char *End = Text;
108bdd1243dSDimitry Andric while (*End != 0 && *End != '\n')
109bdd1243dSDimitry Andric ++End;
110bdd1243dSDimitry Andric StringRef Rest(Text, End - Text);
111bdd1243dSDimitry Andric // Strip off whitespace and comment markers to avoid confusion. This isn't
112bdd1243dSDimitry Andric // fully-compatible with IWYU, which splits into whitespace-delimited tokens.
113bdd1243dSDimitry Andric if (BlockComment)
114bdd1243dSDimitry Andric Rest.consume_back("*/");
115bdd1243dSDimitry Andric return Rest.trim();
116bdd1243dSDimitry Andric }
117bdd1243dSDimitry Andric
118bdd1243dSDimitry Andric } // namespace clang::tooling
119