1 //===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DirectiveTree.h" 10 #include "clang/Basic/IdentifierTable.h" 11 #include "clang/Basic/TokenKinds.h" 12 #include "llvm/Support/FormatVariadic.h" 13 #include <optional> 14 #include <variant> 15 16 namespace clang { 17 namespace clangd { 18 namespace { 19 20 class DirectiveParser { 21 public: 22 explicit DirectiveParser(const TokenStream &Code) 23 : Code(Code), Tok(&Code.front()) {} 24 void parse(DirectiveTree *Result) { parse(Result, /*TopLevel=*/true); } 25 26 private: 27 // Roles that a directive might take within a conditional block. 28 enum class Cond { None, If, Else, End }; 29 static Cond classifyDirective(tok::PPKeywordKind K) { 30 switch (K) { 31 case clang::tok::pp_if: 32 case clang::tok::pp_ifdef: 33 case clang::tok::pp_ifndef: 34 return Cond::If; 35 case clang::tok::pp_elif: 36 case clang::tok::pp_elifdef: 37 case clang::tok::pp_elifndef: 38 case clang::tok::pp_else: 39 return Cond::Else; 40 case clang::tok::pp_endif: 41 return Cond::End; 42 default: 43 return Cond::None; 44 } 45 } 46 47 // Parses tokens starting at Tok into Tree. 48 // If we reach an End or Else directive that ends Tree, returns it. 49 // If TopLevel is true, then we do not expect End and always return 50 // std::nullopt. 51 std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree, 52 bool TopLevel) { 53 auto StartsDirective = 54 [&, AllowDirectiveAt((const Token *)nullptr)]() mutable { 55 if (Tok->flag(LexFlags::StartsPPLine)) { 56 // If we considered a comment at the start of a PP-line, it doesn't 57 // start a directive but the directive can still start after it. 58 if (Tok->Kind == tok::comment) 59 AllowDirectiveAt = Tok + 1; 60 return Tok->Kind == tok::hash; 61 } 62 return Tok->Kind == tok::hash && AllowDirectiveAt == Tok; 63 }; 64 // Each iteration adds one chunk (or returns, if we see #endif). 65 while (Tok->Kind != tok::eof) { 66 // If there's no directive here, we have a code chunk. 67 if (!StartsDirective()) { 68 const Token *Start = Tok; 69 do 70 ++Tok; 71 while (Tok->Kind != tok::eof && !StartsDirective()); 72 Tree->Chunks.push_back(DirectiveTree::Code{ 73 Token::Range{Code.index(*Start), Code.index(*Tok)}}); 74 continue; 75 } 76 77 // We have some kind of directive. 78 DirectiveTree::Directive Directive; 79 parseDirective(&Directive); 80 Cond Kind = classifyDirective(Directive.Kind); 81 if (Kind == Cond::If) { 82 // #if or similar, starting a nested conditional block. 83 DirectiveTree::Conditional Conditional; 84 Conditional.Branches.emplace_back(); 85 Conditional.Branches.back().first = std::move(Directive); 86 parseConditional(&Conditional); 87 Tree->Chunks.push_back(std::move(Conditional)); 88 } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) { 89 // #endif or similar, ending this PStructure scope. 90 // (#endif is unexpected at the top level, treat as simple directive). 91 return std::move(Directive); 92 } else { 93 // #define or similar, a simple directive at the current scope. 94 Tree->Chunks.push_back(std::move(Directive)); 95 } 96 } 97 return std::nullopt; 98 } 99 100 // Parse the rest of a conditional section, after seeing the If directive. 101 // Returns after consuming the End directive. 102 void parseConditional(DirectiveTree::Conditional *C) { 103 assert(C->Branches.size() == 1 && 104 C->Branches.front().second.Chunks.empty() && 105 "Should be ready to parse first branch body"); 106 while (Tok->Kind != tok::eof) { 107 auto Terminator = parse(&C->Branches.back().second, /*TopLevel=*/false); 108 if (!Terminator) { 109 assert(Tok->Kind == tok::eof && "gave up parsing before eof?"); 110 C->End.Tokens = Token::Range::emptyAt(Code.index(*Tok)); 111 return; 112 } 113 if (classifyDirective(Terminator->Kind) == Cond::End) { 114 C->End = std::move(*Terminator); 115 return; 116 } 117 assert(classifyDirective(Terminator->Kind) == Cond::Else && 118 "ended branch unexpectedly"); 119 C->Branches.emplace_back(); 120 C->Branches.back().first = std::move(*Terminator); 121 } 122 } 123 124 // Parse a directive. Tok is the hash. 125 void parseDirective(DirectiveTree::Directive *D) { 126 assert(Tok->Kind == tok::hash); 127 128 // Directive spans from the hash until the end of line or file. 129 const Token *Begin = Tok++; 130 while (Tok->Kind != tok::eof && !Tok->flag(LexFlags::StartsPPLine)) 131 ++Tok; 132 ArrayRef<Token> Tokens{Begin, Tok}; 133 D->Tokens = {Code.index(*Tokens.begin()), Code.index(*Tokens.end())}; 134 135 // Directive name is the first non-comment token after the hash. 136 Tokens = Tokens.drop_front().drop_while( 137 [](const Token &T) { return T.Kind == tok::comment; }); 138 if (!Tokens.empty()) 139 D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID(); 140 } 141 142 const TokenStream &Code; 143 const Token *Tok; 144 clang::IdentifierTable PPKeywords; 145 }; 146 147 struct Dumper { 148 llvm::raw_ostream &OS; 149 unsigned Indent = 0; 150 151 Dumper(llvm::raw_ostream& OS) : OS(OS) {} 152 void operator()(const DirectiveTree& Tree) { 153 for (const auto& Chunk : Tree.Chunks) 154 std::visit(*this, Chunk); 155 } 156 void operator()(const DirectiveTree::Conditional &Conditional) { 157 for (unsigned I = 0; I < Conditional.Branches.size(); ++I) { 158 const auto &Branch = Conditional.Branches[I]; 159 (*this)(Branch.first, Conditional.Taken == I); 160 Indent += 2; 161 (*this)(Branch.second); 162 Indent -= 2; 163 } 164 (*this)(Conditional.End); 165 } 166 void operator()(const DirectiveTree::Directive &Directive, 167 bool Taken = false) { 168 OS.indent(Indent) << llvm::formatv( 169 "#{0} ({1} tokens){2}\n", tok::getPPKeywordSpelling(Directive.Kind), 170 Directive.Tokens.size(), Taken ? " TAKEN" : ""); 171 } 172 void operator()(const DirectiveTree::Code &Code) { 173 OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", 174 Code.Tokens.size()); 175 } 176 }; 177 } // namespace 178 179 DirectiveTree DirectiveTree::parse(const TokenStream &Code) { 180 DirectiveTree Result; 181 DirectiveParser(Code).parse(&Result); 182 return Result; 183 } 184 185 // Define operator<< in terms of dump() functions above. 186 #define OSTREAM_DUMP(Type) \ 187 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \ 188 Dumper{OS}(T); \ 189 return OS; \ 190 } 191 OSTREAM_DUMP(DirectiveTree) 192 OSTREAM_DUMP(DirectiveTree::Directive) 193 OSTREAM_DUMP(DirectiveTree::Conditional) 194 OSTREAM_DUMP(DirectiveTree::Code) 195 #undef OSTREAM_DUMP 196 197 namespace { 198 // Makes choices about conditional branches. 199 // 200 // Generally it tries to maximize the amount of useful code we see. 201 // 202 // Caveat: each conditional is evaluated independently. Consider this code: 203 // #ifdef WINDOWS 204 // bool isWindows = true; 205 // #endif 206 // #ifndef WINDOWS 207 // bool isWindows = false; 208 // #endif 209 // We take both branches and define isWindows twice. We could track more state 210 // in order to produce a consistent view, but this is complex. 211 class BranchChooser { 212 public: 213 BranchChooser(const TokenStream &Code) : Code(Code) {} 214 215 // Describes code seen by making particular branch choices. Higher is better. 216 struct Score { 217 int Tokens = 0; // excluding comments and directives 218 int Directives = 0; 219 int Errors = 0; // #error directives 220 221 bool operator>(const Score &Other) const { 222 // Seeing errors is bad, other things are good. 223 return std::make_tuple(-Errors, Tokens, Directives) > 224 std::make_tuple(-Other.Errors, Other.Tokens, Other.Directives); 225 } 226 227 Score &operator+=(const Score &Other) { 228 Tokens += Other.Tokens; 229 Directives += Other.Directives; 230 Errors += Other.Errors; 231 return *this; 232 } 233 }; 234 235 Score operator()(DirectiveTree::Code &C) { 236 Score S; 237 for (const Token &T : Code.tokens(C.Tokens)) 238 if (T.Kind != tok::comment) 239 ++S.Tokens; 240 return S; 241 } 242 243 Score operator()(DirectiveTree::Directive &D) { 244 Score S; 245 S.Directives = 1; 246 S.Errors = D.Kind == tok::pp_error; 247 return S; 248 } 249 250 Score operator()(DirectiveTree::Conditional &C) { 251 Score Best; 252 bool MayTakeTrivial = true; 253 bool TookTrivial = false; 254 255 for (unsigned I = 0; I < C.Branches.size(); ++I) { 256 // Walk the branch to make its nested choices in any case. 257 Score BranchScore = walk(C.Branches[I].second); 258 // If we already took an #if 1, don't consider any other branches. 259 if (TookTrivial) 260 continue; 261 // Is this a trivial #if 0 or #if 1? 262 if (auto TriviallyTaken = isTakenWhenReached(C.Branches[I].first)) { 263 if (!*TriviallyTaken) 264 continue; // Don't consider #if 0 even if it scores well. 265 if (MayTakeTrivial) 266 TookTrivial = true; 267 } else { 268 // After a nontrivial condition, #elif 1 isn't guaranteed taken. 269 MayTakeTrivial = false; 270 } 271 // Is this the best branch so far? (Including if it's #if 1). 272 if (TookTrivial || !C.Taken || BranchScore > Best) { 273 Best = BranchScore; 274 C.Taken = I; 275 } 276 } 277 return Best; 278 } 279 Score walk(DirectiveTree &M) { 280 Score S; 281 for (auto &C : M.Chunks) 282 S += std::visit(*this, C); 283 return S; 284 } 285 286 private: 287 // Return true if the directive starts an always-taken conditional branch, 288 // false if the branch is never taken, and std::nullopt otherwise. 289 std::optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) { 290 switch (Dir.Kind) { 291 case clang::tok::pp_if: 292 case clang::tok::pp_elif: 293 break; // handled below 294 case clang::tok::pp_else: 295 return true; 296 default: // #ifdef etc 297 return std::nullopt; 298 } 299 300 const auto &Tokens = Code.tokens(Dir.Tokens); 301 assert(!Tokens.empty() && Tokens.front().Kind == tok::hash); 302 const Token &Name = Tokens.front().nextNC(); 303 const Token &Value = Name.nextNC(); 304 // Does the condition consist of exactly one token? 305 if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end()) 306 return std::nullopt; 307 return llvm::StringSwitch<std::optional<bool>>(Value.text()) 308 .Cases("true", "1", true) 309 .Cases("false", "0", false) 310 .Default(std::nullopt); 311 } 312 313 const TokenStream &Code; 314 }; 315 316 } // namespace 317 318 void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) { 319 BranchChooser{Code}.walk(Tree); 320 } 321 322 namespace { 323 class Preprocessor { 324 const TokenStream &In; 325 TokenStream &Out; 326 327 public: 328 Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {} 329 ~Preprocessor() { Out.finalize(); } 330 331 Preprocessor(const Preprocessor &other) = delete; 332 Preprocessor &operator=(const Preprocessor &other) = delete; 333 334 void walk(const DirectiveTree &T) { 335 for (const auto &C : T.Chunks) 336 std::visit(*this, C); 337 } 338 339 void operator()(const DirectiveTree::Code &C) { 340 for (const auto &Tok : In.tokens(C.Tokens)) 341 Out.push(Tok); 342 } 343 344 void operator()(const DirectiveTree::Directive &) {} 345 346 void operator()(const DirectiveTree::Conditional &C) { 347 if (C.Taken) 348 walk(C.Branches[*C.Taken].second); 349 } 350 }; 351 } // namespace 352 353 TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const { 354 TokenStream Out; 355 Preprocessor(In, Out).walk(*this); 356 return Out; 357 } 358 359 } // namespace clangd 360 } // namespace clang 361