1 //===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The pseudoparser tries to match a token stream to the C++ grammar. 10 // Preprocessor #defines and other directives are not part of this grammar, and 11 // should be removed before the file can be parsed. 12 // 13 // Conditional blocks like #if...#else...#endif are particularly tricky, as 14 // simply stripping the directives may not produce a grammatical result: 15 // 16 // return 17 // #ifndef DEBUG 18 // 1 19 // #else 20 // 0 21 // #endif 22 // ; 23 // 24 // This header supports analyzing and removing the directives in a source file. 25 // 26 //===----------------------------------------------------------------------===// 27 28 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H 29 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H 30 31 #include "Token.h" 32 #include "clang/Basic/TokenKinds.h" 33 #include <optional> 34 #include <variant> 35 #include <vector> 36 37 namespace clang { 38 namespace clangd { 39 40 /// Describes the structure of a source file, as seen by the preprocessor. 41 /// 42 /// The structure is a tree, whose leaves are plain source code and directives, 43 /// and whose internal nodes are #if...#endif sections. 44 /// 45 /// (root) 46 /// |-+ Directive #include <stdio.h> 47 /// |-+ Code int main() { 48 /// | ` printf("hello, "); 49 /// |-+ Conditional -+ Directive #ifndef NDEBUG 50 /// | |-+ Code printf("debug\n"); 51 /// | |-+ Directive #else 52 /// | |-+ Code printf("production\n"); 53 /// | `-+ Directive #endif 54 /// |-+ Code return 0; 55 /// ` } 56 /// 57 /// Unlike the clang preprocessor, we model the full tree explicitly. 58 /// This class does not recognize macro usage, only directives. 59 struct DirectiveTree { 60 /// A range of code (and possibly comments) containing no directives. 61 struct Code { 62 Token::Range Tokens; 63 }; 64 /// A preprocessor directive. 65 struct Directive { 66 /// Raw tokens making up the directive, starting with `#`. 67 Token::Range Tokens; 68 clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword; 69 }; 70 /// A preprocessor conditional section. 71 /// 72 /// This starts with an #if, #ifdef, #ifndef etc directive. 73 /// It covers all #else branches, and spans until the matching #endif. 74 struct Conditional { 75 /// The sequence of directives that introduce top-level alternative parses. 76 /// 77 /// The first branch will have an #if type directive. 78 /// Subsequent branches will have #else type directives. 79 std::vector<std::pair<Directive, DirectiveTree>> Branches; 80 /// The directive terminating the conditional, should be #endif. 81 Directive End; 82 /// The index of the conditional branch we chose as active. 83 /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif). 84 /// The initial tree from `parse()` has no branches marked as taken. 85 /// See `chooseConditionalBranches()`. 86 std::optional<unsigned> Taken; 87 }; 88 89 /// Some piece of the file. {One of Code, Directive, Conditional}. 90 using Chunk = std::variant<Code, Directive, Conditional>; 91 std::vector<Chunk> Chunks; 92 93 /// Extract preprocessor structure by examining the raw tokens. 94 static DirectiveTree parse(const TokenStream &); 95 96 /// Produce a parseable token stream by stripping all directive tokens. 97 /// 98 /// Conditional sections are replaced by the taken branch, if any. 99 /// This tree must describe the provided token stream. 100 TokenStream stripDirectives(const TokenStream &) const; 101 }; 102 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &); 103 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &); 104 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 105 const DirectiveTree::Directive &); 106 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 107 const DirectiveTree::Conditional &); 108 109 /// Selects a "taken" branch for each conditional directive in the file. 110 /// 111 /// The choice is somewhat arbitrary, but aims to produce a useful parse: 112 /// - idioms like `#if 0` are respected 113 /// - we avoid paths that reach `#error` 114 /// - we try to maximize the amount of code seen 115 /// The choice may also be "no branch taken". 116 /// 117 /// Choices are also made for conditionals themselves inside not-taken branches: 118 /// #if 1 // taken! 119 /// #else // not taken 120 /// #if 1 // taken! 121 /// #endif 122 /// #endif 123 /// 124 /// The choices are stored in Conditional::Taken nodes. 125 void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code); 126 127 } // namespace clangd 128 } // namespace clang 129 130 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H 131