1*ed8f7882SAaron Ballman //===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===// 2*ed8f7882SAaron Ballman // 3*ed8f7882SAaron Ballman // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*ed8f7882SAaron Ballman // See https://llvm.org/LICENSE.txt for license information. 5*ed8f7882SAaron Ballman // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*ed8f7882SAaron Ballman // 7*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===// 8*ed8f7882SAaron Ballman // 9*ed8f7882SAaron Ballman // The pseudoparser tries to match a token stream to the C++ grammar. 10*ed8f7882SAaron Ballman // Preprocessor #defines and other directives are not part of this grammar, and 11*ed8f7882SAaron Ballman // should be removed before the file can be parsed. 12*ed8f7882SAaron Ballman // 13*ed8f7882SAaron Ballman // Conditional blocks like #if...#else...#endif are particularly tricky, as 14*ed8f7882SAaron Ballman // simply stripping the directives may not produce a grammatical result: 15*ed8f7882SAaron Ballman // 16*ed8f7882SAaron Ballman // return 17*ed8f7882SAaron Ballman // #ifndef DEBUG 18*ed8f7882SAaron Ballman // 1 19*ed8f7882SAaron Ballman // #else 20*ed8f7882SAaron Ballman // 0 21*ed8f7882SAaron Ballman // #endif 22*ed8f7882SAaron Ballman // ; 23*ed8f7882SAaron Ballman // 24*ed8f7882SAaron Ballman // This header supports analyzing and removing the directives in a source file. 25*ed8f7882SAaron Ballman // 26*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===// 27*ed8f7882SAaron Ballman 28*ed8f7882SAaron Ballman #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H 29*ed8f7882SAaron Ballman #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H 30*ed8f7882SAaron Ballman 31*ed8f7882SAaron Ballman #include "Token.h" 32*ed8f7882SAaron Ballman #include "clang/Basic/TokenKinds.h" 33*ed8f7882SAaron Ballman #include <optional> 34*ed8f7882SAaron Ballman #include <variant> 35*ed8f7882SAaron Ballman #include <vector> 36*ed8f7882SAaron Ballman 37*ed8f7882SAaron Ballman namespace clang { 38*ed8f7882SAaron Ballman namespace clangd { 39*ed8f7882SAaron Ballman 40*ed8f7882SAaron Ballman /// Describes the structure of a source file, as seen by the preprocessor. 41*ed8f7882SAaron Ballman /// 42*ed8f7882SAaron Ballman /// The structure is a tree, whose leaves are plain source code and directives, 43*ed8f7882SAaron Ballman /// and whose internal nodes are #if...#endif sections. 44*ed8f7882SAaron Ballman /// 45*ed8f7882SAaron Ballman /// (root) 46*ed8f7882SAaron Ballman /// |-+ Directive #include <stdio.h> 47*ed8f7882SAaron Ballman /// |-+ Code int main() { 48*ed8f7882SAaron Ballman /// | ` printf("hello, "); 49*ed8f7882SAaron Ballman /// |-+ Conditional -+ Directive #ifndef NDEBUG 50*ed8f7882SAaron Ballman /// | |-+ Code printf("debug\n"); 51*ed8f7882SAaron Ballman /// | |-+ Directive #else 52*ed8f7882SAaron Ballman /// | |-+ Code printf("production\n"); 53*ed8f7882SAaron Ballman /// | `-+ Directive #endif 54*ed8f7882SAaron Ballman /// |-+ Code return 0; 55*ed8f7882SAaron Ballman /// ` } 56*ed8f7882SAaron Ballman /// 57*ed8f7882SAaron Ballman /// Unlike the clang preprocessor, we model the full tree explicitly. 58*ed8f7882SAaron Ballman /// This class does not recognize macro usage, only directives. 59*ed8f7882SAaron Ballman struct DirectiveTree { 60*ed8f7882SAaron Ballman /// A range of code (and possibly comments) containing no directives. 61*ed8f7882SAaron Ballman struct Code { 62*ed8f7882SAaron Ballman Token::Range Tokens; 63*ed8f7882SAaron Ballman }; 64*ed8f7882SAaron Ballman /// A preprocessor directive. 65*ed8f7882SAaron Ballman struct Directive { 66*ed8f7882SAaron Ballman /// Raw tokens making up the directive, starting with `#`. 67*ed8f7882SAaron Ballman Token::Range Tokens; 68*ed8f7882SAaron Ballman clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword; 69*ed8f7882SAaron Ballman }; 70*ed8f7882SAaron Ballman /// A preprocessor conditional section. 71*ed8f7882SAaron Ballman /// 72*ed8f7882SAaron Ballman /// This starts with an #if, #ifdef, #ifndef etc directive. 73*ed8f7882SAaron Ballman /// It covers all #else branches, and spans until the matching #endif. 74*ed8f7882SAaron Ballman struct Conditional { 75*ed8f7882SAaron Ballman /// The sequence of directives that introduce top-level alternative parses. 76*ed8f7882SAaron Ballman /// 77*ed8f7882SAaron Ballman /// The first branch will have an #if type directive. 78*ed8f7882SAaron Ballman /// Subsequent branches will have #else type directives. 79*ed8f7882SAaron Ballman std::vector<std::pair<Directive, DirectiveTree>> Branches; 80*ed8f7882SAaron Ballman /// The directive terminating the conditional, should be #endif. 81*ed8f7882SAaron Ballman Directive End; 82*ed8f7882SAaron Ballman /// The index of the conditional branch we chose as active. 83*ed8f7882SAaron Ballman /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif). 84*ed8f7882SAaron Ballman /// The initial tree from `parse()` has no branches marked as taken. 85*ed8f7882SAaron Ballman /// See `chooseConditionalBranches()`. 86*ed8f7882SAaron Ballman std::optional<unsigned> Taken; 87*ed8f7882SAaron Ballman }; 88*ed8f7882SAaron Ballman 89*ed8f7882SAaron Ballman /// Some piece of the file. {One of Code, Directive, Conditional}. 90*ed8f7882SAaron Ballman using Chunk = std::variant<Code, Directive, Conditional>; 91*ed8f7882SAaron Ballman std::vector<Chunk> Chunks; 92*ed8f7882SAaron Ballman 93*ed8f7882SAaron Ballman /// Extract preprocessor structure by examining the raw tokens. 94*ed8f7882SAaron Ballman static DirectiveTree parse(const TokenStream &); 95*ed8f7882SAaron Ballman 96*ed8f7882SAaron Ballman /// Produce a parseable token stream by stripping all directive tokens. 97*ed8f7882SAaron Ballman /// 98*ed8f7882SAaron Ballman /// Conditional sections are replaced by the taken branch, if any. 99*ed8f7882SAaron Ballman /// This tree must describe the provided token stream. 100*ed8f7882SAaron Ballman TokenStream stripDirectives(const TokenStream &) const; 101*ed8f7882SAaron Ballman }; 102*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &); 103*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &); 104*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &, 105*ed8f7882SAaron Ballman const DirectiveTree::Directive &); 106*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &, 107*ed8f7882SAaron Ballman const DirectiveTree::Conditional &); 108*ed8f7882SAaron Ballman 109*ed8f7882SAaron Ballman /// Selects a "taken" branch for each conditional directive in the file. 110*ed8f7882SAaron Ballman /// 111*ed8f7882SAaron Ballman /// The choice is somewhat arbitrary, but aims to produce a useful parse: 112*ed8f7882SAaron Ballman /// - idioms like `#if 0` are respected 113*ed8f7882SAaron Ballman /// - we avoid paths that reach `#error` 114*ed8f7882SAaron Ballman /// - we try to maximize the amount of code seen 115*ed8f7882SAaron Ballman /// The choice may also be "no branch taken". 116*ed8f7882SAaron Ballman /// 117*ed8f7882SAaron Ballman /// Choices are also made for conditionals themselves inside not-taken branches: 118*ed8f7882SAaron Ballman /// #if 1 // taken! 119*ed8f7882SAaron Ballman /// #else // not taken 120*ed8f7882SAaron Ballman /// #if 1 // taken! 121*ed8f7882SAaron Ballman /// #endif 122*ed8f7882SAaron Ballman /// #endif 123*ed8f7882SAaron Ballman /// 124*ed8f7882SAaron Ballman /// The choices are stored in Conditional::Taken nodes. 125*ed8f7882SAaron Ballman void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code); 126*ed8f7882SAaron Ballman 127*ed8f7882SAaron Ballman } // namespace clangd 128*ed8f7882SAaron Ballman } // namespace clang 129*ed8f7882SAaron Ballman 130*ed8f7882SAaron Ballman #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H 131