xref: /llvm-project/clang-tools-extra/clangd/support/DirectiveTree.h (revision ed8f78827895050442f544edef2933a60d4a7935)
1*ed8f7882SAaron Ballman //===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===//
2*ed8f7882SAaron Ballman //
3*ed8f7882SAaron Ballman // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*ed8f7882SAaron Ballman // See https://llvm.org/LICENSE.txt for license information.
5*ed8f7882SAaron Ballman // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*ed8f7882SAaron Ballman //
7*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===//
8*ed8f7882SAaron Ballman //
9*ed8f7882SAaron Ballman // The pseudoparser tries to match a token stream to the C++ grammar.
10*ed8f7882SAaron Ballman // Preprocessor #defines and other directives are not part of this grammar, and
11*ed8f7882SAaron Ballman // should be removed before the file can be parsed.
12*ed8f7882SAaron Ballman //
13*ed8f7882SAaron Ballman // Conditional blocks like #if...#else...#endif are particularly tricky, as
14*ed8f7882SAaron Ballman // simply stripping the directives may not produce a grammatical result:
15*ed8f7882SAaron Ballman //
16*ed8f7882SAaron Ballman //   return
17*ed8f7882SAaron Ballman //     #ifndef DEBUG
18*ed8f7882SAaron Ballman //       1
19*ed8f7882SAaron Ballman //     #else
20*ed8f7882SAaron Ballman //       0
21*ed8f7882SAaron Ballman //     #endif
22*ed8f7882SAaron Ballman //       ;
23*ed8f7882SAaron Ballman //
24*ed8f7882SAaron Ballman // This header supports analyzing and removing the directives in a source file.
25*ed8f7882SAaron Ballman //
26*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===//
27*ed8f7882SAaron Ballman 
28*ed8f7882SAaron Ballman #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H
29*ed8f7882SAaron Ballman #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H
30*ed8f7882SAaron Ballman 
31*ed8f7882SAaron Ballman #include "Token.h"
32*ed8f7882SAaron Ballman #include "clang/Basic/TokenKinds.h"
33*ed8f7882SAaron Ballman #include <optional>
34*ed8f7882SAaron Ballman #include <variant>
35*ed8f7882SAaron Ballman #include <vector>
36*ed8f7882SAaron Ballman 
37*ed8f7882SAaron Ballman namespace clang {
38*ed8f7882SAaron Ballman namespace clangd {
39*ed8f7882SAaron Ballman 
40*ed8f7882SAaron Ballman /// Describes the structure of a source file, as seen by the preprocessor.
41*ed8f7882SAaron Ballman ///
42*ed8f7882SAaron Ballman /// The structure is a tree, whose leaves are plain source code and directives,
43*ed8f7882SAaron Ballman /// and whose internal nodes are #if...#endif sections.
44*ed8f7882SAaron Ballman ///
45*ed8f7882SAaron Ballman /// (root)
46*ed8f7882SAaron Ballman /// |-+ Directive                    #include <stdio.h>
47*ed8f7882SAaron Ballman /// |-+ Code                         int main() {
48*ed8f7882SAaron Ballman /// | `                                printf("hello, ");
49*ed8f7882SAaron Ballman /// |-+ Conditional -+ Directive     #ifndef NDEBUG
50*ed8f7882SAaron Ballman /// | |-+ Code                         printf("debug\n");
51*ed8f7882SAaron Ballman /// | |-+ Directive                  #else
52*ed8f7882SAaron Ballman /// | |-+ Code                         printf("production\n");
53*ed8f7882SAaron Ballman /// | `-+ Directive                  #endif
54*ed8f7882SAaron Ballman /// |-+ Code                           return 0;
55*ed8f7882SAaron Ballman ///   `                              }
56*ed8f7882SAaron Ballman ///
57*ed8f7882SAaron Ballman /// Unlike the clang preprocessor, we model the full tree explicitly.
58*ed8f7882SAaron Ballman /// This class does not recognize macro usage, only directives.
59*ed8f7882SAaron Ballman struct DirectiveTree {
60*ed8f7882SAaron Ballman   /// A range of code (and possibly comments) containing no directives.
61*ed8f7882SAaron Ballman   struct Code {
62*ed8f7882SAaron Ballman     Token::Range Tokens;
63*ed8f7882SAaron Ballman   };
64*ed8f7882SAaron Ballman   /// A preprocessor directive.
65*ed8f7882SAaron Ballman   struct Directive {
66*ed8f7882SAaron Ballman     /// Raw tokens making up the directive, starting with `#`.
67*ed8f7882SAaron Ballman     Token::Range Tokens;
68*ed8f7882SAaron Ballman     clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword;
69*ed8f7882SAaron Ballman   };
70*ed8f7882SAaron Ballman   /// A preprocessor conditional section.
71*ed8f7882SAaron Ballman   ///
72*ed8f7882SAaron Ballman   /// This starts with an #if, #ifdef, #ifndef etc directive.
73*ed8f7882SAaron Ballman   /// It covers all #else branches, and spans until the matching #endif.
74*ed8f7882SAaron Ballman   struct Conditional {
75*ed8f7882SAaron Ballman     /// The sequence of directives that introduce top-level alternative parses.
76*ed8f7882SAaron Ballman     ///
77*ed8f7882SAaron Ballman     /// The first branch will have an #if type directive.
78*ed8f7882SAaron Ballman     /// Subsequent branches will have #else type directives.
79*ed8f7882SAaron Ballman     std::vector<std::pair<Directive, DirectiveTree>> Branches;
80*ed8f7882SAaron Ballman     /// The directive terminating the conditional, should be #endif.
81*ed8f7882SAaron Ballman     Directive End;
82*ed8f7882SAaron Ballman     /// The index of the conditional branch we chose as active.
83*ed8f7882SAaron Ballman     /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif).
84*ed8f7882SAaron Ballman     /// The initial tree from `parse()` has no branches marked as taken.
85*ed8f7882SAaron Ballman     /// See `chooseConditionalBranches()`.
86*ed8f7882SAaron Ballman     std::optional<unsigned> Taken;
87*ed8f7882SAaron Ballman   };
88*ed8f7882SAaron Ballman 
89*ed8f7882SAaron Ballman   /// Some piece of the file. {One of Code, Directive, Conditional}.
90*ed8f7882SAaron Ballman   using Chunk = std::variant<Code, Directive, Conditional>;
91*ed8f7882SAaron Ballman   std::vector<Chunk> Chunks;
92*ed8f7882SAaron Ballman 
93*ed8f7882SAaron Ballman   /// Extract preprocessor structure by examining the raw tokens.
94*ed8f7882SAaron Ballman   static DirectiveTree parse(const TokenStream &);
95*ed8f7882SAaron Ballman 
96*ed8f7882SAaron Ballman   /// Produce a parseable token stream by stripping all directive tokens.
97*ed8f7882SAaron Ballman   ///
98*ed8f7882SAaron Ballman   /// Conditional sections are replaced by the taken branch, if any.
99*ed8f7882SAaron Ballman   /// This tree must describe the provided token stream.
100*ed8f7882SAaron Ballman   TokenStream stripDirectives(const TokenStream &) const;
101*ed8f7882SAaron Ballman };
102*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
103*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &);
104*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &,
105*ed8f7882SAaron Ballman                               const DirectiveTree::Directive &);
106*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &,
107*ed8f7882SAaron Ballman                               const DirectiveTree::Conditional &);
108*ed8f7882SAaron Ballman 
109*ed8f7882SAaron Ballman /// Selects a "taken" branch for each conditional directive in the file.
110*ed8f7882SAaron Ballman ///
111*ed8f7882SAaron Ballman /// The choice is somewhat arbitrary, but aims to produce a useful parse:
112*ed8f7882SAaron Ballman ///  - idioms like `#if 0` are respected
113*ed8f7882SAaron Ballman ///  - we avoid paths that reach `#error`
114*ed8f7882SAaron Ballman ///  - we try to maximize the amount of code seen
115*ed8f7882SAaron Ballman /// The choice may also be "no branch taken".
116*ed8f7882SAaron Ballman ///
117*ed8f7882SAaron Ballman /// Choices are also made for conditionals themselves inside not-taken branches:
118*ed8f7882SAaron Ballman ///   #if 1 // taken!
119*ed8f7882SAaron Ballman ///   #else // not taken
120*ed8f7882SAaron Ballman ///      #if 1 // taken!
121*ed8f7882SAaron Ballman ///      #endif
122*ed8f7882SAaron Ballman ///   #endif
123*ed8f7882SAaron Ballman ///
124*ed8f7882SAaron Ballman /// The choices are stored in Conditional::Taken nodes.
125*ed8f7882SAaron Ballman void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code);
126*ed8f7882SAaron Ballman 
127*ed8f7882SAaron Ballman } // namespace clangd
128*ed8f7882SAaron Ballman } // namespace clang
129*ed8f7882SAaron Ballman 
130*ed8f7882SAaron Ballman #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H
131