xref: /llvm-project/clang-tools-extra/clangd/support/DirectiveTree.h (revision ed8f78827895050442f544edef2933a60d4a7935)
1 //===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The pseudoparser tries to match a token stream to the C++ grammar.
10 // Preprocessor #defines and other directives are not part of this grammar, and
11 // should be removed before the file can be parsed.
12 //
13 // Conditional blocks like #if...#else...#endif are particularly tricky, as
14 // simply stripping the directives may not produce a grammatical result:
15 //
16 //   return
17 //     #ifndef DEBUG
18 //       1
19 //     #else
20 //       0
21 //     #endif
22 //       ;
23 //
24 // This header supports analyzing and removing the directives in a source file.
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H
29 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H
30 
31 #include "Token.h"
32 #include "clang/Basic/TokenKinds.h"
33 #include <optional>
34 #include <variant>
35 #include <vector>
36 
37 namespace clang {
38 namespace clangd {
39 
40 /// Describes the structure of a source file, as seen by the preprocessor.
41 ///
42 /// The structure is a tree, whose leaves are plain source code and directives,
43 /// and whose internal nodes are #if...#endif sections.
44 ///
45 /// (root)
46 /// |-+ Directive                    #include <stdio.h>
47 /// |-+ Code                         int main() {
48 /// | `                                printf("hello, ");
49 /// |-+ Conditional -+ Directive     #ifndef NDEBUG
50 /// | |-+ Code                         printf("debug\n");
51 /// | |-+ Directive                  #else
52 /// | |-+ Code                         printf("production\n");
53 /// | `-+ Directive                  #endif
54 /// |-+ Code                           return 0;
55 ///   `                              }
56 ///
57 /// Unlike the clang preprocessor, we model the full tree explicitly.
58 /// This class does not recognize macro usage, only directives.
59 struct DirectiveTree {
60   /// A range of code (and possibly comments) containing no directives.
61   struct Code {
62     Token::Range Tokens;
63   };
64   /// A preprocessor directive.
65   struct Directive {
66     /// Raw tokens making up the directive, starting with `#`.
67     Token::Range Tokens;
68     clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword;
69   };
70   /// A preprocessor conditional section.
71   ///
72   /// This starts with an #if, #ifdef, #ifndef etc directive.
73   /// It covers all #else branches, and spans until the matching #endif.
74   struct Conditional {
75     /// The sequence of directives that introduce top-level alternative parses.
76     ///
77     /// The first branch will have an #if type directive.
78     /// Subsequent branches will have #else type directives.
79     std::vector<std::pair<Directive, DirectiveTree>> Branches;
80     /// The directive terminating the conditional, should be #endif.
81     Directive End;
82     /// The index of the conditional branch we chose as active.
83     /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif).
84     /// The initial tree from `parse()` has no branches marked as taken.
85     /// See `chooseConditionalBranches()`.
86     std::optional<unsigned> Taken;
87   };
88 
89   /// Some piece of the file. {One of Code, Directive, Conditional}.
90   using Chunk = std::variant<Code, Directive, Conditional>;
91   std::vector<Chunk> Chunks;
92 
93   /// Extract preprocessor structure by examining the raw tokens.
94   static DirectiveTree parse(const TokenStream &);
95 
96   /// Produce a parseable token stream by stripping all directive tokens.
97   ///
98   /// Conditional sections are replaced by the taken branch, if any.
99   /// This tree must describe the provided token stream.
100   TokenStream stripDirectives(const TokenStream &) const;
101 };
102 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &);
103 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &);
104 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
105                               const DirectiveTree::Directive &);
106 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
107                               const DirectiveTree::Conditional &);
108 
109 /// Selects a "taken" branch for each conditional directive in the file.
110 ///
111 /// The choice is somewhat arbitrary, but aims to produce a useful parse:
112 ///  - idioms like `#if 0` are respected
113 ///  - we avoid paths that reach `#error`
114 ///  - we try to maximize the amount of code seen
115 /// The choice may also be "no branch taken".
116 ///
117 /// Choices are also made for conditionals themselves inside not-taken branches:
118 ///   #if 1 // taken!
119 ///   #else // not taken
120 ///      #if 1 // taken!
121 ///      #endif
122 ///   #endif
123 ///
124 /// The choices are stored in Conditional::Taken nodes.
125 void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code);
126 
127 } // namespace clangd
128 } // namespace clang
129 
130 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H
131