xref: /llvm-project/clang/lib/Format/TokenAnnotator.h (revision 106c483a102e1328f11e2b1d9398f4ad2826b59f)
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 
20 namespace clang {
21 namespace format {
22 
23 enum LineType {
24   LT_Invalid,
25   // Contains public/private/protected followed by TT_InheritanceColon.
26   LT_AccessModifier,
27   LT_ImportStatement,
28   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29   LT_ObjCMethodDecl,
30   LT_ObjCProperty, // An @property line.
31   LT_Other,
32   LT_PreprocessorDirective,
33   LT_VirtualFunctionDecl,
34   LT_ArrayOfStructInitializer,
35   LT_CommentAbovePPDirective,
36   LT_RequiresExpression,
37   LT_SimpleRequirement,
38 };
39 
40 enum ScopeType {
41   // Contained in class declaration/definition.
42   ST_Class,
43   // Contained in compound requirement.
44   ST_CompoundRequirement,
45   // Contained in other blocks (function, lambda, loop, if/else, child, etc).
46   ST_Other,
47 };
48 
49 class AnnotatedLine {
50 public:
51   AnnotatedLine(const UnwrappedLine &Line)
52       : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
53         PPLevel(Line.PPLevel),
54         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
55         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
56         InPPDirective(Line.InPPDirective),
57         InPragmaDirective(Line.InPragmaDirective),
58         InMacroBody(Line.InMacroBody),
59         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
60         IsMultiVariableDeclStmt(false), Affected(false),
61         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
62         ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
63         FirstStartColumn(Line.FirstStartColumn) {
64     assert(!Line.Tokens.empty());
65 
66     // Calculate Next and Previous for all tokens. Note that we must overwrite
67     // Next and Previous for every token, as previous formatting runs might have
68     // left them in a different state.
69     First->Previous = nullptr;
70     FormatToken *Current = First;
71     addChildren(Line.Tokens.front(), Current);
72     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
73       if (Node.Tok->MacroParent)
74         ContainsMacroCall = true;
75       Current->Next = Node.Tok;
76       Node.Tok->Previous = Current;
77       Current = Current->Next;
78       addChildren(Node, Current);
79       // FIXME: if we add children, previous will point to the token before
80       // the children; changing this requires significant changes across
81       // clang-format.
82     }
83     Last = Current;
84     Last->Next = nullptr;
85   }
86 
87   void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
88     Current->Children.clear();
89     for (const auto &Child : Node.Children) {
90       Children.push_back(new AnnotatedLine(Child));
91       if (Children.back()->ContainsMacroCall)
92         ContainsMacroCall = true;
93       Current->Children.push_back(Children.back());
94     }
95   }
96 
97   size_t size() const {
98     size_t Size = 1;
99     for (const auto *Child : Children)
100       Size += Child->size();
101     return Size;
102   }
103 
104   ~AnnotatedLine() {
105     for (AnnotatedLine *Child : Children)
106       delete Child;
107     FormatToken *Current = First;
108     while (Current) {
109       Current->Children.clear();
110       Current->Role.reset();
111       Current = Current->Next;
112     }
113   }
114 
115   bool isComment() const {
116     return First && First->is(tok::comment) && !First->getNextNonComment();
117   }
118 
119   /// \c true if this line starts with the given tokens in order, ignoring
120   /// comments.
121   template <typename... Ts> bool startsWith(Ts... Tokens) const {
122     return First && First->startsSequence(Tokens...);
123   }
124 
125   /// \c true if this line ends with the given tokens in reversed order,
126   /// ignoring comments.
127   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
128   /// this line is like "... T3 T2 T1".
129   template <typename... Ts> bool endsWith(Ts... Tokens) const {
130     return Last && Last->endsSequence(Tokens...);
131   }
132 
133   /// \c true if this line looks like a function definition instead of a
134   /// function declaration. Asserts MightBeFunctionDecl.
135   bool mightBeFunctionDefinition() const {
136     assert(MightBeFunctionDecl);
137     // Try to determine if the end of a stream of tokens is either the
138     // Definition or the Declaration for a function. It does this by looking for
139     // the ';' in foo(); and using that it ends with a ; to know this is the
140     // Definition, however the line could end with
141     //    foo(); /* comment */
142     // or
143     //    foo(); // comment
144     // or
145     //    foo() // comment
146     // endsWith() ignores the comment.
147     return !endsWith(tok::semi);
148   }
149 
150   /// \c true if this line starts a namespace definition.
151   bool startsWithNamespace() const {
152     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
153            startsWith(tok::kw_inline, tok::kw_namespace) ||
154            startsWith(tok::kw_export, tok::kw_namespace);
155   }
156 
157   /// \c true if this line starts a C++ export block.
158   bool startsWithExportBlock() const {
159     return startsWith(tok::kw_export, tok::l_brace);
160   }
161 
162   FormatToken *getFirstNonComment() const {
163     assert(First);
164     return First->is(tok::comment) ? First->getNextNonComment() : First;
165   }
166 
167   FormatToken *getLastNonComment() const {
168     assert(Last);
169     return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
170   }
171 
172   FormatToken *First;
173   FormatToken *Last;
174 
175   SmallVector<AnnotatedLine *, 0> Children;
176 
177   LineType Type;
178   unsigned Level;
179   unsigned PPLevel;
180   size_t MatchingOpeningBlockLineIndex;
181   size_t MatchingClosingBlockLineIndex;
182   bool InPPDirective;
183   bool InPragmaDirective;
184   bool InMacroBody;
185   bool MustBeDeclaration;
186   bool MightBeFunctionDecl;
187   bool IsMultiVariableDeclStmt;
188 
189   /// \c True if this line contains a macro call for which an expansion exists.
190   bool ContainsMacroCall = false;
191 
192   /// \c True if calculateFormattingInformation() has been called on this line.
193   bool Computed = false;
194 
195   /// \c True if this line should be formatted, i.e. intersects directly or
196   /// indirectly with one of the input ranges.
197   bool Affected;
198 
199   /// \c True if the leading empty lines of this line intersect with one of the
200   /// input ranges.
201   bool LeadingEmptyLinesAffected;
202 
203   /// \c True if one of this line's children intersects with an input range.
204   bool ChildrenAffected;
205 
206   /// \c True if breaking after last attribute group in function return type.
207   bool ReturnTypeWrapped;
208 
209   /// \c True if this line should be indented by ContinuationIndent in addition
210   /// to the normal indention level.
211   bool IsContinuation;
212 
213   unsigned FirstStartColumn;
214 
215 private:
216   // Disallow copying.
217   AnnotatedLine(const AnnotatedLine &) = delete;
218   void operator=(const AnnotatedLine &) = delete;
219 };
220 
221 /// Determines extra information about the tokens comprising an
222 /// \c UnwrappedLine.
223 class TokenAnnotator {
224 public:
225   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
226       : Style(Style), IsCpp(Style.isCpp()),
227         LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
228     assert(IsCpp == LangOpts.CXXOperatorNames);
229   }
230 
231   /// Adapts the indent levels of comment lines to the indent of the
232   /// subsequent line.
233   // FIXME: Can/should this be done in the UnwrappedLineParser?
234   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
235 
236   void annotate(AnnotatedLine &Line);
237   void calculateFormattingInformation(AnnotatedLine &Line) const;
238 
239 private:
240   /// Calculate the penalty for splitting before \c Tok.
241   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
242                         bool InFunctionDecl) const;
243 
244   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
245 
246   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
247                             const FormatToken &Right) const;
248 
249   bool spaceRequiredBefore(const AnnotatedLine &Line,
250                            const FormatToken &Right) const;
251 
252   bool mustBreakBefore(const AnnotatedLine &Line,
253                        const FormatToken &Right) const;
254 
255   bool canBreakBefore(const AnnotatedLine &Line,
256                       const FormatToken &Right) const;
257 
258   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
259 
260   void printDebugInfo(const AnnotatedLine &Line) const;
261 
262   void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
263 
264   void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
265 
266   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
267                                               FormatToken *CurrentToken,
268                                               unsigned Depth) const;
269   FormatStyle::PointerAlignmentStyle
270   getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
271 
272   FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
273       const FormatToken &PointerOrReference) const;
274 
275   const FormatStyle &Style;
276 
277   bool IsCpp;
278   LangOptions LangOpts;
279 
280   const AdditionalKeywords &Keywords;
281 
282   SmallVector<ScopeType> Scopes, MacroBodyScopes;
283 };
284 
285 } // end namespace format
286 } // end namespace clang
287 
288 #endif
289