xref: /llvm-project/clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp (revision 8b4a27f410ba42b2e19651ddafc60cd878d0963c)
1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "AST.h"
9 #include "ParsedAST.h"
10 #include "Protocol.h"
11 #include "Selection.h"
12 #include "SourceCode.h"
13 #include "refactor/Tweak.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Expr.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/AST/OperationKinds.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtCXX.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Tooling/Core/Replacement.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/raw_ostream.h"
30 
31 namespace clang {
32 namespace clangd {
33 namespace {
34 // information regarding the Expr that is being extracted
35 class ExtractionContext {
36 public:
37   ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM,
38                     const ASTContext &Ctx);
39   const clang::Expr *getExpr() const { return Expr; }
40   const SelectionTree::Node *getExprNode() const { return ExprNode; }
41   bool isExtractable() const { return Extractable; }
42   // The half-open range for the expression to be extracted.
43   SourceRange getExtractionChars() const;
44   // Generate Replacement for replacing selected expression with given VarName
45   tooling::Replacement replaceWithVar(SourceRange Chars,
46                                       llvm::StringRef VarName) const;
47   // Generate Replacement for declaring the selected Expr as a new variable
48   tooling::Replacement insertDeclaration(llvm::StringRef VarName,
49                                          SourceRange InitChars) const;
50 
51 private:
52   bool Extractable = false;
53   const clang::Expr *Expr;
54   QualType VarType;
55   const SelectionTree::Node *ExprNode;
56   // Stmt before which we will extract
57   const clang::Stmt *InsertionPoint = nullptr;
58   const SourceManager &SM;
59   const ASTContext &Ctx;
60   // Decls referenced in the Expr
61   std::vector<clang::Decl *> ReferencedDecls;
62   // returns true if the Expr doesn't reference any variable declared in scope
63   bool exprIsValidOutside(const clang::Stmt *Scope) const;
64   // computes the Stmt before which we will extract out Expr
65   const clang::Stmt *computeInsertionPoint() const;
66 };
67 
68 // Returns all the Decls referenced inside the given Expr
69 static std::vector<clang::Decl *>
70 computeReferencedDecls(const clang::Expr *Expr) {
71   // RAV subclass to find all DeclRefs in a given Stmt
72   class FindDeclRefsVisitor
73       : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> {
74   public:
75     std::vector<Decl *> ReferencedDecls;
76     bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT
77       ReferencedDecls.push_back(DeclRef->getDecl());
78       return true;
79     }
80   };
81   FindDeclRefsVisitor Visitor;
82   Visitor.TraverseStmt(const_cast<Stmt *>(cast<Stmt>(Expr)));
83   return Visitor.ReferencedDecls;
84 }
85 
86 static QualType computeVariableType(const Expr *Expr, const ASTContext &Ctx) {
87   if (Ctx.getLangOpts().CPlusPlus11)
88     return Ctx.getAutoDeductType();
89 
90   if (Expr->hasPlaceholderType(BuiltinType::PseudoObject)) {
91     if (const auto *PR = dyn_cast<ObjCPropertyRefExpr>(Expr)) {
92       if (PR->isMessagingSetter()) {
93         // Don't support extracting a compound reference like `self.prop += 1`
94         // since the meaning changes after extraction since we'll no longer call
95         // the setter. Non compound access like `self.prop = 1` is invalid since
96         // it returns nil (setter method must have a void return type).
97         return QualType();
98       } else if (PR->isMessagingGetter()) {
99         if (PR->isExplicitProperty())
100           return PR->getExplicitProperty()->getType();
101         else
102           return PR->getImplicitPropertyGetter()->getReturnType();
103       }
104     } else {
105       return QualType();
106     }
107   }
108   return Expr->getType();
109 }
110 
111 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node,
112                                      const SourceManager &SM,
113                                      const ASTContext &Ctx)
114     : ExprNode(Node), SM(SM), Ctx(Ctx) {
115   Expr = Node->ASTNode.get<clang::Expr>();
116   ReferencedDecls = computeReferencedDecls(Expr);
117   InsertionPoint = computeInsertionPoint();
118   if (InsertionPoint)
119     Extractable = true;
120   VarType = computeVariableType(Expr, Ctx);
121   if (VarType.isNull())
122     Extractable = false;
123   else
124     // Strip the outer nullability since it's not common for local variables.
125     AttributedType::stripOuterNullability(VarType);
126 }
127 
128 // checks whether extracting before InsertionPoint will take a
129 // variable reference out of scope
130 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const {
131   SourceLocation ScopeBegin = Scope->getBeginLoc();
132   SourceLocation ScopeEnd = Scope->getEndLoc();
133   for (const Decl *ReferencedDecl : ReferencedDecls) {
134     if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) &&
135         SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd))
136       return false;
137   }
138   return true;
139 }
140 
141 // Return the Stmt before which we need to insert the extraction.
142 // To find the Stmt, we go up the AST Tree and if the Parent of the current
143 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before
144 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a
145 // CompoundStmt
146 //
147 // FIXME: Extraction from label, switch and case statements
148 // FIXME: Doens't work for FoldExpr
149 // FIXME: Ensure extraction from loops doesn't change semantics.
150 const clang::Stmt *ExtractionContext::computeInsertionPoint() const {
151   // returns true if we can extract before InsertionPoint
152   auto CanExtractOutside =
153       [](const SelectionTree::Node *InsertionPoint) -> bool {
154     if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) {
155       // Allow all expressions except LambdaExpr since we don't want to extract
156       // from the captures/default arguments of a lambda
157       if (isa<clang::Expr>(Stmt))
158         return !isa<LambdaExpr>(Stmt);
159       // We don't yet allow extraction from switch/case stmt as we would need to
160       // jump over the switch stmt even if there is a CompoundStmt inside the
161       // switch. And there are other Stmts which we don't care about (e.g.
162       // continue and break) as there can never be anything to extract from
163       // them.
164       return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) ||
165              isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) ||
166              isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) ||
167              isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt);
168     }
169     if (InsertionPoint->ASTNode.get<VarDecl>())
170       return true;
171     return false;
172   };
173   for (const SelectionTree::Node *CurNode = getExprNode();
174        CurNode->Parent && CanExtractOutside(CurNode);
175        CurNode = CurNode->Parent) {
176     const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>();
177     // give up if extraction will take a variable out of scope
178     if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint))
179       break;
180     if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) {
181       if (isa<CompoundStmt>(CurParent)) {
182         // Ensure we don't write inside a macro.
183         if (CurParent->getBeginLoc().isMacroID())
184           continue;
185         return CurInsertionPoint;
186       }
187     }
188   }
189   return nullptr;
190 }
191 
192 // returns the replacement for substituting the extraction with VarName
193 tooling::Replacement
194 ExtractionContext::replaceWithVar(SourceRange Chars,
195                                   llvm::StringRef VarName) const {
196   unsigned ExtractionLength =
197       SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
198   return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
199 }
200 // returns the Replacement for declaring a new variable storing the extraction
201 tooling::Replacement
202 ExtractionContext::insertDeclaration(llvm::StringRef VarName,
203                                      SourceRange InitializerChars) const {
204   llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
205   const SourceLocation InsertionLoc =
206       toHalfOpenFileRange(SM, Ctx.getLangOpts(),
207                           InsertionPoint->getSourceRange())
208           ->getBegin();
209   std::string ExtractedVarDecl =
210       printType(VarType, ExprNode->getDeclContext(), VarName) + " = " +
211       ExtractionCode.str() + "; ";
212   return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
213 }
214 
215 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
216 //
217 // These are special, because the formal AST doesn't match what users expect:
218 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
219 // - but extracting `b + c` is reasonable, as + is (mathematically) associative.
220 //
221 // So we try to support these cases with some restrictions:
222 //  - the operator must be associative
223 //  - no mixing of operators is allowed
224 //  - we don't look inside macro expansions in the subexpressions
225 //  - we only adjust the extracted range, so references in the unselected parts
226 //    of the AST expression (e.g. `a`) are still considered referenced for
227 //    the purposes of calculating the insertion point.
228 //    FIXME: it would be nice to exclude these references, by micromanaging
229 //    the computeReferencedDecls() calls around the binary operator tree.
230 
231 // Information extracted about a binary operator encounted in a SelectionTree.
232 // It can represent either an overloaded or built-in operator.
233 struct ParsedBinaryOperator {
234   BinaryOperatorKind Kind;
235   SourceLocation ExprLoc;
236   llvm::SmallVector<const SelectionTree::Node *> SelectedOperands;
237 
238   // If N is a binary operator, populate this and return true.
239   bool parse(const SelectionTree::Node &N) {
240     SelectedOperands.clear();
241 
242     if (const BinaryOperator *Op =
243         llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
244       Kind = Op->getOpcode();
245       ExprLoc = Op->getExprLoc();
246       SelectedOperands = N.Children;
247       return true;
248     }
249     if (const CXXOperatorCallExpr *Op =
250             llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
251                 N.ASTNode.get<Expr>())) {
252       if (!Op->isInfixBinaryOp())
253         return false;
254 
255       Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
256       ExprLoc = Op->getExprLoc();
257       // Not all children are args, there's also the callee (operator).
258       for (const auto* Child : N.Children) {
259         const Expr *E = Child->ASTNode.get<Expr>();
260         assert(E && "callee and args should be Exprs!");
261         if (E == Op->getArg(0) || E == Op->getArg(1))
262           SelectedOperands.push_back(Child);
263       }
264       return true;
265     }
266     return false;
267   }
268 
269   bool associative() const {
270     // Must also be left-associative, or update getBinaryOperatorRange()!
271     switch (Kind) {
272     case BO_Add:
273     case BO_Mul:
274     case BO_And:
275     case BO_Or:
276     case BO_Xor:
277     case BO_LAnd:
278     case BO_LOr:
279       return true;
280     default:
281       return false;
282     }
283   }
284 
285   bool crossesMacroBoundary(const SourceManager &SM) {
286     FileID F = SM.getFileID(ExprLoc);
287     for (const SelectionTree::Node *Child : SelectedOperands)
288       if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F)
289         return true;
290     return false;
291   }
292 };
293 
294 // If have an associative operator at the top level, then we must find
295 // the start point (rightmost in LHS) and end point (leftmost in RHS).
296 // We can only descend into subtrees where the operator matches.
297 //
298 // e.g. for a + [[b + c]] + d
299 //        +
300 //       / \
301 //  N-> +   d
302 //     / \
303 //    +   c <- End
304 //   / \
305 //  a   b <- Start
306 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N,
307                                          const SourceManager &SM,
308                                          const LangOptions &LangOpts) {
309   // If N is not a suitable binary operator, bail out.
310   ParsedBinaryOperator Op;
311   if (!Op.parse(N.ignoreImplicit()) || !Op.associative() ||
312       Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2)
313     return SourceRange();
314   BinaryOperatorKind OuterOp = Op.Kind;
315 
316   // Because the tree we're interested in contains only one operator type, and
317   // all eligible operators are left-associative, the shape of the tree is
318   // very restricted: it's a linked list along the left edges.
319   // This simplifies our implementation.
320   const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS
321   const SelectionTree::Node *End = Op.SelectedOperands.back();    // RHS
322   // End is already correct: it can't be an OuterOp (as it's left-associative).
323   // Start needs to be pushed down int the subtree to the right spot.
324   while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp &&
325          !Op.crossesMacroBoundary(SM)) {
326     assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
327     if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected
328       Start = Op.SelectedOperands.back();
329       break;
330     }
331     // Op.LHS is (at least partially) selected, so descend into it.
332     Start = Op.SelectedOperands.front();
333   }
334 
335   return SourceRange(
336       toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange())
337           ->getBegin(),
338       toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange())
339           ->getEnd());
340 }
341 
342 SourceRange ExtractionContext::getExtractionChars() const {
343   // Special case: we're extracting an associative binary subexpression.
344   SourceRange BinaryOperatorRange =
345       getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts());
346   if (BinaryOperatorRange.isValid())
347     return BinaryOperatorRange;
348 
349   // Usual case: we're extracting the whole expression.
350   return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
351 }
352 
353 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef
354 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) {
355   const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit();
356   const SelectionTree::Node *MaybeCall = MaybeCallee.Parent;
357   if (!MaybeCall)
358     return nullptr;
359   const CallExpr *CE =
360       llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>());
361   if (!CE)
362     return nullptr;
363   if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>())
364     return nullptr;
365   return MaybeCall;
366 }
367 
368 // Returns true if Inner (which is a direct child of Outer) is appearing as
369 // a statement rather than an expression whose value can be used.
370 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) {
371   if (!Outer || !Inner)
372     return false;
373   // Exclude the most common places where an expr can appear but be unused.
374   if (llvm::isa<CompoundStmt>(Outer))
375     return true;
376   if (llvm::isa<SwitchCase>(Outer))
377     return true;
378   // Control flow statements use condition etc, but not the body.
379   if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer))
380     return Inner == WS->getBody();
381   if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer))
382     return Inner == DS->getBody();
383   if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer))
384     return Inner == FS->getBody();
385   if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer))
386     return Inner == FS->getBody();
387   if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer))
388     return Inner == IS->getThen() || Inner == IS->getElse();
389   // Assume all other cases may be actual expressions.
390   // This includes the important case of subexpressions (where Outer is Expr).
391   return false;
392 }
393 
394 // check if N can and should be extracted (e.g. is not void-typed).
395 bool eligibleForExtraction(const SelectionTree::Node *N) {
396   const Expr *E = N->ASTNode.get<Expr>();
397   if (!E)
398     return false;
399 
400   // Void expressions can't be assigned to variables.
401   const Type *ExprType = E->getType().getTypePtrOrNull();
402   if (!ExprType || ExprType->isVoidType())
403     return false;
404 
405   // A plain reference to a name (e.g. variable) isn't  worth extracting.
406   // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`?
407   if (llvm::isa<DeclRefExpr>(E))
408     return false;
409 
410   // Similarly disallow extraction for member exprs with an implicit `this`.
411   if (const auto *ME = dyn_cast<MemberExpr>(E))
412     if (const auto *TE = dyn_cast<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()))
413       if (TE->isImplicit())
414         return false;
415 
416   // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful.
417   // FIXME: we could still hoist the assignment, and leave the variable there?
418   ParsedBinaryOperator BinOp;
419   if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind))
420     return false;
421 
422   const SelectionTree::Node &OuterImplicit = N->outerImplicit();
423   const auto *Parent = OuterImplicit.Parent;
424   if (!Parent)
425     return false;
426   // We don't want to extract expressions used as statements, that would leave
427   // a `placeholder;` around that has no effect.
428   // Unfortunately because the AST doesn't have ExprStmt, we have to check in
429   // this roundabout way.
430   if (childExprIsStmt(Parent->ASTNode.get<Stmt>(),
431                       OuterImplicit.ASTNode.get<Expr>()))
432     return false;
433 
434   // Disable extraction of full RHS on assignment operations, e.g:
435   // auto x = [[RHS_EXPR]];
436   // This would just result in duplicating the code.
437   if (const auto *BO = Parent->ASTNode.get<BinaryOperator>()) {
438     if (BO->isAssignmentOp() &&
439         BO->getRHS() == OuterImplicit.ASTNode.get<Expr>())
440       return false;
441   }
442 
443   return true;
444 }
445 
446 // Find the Expr node that we're going to extract.
447 // We don't want to trigger for assignment expressions and variable/field
448 // DeclRefs. For function/member function, we want to extract the entire
449 // function call.
450 const SelectionTree::Node *computeExtractedExpr(const SelectionTree::Node *N) {
451   if (!N)
452     return nullptr;
453   const SelectionTree::Node *TargetNode = N;
454   const clang::Expr *SelectedExpr = N->ASTNode.get<clang::Expr>();
455   if (!SelectedExpr)
456     return nullptr;
457   // For function and member function DeclRefs, extract the whole call.
458   if (llvm::isa<DeclRefExpr>(SelectedExpr) ||
459       llvm::isa<MemberExpr>(SelectedExpr))
460     if (const SelectionTree::Node *Call = getCallExpr(N))
461       TargetNode = Call;
462   // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful.
463   if (const BinaryOperator *BinOpExpr =
464           dyn_cast_or_null<BinaryOperator>(SelectedExpr)) {
465     if (BinOpExpr->getOpcode() == BinaryOperatorKind::BO_Assign)
466       return nullptr;
467   }
468   if (!TargetNode || !eligibleForExtraction(TargetNode))
469     return nullptr;
470   return TargetNode;
471 }
472 
473 /// Extracts an expression to the variable placeholder
474 /// Before:
475 /// int x = 5 + 4 * 3;
476 ///         ^^^^^
477 /// After:
478 /// auto placeholder = 5 + 4;
479 /// int x = placeholder * 3;
480 class ExtractVariable : public Tweak {
481 public:
482   const char *id() const final;
483   bool prepare(const Selection &Inputs) override;
484   Expected<Effect> apply(const Selection &Inputs) override;
485   std::string title() const override {
486     return "Extract subexpression to variable";
487   }
488   llvm::StringLiteral kind() const override {
489     return CodeAction::REFACTOR_KIND;
490   }
491 
492 private:
493   // the expression to extract
494   std::unique_ptr<ExtractionContext> Target;
495 };
496 REGISTER_TWEAK(ExtractVariable)
497 bool ExtractVariable::prepare(const Selection &Inputs) {
498   // we don't trigger on empty selections for now
499   if (Inputs.SelectionBegin == Inputs.SelectionEnd)
500     return false;
501   const ASTContext &Ctx = Inputs.AST->getASTContext();
502   const SourceManager &SM = Inputs.AST->getSourceManager();
503   if (const SelectionTree::Node *N =
504           computeExtractedExpr(Inputs.ASTSelection.commonAncestor()))
505     Target = std::make_unique<ExtractionContext>(N, SM, Ctx);
506   return Target && Target->isExtractable();
507 }
508 
509 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) {
510   tooling::Replacements Result;
511   // FIXME: get variable name from user or suggest based on type
512   std::string VarName = "placeholder";
513   SourceRange Range = Target->getExtractionChars();
514   // insert new variable declaration
515   if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
516     return std::move(Err);
517   // replace expression with variable name
518   if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
519     return std::move(Err);
520   return Effect::mainFileEdit(Inputs.AST->getSourceManager(),
521                               std::move(Result));
522 }
523 
524 } // namespace
525 } // namespace clangd
526 } // namespace clang
527