1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "AST.h" 9 #include "ParsedAST.h" 10 #include "Protocol.h" 11 #include "Selection.h" 12 #include "SourceCode.h" 13 #include "refactor/Tweak.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Expr.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/AST/OperationKinds.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtCXX.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/SourceLocation.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Tooling/Core/Replacement.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/Support/Casting.h" 28 #include "llvm/Support/Error.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 namespace clang { 32 namespace clangd { 33 namespace { 34 // information regarding the Expr that is being extracted 35 class ExtractionContext { 36 public: 37 ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM, 38 const ASTContext &Ctx); 39 const clang::Expr *getExpr() const { return Expr; } 40 const SelectionTree::Node *getExprNode() const { return ExprNode; } 41 bool isExtractable() const { return Extractable; } 42 // The half-open range for the expression to be extracted. 43 SourceRange getExtractionChars() const; 44 // Generate Replacement for replacing selected expression with given VarName 45 tooling::Replacement replaceWithVar(SourceRange Chars, 46 llvm::StringRef VarName) const; 47 // Generate Replacement for declaring the selected Expr as a new variable 48 tooling::Replacement insertDeclaration(llvm::StringRef VarName, 49 SourceRange InitChars) const; 50 51 private: 52 bool Extractable = false; 53 const clang::Expr *Expr; 54 QualType VarType; 55 const SelectionTree::Node *ExprNode; 56 // Stmt before which we will extract 57 const clang::Stmt *InsertionPoint = nullptr; 58 const SourceManager &SM; 59 const ASTContext &Ctx; 60 // Decls referenced in the Expr 61 std::vector<clang::Decl *> ReferencedDecls; 62 // returns true if the Expr doesn't reference any variable declared in scope 63 bool exprIsValidOutside(const clang::Stmt *Scope) const; 64 // computes the Stmt before which we will extract out Expr 65 const clang::Stmt *computeInsertionPoint() const; 66 }; 67 68 // Returns all the Decls referenced inside the given Expr 69 static std::vector<clang::Decl *> 70 computeReferencedDecls(const clang::Expr *Expr) { 71 // RAV subclass to find all DeclRefs in a given Stmt 72 class FindDeclRefsVisitor 73 : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> { 74 public: 75 std::vector<Decl *> ReferencedDecls; 76 bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT 77 ReferencedDecls.push_back(DeclRef->getDecl()); 78 return true; 79 } 80 }; 81 FindDeclRefsVisitor Visitor; 82 Visitor.TraverseStmt(const_cast<Stmt *>(cast<Stmt>(Expr))); 83 return Visitor.ReferencedDecls; 84 } 85 86 static QualType computeVariableType(const Expr *Expr, const ASTContext &Ctx) { 87 if (Ctx.getLangOpts().CPlusPlus11) 88 return Ctx.getAutoDeductType(); 89 90 if (Expr->hasPlaceholderType(BuiltinType::PseudoObject)) { 91 if (const auto *PR = dyn_cast<ObjCPropertyRefExpr>(Expr)) { 92 if (PR->isMessagingSetter()) { 93 // Don't support extracting a compound reference like `self.prop += 1` 94 // since the meaning changes after extraction since we'll no longer call 95 // the setter. Non compound access like `self.prop = 1` is invalid since 96 // it returns nil (setter method must have a void return type). 97 return QualType(); 98 } else if (PR->isMessagingGetter()) { 99 if (PR->isExplicitProperty()) 100 return PR->getExplicitProperty()->getType(); 101 else 102 return PR->getImplicitPropertyGetter()->getReturnType(); 103 } 104 } else { 105 return QualType(); 106 } 107 } 108 return Expr->getType(); 109 } 110 111 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node, 112 const SourceManager &SM, 113 const ASTContext &Ctx) 114 : ExprNode(Node), SM(SM), Ctx(Ctx) { 115 Expr = Node->ASTNode.get<clang::Expr>(); 116 ReferencedDecls = computeReferencedDecls(Expr); 117 InsertionPoint = computeInsertionPoint(); 118 if (InsertionPoint) 119 Extractable = true; 120 VarType = computeVariableType(Expr, Ctx); 121 if (VarType.isNull()) 122 Extractable = false; 123 else 124 // Strip the outer nullability since it's not common for local variables. 125 AttributedType::stripOuterNullability(VarType); 126 } 127 128 // checks whether extracting before InsertionPoint will take a 129 // variable reference out of scope 130 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const { 131 SourceLocation ScopeBegin = Scope->getBeginLoc(); 132 SourceLocation ScopeEnd = Scope->getEndLoc(); 133 for (const Decl *ReferencedDecl : ReferencedDecls) { 134 if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) && 135 SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd)) 136 return false; 137 } 138 return true; 139 } 140 141 // Return the Stmt before which we need to insert the extraction. 142 // To find the Stmt, we go up the AST Tree and if the Parent of the current 143 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before 144 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a 145 // CompoundStmt 146 // 147 // FIXME: Extraction from label, switch and case statements 148 // FIXME: Doens't work for FoldExpr 149 // FIXME: Ensure extraction from loops doesn't change semantics. 150 const clang::Stmt *ExtractionContext::computeInsertionPoint() const { 151 // returns true if we can extract before InsertionPoint 152 auto CanExtractOutside = 153 [](const SelectionTree::Node *InsertionPoint) -> bool { 154 if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) { 155 // Allow all expressions except LambdaExpr since we don't want to extract 156 // from the captures/default arguments of a lambda 157 if (isa<clang::Expr>(Stmt)) 158 return !isa<LambdaExpr>(Stmt); 159 // We don't yet allow extraction from switch/case stmt as we would need to 160 // jump over the switch stmt even if there is a CompoundStmt inside the 161 // switch. And there are other Stmts which we don't care about (e.g. 162 // continue and break) as there can never be anything to extract from 163 // them. 164 return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) || 165 isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) || 166 isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) || 167 isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt); 168 } 169 if (InsertionPoint->ASTNode.get<VarDecl>()) 170 return true; 171 return false; 172 }; 173 for (const SelectionTree::Node *CurNode = getExprNode(); 174 CurNode->Parent && CanExtractOutside(CurNode); 175 CurNode = CurNode->Parent) { 176 const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>(); 177 // give up if extraction will take a variable out of scope 178 if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint)) 179 break; 180 if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) { 181 if (isa<CompoundStmt>(CurParent)) { 182 // Ensure we don't write inside a macro. 183 if (CurParent->getBeginLoc().isMacroID()) 184 continue; 185 return CurInsertionPoint; 186 } 187 } 188 } 189 return nullptr; 190 } 191 192 // returns the replacement for substituting the extraction with VarName 193 tooling::Replacement 194 ExtractionContext::replaceWithVar(SourceRange Chars, 195 llvm::StringRef VarName) const { 196 unsigned ExtractionLength = 197 SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin()); 198 return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName); 199 } 200 // returns the Replacement for declaring a new variable storing the extraction 201 tooling::Replacement 202 ExtractionContext::insertDeclaration(llvm::StringRef VarName, 203 SourceRange InitializerChars) const { 204 llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars); 205 const SourceLocation InsertionLoc = 206 toHalfOpenFileRange(SM, Ctx.getLangOpts(), 207 InsertionPoint->getSourceRange()) 208 ->getBegin(); 209 std::string ExtractedVarDecl = 210 printType(VarType, ExprNode->getDeclContext(), VarName) + " = " + 211 ExtractionCode.str() + "; "; 212 return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl); 213 } 214 215 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d. 216 // 217 // These are special, because the formal AST doesn't match what users expect: 218 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`. 219 // - but extracting `b + c` is reasonable, as + is (mathematically) associative. 220 // 221 // So we try to support these cases with some restrictions: 222 // - the operator must be associative 223 // - no mixing of operators is allowed 224 // - we don't look inside macro expansions in the subexpressions 225 // - we only adjust the extracted range, so references in the unselected parts 226 // of the AST expression (e.g. `a`) are still considered referenced for 227 // the purposes of calculating the insertion point. 228 // FIXME: it would be nice to exclude these references, by micromanaging 229 // the computeReferencedDecls() calls around the binary operator tree. 230 231 // Information extracted about a binary operator encounted in a SelectionTree. 232 // It can represent either an overloaded or built-in operator. 233 struct ParsedBinaryOperator { 234 BinaryOperatorKind Kind; 235 SourceLocation ExprLoc; 236 llvm::SmallVector<const SelectionTree::Node *> SelectedOperands; 237 238 // If N is a binary operator, populate this and return true. 239 bool parse(const SelectionTree::Node &N) { 240 SelectedOperands.clear(); 241 242 if (const BinaryOperator *Op = 243 llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) { 244 Kind = Op->getOpcode(); 245 ExprLoc = Op->getExprLoc(); 246 SelectedOperands = N.Children; 247 return true; 248 } 249 if (const CXXOperatorCallExpr *Op = 250 llvm::dyn_cast_or_null<CXXOperatorCallExpr>( 251 N.ASTNode.get<Expr>())) { 252 if (!Op->isInfixBinaryOp()) 253 return false; 254 255 Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator()); 256 ExprLoc = Op->getExprLoc(); 257 // Not all children are args, there's also the callee (operator). 258 for (const auto* Child : N.Children) { 259 const Expr *E = Child->ASTNode.get<Expr>(); 260 assert(E && "callee and args should be Exprs!"); 261 if (E == Op->getArg(0) || E == Op->getArg(1)) 262 SelectedOperands.push_back(Child); 263 } 264 return true; 265 } 266 return false; 267 } 268 269 bool associative() const { 270 // Must also be left-associative, or update getBinaryOperatorRange()! 271 switch (Kind) { 272 case BO_Add: 273 case BO_Mul: 274 case BO_And: 275 case BO_Or: 276 case BO_Xor: 277 case BO_LAnd: 278 case BO_LOr: 279 return true; 280 default: 281 return false; 282 } 283 } 284 285 bool crossesMacroBoundary(const SourceManager &SM) { 286 FileID F = SM.getFileID(ExprLoc); 287 for (const SelectionTree::Node *Child : SelectedOperands) 288 if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F) 289 return true; 290 return false; 291 } 292 }; 293 294 // If have an associative operator at the top level, then we must find 295 // the start point (rightmost in LHS) and end point (leftmost in RHS). 296 // We can only descend into subtrees where the operator matches. 297 // 298 // e.g. for a + [[b + c]] + d 299 // + 300 // / \ 301 // N-> + d 302 // / \ 303 // + c <- End 304 // / \ 305 // a b <- Start 306 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N, 307 const SourceManager &SM, 308 const LangOptions &LangOpts) { 309 // If N is not a suitable binary operator, bail out. 310 ParsedBinaryOperator Op; 311 if (!Op.parse(N.ignoreImplicit()) || !Op.associative() || 312 Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2) 313 return SourceRange(); 314 BinaryOperatorKind OuterOp = Op.Kind; 315 316 // Because the tree we're interested in contains only one operator type, and 317 // all eligible operators are left-associative, the shape of the tree is 318 // very restricted: it's a linked list along the left edges. 319 // This simplifies our implementation. 320 const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS 321 const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS 322 // End is already correct: it can't be an OuterOp (as it's left-associative). 323 // Start needs to be pushed down int the subtree to the right spot. 324 while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp && 325 !Op.crossesMacroBoundary(SM)) { 326 assert(!Op.SelectedOperands.empty() && "got only operator on one side!"); 327 if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected 328 Start = Op.SelectedOperands.back(); 329 break; 330 } 331 // Op.LHS is (at least partially) selected, so descend into it. 332 Start = Op.SelectedOperands.front(); 333 } 334 335 return SourceRange( 336 toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange()) 337 ->getBegin(), 338 toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange()) 339 ->getEnd()); 340 } 341 342 SourceRange ExtractionContext::getExtractionChars() const { 343 // Special case: we're extracting an associative binary subexpression. 344 SourceRange BinaryOperatorRange = 345 getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts()); 346 if (BinaryOperatorRange.isValid()) 347 return BinaryOperatorRange; 348 349 // Usual case: we're extracting the whole expression. 350 return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange()); 351 } 352 353 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef 354 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) { 355 const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit(); 356 const SelectionTree::Node *MaybeCall = MaybeCallee.Parent; 357 if (!MaybeCall) 358 return nullptr; 359 const CallExpr *CE = 360 llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>()); 361 if (!CE) 362 return nullptr; 363 if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>()) 364 return nullptr; 365 return MaybeCall; 366 } 367 368 // Returns true if Inner (which is a direct child of Outer) is appearing as 369 // a statement rather than an expression whose value can be used. 370 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) { 371 if (!Outer || !Inner) 372 return false; 373 // Exclude the most common places where an expr can appear but be unused. 374 if (llvm::isa<CompoundStmt>(Outer)) 375 return true; 376 if (llvm::isa<SwitchCase>(Outer)) 377 return true; 378 // Control flow statements use condition etc, but not the body. 379 if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer)) 380 return Inner == WS->getBody(); 381 if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer)) 382 return Inner == DS->getBody(); 383 if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer)) 384 return Inner == FS->getBody(); 385 if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer)) 386 return Inner == FS->getBody(); 387 if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer)) 388 return Inner == IS->getThen() || Inner == IS->getElse(); 389 // Assume all other cases may be actual expressions. 390 // This includes the important case of subexpressions (where Outer is Expr). 391 return false; 392 } 393 394 // check if N can and should be extracted (e.g. is not void-typed). 395 bool eligibleForExtraction(const SelectionTree::Node *N) { 396 const Expr *E = N->ASTNode.get<Expr>(); 397 if (!E) 398 return false; 399 400 // Void expressions can't be assigned to variables. 401 const Type *ExprType = E->getType().getTypePtrOrNull(); 402 if (!ExprType || ExprType->isVoidType()) 403 return false; 404 405 // A plain reference to a name (e.g. variable) isn't worth extracting. 406 // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`? 407 if (llvm::isa<DeclRefExpr>(E)) 408 return false; 409 410 // Similarly disallow extraction for member exprs with an implicit `this`. 411 if (const auto *ME = dyn_cast<MemberExpr>(E)) 412 if (const auto *TE = dyn_cast<CXXThisExpr>(ME->getBase()->IgnoreImpCasts())) 413 if (TE->isImplicit()) 414 return false; 415 416 // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful. 417 // FIXME: we could still hoist the assignment, and leave the variable there? 418 ParsedBinaryOperator BinOp; 419 if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind)) 420 return false; 421 422 const SelectionTree::Node &OuterImplicit = N->outerImplicit(); 423 const auto *Parent = OuterImplicit.Parent; 424 if (!Parent) 425 return false; 426 // We don't want to extract expressions used as statements, that would leave 427 // a `placeholder;` around that has no effect. 428 // Unfortunately because the AST doesn't have ExprStmt, we have to check in 429 // this roundabout way. 430 if (childExprIsStmt(Parent->ASTNode.get<Stmt>(), 431 OuterImplicit.ASTNode.get<Expr>())) 432 return false; 433 434 // Disable extraction of full RHS on assignment operations, e.g: 435 // auto x = [[RHS_EXPR]]; 436 // This would just result in duplicating the code. 437 if (const auto *BO = Parent->ASTNode.get<BinaryOperator>()) { 438 if (BO->isAssignmentOp() && 439 BO->getRHS() == OuterImplicit.ASTNode.get<Expr>()) 440 return false; 441 } 442 443 return true; 444 } 445 446 // Find the Expr node that we're going to extract. 447 // We don't want to trigger for assignment expressions and variable/field 448 // DeclRefs. For function/member function, we want to extract the entire 449 // function call. 450 const SelectionTree::Node *computeExtractedExpr(const SelectionTree::Node *N) { 451 if (!N) 452 return nullptr; 453 const SelectionTree::Node *TargetNode = N; 454 const clang::Expr *SelectedExpr = N->ASTNode.get<clang::Expr>(); 455 if (!SelectedExpr) 456 return nullptr; 457 // For function and member function DeclRefs, extract the whole call. 458 if (llvm::isa<DeclRefExpr>(SelectedExpr) || 459 llvm::isa<MemberExpr>(SelectedExpr)) 460 if (const SelectionTree::Node *Call = getCallExpr(N)) 461 TargetNode = Call; 462 // Extracting Exprs like a = 1 gives placeholder = a = 1 which isn't useful. 463 if (const BinaryOperator *BinOpExpr = 464 dyn_cast_or_null<BinaryOperator>(SelectedExpr)) { 465 if (BinOpExpr->getOpcode() == BinaryOperatorKind::BO_Assign) 466 return nullptr; 467 } 468 if (!TargetNode || !eligibleForExtraction(TargetNode)) 469 return nullptr; 470 return TargetNode; 471 } 472 473 /// Extracts an expression to the variable placeholder 474 /// Before: 475 /// int x = 5 + 4 * 3; 476 /// ^^^^^ 477 /// After: 478 /// auto placeholder = 5 + 4; 479 /// int x = placeholder * 3; 480 class ExtractVariable : public Tweak { 481 public: 482 const char *id() const final; 483 bool prepare(const Selection &Inputs) override; 484 Expected<Effect> apply(const Selection &Inputs) override; 485 std::string title() const override { 486 return "Extract subexpression to variable"; 487 } 488 llvm::StringLiteral kind() const override { 489 return CodeAction::REFACTOR_KIND; 490 } 491 492 private: 493 // the expression to extract 494 std::unique_ptr<ExtractionContext> Target; 495 }; 496 REGISTER_TWEAK(ExtractVariable) 497 bool ExtractVariable::prepare(const Selection &Inputs) { 498 // we don't trigger on empty selections for now 499 if (Inputs.SelectionBegin == Inputs.SelectionEnd) 500 return false; 501 const ASTContext &Ctx = Inputs.AST->getASTContext(); 502 const SourceManager &SM = Inputs.AST->getSourceManager(); 503 if (const SelectionTree::Node *N = 504 computeExtractedExpr(Inputs.ASTSelection.commonAncestor())) 505 Target = std::make_unique<ExtractionContext>(N, SM, Ctx); 506 return Target && Target->isExtractable(); 507 } 508 509 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) { 510 tooling::Replacements Result; 511 // FIXME: get variable name from user or suggest based on type 512 std::string VarName = "placeholder"; 513 SourceRange Range = Target->getExtractionChars(); 514 // insert new variable declaration 515 if (auto Err = Result.add(Target->insertDeclaration(VarName, Range))) 516 return std::move(Err); 517 // replace expression with variable name 518 if (auto Err = Result.add(Target->replaceWithVar(Range, VarName))) 519 return std::move(Err); 520 return Effect::mainFileEdit(Inputs.AST->getSourceManager(), 521 std::move(Result)); 522 } 523 524 } // namespace 525 } // namespace clangd 526 } // namespace clang 527