1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "Logger.h" 9 #include "ParsedAST.h" 10 #include "Protocol.h" 11 #include "Selection.h" 12 #include "SourceCode.h" 13 #include "refactor/Tweak.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Expr.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/AST/OperationKinds.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtCXX.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/SourceLocation.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Tooling/Core/Replacement.h" 25 #include "llvm/ADT/None.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/Error.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 namespace clang { 33 namespace clangd { 34 namespace { 35 // information regarding the Expr that is being extracted 36 class ExtractionContext { 37 public: 38 ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM, 39 const ASTContext &Ctx); 40 const clang::Expr *getExpr() const { return Expr; } 41 const SelectionTree::Node *getExprNode() const { return ExprNode; } 42 bool isExtractable() const { return Extractable; } 43 // The half-open range for the expression to be extracted. 44 SourceRange getExtractionChars() const; 45 // Generate Replacement for replacing selected expression with given VarName 46 tooling::Replacement replaceWithVar(SourceRange Chars, 47 llvm::StringRef VarName) const; 48 // Generate Replacement for declaring the selected Expr as a new variable 49 tooling::Replacement insertDeclaration(llvm::StringRef VarName, 50 SourceRange InitChars) const; 51 52 private: 53 bool Extractable = false; 54 const clang::Expr *Expr; 55 const SelectionTree::Node *ExprNode; 56 // Stmt before which we will extract 57 const clang::Stmt *InsertionPoint = nullptr; 58 const SourceManager &SM; 59 const ASTContext &Ctx; 60 // Decls referenced in the Expr 61 std::vector<clang::Decl *> ReferencedDecls; 62 // returns true if the Expr doesn't reference any variable declared in scope 63 bool exprIsValidOutside(const clang::Stmt *Scope) const; 64 // computes the Stmt before which we will extract out Expr 65 const clang::Stmt *computeInsertionPoint() const; 66 }; 67 68 // Returns all the Decls referenced inside the given Expr 69 static std::vector<clang::Decl *> 70 computeReferencedDecls(const clang::Expr *Expr) { 71 // RAV subclass to find all DeclRefs in a given Stmt 72 class FindDeclRefsVisitor 73 : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> { 74 public: 75 std::vector<Decl *> ReferencedDecls; 76 bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT 77 ReferencedDecls.push_back(DeclRef->getDecl()); 78 return true; 79 } 80 }; 81 FindDeclRefsVisitor Visitor; 82 Visitor.TraverseStmt(const_cast<Stmt *>(dyn_cast<Stmt>(Expr))); 83 return Visitor.ReferencedDecls; 84 } 85 86 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node, 87 const SourceManager &SM, 88 const ASTContext &Ctx) 89 : ExprNode(Node), SM(SM), Ctx(Ctx) { 90 Expr = Node->ASTNode.get<clang::Expr>(); 91 ReferencedDecls = computeReferencedDecls(Expr); 92 InsertionPoint = computeInsertionPoint(); 93 if (InsertionPoint) 94 Extractable = true; 95 } 96 97 // checks whether extracting before InsertionPoint will take a 98 // variable reference out of scope 99 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const { 100 SourceLocation ScopeBegin = Scope->getBeginLoc(); 101 SourceLocation ScopeEnd = Scope->getEndLoc(); 102 for (const Decl *ReferencedDecl : ReferencedDecls) { 103 if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) && 104 SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd)) 105 return false; 106 } 107 return true; 108 } 109 110 // Return the Stmt before which we need to insert the extraction. 111 // To find the Stmt, we go up the AST Tree and if the Parent of the current 112 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before 113 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a 114 // CompoundStmt 115 // 116 // FIXME: Extraction from label, switch and case statements 117 // FIXME: Doens't work for FoldExpr 118 // FIXME: Ensure extraction from loops doesn't change semantics. 119 const clang::Stmt *ExtractionContext::computeInsertionPoint() const { 120 // returns true if we can extract before InsertionPoint 121 auto CanExtractOutside = 122 [](const SelectionTree::Node *InsertionPoint) -> bool { 123 if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) { 124 // Allow all expressions except LambdaExpr since we don't want to extract 125 // from the captures/default arguments of a lambda 126 if (isa<clang::Expr>(Stmt)) 127 return !isa<LambdaExpr>(Stmt); 128 // We don't yet allow extraction from switch/case stmt as we would need to 129 // jump over the switch stmt even if there is a CompoundStmt inside the 130 // switch. And there are other Stmts which we don't care about (e.g. 131 // continue and break) as there can never be anything to extract from 132 // them. 133 return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) || 134 isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) || 135 isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) || 136 isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt); 137 } 138 if (InsertionPoint->ASTNode.get<VarDecl>()) 139 return true; 140 return false; 141 }; 142 for (const SelectionTree::Node *CurNode = getExprNode(); 143 CurNode->Parent && CanExtractOutside(CurNode); 144 CurNode = CurNode->Parent) { 145 const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>(); 146 // give up if extraction will take a variable out of scope 147 if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint)) 148 break; 149 if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) { 150 if (isa<CompoundStmt>(CurParent)) { 151 // Ensure we don't write inside a macro. 152 if (CurParent->getBeginLoc().isMacroID()) 153 continue; 154 return CurInsertionPoint; 155 } 156 } 157 } 158 return nullptr; 159 } 160 161 // returns the replacement for substituting the extraction with VarName 162 tooling::Replacement 163 ExtractionContext::replaceWithVar(SourceRange Chars, 164 llvm::StringRef VarName) const { 165 unsigned ExtractionLength = 166 SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin()); 167 return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName); 168 } 169 // returns the Replacement for declaring a new variable storing the extraction 170 tooling::Replacement 171 ExtractionContext::insertDeclaration(llvm::StringRef VarName, 172 SourceRange InitializerChars) const { 173 llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars); 174 const SourceLocation InsertionLoc = 175 toHalfOpenFileRange(SM, Ctx.getLangOpts(), 176 InsertionPoint->getSourceRange()) 177 ->getBegin(); 178 // FIXME: Replace auto with explicit type and add &/&& as necessary 179 std::string ExtractedVarDecl = std::string("auto ") + VarName.str() + " = " + 180 ExtractionCode.str() + "; "; 181 return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl); 182 } 183 184 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d. 185 // 186 // These are special, because the formal AST doesn't match what users expect: 187 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`. 188 // - but extracting `b + c` is reasonable, as + is (mathematically) associative. 189 // 190 // So we try to support these cases with some restrictions: 191 // - the operator must be associative 192 // - no mixing of operators is allowed 193 // - we don't look inside macro expansions in the subexpressions 194 // - we only adjust the extracted range, so references in the unselected parts 195 // of the AST expression (e.g. `a`) are still considered referenced for 196 // the purposes of calculating the insertion point. 197 // FIXME: it would be nice to exclude these references, by micromanaging 198 // the computeReferencedDecls() calls around the binary operator tree. 199 200 // Information extracted about a binary operator encounted in a SelectionTree. 201 // It can represent either an overloaded or built-in operator. 202 struct ParsedBinaryOperator { 203 BinaryOperatorKind Kind; 204 SourceLocation ExprLoc; 205 llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands; 206 207 // If N is a binary operator, populate this and return true. 208 bool parse(const SelectionTree::Node &N) { 209 SelectedOperands.clear(); 210 211 if (const BinaryOperator *Op = 212 llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) { 213 Kind = Op->getOpcode(); 214 ExprLoc = Op->getExprLoc(); 215 SelectedOperands = N.Children; 216 return true; 217 } 218 if (const CXXOperatorCallExpr *Op = 219 llvm::dyn_cast_or_null<CXXOperatorCallExpr>( 220 N.ASTNode.get<Expr>())) { 221 if (!Op->isInfixBinaryOp()) 222 return false; 223 224 Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator()); 225 ExprLoc = Op->getExprLoc(); 226 // Not all children are args, there's also the callee (operator). 227 for (const auto* Child : N.Children) { 228 const Expr *E = Child->ASTNode.get<Expr>(); 229 assert(E && "callee and args should be Exprs!"); 230 if (E == Op->getArg(0) || E == Op->getArg(1)) 231 SelectedOperands.push_back(Child); 232 } 233 return true; 234 } 235 return false; 236 } 237 238 bool associative() const { 239 // Must also be left-associative, or update getBinaryOperatorRange()! 240 switch (Kind) { 241 case BO_Add: 242 case BO_Mul: 243 case BO_And: 244 case BO_Or: 245 case BO_Xor: 246 case BO_LAnd: 247 case BO_LOr: 248 return true; 249 default: 250 return false; 251 } 252 } 253 254 bool crossesMacroBoundary(const SourceManager &SM) { 255 FileID F = SM.getFileID(ExprLoc); 256 for (const SelectionTree::Node *Child : SelectedOperands) 257 if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F) 258 return true; 259 return false; 260 } 261 }; 262 263 // If have an associative operator at the top level, then we must find 264 // the start point (rightmost in LHS) and end point (leftmost in RHS). 265 // We can only descend into subtrees where the operator matches. 266 // 267 // e.g. for a + [[b + c]] + d 268 // + 269 // / \ 270 // N-> + d 271 // / \ 272 // + c <- End 273 // / \ 274 // a b <- Start 275 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N, 276 const SourceManager &SM, 277 const LangOptions &LangOpts) { 278 // If N is not a suitable binary operator, bail out. 279 ParsedBinaryOperator Op; 280 if (!Op.parse(N.ignoreImplicit()) || !Op.associative() || 281 Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2) 282 return SourceRange(); 283 BinaryOperatorKind OuterOp = Op.Kind; 284 285 // Because the tree we're interested in contains only one operator type, and 286 // all eligible operators are left-associative, the shape of the tree is 287 // very restricted: it's a linked list along the left edges. 288 // This simplifies our implementation. 289 const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS 290 const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS 291 // End is already correct: it can't be an OuterOp (as it's left-associative). 292 // Start needs to be pushed down int the subtree to the right spot. 293 while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp && 294 !Op.crossesMacroBoundary(SM)) { 295 assert(!Op.SelectedOperands.empty() && "got only operator on one side!"); 296 if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected 297 Start = Op.SelectedOperands.back(); 298 break; 299 } 300 // Op.LHS is (at least partially) selected, so descend into it. 301 Start = Op.SelectedOperands.front(); 302 } 303 304 return SourceRange( 305 toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange()) 306 ->getBegin(), 307 toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange()) 308 ->getEnd()); 309 } 310 311 SourceRange ExtractionContext::getExtractionChars() const { 312 // Special case: we're extracting an associative binary subexpression. 313 SourceRange BinaryOperatorRange = 314 getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts()); 315 if (BinaryOperatorRange.isValid()) 316 return BinaryOperatorRange; 317 318 // Usual case: we're extracting the whole expression. 319 return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange()); 320 } 321 322 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef 323 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) { 324 const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit(); 325 const SelectionTree::Node *MaybeCall = MaybeCallee.Parent; 326 if (!MaybeCall) 327 return nullptr; 328 const CallExpr *CE = 329 llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>()); 330 if (!CE) 331 return nullptr; 332 if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>()) 333 return nullptr; 334 return MaybeCall; 335 } 336 337 // Returns true if Inner (which is a direct child of Outer) is appearing as 338 // a statement rather than an expression whose value can be used. 339 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) { 340 if (!Outer || !Inner) 341 return false; 342 // Blacklist the most common places where an expr can appear but be unused. 343 if (llvm::isa<CompoundStmt>(Outer)) 344 return true; 345 if (llvm::isa<SwitchCase>(Outer)) 346 return true; 347 // Control flow statements use condition etc, but not the body. 348 if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer)) 349 return Inner == WS->getBody(); 350 if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer)) 351 return Inner == DS->getBody(); 352 if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer)) 353 return Inner == FS->getBody(); 354 if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer)) 355 return Inner == FS->getBody(); 356 if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer)) 357 return Inner == IS->getThen() || Inner == IS->getElse(); 358 // Assume all other cases may be actual expressions. 359 // This includes the important case of subexpressions (where Outer is Expr). 360 return false; 361 } 362 363 // check if N can and should be extracted (e.g. is not void-typed). 364 bool eligibleForExtraction(const SelectionTree::Node *N) { 365 const Expr *E = N->ASTNode.get<Expr>(); 366 if (!E) 367 return false; 368 369 // Void expressions can't be assigned to variables. 370 if (const Type *ExprType = E->getType().getTypePtrOrNull()) 371 if (ExprType->isVoidType()) 372 return false; 373 374 // A plain reference to a name (e.g. variable) isn't worth extracting. 375 // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`? 376 if (llvm::isa<DeclRefExpr>(E) || llvm::isa<MemberExpr>(E)) 377 return false; 378 379 // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful. 380 // FIXME: we could still hoist the assignment, and leave the variable there? 381 ParsedBinaryOperator BinOp; 382 if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind)) 383 return false; 384 385 // We don't want to extract expressions used as statements, that would leave 386 // a `dummy;` around that has no effect. 387 // Unfortunately because the AST doesn't have ExprStmt, we have to check in 388 // this roundabout way. 389 const SelectionTree::Node &OuterImplicit = N->outerImplicit(); 390 if (!OuterImplicit.Parent || 391 childExprIsStmt(OuterImplicit.Parent->ASTNode.get<Stmt>(), 392 OuterImplicit.ASTNode.get<Expr>())) 393 return false; 394 395 // FIXME: ban extracting the RHS of an assignment: `a = [[foo()]]` 396 return true; 397 } 398 399 // Find the Expr node that we're going to extract. 400 // We don't want to trigger for assignment expressions and variable/field 401 // DeclRefs. For function/member function, we want to extract the entire 402 // function call. 403 const SelectionTree::Node *computeExtractedExpr(const SelectionTree::Node *N) { 404 if (!N) 405 return nullptr; 406 const SelectionTree::Node *TargetNode = N; 407 const clang::Expr *SelectedExpr = N->ASTNode.get<clang::Expr>(); 408 if (!SelectedExpr) 409 return nullptr; 410 // For function and member function DeclRefs, extract the whole call. 411 if (llvm::isa<DeclRefExpr>(SelectedExpr) || 412 llvm::isa<MemberExpr>(SelectedExpr)) 413 if (const SelectionTree::Node *Call = getCallExpr(N)) 414 TargetNode = Call; 415 // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful. 416 if (const BinaryOperator *BinOpExpr = 417 dyn_cast_or_null<BinaryOperator>(SelectedExpr)) { 418 if (BinOpExpr->getOpcode() == BinaryOperatorKind::BO_Assign) 419 return nullptr; 420 } 421 if (!TargetNode || !eligibleForExtraction(TargetNode)) 422 return nullptr; 423 return TargetNode; 424 } 425 426 /// Extracts an expression to the variable dummy 427 /// Before: 428 /// int x = 5 + 4 * 3; 429 /// ^^^^^ 430 /// After: 431 /// auto dummy = 5 + 4; 432 /// int x = dummy * 3; 433 class ExtractVariable : public Tweak { 434 public: 435 const char *id() const override final; 436 bool prepare(const Selection &Inputs) override; 437 Expected<Effect> apply(const Selection &Inputs) override; 438 std::string title() const override { 439 return "Extract subexpression to variable"; 440 } 441 Intent intent() const override { return Refactor; } 442 443 private: 444 // the expression to extract 445 std::unique_ptr<ExtractionContext> Target; 446 }; 447 REGISTER_TWEAK(ExtractVariable) 448 bool ExtractVariable::prepare(const Selection &Inputs) { 449 // we don't trigger on empty selections for now 450 if (Inputs.SelectionBegin == Inputs.SelectionEnd) 451 return false; 452 const ASTContext &Ctx = Inputs.AST.getASTContext(); 453 const SourceManager &SM = Inputs.AST.getSourceManager(); 454 if (const SelectionTree::Node *N = 455 computeExtractedExpr(Inputs.ASTSelection.commonAncestor())) 456 Target = std::make_unique<ExtractionContext>(N, SM, Ctx); 457 return Target && Target->isExtractable(); 458 } 459 460 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) { 461 tooling::Replacements Result; 462 // FIXME: get variable name from user or suggest based on type 463 std::string VarName = "dummy"; 464 SourceRange Range = Target->getExtractionChars(); 465 // insert new variable declaration 466 if (auto Err = Result.add(Target->insertDeclaration(VarName, Range))) 467 return std::move(Err); 468 // replace expression with variable name 469 if (auto Err = Result.add(Target->replaceWithVar(Range, VarName))) 470 return std::move(Err); 471 return Effect::mainFileEdit(Inputs.AST.getSourceManager(), std::move(Result)); 472 } 473 474 } // namespace 475 } // namespace clangd 476 } // namespace clang 477