1 //===--- ExtractVariable.cpp ------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "ClangdUnit.h" 9 #include "Logger.h" 10 #include "Protocol.h" 11 #include "Selection.h" 12 #include "SourceCode.h" 13 #include "refactor/Tweak.h" 14 #include "clang/AST/ASTContext.h" 15 #include "clang/AST/Expr.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/AST/OperationKinds.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/Stmt.h" 20 #include "clang/AST/StmtCXX.h" 21 #include "clang/Basic/LangOptions.h" 22 #include "clang/Basic/SourceLocation.h" 23 #include "clang/Basic/SourceManager.h" 24 #include "clang/Tooling/Core/Replacement.h" 25 #include "llvm/ADT/None.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/StringRef.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/Error.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 namespace clang { 33 namespace clangd { 34 namespace { 35 // information regarding the Expr that is being extracted 36 class ExtractionContext { 37 public: 38 ExtractionContext(const SelectionTree::Node *Node, const SourceManager &SM, 39 const ASTContext &Ctx); 40 const clang::Expr *getExpr() const { return Expr; } 41 const SelectionTree::Node *getExprNode() const { return ExprNode; } 42 bool isExtractable() const { return Extractable; } 43 // The half-open range for the expression to be extracted. 44 SourceRange getExtractionChars() const; 45 // Generate Replacement for replacing selected expression with given VarName 46 tooling::Replacement replaceWithVar(SourceRange Chars, 47 llvm::StringRef VarName) const; 48 // Generate Replacement for declaring the selected Expr as a new variable 49 tooling::Replacement insertDeclaration(llvm::StringRef VarName, 50 SourceRange InitChars) const; 51 52 private: 53 bool Extractable = false; 54 const clang::Expr *Expr; 55 const SelectionTree::Node *ExprNode; 56 // Stmt before which we will extract 57 const clang::Stmt *InsertionPoint = nullptr; 58 const SourceManager &SM; 59 const ASTContext &Ctx; 60 // Decls referenced in the Expr 61 std::vector<clang::Decl *> ReferencedDecls; 62 // returns true if the Expr doesn't reference any variable declared in scope 63 bool exprIsValidOutside(const clang::Stmt *Scope) const; 64 // computes the Stmt before which we will extract out Expr 65 const clang::Stmt *computeInsertionPoint() const; 66 }; 67 68 // Returns all the Decls referenced inside the given Expr 69 static std::vector<clang::Decl *> 70 computeReferencedDecls(const clang::Expr *Expr) { 71 // RAV subclass to find all DeclRefs in a given Stmt 72 class FindDeclRefsVisitor 73 : public clang::RecursiveASTVisitor<FindDeclRefsVisitor> { 74 public: 75 std::vector<Decl *> ReferencedDecls; 76 bool VisitDeclRefExpr(DeclRefExpr *DeclRef) { // NOLINT 77 ReferencedDecls.push_back(DeclRef->getDecl()); 78 return true; 79 } 80 }; 81 FindDeclRefsVisitor Visitor; 82 Visitor.TraverseStmt(const_cast<Stmt *>(dyn_cast<Stmt>(Expr))); 83 return Visitor.ReferencedDecls; 84 } 85 86 ExtractionContext::ExtractionContext(const SelectionTree::Node *Node, 87 const SourceManager &SM, 88 const ASTContext &Ctx) 89 : ExprNode(Node), SM(SM), Ctx(Ctx) { 90 Expr = Node->ASTNode.get<clang::Expr>(); 91 ReferencedDecls = computeReferencedDecls(Expr); 92 InsertionPoint = computeInsertionPoint(); 93 if (InsertionPoint) 94 Extractable = true; 95 } 96 97 // checks whether extracting before InsertionPoint will take a 98 // variable reference out of scope 99 bool ExtractionContext::exprIsValidOutside(const clang::Stmt *Scope) const { 100 SourceLocation ScopeBegin = Scope->getBeginLoc(); 101 SourceLocation ScopeEnd = Scope->getEndLoc(); 102 for (const Decl *ReferencedDecl : ReferencedDecls) { 103 if (SM.isPointWithin(ReferencedDecl->getBeginLoc(), ScopeBegin, ScopeEnd) && 104 SM.isPointWithin(ReferencedDecl->getEndLoc(), ScopeBegin, ScopeEnd)) 105 return false; 106 } 107 return true; 108 } 109 110 // Return the Stmt before which we need to insert the extraction. 111 // To find the Stmt, we go up the AST Tree and if the Parent of the current 112 // Stmt is a CompoundStmt, we can extract inside this CompoundStmt just before 113 // the current Stmt. We ALWAYS insert before a Stmt whose parent is a 114 // CompoundStmt 115 // 116 // FIXME: Extraction from label, switch and case statements 117 // FIXME: Doens't work for FoldExpr 118 // FIXME: Ensure extraction from loops doesn't change semantics. 119 const clang::Stmt *ExtractionContext::computeInsertionPoint() const { 120 // returns true if we can extract before InsertionPoint 121 auto CanExtractOutside = 122 [](const SelectionTree::Node *InsertionPoint) -> bool { 123 if (const clang::Stmt *Stmt = InsertionPoint->ASTNode.get<clang::Stmt>()) { 124 // Allow all expressions except LambdaExpr since we don't want to extract 125 // from the captures/default arguments of a lambda 126 if (isa<clang::Expr>(Stmt)) 127 return !isa<LambdaExpr>(Stmt); 128 // We don't yet allow extraction from switch/case stmt as we would need to 129 // jump over the switch stmt even if there is a CompoundStmt inside the 130 // switch. And there are other Stmts which we don't care about (e.g. 131 // continue and break) as there can never be anything to extract from 132 // them. 133 return isa<AttributedStmt>(Stmt) || isa<CompoundStmt>(Stmt) || 134 isa<CXXForRangeStmt>(Stmt) || isa<DeclStmt>(Stmt) || 135 isa<DoStmt>(Stmt) || isa<ForStmt>(Stmt) || isa<IfStmt>(Stmt) || 136 isa<ReturnStmt>(Stmt) || isa<WhileStmt>(Stmt); 137 } 138 if (InsertionPoint->ASTNode.get<VarDecl>()) 139 return true; 140 return false; 141 }; 142 for (const SelectionTree::Node *CurNode = getExprNode(); 143 CurNode->Parent && CanExtractOutside(CurNode); 144 CurNode = CurNode->Parent) { 145 const clang::Stmt *CurInsertionPoint = CurNode->ASTNode.get<Stmt>(); 146 // give up if extraction will take a variable out of scope 147 if (CurInsertionPoint && !exprIsValidOutside(CurInsertionPoint)) 148 break; 149 if (const clang::Stmt *CurParent = CurNode->Parent->ASTNode.get<Stmt>()) { 150 if (isa<CompoundStmt>(CurParent)) { 151 // Ensure we don't write inside a macro. 152 if (CurParent->getBeginLoc().isMacroID()) 153 continue; 154 return CurInsertionPoint; 155 } 156 } 157 } 158 return nullptr; 159 } 160 161 // returns the replacement for substituting the extraction with VarName 162 tooling::Replacement 163 ExtractionContext::replaceWithVar(SourceRange Chars, 164 llvm::StringRef VarName) const { 165 unsigned ExtractionLength = 166 SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin()); 167 return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName); 168 } 169 // returns the Replacement for declaring a new variable storing the extraction 170 tooling::Replacement 171 ExtractionContext::insertDeclaration(llvm::StringRef VarName, 172 SourceRange InitializerChars) const { 173 llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars); 174 const SourceLocation InsertionLoc = 175 toHalfOpenFileRange(SM, Ctx.getLangOpts(), 176 InsertionPoint->getSourceRange()) 177 ->getBegin(); 178 // FIXME: Replace auto with explicit type and add &/&& as necessary 179 std::string ExtractedVarDecl = std::string("auto ") + VarName.str() + " = " + 180 ExtractionCode.str() + "; "; 181 return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl); 182 } 183 184 // Helpers for handling "binary subexpressions" like a + [[b + c]] + d. 185 // 186 // These are special, because the formal AST doesn't match what users expect: 187 // - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`. 188 // - but extracting `b + c` is reasonable, as + is (mathematically) associative. 189 // 190 // So we try to support these cases with some restrictions: 191 // - the operator must be associative 192 // - no mixing of operators is allowed 193 // - we don't look inside macro expansions in the subexpressions 194 // - we only adjust the extracted range, so references in the unselected parts 195 // of the AST expression (e.g. `a`) are still considered referenced for 196 // the purposes of calculating the insertion point. 197 // FIXME: it would be nice to exclude these references, by micromanaging 198 // the computeReferencedDecls() calls around the binary operator tree. 199 200 // Information extracted about a binary operator encounted in a SelectionTree. 201 // It can represent either an overloaded or built-in operator. 202 struct ParsedBinaryOperator { 203 BinaryOperatorKind Kind; 204 SourceLocation ExprLoc; 205 llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands; 206 207 // If N is a binary operator, populate this and return true. 208 bool parse(const SelectionTree::Node &N) { 209 SelectedOperands.clear(); 210 211 if (const BinaryOperator *Op = 212 llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) { 213 Kind = Op->getOpcode(); 214 ExprLoc = Op->getExprLoc(); 215 SelectedOperands = N.Children; 216 return true; 217 } 218 if (const CXXOperatorCallExpr *Op = 219 llvm::dyn_cast_or_null<CXXOperatorCallExpr>( 220 N.ASTNode.get<Expr>())) { 221 if (!Op->isInfixBinaryOp()) 222 return false; 223 224 Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator()); 225 ExprLoc = Op->getExprLoc(); 226 // Not all children are args, there's also the callee (operator). 227 for (const auto* Child : N.Children) { 228 const Expr *E = Child->ASTNode.get<Expr>(); 229 assert(E && "callee and args should be Exprs!"); 230 if (E == Op->getArg(0) || E == Op->getArg(1)) 231 SelectedOperands.push_back(Child); 232 } 233 return true; 234 } 235 return false; 236 } 237 238 bool associative() const { 239 // Must also be left-associative, or update getBinaryOperatorRange()! 240 switch (Kind) { 241 case BO_Add: 242 case BO_Mul: 243 case BO_And: 244 case BO_Or: 245 case BO_Xor: 246 case BO_LAnd: 247 case BO_LOr: 248 return true; 249 default: 250 return false; 251 } 252 } 253 254 bool crossesMacroBoundary(const SourceManager &SM) { 255 FileID F = SM.getFileID(ExprLoc); 256 for (const SelectionTree::Node *Child : SelectedOperands) 257 if (SM.getFileID(Child->ASTNode.get<Expr>()->getExprLoc()) != F) 258 return true; 259 return false; 260 } 261 }; 262 263 // If have an associative operator at the top level, then we must find 264 // the start point (rightmost in LHS) and end point (leftmost in RHS). 265 // We can only descend into subtrees where the operator matches. 266 // 267 // e.g. for a + [[b + c]] + d 268 // + 269 // / \ 270 // N-> + d 271 // / \ 272 // + c <- End 273 // / \ 274 // a b <- Start 275 const SourceRange getBinaryOperatorRange(const SelectionTree::Node &N, 276 const SourceManager &SM, 277 const LangOptions &LangOpts) { 278 // If N is not a suitable binary operator, bail out. 279 ParsedBinaryOperator Op; 280 if (!Op.parse(N.ignoreImplicit()) || !Op.associative() || 281 Op.crossesMacroBoundary(SM) || Op.SelectedOperands.size() != 2) 282 return SourceRange(); 283 BinaryOperatorKind OuterOp = Op.Kind; 284 285 // Because the tree we're interested in contains only one operator type, and 286 // all eligible operators are left-associative, the shape of the tree is 287 // very restricted: it's a linked list along the left edges. 288 // This simplifies our implementation. 289 const SelectionTree::Node *Start = Op.SelectedOperands.front(); // LHS 290 const SelectionTree::Node *End = Op.SelectedOperands.back(); // RHS 291 // End is already correct: it can't be an OuterOp (as it's left-associative). 292 // Start needs to be pushed down int the subtree to the right spot. 293 while (Op.parse(Start->ignoreImplicit()) && Op.Kind == OuterOp && 294 !Op.crossesMacroBoundary(SM)) { 295 assert(!Op.SelectedOperands.empty() && "got only operator on one side!"); 296 if (Op.SelectedOperands.size() == 1) { // Only Op.RHS selected 297 Start = Op.SelectedOperands.back(); 298 break; 299 } 300 // Op.LHS is (at least partially) selected, so descend into it. 301 Start = Op.SelectedOperands.front(); 302 } 303 304 return SourceRange( 305 toHalfOpenFileRange(SM, LangOpts, Start->ASTNode.getSourceRange()) 306 ->getBegin(), 307 toHalfOpenFileRange(SM, LangOpts, End->ASTNode.getSourceRange()) 308 ->getEnd()); 309 } 310 311 SourceRange ExtractionContext::getExtractionChars() const { 312 // Special case: we're extracting an associative binary subexpression. 313 SourceRange BinaryOperatorRange = 314 getBinaryOperatorRange(*ExprNode, SM, Ctx.getLangOpts()); 315 if (BinaryOperatorRange.isValid()) 316 return BinaryOperatorRange; 317 318 // Usual case: we're extracting the whole expression. 319 return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange()); 320 } 321 322 /// Extracts an expression to the variable dummy 323 /// Before: 324 /// int x = 5 + 4 * 3; 325 /// ^^^^^ 326 /// After: 327 /// auto dummy = 5 + 4; 328 /// int x = dummy * 3; 329 class ExtractVariable : public Tweak { 330 public: 331 const char *id() const override final; 332 bool prepare(const Selection &Inputs) override; 333 Expected<Effect> apply(const Selection &Inputs) override; 334 std::string title() const override { 335 return "Extract subexpression to variable"; 336 } 337 Intent intent() const override { return Refactor; } 338 // Compute the extraction context for the Selection 339 bool computeExtractionContext(const SelectionTree::Node *N, 340 const SourceManager &SM, const ASTContext &Ctx); 341 342 private: 343 // the expression to extract 344 std::unique_ptr<ExtractionContext> Target; 345 }; 346 REGISTER_TWEAK(ExtractVariable) 347 bool ExtractVariable::prepare(const Selection &Inputs) { 348 // we don't trigger on empty selections for now 349 if (Inputs.SelectionBegin == Inputs.SelectionEnd) 350 return false; 351 const ASTContext &Ctx = Inputs.AST.getASTContext(); 352 const SourceManager &SM = Inputs.AST.getSourceManager(); 353 const SelectionTree::Node *N = Inputs.ASTSelection.commonAncestor(); 354 return computeExtractionContext(N, SM, Ctx); 355 } 356 357 Expected<Tweak::Effect> ExtractVariable::apply(const Selection &Inputs) { 358 tooling::Replacements Result; 359 // FIXME: get variable name from user or suggest based on type 360 std::string VarName = "dummy"; 361 SourceRange Range = Target->getExtractionChars(); 362 // insert new variable declaration 363 if (auto Err = Result.add(Target->insertDeclaration(VarName, Range))) 364 return std::move(Err); 365 // replace expression with variable name 366 if (auto Err = Result.add(Target->replaceWithVar(Range, VarName))) 367 return std::move(Err); 368 return Effect::applyEdit(Result); 369 } 370 371 // Find the CallExpr whose callee is the (possibly wrapped) DeclRef 372 const SelectionTree::Node *getCallExpr(const SelectionTree::Node *DeclRef) { 373 const SelectionTree::Node &MaybeCallee = DeclRef->outerImplicit(); 374 const SelectionTree::Node *MaybeCall = MaybeCallee.Parent; 375 if (!MaybeCall) 376 return nullptr; 377 const CallExpr *CE = 378 llvm::dyn_cast_or_null<CallExpr>(MaybeCall->ASTNode.get<Expr>()); 379 if (!CE) 380 return nullptr; 381 if (CE->getCallee() != MaybeCallee.ASTNode.get<Expr>()) 382 return nullptr; 383 return MaybeCall; 384 } 385 386 // Returns true if Inner (which is a direct child of Outer) is appearing as 387 // a statement rather than an expression whose value can be used. 388 bool childExprIsStmt(const Stmt *Outer, const Expr *Inner) { 389 if (!Outer || !Inner) 390 return false; 391 // Blacklist the most common places where an expr can appear but be unused. 392 if (llvm::isa<CompoundStmt>(Outer)) 393 return true; 394 if (llvm::isa<SwitchCase>(Outer)) 395 return true; 396 // Control flow statements use condition etc, but not the body. 397 if (const auto* WS = llvm::dyn_cast<WhileStmt>(Outer)) 398 return Inner == WS->getBody(); 399 if (const auto* DS = llvm::dyn_cast<DoStmt>(Outer)) 400 return Inner == DS->getBody(); 401 if (const auto* FS = llvm::dyn_cast<ForStmt>(Outer)) 402 return Inner == FS->getBody(); 403 if (const auto* FS = llvm::dyn_cast<CXXForRangeStmt>(Outer)) 404 return Inner == FS->getBody(); 405 if (const auto* IS = llvm::dyn_cast<IfStmt>(Outer)) 406 return Inner == IS->getThen() || Inner == IS->getElse(); 407 // Assume all other cases may be actual expressions. 408 // This includes the important case of subexpressions (where Outer is Expr). 409 return false; 410 } 411 412 // check if N can and should be extracted (e.g. is not void-typed). 413 bool eligibleForExtraction(const SelectionTree::Node *N) { 414 const Expr *E = N->ASTNode.get<Expr>(); 415 if (!E) 416 return false; 417 418 // Void expressions can't be assigned to variables. 419 if (const Type *ExprType = E->getType().getTypePtrOrNull()) 420 if (ExprType->isVoidType()) 421 return false; 422 423 // A plain reference to a name (e.g. variable) isn't worth extracting. 424 // FIXME: really? What if it's e.g. `std::is_same<void, void>::value`? 425 if (llvm::isa<DeclRefExpr>(E) || llvm::isa<MemberExpr>(E)) 426 return false; 427 428 // Extracting Exprs like a = 1 gives dummy = a = 1 which isn't useful. 429 // FIXME: we could still hoist the assignment, and leave the variable there? 430 ParsedBinaryOperator BinOp; 431 if (BinOp.parse(*N) && BinaryOperator::isAssignmentOp(BinOp.Kind)) 432 return false; 433 434 // We don't want to extract expressions used as statements, that would leave 435 // a `dummy;` around that has no effect. 436 // Unfortunately because the AST doesn't have ExprStmt, we have to check in 437 // this roundabout way. 438 const SelectionTree::Node &OuterImplicit = N->outerImplicit(); 439 if (!OuterImplicit.Parent || 440 childExprIsStmt(OuterImplicit.Parent->ASTNode.get<Stmt>(), 441 OuterImplicit.ASTNode.get<Expr>())) 442 return false; 443 444 // FIXME: ban extracting the RHS of an assignment: `a = [[foo()]]` 445 return true; 446 } 447 448 // Find the node that will form our ExtractionContext. 449 // We don't want to trigger for assignment expressions and variable/field 450 // DeclRefs. For function/member function, we want to extract the entire 451 // function call. 452 bool ExtractVariable::computeExtractionContext(const SelectionTree::Node *N, 453 const SourceManager &SM, 454 const ASTContext &Ctx) { 455 if (!N) 456 return false; 457 const SelectionTree::Node *TargetNode = N; 458 // For function and member function DeclRefs, extract the whole call. 459 if (const Expr *E = N->ASTNode.get<clang::Expr>()) 460 if (llvm::isa<DeclRefExpr>(E) || llvm::isa<MemberExpr>(E)) 461 if (const SelectionTree::Node *Call = getCallExpr(N)) 462 TargetNode = Call; 463 if (!eligibleForExtraction(TargetNode)) 464 return false; 465 Target = llvm::make_unique<ExtractionContext>(TargetNode, SM, Ctx); 466 return Target->isExtractable(); 467 } 468 469 } // namespace 470 } // namespace clangd 471 } // namespace clang 472