10b57cec5SDimitry Andric //===--- SourceExtraction.cpp - Clang refactoring library -----------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
9a7dea167SDimitry Andric #include "clang/Tooling/Refactoring/Extract/SourceExtraction.h"
100b57cec5SDimitry Andric #include "clang/AST/Stmt.h"
110b57cec5SDimitry Andric #include "clang/AST/StmtCXX.h"
120b57cec5SDimitry Andric #include "clang/AST/StmtObjC.h"
130b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
140b57cec5SDimitry Andric #include "clang/Lex/Lexer.h"
15*bdd1243dSDimitry Andric #include <optional>
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric using namespace clang;
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric namespace {
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric /// Returns true if the token at the given location is a semicolon.
isSemicolonAtLocation(SourceLocation TokenLoc,const SourceManager & SM,const LangOptions & LangOpts)220b57cec5SDimitry Andric bool isSemicolonAtLocation(SourceLocation TokenLoc, const SourceManager &SM,
230b57cec5SDimitry Andric const LangOptions &LangOpts) {
240b57cec5SDimitry Andric return Lexer::getSourceText(
250b57cec5SDimitry Andric CharSourceRange::getTokenRange(TokenLoc, TokenLoc), SM,
260b57cec5SDimitry Andric LangOpts) == ";";
270b57cec5SDimitry Andric }
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric /// Returns true if there should be a semicolon after the given statement.
isSemicolonRequiredAfter(const Stmt * S)300b57cec5SDimitry Andric bool isSemicolonRequiredAfter(const Stmt *S) {
310b57cec5SDimitry Andric if (isa<CompoundStmt>(S))
320b57cec5SDimitry Andric return false;
330b57cec5SDimitry Andric if (const auto *If = dyn_cast<IfStmt>(S))
340b57cec5SDimitry Andric return isSemicolonRequiredAfter(If->getElse() ? If->getElse()
350b57cec5SDimitry Andric : If->getThen());
360b57cec5SDimitry Andric if (const auto *While = dyn_cast<WhileStmt>(S))
370b57cec5SDimitry Andric return isSemicolonRequiredAfter(While->getBody());
380b57cec5SDimitry Andric if (const auto *For = dyn_cast<ForStmt>(S))
390b57cec5SDimitry Andric return isSemicolonRequiredAfter(For->getBody());
400b57cec5SDimitry Andric if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(S))
410b57cec5SDimitry Andric return isSemicolonRequiredAfter(CXXFor->getBody());
420b57cec5SDimitry Andric if (const auto *ObjCFor = dyn_cast<ObjCForCollectionStmt>(S))
430b57cec5SDimitry Andric return isSemicolonRequiredAfter(ObjCFor->getBody());
44a7dea167SDimitry Andric if(const auto *Switch = dyn_cast<SwitchStmt>(S))
45a7dea167SDimitry Andric return isSemicolonRequiredAfter(Switch->getBody());
46a7dea167SDimitry Andric if(const auto *Case = dyn_cast<SwitchCase>(S))
47a7dea167SDimitry Andric return isSemicolonRequiredAfter(Case->getSubStmt());
480b57cec5SDimitry Andric switch (S->getStmtClass()) {
49a7dea167SDimitry Andric case Stmt::DeclStmtClass:
500b57cec5SDimitry Andric case Stmt::CXXTryStmtClass:
510b57cec5SDimitry Andric case Stmt::ObjCAtSynchronizedStmtClass:
520b57cec5SDimitry Andric case Stmt::ObjCAutoreleasePoolStmtClass:
530b57cec5SDimitry Andric case Stmt::ObjCAtTryStmtClass:
540b57cec5SDimitry Andric return false;
550b57cec5SDimitry Andric default:
560b57cec5SDimitry Andric return true;
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric /// Returns true if the two source locations are on the same line.
areOnSameLine(SourceLocation Loc1,SourceLocation Loc2,const SourceManager & SM)610b57cec5SDimitry Andric bool areOnSameLine(SourceLocation Loc1, SourceLocation Loc2,
620b57cec5SDimitry Andric const SourceManager &SM) {
630b57cec5SDimitry Andric return !Loc1.isMacroID() && !Loc2.isMacroID() &&
640b57cec5SDimitry Andric SM.getSpellingLineNumber(Loc1) == SM.getSpellingLineNumber(Loc2);
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric
670b57cec5SDimitry Andric } // end anonymous namespace
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric namespace clang {
700b57cec5SDimitry Andric namespace tooling {
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric ExtractionSemicolonPolicy
compute(const Stmt * S,SourceRange & ExtractedRange,const SourceManager & SM,const LangOptions & LangOpts)730b57cec5SDimitry Andric ExtractionSemicolonPolicy::compute(const Stmt *S, SourceRange &ExtractedRange,
740b57cec5SDimitry Andric const SourceManager &SM,
750b57cec5SDimitry Andric const LangOptions &LangOpts) {
760b57cec5SDimitry Andric auto neededInExtractedFunction = []() {
770b57cec5SDimitry Andric return ExtractionSemicolonPolicy(true, false);
780b57cec5SDimitry Andric };
790b57cec5SDimitry Andric auto neededInOriginalFunction = []() {
800b57cec5SDimitry Andric return ExtractionSemicolonPolicy(false, true);
810b57cec5SDimitry Andric };
820b57cec5SDimitry Andric
830b57cec5SDimitry Andric /// The extracted expression should be terminated with a ';'. The call to
840b57cec5SDimitry Andric /// the extracted function will replace this expression, so it won't need
850b57cec5SDimitry Andric /// a terminating ';'.
860b57cec5SDimitry Andric if (isa<Expr>(S))
870b57cec5SDimitry Andric return neededInExtractedFunction();
880b57cec5SDimitry Andric
890b57cec5SDimitry Andric /// Some statements don't need to be terminated with ';'. The call to the
900b57cec5SDimitry Andric /// extracted function will be a standalone statement, so it should be
910b57cec5SDimitry Andric /// terminated with a ';'.
920b57cec5SDimitry Andric bool NeedsSemi = isSemicolonRequiredAfter(S);
930b57cec5SDimitry Andric if (!NeedsSemi)
940b57cec5SDimitry Andric return neededInOriginalFunction();
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric /// Some statements might end at ';'. The extraction will move that ';', so
970b57cec5SDimitry Andric /// the call to the extracted function should be terminated with a ';'.
980b57cec5SDimitry Andric SourceLocation End = ExtractedRange.getEnd();
990b57cec5SDimitry Andric if (isSemicolonAtLocation(End, SM, LangOpts))
1000b57cec5SDimitry Andric return neededInOriginalFunction();
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric /// Other statements should generally have a trailing ';'. We can try to find
1030b57cec5SDimitry Andric /// it and move it together it with the extracted code.
104*bdd1243dSDimitry Andric std::optional<Token> NextToken = Lexer::findNextToken(End, SM, LangOpts);
1050b57cec5SDimitry Andric if (NextToken && NextToken->is(tok::semi) &&
1060b57cec5SDimitry Andric areOnSameLine(NextToken->getLocation(), End, SM)) {
1070b57cec5SDimitry Andric ExtractedRange.setEnd(NextToken->getLocation());
1080b57cec5SDimitry Andric return neededInOriginalFunction();
1090b57cec5SDimitry Andric }
1100b57cec5SDimitry Andric
1110b57cec5SDimitry Andric /// Otherwise insert semicolons in both places.
1120b57cec5SDimitry Andric return ExtractionSemicolonPolicy(true, true);
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric
1150b57cec5SDimitry Andric } // end namespace tooling
1160b57cec5SDimitry Andric } // end namespace clang
117