17330f729Sjoerg //== CStringSyntaxChecker.cpp - CoreFoundation containers API *- C++ -*-==//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // An AST checker that looks for common pitfalls when using C string APIs.
107330f729Sjoerg // - Identifies erroneous patterns in the last argument to strncat - the number
117330f729Sjoerg // of bytes to copy.
127330f729Sjoerg //
137330f729Sjoerg //===----------------------------------------------------------------------===//
147330f729Sjoerg #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
157330f729Sjoerg #include "clang/AST/Expr.h"
167330f729Sjoerg #include "clang/AST/OperationKinds.h"
177330f729Sjoerg #include "clang/AST/StmtVisitor.h"
187330f729Sjoerg #include "clang/Analysis/AnalysisDeclContext.h"
197330f729Sjoerg #include "clang/Basic/TargetInfo.h"
207330f729Sjoerg #include "clang/Basic/TypeTraits.h"
217330f729Sjoerg #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
227330f729Sjoerg #include "clang/StaticAnalyzer/Core/Checker.h"
237330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
247330f729Sjoerg #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
257330f729Sjoerg #include "llvm/ADT/SmallString.h"
267330f729Sjoerg #include "llvm/Support/raw_ostream.h"
277330f729Sjoerg
287330f729Sjoerg using namespace clang;
297330f729Sjoerg using namespace ento;
307330f729Sjoerg
317330f729Sjoerg namespace {
327330f729Sjoerg class WalkAST: public StmtVisitor<WalkAST> {
337330f729Sjoerg const CheckerBase *Checker;
347330f729Sjoerg BugReporter &BR;
357330f729Sjoerg AnalysisDeclContext* AC;
367330f729Sjoerg
377330f729Sjoerg /// Check if two expressions refer to the same declaration.
sameDecl(const Expr * A1,const Expr * A2)387330f729Sjoerg bool sameDecl(const Expr *A1, const Expr *A2) {
397330f729Sjoerg if (const auto *D1 = dyn_cast<DeclRefExpr>(A1->IgnoreParenCasts()))
407330f729Sjoerg if (const auto *D2 = dyn_cast<DeclRefExpr>(A2->IgnoreParenCasts()))
417330f729Sjoerg return D1->getDecl() == D2->getDecl();
427330f729Sjoerg return false;
437330f729Sjoerg }
447330f729Sjoerg
457330f729Sjoerg /// Check if the expression E is a sizeof(WithArg).
isSizeof(const Expr * E,const Expr * WithArg)467330f729Sjoerg bool isSizeof(const Expr *E, const Expr *WithArg) {
477330f729Sjoerg if (const auto *UE = dyn_cast<UnaryExprOrTypeTraitExpr>(E))
487330f729Sjoerg if (UE->getKind() == UETT_SizeOf && !UE->isArgumentType())
497330f729Sjoerg return sameDecl(UE->getArgumentExpr(), WithArg);
507330f729Sjoerg return false;
517330f729Sjoerg }
527330f729Sjoerg
537330f729Sjoerg /// Check if the expression E is a strlen(WithArg).
isStrlen(const Expr * E,const Expr * WithArg)547330f729Sjoerg bool isStrlen(const Expr *E, const Expr *WithArg) {
557330f729Sjoerg if (const auto *CE = dyn_cast<CallExpr>(E)) {
567330f729Sjoerg const FunctionDecl *FD = CE->getDirectCallee();
577330f729Sjoerg if (!FD)
587330f729Sjoerg return false;
597330f729Sjoerg return (CheckerContext::isCLibraryFunction(FD, "strlen") &&
607330f729Sjoerg sameDecl(CE->getArg(0), WithArg));
617330f729Sjoerg }
627330f729Sjoerg return false;
637330f729Sjoerg }
647330f729Sjoerg
657330f729Sjoerg /// Check if the expression is an integer literal with value 1.
isOne(const Expr * E)667330f729Sjoerg bool isOne(const Expr *E) {
677330f729Sjoerg if (const auto *IL = dyn_cast<IntegerLiteral>(E))
687330f729Sjoerg return (IL->getValue().isIntN(1));
697330f729Sjoerg return false;
707330f729Sjoerg }
717330f729Sjoerg
getPrintableName(const Expr * E)727330f729Sjoerg StringRef getPrintableName(const Expr *E) {
737330f729Sjoerg if (const auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
747330f729Sjoerg return D->getDecl()->getName();
757330f729Sjoerg return StringRef();
767330f729Sjoerg }
777330f729Sjoerg
787330f729Sjoerg /// Identify erroneous patterns in the last argument to strncat - the number
797330f729Sjoerg /// of bytes to copy.
807330f729Sjoerg bool containsBadStrncatPattern(const CallExpr *CE);
817330f729Sjoerg
827330f729Sjoerg /// Identify erroneous patterns in the last argument to strlcpy - the number
837330f729Sjoerg /// of bytes to copy.
847330f729Sjoerg /// The bad pattern checked is when the size is known
857330f729Sjoerg /// to be larger than the destination can handle.
867330f729Sjoerg /// char dst[2];
877330f729Sjoerg /// size_t cpy = 4;
887330f729Sjoerg /// strlcpy(dst, "abcd", sizeof("abcd") - 1);
897330f729Sjoerg /// strlcpy(dst, "abcd", 4);
907330f729Sjoerg /// strlcpy(dst + 3, "abcd", 2);
917330f729Sjoerg /// strlcpy(dst, "abcd", cpy);
927330f729Sjoerg /// Identify erroneous patterns in the last argument to strlcat - the number
937330f729Sjoerg /// of bytes to copy.
947330f729Sjoerg /// The bad pattern checked is when the last argument is basically
957330f729Sjoerg /// pointing to the destination buffer size or argument larger or
967330f729Sjoerg /// equal to.
977330f729Sjoerg /// char dst[2];
987330f729Sjoerg /// strlcat(dst, src2, sizeof(dst));
997330f729Sjoerg /// strlcat(dst, src2, 2);
1007330f729Sjoerg /// strlcat(dst, src2, 10);
1017330f729Sjoerg bool containsBadStrlcpyStrlcatPattern(const CallExpr *CE);
1027330f729Sjoerg
1037330f729Sjoerg public:
WalkAST(const CheckerBase * Checker,BugReporter & BR,AnalysisDeclContext * AC)1047330f729Sjoerg WalkAST(const CheckerBase *Checker, BugReporter &BR, AnalysisDeclContext *AC)
1057330f729Sjoerg : Checker(Checker), BR(BR), AC(AC) {}
1067330f729Sjoerg
1077330f729Sjoerg // Statement visitor methods.
1087330f729Sjoerg void VisitChildren(Stmt *S);
VisitStmt(Stmt * S)1097330f729Sjoerg void VisitStmt(Stmt *S) {
1107330f729Sjoerg VisitChildren(S);
1117330f729Sjoerg }
1127330f729Sjoerg void VisitCallExpr(CallExpr *CE);
1137330f729Sjoerg };
1147330f729Sjoerg } // end anonymous namespace
1157330f729Sjoerg
1167330f729Sjoerg // The correct size argument should look like following:
1177330f729Sjoerg // strncat(dst, src, sizeof(dst) - strlen(dest) - 1);
1187330f729Sjoerg // We look for the following anti-patterns:
1197330f729Sjoerg // - strncat(dst, src, sizeof(dst) - strlen(dst));
1207330f729Sjoerg // - strncat(dst, src, sizeof(dst) - 1);
1217330f729Sjoerg // - strncat(dst, src, sizeof(dst));
containsBadStrncatPattern(const CallExpr * CE)1227330f729Sjoerg bool WalkAST::containsBadStrncatPattern(const CallExpr *CE) {
1237330f729Sjoerg if (CE->getNumArgs() != 3)
1247330f729Sjoerg return false;
1257330f729Sjoerg const Expr *DstArg = CE->getArg(0);
1267330f729Sjoerg const Expr *SrcArg = CE->getArg(1);
1277330f729Sjoerg const Expr *LenArg = CE->getArg(2);
1287330f729Sjoerg
1297330f729Sjoerg // Identify wrong size expressions, which are commonly used instead.
1307330f729Sjoerg if (const auto *BE = dyn_cast<BinaryOperator>(LenArg->IgnoreParenCasts())) {
1317330f729Sjoerg // - sizeof(dst) - strlen(dst)
1327330f729Sjoerg if (BE->getOpcode() == BO_Sub) {
1337330f729Sjoerg const Expr *L = BE->getLHS();
1347330f729Sjoerg const Expr *R = BE->getRHS();
1357330f729Sjoerg if (isSizeof(L, DstArg) && isStrlen(R, DstArg))
1367330f729Sjoerg return true;
1377330f729Sjoerg
1387330f729Sjoerg // - sizeof(dst) - 1
1397330f729Sjoerg if (isSizeof(L, DstArg) && isOne(R->IgnoreParenCasts()))
1407330f729Sjoerg return true;
1417330f729Sjoerg }
1427330f729Sjoerg }
1437330f729Sjoerg // - sizeof(dst)
1447330f729Sjoerg if (isSizeof(LenArg, DstArg))
1457330f729Sjoerg return true;
1467330f729Sjoerg
1477330f729Sjoerg // - sizeof(src)
1487330f729Sjoerg if (isSizeof(LenArg, SrcArg))
1497330f729Sjoerg return true;
1507330f729Sjoerg return false;
1517330f729Sjoerg }
1527330f729Sjoerg
containsBadStrlcpyStrlcatPattern(const CallExpr * CE)1537330f729Sjoerg bool WalkAST::containsBadStrlcpyStrlcatPattern(const CallExpr *CE) {
1547330f729Sjoerg if (CE->getNumArgs() != 3)
1557330f729Sjoerg return false;
1567330f729Sjoerg const Expr *DstArg = CE->getArg(0);
1577330f729Sjoerg const Expr *LenArg = CE->getArg(2);
1587330f729Sjoerg
1597330f729Sjoerg const auto *DstArgDRE = dyn_cast<DeclRefExpr>(DstArg->IgnoreParenImpCasts());
1607330f729Sjoerg const auto *LenArgDRE =
1617330f729Sjoerg dyn_cast<DeclRefExpr>(LenArg->IgnoreParenLValueCasts());
1627330f729Sjoerg uint64_t DstOff = 0;
1637330f729Sjoerg if (isSizeof(LenArg, DstArg))
1647330f729Sjoerg return false;
1657330f729Sjoerg
1667330f729Sjoerg // - size_t dstlen = sizeof(dst)
1677330f729Sjoerg if (LenArgDRE) {
1687330f729Sjoerg const auto *LenArgVal = dyn_cast<VarDecl>(LenArgDRE->getDecl());
1697330f729Sjoerg // If it's an EnumConstantDecl instead, then we're missing out on something.
1707330f729Sjoerg if (!LenArgVal) {
1717330f729Sjoerg assert(isa<EnumConstantDecl>(LenArgDRE->getDecl()));
1727330f729Sjoerg return false;
1737330f729Sjoerg }
1747330f729Sjoerg if (LenArgVal->getInit())
1757330f729Sjoerg LenArg = LenArgVal->getInit();
1767330f729Sjoerg }
1777330f729Sjoerg
1787330f729Sjoerg // - integral value
1797330f729Sjoerg // We try to figure out if the last argument is possibly longer
1807330f729Sjoerg // than the destination can possibly handle if its size can be defined.
1817330f729Sjoerg if (const auto *IL = dyn_cast<IntegerLiteral>(LenArg->IgnoreParenImpCasts())) {
1827330f729Sjoerg uint64_t ILRawVal = IL->getValue().getZExtValue();
1837330f729Sjoerg
1847330f729Sjoerg // Case when there is pointer arithmetic on the destination buffer
1857330f729Sjoerg // especially when we offset from the base decreasing the
1867330f729Sjoerg // buffer length accordingly.
1877330f729Sjoerg if (!DstArgDRE) {
1887330f729Sjoerg if (const auto *BE =
1897330f729Sjoerg dyn_cast<BinaryOperator>(DstArg->IgnoreParenImpCasts())) {
1907330f729Sjoerg DstArgDRE = dyn_cast<DeclRefExpr>(BE->getLHS()->IgnoreParenImpCasts());
1917330f729Sjoerg if (BE->getOpcode() == BO_Add) {
1927330f729Sjoerg if ((IL = dyn_cast<IntegerLiteral>(BE->getRHS()->IgnoreParenImpCasts()))) {
1937330f729Sjoerg DstOff = IL->getValue().getZExtValue();
1947330f729Sjoerg }
1957330f729Sjoerg }
1967330f729Sjoerg }
1977330f729Sjoerg }
1987330f729Sjoerg if (DstArgDRE) {
1997330f729Sjoerg if (const auto *Buffer =
2007330f729Sjoerg dyn_cast<ConstantArrayType>(DstArgDRE->getType())) {
2017330f729Sjoerg ASTContext &C = BR.getContext();
2027330f729Sjoerg uint64_t BufferLen = C.getTypeSize(Buffer) / 8;
2037330f729Sjoerg auto RemainingBufferLen = BufferLen - DstOff;
2047330f729Sjoerg if (RemainingBufferLen < ILRawVal)
2057330f729Sjoerg return true;
2067330f729Sjoerg }
2077330f729Sjoerg }
2087330f729Sjoerg }
2097330f729Sjoerg
2107330f729Sjoerg return false;
2117330f729Sjoerg }
2127330f729Sjoerg
VisitCallExpr(CallExpr * CE)2137330f729Sjoerg void WalkAST::VisitCallExpr(CallExpr *CE) {
2147330f729Sjoerg const FunctionDecl *FD = CE->getDirectCallee();
2157330f729Sjoerg if (!FD)
2167330f729Sjoerg return;
2177330f729Sjoerg
2187330f729Sjoerg if (CheckerContext::isCLibraryFunction(FD, "strncat")) {
2197330f729Sjoerg if (containsBadStrncatPattern(CE)) {
2207330f729Sjoerg const Expr *DstArg = CE->getArg(0);
2217330f729Sjoerg const Expr *LenArg = CE->getArg(2);
2227330f729Sjoerg PathDiagnosticLocation Loc =
2237330f729Sjoerg PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
2247330f729Sjoerg
2257330f729Sjoerg StringRef DstName = getPrintableName(DstArg);
2267330f729Sjoerg
2277330f729Sjoerg SmallString<256> S;
2287330f729Sjoerg llvm::raw_svector_ostream os(S);
2297330f729Sjoerg os << "Potential buffer overflow. ";
2307330f729Sjoerg if (!DstName.empty()) {
2317330f729Sjoerg os << "Replace with 'sizeof(" << DstName << ") "
2327330f729Sjoerg "- strlen(" << DstName <<") - 1'";
2337330f729Sjoerg os << " or u";
2347330f729Sjoerg } else
2357330f729Sjoerg os << "U";
2367330f729Sjoerg os << "se a safer 'strlcat' API";
2377330f729Sjoerg
2387330f729Sjoerg BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",
2397330f729Sjoerg "C String API", os.str(), Loc,
2407330f729Sjoerg LenArg->getSourceRange());
2417330f729Sjoerg }
2427330f729Sjoerg } else if (CheckerContext::isCLibraryFunction(FD, "strlcpy") ||
2437330f729Sjoerg CheckerContext::isCLibraryFunction(FD, "strlcat")) {
2447330f729Sjoerg if (containsBadStrlcpyStrlcatPattern(CE)) {
2457330f729Sjoerg const Expr *DstArg = CE->getArg(0);
2467330f729Sjoerg const Expr *LenArg = CE->getArg(2);
2477330f729Sjoerg PathDiagnosticLocation Loc =
2487330f729Sjoerg PathDiagnosticLocation::createBegin(LenArg, BR.getSourceManager(), AC);
2497330f729Sjoerg
2507330f729Sjoerg StringRef DstName = getPrintableName(DstArg);
2517330f729Sjoerg
2527330f729Sjoerg SmallString<256> S;
2537330f729Sjoerg llvm::raw_svector_ostream os(S);
2547330f729Sjoerg os << "The third argument allows to potentially copy more bytes than it should. ";
2557330f729Sjoerg os << "Replace with the value ";
2567330f729Sjoerg if (!DstName.empty())
2577330f729Sjoerg os << "sizeof(" << DstName << ")";
2587330f729Sjoerg else
2597330f729Sjoerg os << "sizeof(<destination buffer>)";
2607330f729Sjoerg os << " or lower";
2617330f729Sjoerg
2627330f729Sjoerg BR.EmitBasicReport(FD, Checker, "Anti-pattern in the argument",
2637330f729Sjoerg "C String API", os.str(), Loc,
2647330f729Sjoerg LenArg->getSourceRange());
2657330f729Sjoerg }
2667330f729Sjoerg }
2677330f729Sjoerg
2687330f729Sjoerg // Recurse and check children.
2697330f729Sjoerg VisitChildren(CE);
2707330f729Sjoerg }
2717330f729Sjoerg
VisitChildren(Stmt * S)2727330f729Sjoerg void WalkAST::VisitChildren(Stmt *S) {
2737330f729Sjoerg for (Stmt *Child : S->children())
2747330f729Sjoerg if (Child)
2757330f729Sjoerg Visit(Child);
2767330f729Sjoerg }
2777330f729Sjoerg
2787330f729Sjoerg namespace {
2797330f729Sjoerg class CStringSyntaxChecker: public Checker<check::ASTCodeBody> {
2807330f729Sjoerg public:
2817330f729Sjoerg
checkASTCodeBody(const Decl * D,AnalysisManager & Mgr,BugReporter & BR) const2827330f729Sjoerg void checkASTCodeBody(const Decl *D, AnalysisManager& Mgr,
2837330f729Sjoerg BugReporter &BR) const {
2847330f729Sjoerg WalkAST walker(this, BR, Mgr.getAnalysisDeclContext(D));
2857330f729Sjoerg walker.Visit(D->getBody());
2867330f729Sjoerg }
2877330f729Sjoerg };
2887330f729Sjoerg }
2897330f729Sjoerg
registerCStringSyntaxChecker(CheckerManager & mgr)2907330f729Sjoerg void ento::registerCStringSyntaxChecker(CheckerManager &mgr) {
2917330f729Sjoerg mgr.registerChecker<CStringSyntaxChecker>();
2927330f729Sjoerg }
2937330f729Sjoerg
shouldRegisterCStringSyntaxChecker(const CheckerManager & mgr)294*e038c9c4Sjoerg bool ento::shouldRegisterCStringSyntaxChecker(const CheckerManager &mgr) {
2957330f729Sjoerg return true;
2967330f729Sjoerg }
297