1b3b0d09cSBalazs Benics //===- unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp ----------------===// 2b3b0d09cSBalazs Benics // 3b3b0d09cSBalazs Benics // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4b3b0d09cSBalazs Benics // See https://llvm.org/LICENSE.txt for license information. 5b3b0d09cSBalazs Benics // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6b3b0d09cSBalazs Benics // 7b3b0d09cSBalazs Benics //===----------------------------------------------------------------------===// 8b3b0d09cSBalazs Benics 9ae570d82SBalazs Benics #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" 10b3b0d09cSBalazs Benics #include "clang/StaticAnalyzer/Core/BugReporter/Z3CrosscheckVisitor.h" 11b3b0d09cSBalazs Benics #include "gtest/gtest.h" 12b3b0d09cSBalazs Benics 13b3b0d09cSBalazs Benics using namespace clang; 14b3b0d09cSBalazs Benics using namespace ento; 15b3b0d09cSBalazs Benics 16b3b0d09cSBalazs Benics using Z3Result = Z3CrosscheckVisitor::Z3Result; 17b3b0d09cSBalazs Benics using Z3Decision = Z3CrosscheckOracle::Z3Decision; 18b3b0d09cSBalazs Benics 19b3b0d09cSBalazs Benics static constexpr Z3Decision AcceptReport = Z3Decision::AcceptReport; 20b3b0d09cSBalazs Benics static constexpr Z3Decision RejectReport = Z3Decision::RejectReport; 21ae570d82SBalazs Benics static constexpr Z3Decision RejectEQClass = Z3Decision::RejectEQClass; 22b3b0d09cSBalazs Benics 23b3b0d09cSBalazs Benics static constexpr std::optional<bool> SAT = true; 24b3b0d09cSBalazs Benics static constexpr std::optional<bool> UNSAT = false; 25b3b0d09cSBalazs Benics static constexpr std::optional<bool> UNDEF = std::nullopt; 26b3b0d09cSBalazs Benics 27ae570d82SBalazs Benics static unsigned operator""_ms(unsigned long long ms) { return ms; } 28ae570d82SBalazs Benics static unsigned operator""_step(unsigned long long rlimit) { return rlimit; } 29ae570d82SBalazs Benics 30ae570d82SBalazs Benics static const AnalyzerOptions DefaultOpts = [] { 31ae570d82SBalazs Benics AnalyzerOptions Config; 32ae570d82SBalazs Benics #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ 33ae570d82SBalazs Benics SHALLOW_VAL, DEEP_VAL) \ 34ae570d82SBalazs Benics ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEEP_VAL) 35ae570d82SBalazs Benics #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ 36*5f6b7145SBalazs Benics Config.NAME = DEFAULT_VAL; 37ae570d82SBalazs Benics #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def" 38ae570d82SBalazs Benics 39ae570d82SBalazs Benics // Remember to update the tests in this file when these values change. 40ae570d82SBalazs Benics // Also update the doc comment of `interpretQueryResult`. 41ea8e328aSKristóf Umann assert(Config.Z3CrosscheckRLimitThreshold == 0); 42ea8e328aSKristóf Umann assert(Config.Z3CrosscheckTimeoutThreshold == 15'000_ms); 43ae570d82SBalazs Benics // Usually, when the timeout/rlimit threshold is reached, Z3 only slightly 44ae570d82SBalazs Benics // overshoots until it realizes that it overshoot and needs to back off. 45ae570d82SBalazs Benics // Consequently, the measured timeout should be fairly close to the threshold. 46ae570d82SBalazs Benics // Same reasoning applies to the rlimit too. 47ae570d82SBalazs Benics return Config; 48ae570d82SBalazs Benics }(); 49ae570d82SBalazs Benics 50ea8e328aSKristóf Umann static const AnalyzerOptions LimitedOpts = [] { 51ea8e328aSKristóf Umann AnalyzerOptions Config = DefaultOpts; 52ea8e328aSKristóf Umann Config.Z3CrosscheckEQClassTimeoutThreshold = 700_ms; 53ea8e328aSKristóf Umann Config.Z3CrosscheckTimeoutThreshold = 300_step; 54ea8e328aSKristóf Umann Config.Z3CrosscheckRLimitThreshold = 400'000_step; 55ea8e328aSKristóf Umann return Config; 56ea8e328aSKristóf Umann }(); 57ea8e328aSKristóf Umann 58b3b0d09cSBalazs Benics namespace { 59b3b0d09cSBalazs Benics 60ea8e328aSKristóf Umann template <const AnalyzerOptions &Opts> 61ae570d82SBalazs Benics class Z3CrosscheckOracleTest : public testing::Test { 62ae570d82SBalazs Benics public: 63ae570d82SBalazs Benics Z3Decision interpretQueryResult(const Z3Result &Result) { 64ae570d82SBalazs Benics return Oracle.interpretQueryResult(Result); 65b3b0d09cSBalazs Benics } 66ae570d82SBalazs Benics 67ae570d82SBalazs Benics private: 68ea8e328aSKristóf Umann Z3CrosscheckOracle Oracle = Z3CrosscheckOracle(Opts); 69b3b0d09cSBalazs Benics }; 70b3b0d09cSBalazs Benics 71ea8e328aSKristóf Umann using DefaultZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<DefaultOpts>; 72ea8e328aSKristóf Umann using LimitedZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<LimitedOpts>; 73ea8e328aSKristóf Umann 74ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsFirstSAT) { 75ea8e328aSKristóf Umann ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step})); 76ea8e328aSKristóf Umann } 77ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsFirstSAT) { 78ae570d82SBalazs Benics ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step})); 79b3b0d09cSBalazs Benics } 80b3b0d09cSBalazs Benics 81ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsSAT) { 82ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 83ea8e328aSKristóf Umann ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step})); 84ea8e328aSKristóf Umann } 85ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsSAT) { 86ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 87ae570d82SBalazs Benics ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step})); 88b3b0d09cSBalazs Benics } 89b3b0d09cSBalazs Benics 90ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItGoesOverTime) { 91ea8e328aSKristóf Umann // Even if it times out, if it is SAT, we should accept it. 92ea8e328aSKristóf Umann ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 15'010_ms, 1000_step})); 93ea8e328aSKristóf Umann } 94ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItGoesOverTime) { 95ae570d82SBalazs Benics // Even if it times out, if it is SAT, we should accept it. 96ae570d82SBalazs Benics ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 310_ms, 1000_step})); 97b3b0d09cSBalazs Benics } 98b3b0d09cSBalazs Benics 99ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) { 100ea8e328aSKristóf Umann ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 15'010_ms, 1000_step})); 101ea8e328aSKristóf Umann } 102ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) { 103ae570d82SBalazs Benics ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 310_ms, 1000_step})); 104ae570d82SBalazs Benics } 105ae570d82SBalazs Benics 106ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, RejectsTimeout) { 107ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 108ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 109ea8e328aSKristóf Umann ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 15'010_ms, 1000_step})); 110ea8e328aSKristóf Umann } 111ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectsTimeout) { 112ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 113ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 114ae570d82SBalazs Benics ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 310_ms, 1000_step})); 115b3b0d09cSBalazs Benics } 116b3b0d09cSBalazs Benics 117ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, RejectsUNSATs) { 118ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 119ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 120ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 121ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 122ea8e328aSKristóf Umann } 123ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectsUNSATs) { 124ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 125ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 126ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 127ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 128ae570d82SBalazs Benics } 129ae570d82SBalazs Benics 130ea8e328aSKristóf Umann // Testing cut heuristics of the two configurations: 131ea8e328aSKristóf Umann // ================================================= 132ae570d82SBalazs Benics 133ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) { 134ea8e328aSKristóf Umann // Simulate long queries, that barely doesn't trigger the timeout. 135ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step})); 136ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step})); 137ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step})); 138ea8e328aSKristóf Umann } 139ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) { 140ae570d82SBalazs Benics // Simulate long queries, that barely doesn't trigger the timeout. 141ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step})); 142ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step})); 143ae570d82SBalazs Benics ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 290_ms, 1000_step})); 144ae570d82SBalazs Benics } 145ae570d82SBalazs Benics 146ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) { 147ea8e328aSKristóf Umann // Simulate long queries, that barely doesn't trigger the timeout. 148ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step})); 149ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step})); 150ea8e328aSKristóf Umann ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 14'990_ms, 1000_step})); 151ea8e328aSKristóf Umann } 152ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) { 153ae570d82SBalazs Benics // Simulate long queries, that barely doesn't trigger the timeout. 154ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step})); 155ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step})); 156ae570d82SBalazs Benics ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step})); 157ae570d82SBalazs Benics } 158ae570d82SBalazs Benics 159ea8e328aSKristóf Umann // Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so 160ea8e328aSKristóf Umann // it doesn't make sense to test that. 161ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) { 162ae570d82SBalazs Benics // Simulate quick, but many queries: 35 quick UNSAT queries. 163ae570d82SBalazs Benics // 35*20ms = 700ms, which is equal to the 700ms threshold. 164ae570d82SBalazs Benics for (int i = 0; i < 35; ++i) { 165ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step})); 166ae570d82SBalazs Benics } 167ae570d82SBalazs Benics // Do one more to trigger the heuristic. 168ae570d82SBalazs Benics ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 1_ms, 1000_step})); 169ae570d82SBalazs Benics } 170ae570d82SBalazs Benics 171ea8e328aSKristóf Umann // Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so 172ea8e328aSKristóf Umann // it doesn't make sense to test that. 173ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItAttemptsManySmallQueries) { 174ae570d82SBalazs Benics // Simulate quick, but many queries: 35 quick UNSAT queries. 175ae570d82SBalazs Benics // 35*20ms = 700ms, which is equal to the 700ms threshold. 176ae570d82SBalazs Benics for (int i = 0; i < 35; ++i) { 177ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step})); 178ae570d82SBalazs Benics } 179ae570d82SBalazs Benics // Do one more to trigger the heuristic, but given this was SAT, we still 180ae570d82SBalazs Benics // accept the query. 181ae570d82SBalazs Benics ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step})); 182ae570d82SBalazs Benics } 183ae570d82SBalazs Benics 184ea8e328aSKristóf Umann // Z3CrosscheckRLimitThreshold is disabled in default configuration, so it 185ea8e328aSKristóf Umann // doesn't make sense to test that. 186ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) { 187ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 188ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 189ae570d82SBalazs Benics ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 25_ms, 405'000_step})); 190ae570d82SBalazs Benics } 191ae570d82SBalazs Benics 192ea8e328aSKristóf Umann // Z3CrosscheckRLimitThreshold is disabled in default configuration, so it 193ea8e328aSKristóf Umann // doesn't make sense to test that. 194ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) { 195ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 196ae570d82SBalazs Benics ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step})); 197ae570d82SBalazs Benics ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step})); 198b3b0d09cSBalazs Benics } 199b3b0d09cSBalazs Benics 200ea8e328aSKristóf Umann // Demonstrate the weaknesses of the default configuration: 201ea8e328aSKristóf Umann // ======================================================== 202ea8e328aSKristóf Umann 203ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, ManySlowQueriesHangTheAnalyzer) { 204ea8e328aSKristóf Umann // Simulate many slow queries: 250 slow UNSAT queries. 205ea8e328aSKristóf Umann // 250*14000ms = 3500s, ~1 hour. Since we disabled the total time limitation, 206ea8e328aSKristóf Umann // this eqclass would take roughly 1 hour to process. 207ea8e328aSKristóf Umann // It doesn't matter what rlimit the queries consume. 208ea8e328aSKristóf Umann for (int i = 0; i < 250; ++i) { 209ea8e328aSKristóf Umann ASSERT_EQ(RejectReport, 210ea8e328aSKristóf Umann interpretQueryResult({UNSAT, 14'000_ms, 1'000'000_step})); 211ea8e328aSKristóf Umann } 212ea8e328aSKristóf Umann } 213ea8e328aSKristóf Umann 214b3b0d09cSBalazs Benics } // namespace 215