xref: /llvm-project/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp (revision 5f6b7145077386afac806eec1bb8e866c6166034)
1b3b0d09cSBalazs Benics //===- unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp ----------------===//
2b3b0d09cSBalazs Benics //
3b3b0d09cSBalazs Benics // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b3b0d09cSBalazs Benics // See https://llvm.org/LICENSE.txt for license information.
5b3b0d09cSBalazs Benics // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b3b0d09cSBalazs Benics //
7b3b0d09cSBalazs Benics //===----------------------------------------------------------------------===//
8b3b0d09cSBalazs Benics 
9ae570d82SBalazs Benics #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
10b3b0d09cSBalazs Benics #include "clang/StaticAnalyzer/Core/BugReporter/Z3CrosscheckVisitor.h"
11b3b0d09cSBalazs Benics #include "gtest/gtest.h"
12b3b0d09cSBalazs Benics 
13b3b0d09cSBalazs Benics using namespace clang;
14b3b0d09cSBalazs Benics using namespace ento;
15b3b0d09cSBalazs Benics 
16b3b0d09cSBalazs Benics using Z3Result = Z3CrosscheckVisitor::Z3Result;
17b3b0d09cSBalazs Benics using Z3Decision = Z3CrosscheckOracle::Z3Decision;
18b3b0d09cSBalazs Benics 
19b3b0d09cSBalazs Benics static constexpr Z3Decision AcceptReport = Z3Decision::AcceptReport;
20b3b0d09cSBalazs Benics static constexpr Z3Decision RejectReport = Z3Decision::RejectReport;
21ae570d82SBalazs Benics static constexpr Z3Decision RejectEQClass = Z3Decision::RejectEQClass;
22b3b0d09cSBalazs Benics 
23b3b0d09cSBalazs Benics static constexpr std::optional<bool> SAT = true;
24b3b0d09cSBalazs Benics static constexpr std::optional<bool> UNSAT = false;
25b3b0d09cSBalazs Benics static constexpr std::optional<bool> UNDEF = std::nullopt;
26b3b0d09cSBalazs Benics 
27ae570d82SBalazs Benics static unsigned operator""_ms(unsigned long long ms) { return ms; }
28ae570d82SBalazs Benics static unsigned operator""_step(unsigned long long rlimit) { return rlimit; }
29ae570d82SBalazs Benics 
30ae570d82SBalazs Benics static const AnalyzerOptions DefaultOpts = [] {
31ae570d82SBalazs Benics   AnalyzerOptions Config;
32ae570d82SBalazs Benics #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC,        \
33ae570d82SBalazs Benics                                              SHALLOW_VAL, DEEP_VAL)            \
34ae570d82SBalazs Benics   ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEEP_VAL)
35ae570d82SBalazs Benics #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL)                \
36*5f6b7145SBalazs Benics   Config.NAME = DEFAULT_VAL;
37ae570d82SBalazs Benics #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def"
38ae570d82SBalazs Benics 
39ae570d82SBalazs Benics   // Remember to update the tests in this file when these values change.
40ae570d82SBalazs Benics   // Also update the doc comment of `interpretQueryResult`.
41ea8e328aSKristóf Umann   assert(Config.Z3CrosscheckRLimitThreshold == 0);
42ea8e328aSKristóf Umann   assert(Config.Z3CrosscheckTimeoutThreshold == 15'000_ms);
43ae570d82SBalazs Benics   // Usually, when the timeout/rlimit threshold is reached, Z3 only slightly
44ae570d82SBalazs Benics   // overshoots until it realizes that it overshoot and needs to back off.
45ae570d82SBalazs Benics   // Consequently, the measured timeout should be fairly close to the threshold.
46ae570d82SBalazs Benics   // Same reasoning applies to the rlimit too.
47ae570d82SBalazs Benics   return Config;
48ae570d82SBalazs Benics }();
49ae570d82SBalazs Benics 
50ea8e328aSKristóf Umann static const AnalyzerOptions LimitedOpts = [] {
51ea8e328aSKristóf Umann   AnalyzerOptions Config = DefaultOpts;
52ea8e328aSKristóf Umann   Config.Z3CrosscheckEQClassTimeoutThreshold = 700_ms;
53ea8e328aSKristóf Umann   Config.Z3CrosscheckTimeoutThreshold = 300_step;
54ea8e328aSKristóf Umann   Config.Z3CrosscheckRLimitThreshold = 400'000_step;
55ea8e328aSKristóf Umann   return Config;
56ea8e328aSKristóf Umann }();
57ea8e328aSKristóf Umann 
58b3b0d09cSBalazs Benics namespace {
59b3b0d09cSBalazs Benics 
60ea8e328aSKristóf Umann template <const AnalyzerOptions &Opts>
61ae570d82SBalazs Benics class Z3CrosscheckOracleTest : public testing::Test {
62ae570d82SBalazs Benics public:
63ae570d82SBalazs Benics   Z3Decision interpretQueryResult(const Z3Result &Result) {
64ae570d82SBalazs Benics     return Oracle.interpretQueryResult(Result);
65b3b0d09cSBalazs Benics   }
66ae570d82SBalazs Benics 
67ae570d82SBalazs Benics private:
68ea8e328aSKristóf Umann   Z3CrosscheckOracle Oracle = Z3CrosscheckOracle(Opts);
69b3b0d09cSBalazs Benics };
70b3b0d09cSBalazs Benics 
71ea8e328aSKristóf Umann using DefaultZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<DefaultOpts>;
72ea8e328aSKristóf Umann using LimitedZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<LimitedOpts>;
73ea8e328aSKristóf Umann 
74ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsFirstSAT) {
75ea8e328aSKristóf Umann   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
76ea8e328aSKristóf Umann }
77ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsFirstSAT) {
78ae570d82SBalazs Benics   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
79b3b0d09cSBalazs Benics }
80b3b0d09cSBalazs Benics 
81ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsSAT) {
82ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
83ea8e328aSKristóf Umann   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
84ea8e328aSKristóf Umann }
85ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsSAT) {
86ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
87ae570d82SBalazs Benics   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
88b3b0d09cSBalazs Benics }
89b3b0d09cSBalazs Benics 
90ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
91ea8e328aSKristóf Umann   // Even if it times out, if it is SAT, we should accept it.
92ea8e328aSKristóf Umann   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 15'010_ms, 1000_step}));
93ea8e328aSKristóf Umann }
94ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
95ae570d82SBalazs Benics   // Even if it times out, if it is SAT, we should accept it.
96ae570d82SBalazs Benics   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 310_ms, 1000_step}));
97b3b0d09cSBalazs Benics }
98b3b0d09cSBalazs Benics 
99ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
100ea8e328aSKristóf Umann   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 15'010_ms, 1000_step}));
101ea8e328aSKristóf Umann }
102ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
103ae570d82SBalazs Benics   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
104ae570d82SBalazs Benics }
105ae570d82SBalazs Benics 
106ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, RejectsTimeout) {
107ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
108ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
109ea8e328aSKristóf Umann   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 15'010_ms, 1000_step}));
110ea8e328aSKristóf Umann }
111ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectsTimeout) {
112ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
113ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
114ae570d82SBalazs Benics   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
115b3b0d09cSBalazs Benics }
116b3b0d09cSBalazs Benics 
117ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, RejectsUNSATs) {
118ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
119ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
120ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
121ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
122ea8e328aSKristóf Umann }
123ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectsUNSATs) {
124ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
125ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
126ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
127ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
128ae570d82SBalazs Benics }
129ae570d82SBalazs Benics 
130ea8e328aSKristóf Umann // Testing cut heuristics of the two configurations:
131ea8e328aSKristóf Umann // =================================================
132ae570d82SBalazs Benics 
133ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
134ea8e328aSKristóf Umann   // Simulate long queries, that barely doesn't trigger the timeout.
135ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
136ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
137ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
138ea8e328aSKristóf Umann }
139ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
140ae570d82SBalazs Benics   // Simulate long queries, that barely doesn't trigger the timeout.
141ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
142ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
143ae570d82SBalazs Benics   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
144ae570d82SBalazs Benics }
145ae570d82SBalazs Benics 
146ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
147ea8e328aSKristóf Umann   // Simulate long queries, that barely doesn't trigger the timeout.
148ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
149ea8e328aSKristóf Umann   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
150ea8e328aSKristóf Umann   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 14'990_ms, 1000_step}));
151ea8e328aSKristóf Umann }
152ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
153ae570d82SBalazs Benics   // Simulate long queries, that barely doesn't trigger the timeout.
154ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
155ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
156ae570d82SBalazs Benics   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step}));
157ae570d82SBalazs Benics }
158ae570d82SBalazs Benics 
159ea8e328aSKristóf Umann // Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so
160ea8e328aSKristóf Umann // it doesn't make sense to test that.
161ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
162ae570d82SBalazs Benics   // Simulate quick, but many queries: 35 quick UNSAT queries.
163ae570d82SBalazs Benics   // 35*20ms = 700ms, which is equal to the 700ms threshold.
164ae570d82SBalazs Benics   for (int i = 0; i < 35; ++i) {
165ae570d82SBalazs Benics     ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
166ae570d82SBalazs Benics   }
167ae570d82SBalazs Benics   // Do one more to trigger the heuristic.
168ae570d82SBalazs Benics   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
169ae570d82SBalazs Benics }
170ae570d82SBalazs Benics 
171ea8e328aSKristóf Umann // Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so
172ea8e328aSKristóf Umann // it doesn't make sense to test that.
173ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItAttemptsManySmallQueries) {
174ae570d82SBalazs Benics   // Simulate quick, but many queries: 35 quick UNSAT queries.
175ae570d82SBalazs Benics   // 35*20ms = 700ms, which is equal to the 700ms threshold.
176ae570d82SBalazs Benics   for (int i = 0; i < 35; ++i) {
177ae570d82SBalazs Benics     ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
178ae570d82SBalazs Benics   }
179ae570d82SBalazs Benics   // Do one more to trigger the heuristic, but given this was SAT, we still
180ae570d82SBalazs Benics   // accept the query.
181ae570d82SBalazs Benics   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step}));
182ae570d82SBalazs Benics }
183ae570d82SBalazs Benics 
184ea8e328aSKristóf Umann // Z3CrosscheckRLimitThreshold is disabled in default configuration, so it
185ea8e328aSKristóf Umann // doesn't make sense to test that.
186ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
187ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
188ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
189ae570d82SBalazs Benics   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
190ae570d82SBalazs Benics }
191ae570d82SBalazs Benics 
192ea8e328aSKristóf Umann // Z3CrosscheckRLimitThreshold is disabled in default configuration, so it
193ea8e328aSKristóf Umann // doesn't make sense to test that.
194ea8e328aSKristóf Umann TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
195ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
196ae570d82SBalazs Benics   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
197ae570d82SBalazs Benics   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step}));
198b3b0d09cSBalazs Benics }
199b3b0d09cSBalazs Benics 
200ea8e328aSKristóf Umann // Demonstrate the weaknesses of the default configuration:
201ea8e328aSKristóf Umann // ========================================================
202ea8e328aSKristóf Umann 
203ea8e328aSKristóf Umann TEST_F(DefaultZ3CrosscheckOracleTest, ManySlowQueriesHangTheAnalyzer) {
204ea8e328aSKristóf Umann   // Simulate many slow queries: 250 slow UNSAT queries.
205ea8e328aSKristóf Umann   // 250*14000ms = 3500s, ~1 hour. Since we disabled the total time limitation,
206ea8e328aSKristóf Umann   // this eqclass would take roughly 1 hour to process.
207ea8e328aSKristóf Umann   // It doesn't matter what rlimit the queries consume.
208ea8e328aSKristóf Umann   for (int i = 0; i < 250; ++i) {
209ea8e328aSKristóf Umann     ASSERT_EQ(RejectReport,
210ea8e328aSKristóf Umann               interpretQueryResult({UNSAT, 14'000_ms, 1'000'000_step}));
211ea8e328aSKristóf Umann   }
212ea8e328aSKristóf Umann }
213ea8e328aSKristóf Umann 
214b3b0d09cSBalazs Benics } // namespace
215