xref: /netbsd-src/external/apache2/llvm/dist/llvm/utils/benchmark/include/benchmark/benchmark.h (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21   for (auto _ : state)
22     std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30   std::string x = "hello";
31   for (auto _ : state)
32     std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmarks command line flag.  E.g.,
38 //       my_unittest --benchmark_filter=all
39 //       my_unittest --benchmark_filter=BM_StringCreation
40 //       my_unittest --benchmark_filter=String
41 //       my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43   benchmark::Initialize(&argc, argv);
44   benchmark::RunSpecifiedBenchmarks();
45   return 0;
46 }
47 
48 // Sometimes a family of microbenchmarks can be implemented with
49 // just one routine that takes an extra argument to specify which
50 // one of the family of benchmarks to run.  For example, the following
51 // code defines a family of microbenchmarks for measuring the speed
52 // of memcpy() calls of different lengths:
53 
54 static void BM_memcpy(benchmark::State& state) {
55   char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
56   memset(src, 'x', state.range(0));
57   for (auto _ : state)
58     memcpy(dst, src, state.range(0));
59   state.SetBytesProcessed(int64_t(state.iterations()) *
60                           int64_t(state.range(0)));
61   delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64 
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand.  The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70 
71 // You might have a microbenchmark that depends on two inputs.  For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75   set<int> data;
76   for (auto _ : state) {
77     state.PauseTiming();
78     data = ConstructRandomSet(state.range(0));
79     state.ResumeTiming();
80     for (int j = 0; j < state.range(1); ++j)
81       data.insert(RandomNumber());
82   }
83 }
84 BENCHMARK(BM_SetInsert)
85    ->Args({1<<10, 128})
86    ->Args({2<<10, 128})
87    ->Args({4<<10, 128})
88    ->Args({8<<10, 128})
89    ->Args({1<<10, 512})
90    ->Args({2<<10, 512})
91    ->Args({4<<10, 512})
92    ->Args({8<<10, 512});
93 
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand.  The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99 
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106   for (int i = 0; i <= 10; ++i)
107     for (int j = 32; j <= 1024*1024; j *= 8)
108       b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111 
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116   Q q;
117   typename Q::value_type v;
118   for (auto _ : state) {
119     for (int i = state.range(0); i--; )
120       q.push(v);
121     for (int e = state.range(0); e--; )
122       q.Wait(&v);
123   }
124   // actually messages, not bytes:
125   state.SetBytesProcessed(
126       static_cast<int64_t>(state.iterations())*state.range(0));
127 }
128 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129 
130 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131 benchmark. This option overrides the `benchmark_min_time` flag.
132 
133 void BM_test(benchmark::State& state) {
134  ... body ...
135 }
136 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
137 
138 In a multithreaded test, it is guaranteed that none of the threads will start
139 until all have reached the loop start, and all will have finished before any
140 thread exits the loop body. As such, any global setup or teardown you want to
141 do can be wrapped in a check against the thread index:
142 
143 static void BM_MultiThreaded(benchmark::State& state) {
144   if (state.thread_index == 0) {
145     // Setup code here.
146   }
147   for (auto _ : state) {
148     // Run the test as normal.
149   }
150   if (state.thread_index == 0) {
151     // Teardown code here.
152   }
153 }
154 BENCHMARK(BM_MultiThreaded)->Threads(4);
155 
156 
157 If a benchmark runs a few milliseconds it may be hard to visually compare the
158 measured times, since the output data is given in nanoseconds per default. In
159 order to manually set the time unit, you can specify it manually:
160 
161 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
162 */
163 
164 #ifndef BENCHMARK_BENCHMARK_H_
165 #define BENCHMARK_BENCHMARK_H_
166 
167 
168 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
169 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
170 #define BENCHMARK_HAS_CXX11
171 #endif
172 
173 #include <stdint.h>
174 
175 #include <algorithm>
176 #include <cassert>
177 #include <cstddef>
178 #include <iosfwd>
179 #include <string>
180 #include <vector>
181 #include <map>
182 #include <set>
183 
184 #if defined(BENCHMARK_HAS_CXX11)
185 #include <type_traits>
186 #include <initializer_list>
187 #include <utility>
188 #endif
189 
190 #if defined(_MSC_VER)
191 #include <intrin.h> // for _ReadWriteBarrier
192 #endif
193 
194 #ifndef BENCHMARK_HAS_CXX11
195 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
196   TypeName(const TypeName&);                         \
197   TypeName& operator=(const TypeName&)
198 #else
199 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
200   TypeName(const TypeName&) = delete;                \
201   TypeName& operator=(const TypeName&) = delete
202 #endif
203 
204 #if defined(__GNUC__)
205 #define BENCHMARK_UNUSED __attribute__((unused))
206 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
207 #define BENCHMARK_NOEXCEPT noexcept
208 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
209 #elif defined(_MSC_VER) && !defined(__clang__)
210 #define BENCHMARK_UNUSED
211 #define BENCHMARK_ALWAYS_INLINE __forceinline
212 #if _MSC_VER >= 1900
213 #define BENCHMARK_NOEXCEPT noexcept
214 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
215 #else
216 #define BENCHMARK_NOEXCEPT
217 #define BENCHMARK_NOEXCEPT_OP(x)
218 #endif
219 #define __func__ __FUNCTION__
220 #else
221 #define BENCHMARK_UNUSED
222 #define BENCHMARK_ALWAYS_INLINE
223 #define BENCHMARK_NOEXCEPT
224 #define BENCHMARK_NOEXCEPT_OP(x)
225 #endif
226 
227 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
228 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
229 
230 #if defined(__GNUC__)
231 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
232 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
233 #else
234 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
235 #define BENCHMARK_DEPRECATED_MSG(msg)
236 #define BENCHMARK_WARNING_MSG(msg) __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING(__LINE__) ") : warning note: " msg))
237 #endif
238 
239 #if defined(__GNUC__) && !defined(__clang__)
240 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
241 #endif
242 
243 #ifndef __has_builtin
244 #define __has_builtin(x) 0
245 #endif
246 
247 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
248   #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
249 #elif defined(_MSC_VER)
250   #define BENCHMARK_UNREACHABLE() __assume(false)
251 #else
252   #define BENCHMARK_UNREACHABLE() ((void)0)
253 #endif
254 
255 namespace benchmark {
256 class BenchmarkReporter;
257 
258 void Initialize(int* argc, char** argv);
259 
260 // Report to stdout all arguments in 'argv' as unrecognized except the first.
261 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
262 bool ReportUnrecognizedArguments(int argc, char** argv);
263 
264 // Generate a list of benchmarks matching the specified --benchmark_filter flag
265 // and if --benchmark_list_tests is specified return after printing the name
266 // of each matching benchmark. Otherwise run each matching benchmark and
267 // report the results.
268 //
269 // The second and third overload use the specified 'console_reporter' and
270 //  'file_reporter' respectively. 'file_reporter' will write to the file
271 //  specified
272 //   by '--benchmark_output'. If '--benchmark_output' is not given the
273 //  'file_reporter' is ignored.
274 //
275 // RETURNS: The number of matching benchmarks.
276 size_t RunSpecifiedBenchmarks();
277 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
278 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
279                               BenchmarkReporter* file_reporter);
280 
281 // If this routine is called, peak memory allocation past this point in the
282 // benchmark is reported at the end of the benchmark report line. (It is
283 // computed by running the benchmark once with a single iteration and a memory
284 // tracer.)
285 // TODO(dominic)
286 // void MemoryUsage();
287 
288 namespace internal {
289 class Benchmark;
290 class BenchmarkImp;
291 class BenchmarkFamilies;
292 
293 void UseCharPointer(char const volatile*);
294 
295 // Take ownership of the pointer and register the benchmark. Return the
296 // registered benchmark.
297 Benchmark* RegisterBenchmarkInternal(Benchmark*);
298 
299 // Ensure that the standard streams are properly initialized in every TU.
300 int InitializeStreams();
301 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
302 
303 }  // namespace internal
304 
305 
306 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
307     defined(__EMSCRIPTEN__)
308 # define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
309 #endif
310 
311 
312 // The DoNotOptimize(...) function can be used to prevent a value or
313 // expression from being optimized away by the compiler. This function is
314 // intended to add little to no overhead.
315 // See: https://youtu.be/nXaxk27zwlk?t=2441
316 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
317 template <class Tp>
318 inline BENCHMARK_ALWAYS_INLINE
DoNotOptimize(Tp const & value)319 void DoNotOptimize(Tp const& value) {
320     asm volatile("" : : "r,m"(value) : "memory");
321 }
322 
323 template <class Tp>
DoNotOptimize(Tp & value)324 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
325 #if defined(__clang__)
326   asm volatile("" : "+r,m"(value) : : "memory");
327 #else
328   asm volatile("" : "+m,r"(value) : : "memory");
329 #endif
330 }
331 
332 // Force the compiler to flush pending writes to global memory. Acts as an
333 // effective read/write barrier
ClobberMemory()334 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
335   asm volatile("" : : : "memory");
336 }
337 #elif defined(_MSC_VER)
338 template <class Tp>
DoNotOptimize(Tp const & value)339 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
340   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
341   _ReadWriteBarrier();
342 }
343 
ClobberMemory()344 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
345   _ReadWriteBarrier();
346 }
347 #else
348 template <class Tp>
DoNotOptimize(Tp const & value)349 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
350   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
351 }
352 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
353 #endif
354 
355 
356 
357 // This class is used for user-defined counters.
358 class Counter {
359 public:
360 
361   enum Flags {
362     kDefaults   = 0,
363     // Mark the counter as a rate. It will be presented divided
364     // by the duration of the benchmark.
365     kIsRate     = 1,
366     // Mark the counter as a thread-average quantity. It will be
367     // presented divided by the number of threads.
368     kAvgThreads = 2,
369     // Mark the counter as a thread-average rate. See above.
370     kAvgThreadsRate = kIsRate|kAvgThreads
371   };
372 
373   double value;
374   Flags  flags;
375 
376   BENCHMARK_ALWAYS_INLINE
value(v)377   Counter(double v = 0., Flags f = kDefaults) : value(v), flags(f) {}
378 
379   BENCHMARK_ALWAYS_INLINE operator double const& () const { return value; }
380   BENCHMARK_ALWAYS_INLINE operator double      & ()       { return value; }
381 
382 };
383 
384 // This is the container for the user-defined counters.
385 typedef std::map<std::string, Counter> UserCounters;
386 
387 
388 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
389 // for the measured time.
390 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
391 
392 // BigO is passed to a benchmark in order to specify the asymptotic
393 // computational
394 // complexity for the benchmark. In case oAuto is selected, complexity will be
395 // calculated automatically to the best fit.
396 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
397 
398 // BigOFunc is passed to a benchmark in order to specify the asymptotic
399 // computational complexity for the benchmark.
400 typedef double(BigOFunc)(int64_t);
401 
402 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
403 // statistics over all the measurements of some type
404 typedef double(StatisticsFunc)(const std::vector<double>&);
405 
406 struct Statistics {
407   std::string name_;
408   StatisticsFunc* compute_;
409 
StatisticsStatistics410   Statistics(std::string name, StatisticsFunc* compute)
411     : name_(name), compute_(compute) {}
412 };
413 
414 namespace internal {
415 class ThreadTimer;
416 class ThreadManager;
417 
418 enum ReportMode
419 #if defined(BENCHMARK_HAS_CXX11)
420   : unsigned
421 #else
422 #endif
423   {
424   RM_Unspecified,  // The mode has not been manually specified
425   RM_Default,      // The mode is user-specified as default.
426   RM_ReportAggregatesOnly
427 };
428 }  // namespace internal
429 
430 // State is passed to a running Benchmark and contains state for the
431 // benchmark to use.
432 class State {
433  public:
434   struct StateIterator;
435   friend struct StateIterator;
436 
437   // Returns iterators used to run each iteration of a benchmark using a
438   // C++11 ranged-based for loop. These functions should not be called directly.
439   //
440   // REQUIRES: The benchmark has not started running yet. Neither begin nor end
441   // have been called previously.
442   //
443   // NOTE: KeepRunning may not be used after calling either of these functions.
444   BENCHMARK_ALWAYS_INLINE StateIterator begin();
445   BENCHMARK_ALWAYS_INLINE StateIterator end();
446 
447   // Returns true if the benchmark should continue through another iteration.
448   // NOTE: A benchmark may not return from the test until KeepRunning() has
449   // returned false.
450   bool KeepRunning();
451 
452   // Returns true iff the benchmark should run n more iterations.
453   // REQUIRES: 'n' > 0.
454   // NOTE: A benchmark must not return from the test until KeepRunningBatch()
455   // has returned false.
456   // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
457   //
458   // Intended usage:
459   //   while (state.KeepRunningBatch(1000)) {
460   //     // process 1000 elements
461   //   }
462   bool KeepRunningBatch(size_t n);
463 
464   // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
465   //           by the current thread.
466   // Stop the benchmark timer.  If not called, the timer will be
467   // automatically stopped after the last iteration of the benchmark loop.
468   //
469   // For threaded benchmarks the PauseTiming() function only pauses the timing
470   // for the current thread.
471   //
472   // NOTE: The "real time" measurement is per-thread. If different threads
473   // report different measurements the largest one is reported.
474   //
475   // NOTE: PauseTiming()/ResumeTiming() are relatively
476   // heavyweight, and so their use should generally be avoided
477   // within each benchmark iteration, if possible.
478   void PauseTiming();
479 
480   // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
481   //           by the current thread.
482   // Start the benchmark timer.  The timer is NOT running on entrance to the
483   // benchmark function. It begins running after control flow enters the
484   // benchmark loop.
485   //
486   // NOTE: PauseTiming()/ResumeTiming() are relatively
487   // heavyweight, and so their use should generally be avoided
488   // within each benchmark iteration, if possible.
489   void ResumeTiming();
490 
491   // REQUIRES: 'SkipWithError(...)' has not been called previously by the
492   //            current thread.
493   // Report the benchmark as resulting in an error with the specified 'msg'.
494   // After this call the user may explicitly 'return' from the benchmark.
495   //
496   // If the ranged-for style of benchmark loop is used, the user must explicitly
497   // break from the loop, otherwise all future iterations will be run.
498   // If the 'KeepRunning()' loop is used the current thread will automatically
499   // exit the loop at the end of the current iteration.
500   //
501   // For threaded benchmarks only the current thread stops executing and future
502   // calls to `KeepRunning()` will block until all threads have completed
503   // the `KeepRunning()` loop. If multiple threads report an error only the
504   // first error message is used.
505   //
506   // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
507   // the current scope immediately. If the function is called from within
508   // the 'KeepRunning()' loop the current iteration will finish. It is the users
509   // responsibility to exit the scope as needed.
510   void SkipWithError(const char* msg);
511 
512   // REQUIRES: called exactly once per iteration of the benchmarking loop.
513   // Set the manually measured time for this benchmark iteration, which
514   // is used instead of automatically measured time if UseManualTime() was
515   // specified.
516   //
517   // For threaded benchmarks the final value will be set to the largest
518   // reported values.
519   void SetIterationTime(double seconds);
520 
521   // Set the number of bytes processed by the current benchmark
522   // execution.  This routine is typically called once at the end of a
523   // throughput oriented benchmark.  If this routine is called with a
524   // value > 0, the report is printed in MB/sec instead of nanoseconds
525   // per iteration.
526   //
527   // REQUIRES: a benchmark has exited its benchmarking loop.
528   BENCHMARK_ALWAYS_INLINE
SetBytesProcessed(int64_t bytes)529   void SetBytesProcessed(int64_t bytes) { bytes_processed_ = bytes; }
530 
531   BENCHMARK_ALWAYS_INLINE
bytes_processed()532   int64_t bytes_processed() const { return bytes_processed_; }
533 
534   // If this routine is called with complexity_n > 0 and complexity report is
535   // requested for the
536   // family benchmark, then current benchmark will be part of the computation
537   // and complexity_n will
538   // represent the length of N.
539   BENCHMARK_ALWAYS_INLINE
SetComplexityN(int64_t complexity_n)540   void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
541 
542   BENCHMARK_ALWAYS_INLINE
complexity_length_n()543   int64_t complexity_length_n() { return complexity_n_; }
544 
545   // If this routine is called with items > 0, then an items/s
546   // label is printed on the benchmark report line for the currently
547   // executing benchmark. It is typically called at the end of a processing
548   // benchmark where a processing items/second output is desired.
549   //
550   // REQUIRES: a benchmark has exited its benchmarking loop.
551   BENCHMARK_ALWAYS_INLINE
SetItemsProcessed(int64_t items)552   void SetItemsProcessed(int64_t items) { items_processed_ = items; }
553 
554   BENCHMARK_ALWAYS_INLINE
items_processed()555   int64_t items_processed() const { return items_processed_; }
556 
557   // If this routine is called, the specified label is printed at the
558   // end of the benchmark report line for the currently executing
559   // benchmark.  Example:
560   //  static void BM_Compress(benchmark::State& state) {
561   //    ...
562   //    double compress = input_size / output_size;
563   //    state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
564   //  }
565   // Produces output that looks like:
566   //  BM_Compress   50         50   14115038  compress:27.3%
567   //
568   // REQUIRES: a benchmark has exited its benchmarking loop.
569   void SetLabel(const char* label);
570 
SetLabel(const std::string & str)571   void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
572     this->SetLabel(str.c_str());
573   }
574 
575   // Range arguments for this run. CHECKs if the argument has been set.
576   BENCHMARK_ALWAYS_INLINE
577   int64_t range(std::size_t pos = 0) const {
578     assert(range_.size() > pos);
579     return range_[pos];
580   }
581 
582   BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
range_x()583   int64_t range_x() const { return range(0); }
584 
585   BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
range_y()586   int64_t range_y() const { return range(1); }
587 
588   BENCHMARK_ALWAYS_INLINE
iterations()589   size_t iterations() const {
590     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
591       return 0;
592     }
593     return max_iterations - total_iterations_ + batch_leftover_;
594   }
595 
596 private: // items we expect on the first cache line (ie 64 bytes of the struct)
597 
598   // When total_iterations_ is 0, KeepRunning() and friends will return false.
599   // May be larger than max_iterations.
600   size_t total_iterations_;
601 
602   // When using KeepRunningBatch(), batch_leftover_ holds the number of
603   // iterations beyond max_iters that were run. Used to track
604   // completed_iterations_ accurately.
605   size_t batch_leftover_;
606 
607 public:
608   const size_t max_iterations;
609 
610 private:
611   bool started_;
612   bool finished_;
613   bool error_occurred_;
614 
615 private: // items we don't need on the first cache line
616   std::vector<int64_t> range_;
617 
618   int64_t bytes_processed_;
619   int64_t items_processed_;
620 
621   int64_t complexity_n_;
622 
623  public:
624   // Container for user-defined counters.
625   UserCounters counters;
626   // Index of the executing thread. Values from [0, threads).
627   const int thread_index;
628   // Number of threads concurrently executing the benchmark.
629   const int threads;
630 
631 
632   // TODO(EricWF) make me private
633   State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
634         int n_threads, internal::ThreadTimer* timer,
635         internal::ThreadManager* manager);
636 
637  private:
638   void StartKeepRunning();
639   // Implementation of KeepRunning() and KeepRunningBatch().
640   // is_batch must be true unless n is 1.
641   bool KeepRunningInternal(size_t n, bool is_batch);
642   void FinishKeepRunning();
643   internal::ThreadTimer* timer_;
644   internal::ThreadManager* manager_;
645   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
646 };
647 
648 inline BENCHMARK_ALWAYS_INLINE
KeepRunning()649 bool State::KeepRunning() {
650   return KeepRunningInternal(1, /*is_batch=*/ false);
651 }
652 
653 inline BENCHMARK_ALWAYS_INLINE
KeepRunningBatch(size_t n)654 bool State::KeepRunningBatch(size_t n) {
655   return KeepRunningInternal(n, /*is_batch=*/ true);
656 }
657 
658 inline BENCHMARK_ALWAYS_INLINE
KeepRunningInternal(size_t n,bool is_batch)659 bool State::KeepRunningInternal(size_t n, bool is_batch) {
660   // total_iterations_ is set to 0 by the constructor, and always set to a
661   // nonzero value by StartKepRunning().
662   assert(n > 0);
663   // n must be 1 unless is_batch is true.
664   assert(is_batch || n == 1);
665   if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
666     total_iterations_ -= n;
667     return true;
668   }
669   if (!started_) {
670     StartKeepRunning();
671     if (!error_occurred_ && total_iterations_ >= n) {
672       total_iterations_-= n;
673       return true;
674     }
675   }
676   // For non-batch runs, total_iterations_ must be 0 by now.
677   if (is_batch && total_iterations_ != 0) {
678     batch_leftover_  = n - total_iterations_;
679     total_iterations_ = 0;
680     return true;
681   }
682   FinishKeepRunning();
683   return false;
684 }
685 
686 struct State::StateIterator {
687   struct BENCHMARK_UNUSED Value {};
688   typedef std::forward_iterator_tag iterator_category;
689   typedef Value value_type;
690   typedef Value reference;
691   typedef Value pointer;
692   typedef std::ptrdiff_t difference_type;
693 
694  private:
695   friend class State;
696   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator697   StateIterator() : cached_(0), parent_() {}
698 
699   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator700   explicit StateIterator(State* st)
701       : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
702 
703  public:
704   BENCHMARK_ALWAYS_INLINE
705   Value operator*() const { return Value(); }
706 
707   BENCHMARK_ALWAYS_INLINE
708   StateIterator& operator++() {
709     assert(cached_ > 0);
710     --cached_;
711     return *this;
712   }
713 
714   BENCHMARK_ALWAYS_INLINE
715   bool operator!=(StateIterator const&) const {
716     if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
717     parent_->FinishKeepRunning();
718     return false;
719   }
720 
721  private:
722   size_t cached_;
723   State* const parent_;
724 };
725 
begin()726 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
727   return StateIterator(this);
728 }
end()729 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
730   StartKeepRunning();
731   return StateIterator();
732 }
733 
734 namespace internal {
735 
736 typedef void(Function)(State&);
737 
738 // ------------------------------------------------------
739 // Benchmark registration object.  The BENCHMARK() macro expands
740 // into an internal::Benchmark* object.  Various methods can
741 // be called on this object to change the properties of the benchmark.
742 // Each method returns "this" so that multiple method calls can
743 // chained into one expression.
744 class Benchmark {
745  public:
746   virtual ~Benchmark();
747 
748   // Note: the following methods all return "this" so that multiple
749   // method calls can be chained together in one expression.
750 
751   // Run this benchmark once with "x" as the extra argument passed
752   // to the function.
753   // REQUIRES: The function passed to the constructor must accept an arg1.
754   Benchmark* Arg(int64_t x);
755 
756   // Run this benchmark with the given time unit for the generated output report
757   Benchmark* Unit(TimeUnit unit);
758 
759   // Run this benchmark once for a number of values picked from the
760   // range [start..limit].  (start and limit are always picked.)
761   // REQUIRES: The function passed to the constructor must accept an arg1.
762   Benchmark* Range(int64_t start, int64_t limit);
763 
764   // Run this benchmark once for all values in the range [start..limit] with
765   // specific step
766   // REQUIRES: The function passed to the constructor must accept an arg1.
767   Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
768 
769   // Run this benchmark once with "args" as the extra arguments passed
770   // to the function.
771   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
772   Benchmark* Args(const std::vector<int64_t>& args);
773 
774   // Equivalent to Args({x, y})
775   // NOTE: This is a legacy C++03 interface provided for compatibility only.
776   //   New code should use 'Args'.
ArgPair(int64_t x,int64_t y)777   Benchmark* ArgPair(int64_t x, int64_t y) {
778     std::vector<int64_t> args;
779     args.push_back(x);
780     args.push_back(y);
781     return Args(args);
782   }
783 
784   // Run this benchmark once for a number of values picked from the
785   // ranges [start..limit].  (starts and limits are always picked.)
786   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
787   Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
788 
789   // Equivalent to ArgNames({name})
790   Benchmark* ArgName(const std::string& name);
791 
792   // Set the argument names to display in the benchmark name. If not called,
793   // only argument values will be shown.
794   Benchmark* ArgNames(const std::vector<std::string>& names);
795 
796   // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
797   // NOTE: This is a legacy C++03 interface provided for compatibility only.
798   //   New code should use 'Ranges'.
RangePair(int64_t lo1,int64_t hi1,int64_t lo2,int64_t hi2)799   Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
800     std::vector<std::pair<int64_t, int64_t> > ranges;
801     ranges.push_back(std::make_pair(lo1, hi1));
802     ranges.push_back(std::make_pair(lo2, hi2));
803     return Ranges(ranges);
804   }
805 
806   // Pass this benchmark object to *func, which can customize
807   // the benchmark by calling various methods like Arg, Args,
808   // Threads, etc.
809   Benchmark* Apply(void (*func)(Benchmark* benchmark));
810 
811   // Set the range multiplier for non-dense range. If not called, the range
812   // multiplier kRangeMultiplier will be used.
813   Benchmark* RangeMultiplier(int multiplier);
814 
815   // Set the minimum amount of time to use when running this benchmark. This
816   // option overrides the `benchmark_min_time` flag.
817   // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
818   Benchmark* MinTime(double t);
819 
820   // Specify the amount of iterations that should be run by this benchmark.
821   // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
822   //
823   // NOTE: This function should only be used when *exact* iteration control is
824   //   needed and never to control or limit how long a benchmark runs, where
825   // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
826   Benchmark* Iterations(size_t n);
827 
828   // Specify the amount of times to repeat this benchmark. This option overrides
829   // the `benchmark_repetitions` flag.
830   // REQUIRES: `n > 0`
831   Benchmark* Repetitions(int n);
832 
833   // Specify if each repetition of the benchmark should be reported separately
834   // or if only the final statistics should be reported. If the benchmark
835   // is not repeated then the single result is always reported.
836   Benchmark* ReportAggregatesOnly(bool value = true);
837 
838   // If a particular benchmark is I/O bound, runs multiple threads internally or
839   // if for some reason CPU timings are not representative, call this method. If
840   // called, the elapsed time will be used to control how many iterations are
841   // run, and in the printing of items/second or MB/seconds values.  If not
842   // called, the cpu time used by the benchmark will be used.
843   Benchmark* UseRealTime();
844 
845   // If a benchmark must measure time manually (e.g. if GPU execution time is
846   // being
847   // measured), call this method. If called, each benchmark iteration should
848   // call
849   // SetIterationTime(seconds) to report the measured time, which will be used
850   // to control how many iterations are run, and in the printing of items/second
851   // or MB/second values.
852   Benchmark* UseManualTime();
853 
854   // Set the asymptotic computational complexity for the benchmark. If called
855   // the asymptotic computational complexity will be shown on the output.
856   Benchmark* Complexity(BigO complexity = benchmark::oAuto);
857 
858   // Set the asymptotic computational complexity for the benchmark. If called
859   // the asymptotic computational complexity will be shown on the output.
860   Benchmark* Complexity(BigOFunc* complexity);
861 
862   // Add this statistics to be computed over all the values of benchmark run
863   Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics);
864 
865   // Support for running multiple copies of the same benchmark concurrently
866   // in multiple threads.  This may be useful when measuring the scaling
867   // of some piece of code.
868 
869   // Run one instance of this benchmark concurrently in t threads.
870   Benchmark* Threads(int t);
871 
872   // Pick a set of values T from [min_threads,max_threads].
873   // min_threads and max_threads are always included in T.  Run this
874   // benchmark once for each value in T.  The benchmark run for a
875   // particular value t consists of t threads running the benchmark
876   // function concurrently.  For example, consider:
877   //    BENCHMARK(Foo)->ThreadRange(1,16);
878   // This will run the following benchmarks:
879   //    Foo in 1 thread
880   //    Foo in 2 threads
881   //    Foo in 4 threads
882   //    Foo in 8 threads
883   //    Foo in 16 threads
884   Benchmark* ThreadRange(int min_threads, int max_threads);
885 
886   // For each value n in the range, run this benchmark once using n threads.
887   // min_threads and max_threads are always included in the range.
888   // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
889   // a benchmark with 1, 4, 7 and 8 threads.
890   Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
891 
892   // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
893   Benchmark* ThreadPerCpu();
894 
895   virtual void Run(State& state) = 0;
896 
897   // Used inside the benchmark implementation
898   struct Instance;
899 
900  protected:
901   explicit Benchmark(const char* name);
902   Benchmark(Benchmark const&);
903   void SetName(const char* name);
904 
905   int ArgsCnt() const;
906 
907  private:
908   friend class BenchmarkFamilies;
909 
910   std::string name_;
911   ReportMode report_mode_;
912   std::vector<std::string> arg_names_;   // Args for all benchmark runs
913   std::vector<std::vector<int64_t> > args_;  // Args for all benchmark runs
914   TimeUnit time_unit_;
915   int range_multiplier_;
916   double min_time_;
917   size_t iterations_;
918   int repetitions_;
919   bool use_real_time_;
920   bool use_manual_time_;
921   BigO complexity_;
922   BigOFunc* complexity_lambda_;
923   std::vector<Statistics> statistics_;
924   std::vector<int> thread_counts_;
925 
926   Benchmark& operator=(Benchmark const&);
927 };
928 
929 }  // namespace internal
930 
931 // Create and register a benchmark with the specified 'name' that invokes
932 // the specified functor 'fn'.
933 //
934 // RETURNS: A pointer to the registered benchmark.
935 internal::Benchmark* RegisterBenchmark(const char* name,
936                                        internal::Function* fn);
937 
938 #if defined(BENCHMARK_HAS_CXX11)
939 template <class Lambda>
940 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
941 #endif
942 
943 // Remove all registered benchmarks. All pointers to previously registered
944 // benchmarks are invalidated.
945 void ClearRegisteredBenchmarks();
946 
947 namespace internal {
948 // The class used to hold all Benchmarks created from static function.
949 // (ie those created using the BENCHMARK(...) macros.
950 class FunctionBenchmark : public Benchmark {
951  public:
FunctionBenchmark(const char * name,Function * func)952   FunctionBenchmark(const char* name, Function* func)
953       : Benchmark(name), func_(func) {}
954 
955   virtual void Run(State& st);
956 
957  private:
958   Function* func_;
959 };
960 
961 #ifdef BENCHMARK_HAS_CXX11
962 template <class Lambda>
963 class LambdaBenchmark : public Benchmark {
964  public:
Run(State & st)965   virtual void Run(State& st) { lambda_(st); }
966 
967  private:
968   template <class OLambda>
LambdaBenchmark(const char * name,OLambda && lam)969   LambdaBenchmark(const char* name, OLambda&& lam)
970       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
971 
972   LambdaBenchmark(LambdaBenchmark const&) = delete;
973 
974  private:
975   template <class Lam>
976   friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
977 
978   Lambda lambda_;
979 };
980 #endif
981 
982 }  // namespace internal
983 
RegisterBenchmark(const char * name,internal::Function * fn)984 inline internal::Benchmark* RegisterBenchmark(const char* name,
985                                               internal::Function* fn) {
986   return internal::RegisterBenchmarkInternal(
987       ::new internal::FunctionBenchmark(name, fn));
988 }
989 
990 #ifdef BENCHMARK_HAS_CXX11
991 template <class Lambda>
RegisterBenchmark(const char * name,Lambda && fn)992 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
993   using BenchType = internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
994   return internal::RegisterBenchmarkInternal(
995       ::new BenchType(name, std::forward<Lambda>(fn)));
996 }
997 #endif
998 
999 #if defined(BENCHMARK_HAS_CXX11) && \
1000     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1001 template <class Lambda, class... Args>
RegisterBenchmark(const char * name,Lambda && fn,Args &&...args)1002 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1003                                        Args&&... args) {
1004   return benchmark::RegisterBenchmark(
1005       name, [=](benchmark::State& st) { fn(st, args...); });
1006 }
1007 #else
1008 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1009 #endif
1010 
1011 // The base class for all fixture tests.
1012 class Fixture : public internal::Benchmark {
1013  public:
Fixture()1014   Fixture() : internal::Benchmark("") {}
1015 
Run(State & st)1016   virtual void Run(State& st) {
1017     this->SetUp(st);
1018     this->BenchmarkCase(st);
1019     this->TearDown(st);
1020   }
1021 
1022   // These will be deprecated ...
SetUp(const State &)1023   virtual void SetUp(const State&) {}
TearDown(const State &)1024   virtual void TearDown(const State&) {}
1025   // ... In favor of these.
SetUp(State & st)1026   virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
TearDown(State & st)1027   virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1028 
1029  protected:
1030   virtual void BenchmarkCase(State&) = 0;
1031 };
1032 
1033 }  // namespace benchmark
1034 
1035 // ------------------------------------------------------
1036 // Macro to register benchmarks
1037 
1038 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1039 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1040 // empty. If X is empty the expression becomes (+1 == +0).
1041 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1042 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1043 #else
1044 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1045 #endif
1046 
1047 // Helpers for generating unique variable names
1048 #define BENCHMARK_PRIVATE_NAME(n) \
1049   BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1050 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1051 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1052 
1053 #define BENCHMARK_PRIVATE_DECLARE(n)                                 \
1054   static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1055       BENCHMARK_UNUSED
1056 
1057 #define BENCHMARK(n)                                     \
1058   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1059       (::benchmark::internal::RegisterBenchmarkInternal( \
1060           new ::benchmark::internal::FunctionBenchmark(#n, n)))
1061 
1062 // Old-style macros
1063 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1064 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1065 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1066 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1067 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1068   BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1069 
1070 #ifdef BENCHMARK_HAS_CXX11
1071 
1072 // Register a benchmark which invokes the function specified by `func`
1073 // with the additional arguments specified by `...`.
1074 //
1075 // For example:
1076 //
1077 // template <class ...ExtraArgs>`
1078 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1079 //  [...]
1080 //}
1081 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1082 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1083 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
1084   BENCHMARK_PRIVATE_DECLARE(func) =                      \
1085       (::benchmark::internal::RegisterBenchmarkInternal( \
1086           new ::benchmark::internal::FunctionBenchmark(  \
1087               #func "/" #test_case_name,                 \
1088               [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1089 
1090 #endif  // BENCHMARK_HAS_CXX11
1091 
1092 // This will register a benchmark for a templatized function.  For example:
1093 //
1094 // template<int arg>
1095 // void BM_Foo(int iters);
1096 //
1097 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1098 //
1099 // will register BM_Foo<1> as a benchmark.
1100 #define BENCHMARK_TEMPLATE1(n, a)                        \
1101   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1102       (::benchmark::internal::RegisterBenchmarkInternal( \
1103           new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1104 
1105 #define BENCHMARK_TEMPLATE2(n, a, b)                                         \
1106   BENCHMARK_PRIVATE_DECLARE(n) =                                             \
1107       (::benchmark::internal::RegisterBenchmarkInternal(                     \
1108           new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1109                                                        n<a, b>)))
1110 
1111 #ifdef BENCHMARK_HAS_CXX11
1112 #define BENCHMARK_TEMPLATE(n, ...)                       \
1113   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1114       (::benchmark::internal::RegisterBenchmarkInternal( \
1115           new ::benchmark::internal::FunctionBenchmark(  \
1116               #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1117 #else
1118 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1119 #endif
1120 
1121 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
1122   class BaseClass##_##Method##_Benchmark : public BaseClass { \
1123    public:                                                    \
1124     BaseClass##_##Method##_Benchmark() : BaseClass() {        \
1125       this->SetName(#BaseClass "/" #Method);                  \
1126     }                                                         \
1127                                                               \
1128    protected:                                                 \
1129     virtual void BenchmarkCase(::benchmark::State&);          \
1130   };
1131 
1132 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1133   class BaseClass##_##Method##_Benchmark : public BaseClass<a> {    \
1134    public:                                                          \
1135     BaseClass##_##Method##_Benchmark() : BaseClass<a>() {           \
1136       this->SetName(#BaseClass"<" #a ">/" #Method);                 \
1137     }                                                               \
1138                                                                     \
1139    protected:                                                       \
1140     virtual void BenchmarkCase(::benchmark::State&);                \
1141   };
1142 
1143 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1144   class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {    \
1145    public:                                                             \
1146     BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() {           \
1147       this->SetName(#BaseClass"<" #a "," #b ">/" #Method);             \
1148     }                                                                  \
1149                                                                        \
1150    protected:                                                          \
1151     virtual void BenchmarkCase(::benchmark::State&);                   \
1152   };
1153 
1154 #ifdef BENCHMARK_HAS_CXX11
1155 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...)       \
1156   class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1157    public:                                                                 \
1158     BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() {        \
1159       this->SetName(#BaseClass"<" #__VA_ARGS__ ">/" #Method);              \
1160     }                                                                      \
1161                                                                            \
1162    protected:                                                              \
1163     virtual void BenchmarkCase(::benchmark::State&);                       \
1164   };
1165 #else
1166 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1167 #endif
1168 
1169 #define BENCHMARK_DEFINE_F(BaseClass, Method)    \
1170   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1171   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1172 
1173 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)    \
1174   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1175   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1176 
1177 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b)    \
1178   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1179   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1180 
1181 #ifdef BENCHMARK_HAS_CXX11
1182 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...)            \
1183   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1184   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1185 #else
1186 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1187 #endif
1188 
1189 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1190   BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
1191 
1192 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1193   BENCHMARK_PRIVATE_DECLARE(TestName) =        \
1194       (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1195 
1196 // This macro will define and register a benchmark within a fixture class.
1197 #define BENCHMARK_F(BaseClass, Method)           \
1198   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1199   BENCHMARK_REGISTER_F(BaseClass, Method);       \
1200   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1201 
1202 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)           \
1203   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1204   BENCHMARK_REGISTER_F(BaseClass, Method);                    \
1205   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1206 
1207 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b)           \
1208   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1209   BENCHMARK_REGISTER_F(BaseClass, Method);                       \
1210   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1211 
1212 #ifdef BENCHMARK_HAS_CXX11
1213 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...)           \
1214   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1215   BENCHMARK_REGISTER_F(BaseClass, Method);                     \
1216   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1217 #else
1218 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1219 #endif
1220 
1221 // Helper macro to create a main routine in a test that runs the benchmarks
1222 #define BENCHMARK_MAIN()                   \
1223   int main(int argc, char** argv) {        \
1224     ::benchmark::Initialize(&argc, argv);  \
1225     if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1226     ::benchmark::RunSpecifiedBenchmarks(); \
1227   }                                        \
1228   int main(int, char**)
1229 
1230 
1231 // ------------------------------------------------------
1232 // Benchmark Reporters
1233 
1234 namespace benchmark {
1235 
1236 struct CPUInfo {
1237   struct CacheInfo {
1238     std::string type;
1239     int level;
1240     int size;
1241     int num_sharing;
1242   };
1243 
1244   int num_cpus;
1245   double cycles_per_second;
1246   std::vector<CacheInfo> caches;
1247   bool scaling_enabled;
1248 
1249   static const CPUInfo& Get();
1250 
1251  private:
1252   CPUInfo();
1253   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1254 };
1255 
1256 // Interface for custom benchmark result printers.
1257 // By default, benchmark reports are printed to stdout. However an application
1258 // can control the destination of the reports by calling
1259 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1260 // The reporter object must implement the following interface.
1261 class BenchmarkReporter {
1262  public:
1263   struct Context {
1264     CPUInfo const& cpu_info;
1265     // The number of chars in the longest benchmark name.
1266     size_t name_field_width;
1267     static const char *executable_name;
1268     Context();
1269   };
1270 
1271   struct Run {
RunRun1272     Run()
1273         : error_occurred(false),
1274           iterations(1),
1275           time_unit(kNanosecond),
1276           real_accumulated_time(0),
1277           cpu_accumulated_time(0),
1278           bytes_per_second(0),
1279           items_per_second(0),
1280           max_heapbytes_used(0),
1281           complexity(oNone),
1282           complexity_lambda(),
1283           complexity_n(0),
1284           report_big_o(false),
1285           report_rms(false),
1286           counters() {}
1287 
1288     std::string benchmark_name;
1289     std::string report_label;  // Empty if not set by benchmark.
1290     bool error_occurred;
1291     std::string error_message;
1292 
1293     int64_t iterations;
1294     TimeUnit time_unit;
1295     double real_accumulated_time;
1296     double cpu_accumulated_time;
1297 
1298     // Return a value representing the real time per iteration in the unit
1299     // specified by 'time_unit'.
1300     // NOTE: If 'iterations' is zero the returned value represents the
1301     // accumulated time.
1302     double GetAdjustedRealTime() const;
1303 
1304     // Return a value representing the cpu time per iteration in the unit
1305     // specified by 'time_unit'.
1306     // NOTE: If 'iterations' is zero the returned value represents the
1307     // accumulated time.
1308     double GetAdjustedCPUTime() const;
1309 
1310     // Zero if not set by benchmark.
1311     double bytes_per_second;
1312     double items_per_second;
1313 
1314     // This is set to 0.0 if memory tracing is not enabled.
1315     double max_heapbytes_used;
1316 
1317     // Keep track of arguments to compute asymptotic complexity
1318     BigO complexity;
1319     BigOFunc* complexity_lambda;
1320     int64_t complexity_n;
1321 
1322     // what statistics to compute from the measurements
1323     const std::vector<Statistics>* statistics;
1324 
1325     // Inform print function whether the current run is a complexity report
1326     bool report_big_o;
1327     bool report_rms;
1328 
1329     UserCounters counters;
1330   };
1331 
1332   // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1333   // and the error stream set to 'std::cerr'
1334   BenchmarkReporter();
1335 
1336   // Called once for every suite of benchmarks run.
1337   // The parameter "context" contains information that the
1338   // reporter may wish to use when generating its report, for example the
1339   // platform under which the benchmarks are running. The benchmark run is
1340   // never started if this function returns false, allowing the reporter
1341   // to skip runs based on the context information.
1342   virtual bool ReportContext(const Context& context) = 0;
1343 
1344   // Called once for each group of benchmark runs, gives information about
1345   // cpu-time and heap memory usage during the benchmark run. If the group
1346   // of runs contained more than two entries then 'report' contains additional
1347   // elements representing the mean and standard deviation of those runs.
1348   // Additionally if this group of runs was the last in a family of benchmarks
1349   // 'reports' contains additional entries representing the asymptotic
1350   // complexity and RMS of that benchmark family.
1351   virtual void ReportRuns(const std::vector<Run>& report) = 0;
1352 
1353   // Called once and only once after ever group of benchmarks is run and
1354   // reported.
Finalize()1355   virtual void Finalize() {}
1356 
1357   // REQUIRES: The object referenced by 'out' is valid for the lifetime
1358   // of the reporter.
SetOutputStream(std::ostream * out)1359   void SetOutputStream(std::ostream* out) {
1360     assert(out);
1361     output_stream_ = out;
1362   }
1363 
1364   // REQUIRES: The object referenced by 'err' is valid for the lifetime
1365   // of the reporter.
SetErrorStream(std::ostream * err)1366   void SetErrorStream(std::ostream* err) {
1367     assert(err);
1368     error_stream_ = err;
1369   }
1370 
GetOutputStream()1371   std::ostream& GetOutputStream() const { return *output_stream_; }
1372 
GetErrorStream()1373   std::ostream& GetErrorStream() const { return *error_stream_; }
1374 
1375   virtual ~BenchmarkReporter();
1376 
1377   // Write a human readable string to 'out' representing the specified
1378   // 'context'.
1379   // REQUIRES: 'out' is non-null.
1380   static void PrintBasicContext(std::ostream* out, Context const& context);
1381 
1382  private:
1383   std::ostream* output_stream_;
1384   std::ostream* error_stream_;
1385 };
1386 
1387 // Simple reporter that outputs benchmark data to the console. This is the
1388 // default reporter used by RunSpecifiedBenchmarks().
1389 class ConsoleReporter : public BenchmarkReporter {
1390 public:
1391   enum OutputOptions {
1392     OO_None = 0,
1393     OO_Color = 1,
1394     OO_Tabular = 2,
1395     OO_ColorTabular = OO_Color|OO_Tabular,
1396     OO_Defaults = OO_ColorTabular
1397   };
1398   explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
output_options_(opts_)1399       : output_options_(opts_), name_field_width_(0),
1400         prev_counters_(), printed_header_(false) {}
1401 
1402   virtual bool ReportContext(const Context& context);
1403   virtual void ReportRuns(const std::vector<Run>& reports);
1404 
1405  protected:
1406   virtual void PrintRunData(const Run& report);
1407   virtual void PrintHeader(const Run& report);
1408 
1409   OutputOptions output_options_;
1410   size_t name_field_width_;
1411   UserCounters prev_counters_;
1412   bool printed_header_;
1413 };
1414 
1415 class JSONReporter : public BenchmarkReporter {
1416  public:
JSONReporter()1417   JSONReporter() : first_report_(true) {}
1418   virtual bool ReportContext(const Context& context);
1419   virtual void ReportRuns(const std::vector<Run>& reports);
1420   virtual void Finalize();
1421 
1422  private:
1423   void PrintRunData(const Run& report);
1424 
1425   bool first_report_;
1426 };
1427 
1428 class CSVReporter : public BenchmarkReporter {
1429  public:
CSVReporter()1430   CSVReporter() : printed_header_(false) {}
1431   virtual bool ReportContext(const Context& context);
1432   virtual void ReportRuns(const std::vector<Run>& reports);
1433 
1434  private:
1435   void PrintRunData(const Run& report);
1436 
1437   bool printed_header_;
1438   std::set< std::string > user_counter_names_;
1439 };
1440 
GetTimeUnitString(TimeUnit unit)1441 inline const char* GetTimeUnitString(TimeUnit unit) {
1442   switch (unit) {
1443     case kMillisecond:
1444       return "ms";
1445     case kMicrosecond:
1446       return "us";
1447     case kNanosecond:
1448       return "ns";
1449   }
1450   BENCHMARK_UNREACHABLE();
1451 }
1452 
GetTimeUnitMultiplier(TimeUnit unit)1453 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1454   switch (unit) {
1455     case kMillisecond:
1456       return 1e3;
1457     case kMicrosecond:
1458       return 1e6;
1459     case kNanosecond:
1460       return 1e9;
1461   }
1462   BENCHMARK_UNREACHABLE();
1463 }
1464 
1465 } // namespace benchmark
1466 
1467 #endif  // BENCHMARK_BENCHMARK_H_
1468