xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_stats.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric #ifndef KMP_STATS_H
20b57cec5SDimitry Andric #define KMP_STATS_H
30b57cec5SDimitry Andric 
40b57cec5SDimitry Andric /** @file kmp_stats.h
50b57cec5SDimitry Andric  * Functions for collecting statistics.
60b57cec5SDimitry Andric  */
70b57cec5SDimitry Andric 
80b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
90b57cec5SDimitry Andric //
100b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
110b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
120b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "kmp_config.h"
170b57cec5SDimitry Andric #include "kmp_debug.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #if KMP_STATS_ENABLED
200b57cec5SDimitry Andric /* Statistics accumulator.
210b57cec5SDimitry Andric    Accumulates number of samples and computes min, max, mean, standard deviation
220b57cec5SDimitry Andric    on the fly.
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric    Online variance calculation algorithm from
250b57cec5SDimitry Andric    http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
260b57cec5SDimitry Andric  */
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #include "kmp_stats_timing.h"
290b57cec5SDimitry Andric #include <limits>
300b57cec5SDimitry Andric #include <math.h>
310b57cec5SDimitry Andric #include <new> // placement new
320b57cec5SDimitry Andric #include <stdint.h>
330b57cec5SDimitry Andric #include <string>
340b57cec5SDimitry Andric #include <vector>
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric /* Enable developer statistics here if you want them. They are more detailed
370b57cec5SDimitry Andric    than is useful for application characterisation and are intended for the
380b57cec5SDimitry Andric    runtime library developer. */
390b57cec5SDimitry Andric #define KMP_DEVELOPER_STATS 0
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric /* Enable/Disable histogram output */
420b57cec5SDimitry Andric #define KMP_STATS_HIST 0
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric /*!
450b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
460b57cec5SDimitry Andric  * \brief flags to describe the statistic (timer or counter)
470b57cec5SDimitry Andric  *
480b57cec5SDimitry Andric  */
490b57cec5SDimitry Andric enum stats_flags_e {
500b57cec5SDimitry Andric   noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic
51fe6060f1SDimitry Andric   onlyInMaster = 1 << 1, //!< statistic is valid only for primary thread
520b57cec5SDimitry Andric   noUnits = 1 << 2, //!< statistic doesn't need units printed next to it
53fe6060f1SDimitry Andric   notInMaster = 1 << 3, //!< statistic is valid only for non-primary threads
540b57cec5SDimitry Andric   logEvent = 1 << 4 //!< statistic can be logged on the event timeline when
550b57cec5SDimitry Andric   //! KMP_STATS_EVENTS is on (valid only for timers)
560b57cec5SDimitry Andric };
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric /*!
590b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
600b57cec5SDimitry Andric  * \brief the states which a thread can be in
610b57cec5SDimitry Andric  *
620b57cec5SDimitry Andric  */
630b57cec5SDimitry Andric enum stats_state_e {
640b57cec5SDimitry Andric   IDLE,
650b57cec5SDimitry Andric   SERIAL_REGION,
660b57cec5SDimitry Andric   FORK_JOIN_BARRIER,
670b57cec5SDimitry Andric   PLAIN_BARRIER,
680b57cec5SDimitry Andric   TASKWAIT,
690b57cec5SDimitry Andric   TASKYIELD,
700b57cec5SDimitry Andric   TASKGROUP,
710b57cec5SDimitry Andric   IMPLICIT_TASK,
720b57cec5SDimitry Andric   EXPLICIT_TASK,
730b57cec5SDimitry Andric   TEAMS_REGION
740b57cec5SDimitry Andric };
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric /*!
770b57cec5SDimitry Andric  * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
780b57cec5SDimitry Andric  *
790b57cec5SDimitry Andric  * @param macro a user defined macro that takes three arguments -
800b57cec5SDimitry Andric  * macro(COUNTER_NAME, flags, arg)
810b57cec5SDimitry Andric  * @param arg a user defined argument to send to the user defined macro
820b57cec5SDimitry Andric  *
830b57cec5SDimitry Andric  * \details A counter counts the occurrence of some event. Each thread
840b57cec5SDimitry Andric  * accumulates its own count, at the end of execution the counts are aggregated
850b57cec5SDimitry Andric  * treating each thread as a separate measurement. (Unless onlyInMaster is set,
860b57cec5SDimitry Andric  * in which case there's only a single measurement). The min,mean,max are
870b57cec5SDimitry Andric  * therefore the values for the threads. Adding the counter here and then
880b57cec5SDimitry Andric  * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you
890b57cec5SDimitry Andric  * need to do. All of the tables and printing is generated from this macro.
900b57cec5SDimitry Andric  * Format is "macro(name, flags, arg)"
910b57cec5SDimitry Andric  *
920b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
930b57cec5SDimitry Andric  */
940b57cec5SDimitry Andric // clang-format off
950b57cec5SDimitry Andric #define KMP_FOREACH_COUNTER(macro, arg)                                        \
960b57cec5SDimitry Andric   macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg)   \
970b57cec5SDimitry Andric   macro(OMP_NESTED_PARALLEL, 0, arg)                                           \
980b57cec5SDimitry Andric   macro(OMP_LOOP_STATIC, 0, arg)                                               \
990b57cec5SDimitry Andric   macro(OMP_LOOP_STATIC_STEAL, 0, arg)                                         \
1000b57cec5SDimitry Andric   macro(OMP_LOOP_DYNAMIC, 0, arg)                                              \
1010b57cec5SDimitry Andric   macro(OMP_DISTRIBUTE, 0, arg)                                                \
1020b57cec5SDimitry Andric   macro(OMP_BARRIER, 0, arg)                                                   \
1030b57cec5SDimitry Andric   macro(OMP_CRITICAL, 0, arg)                                                  \
1040b57cec5SDimitry Andric   macro(OMP_SINGLE, 0, arg)                                                    \
105*bdd1243dSDimitry Andric   macro(OMP_SECTIONS, 0, arg)                                                  \
1060b57cec5SDimitry Andric   macro(OMP_MASTER, 0, arg)                                                    \
107fe6060f1SDimitry Andric   macro(OMP_MASKED, 0, arg)                                                    \
1080b57cec5SDimitry Andric   macro(OMP_TEAMS, 0, arg)                                                     \
1090b57cec5SDimitry Andric   macro(OMP_set_lock, 0, arg)                                                  \
1100b57cec5SDimitry Andric   macro(OMP_test_lock, 0, arg)                                                 \
1110b57cec5SDimitry Andric   macro(REDUCE_wait, 0, arg)                                                   \
1120b57cec5SDimitry Andric   macro(REDUCE_nowait, 0, arg)                                                 \
1130b57cec5SDimitry Andric   macro(OMP_TASKYIELD, 0, arg)                                                 \
1140b57cec5SDimitry Andric   macro(OMP_TASKLOOP, 0, arg)                                                  \
1150b57cec5SDimitry Andric   macro(TASK_executed, 0, arg)                                                 \
1160b57cec5SDimitry Andric   macro(TASK_cancelled, 0, arg)                                                \
1170b57cec5SDimitry Andric   macro(TASK_stolen, 0, arg)
1180b57cec5SDimitry Andric // clang-format on
1190b57cec5SDimitry Andric 
1200b57cec5SDimitry Andric /*!
1210b57cec5SDimitry Andric  * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
1220b57cec5SDimitry Andric  *
1230b57cec5SDimitry Andric  * @param macro a user defined macro that takes three arguments -
1240b57cec5SDimitry Andric  * macro(TIMER_NAME, flags, arg)
1250b57cec5SDimitry Andric  * @param arg a user defined argument to send to the user defined macro
1260b57cec5SDimitry Andric  *
1270b57cec5SDimitry Andric  * \details A timer collects multiple samples of some count in each thread and
1280b57cec5SDimitry Andric  * then finally aggregates all of the samples from all of the threads. For most
1290b57cec5SDimitry Andric  * timers the printing code also provides an aggregation over the thread totals.
1300b57cec5SDimitry Andric  * These are printed as TOTAL_foo. The count is normally a time (in ticks),
1310b57cec5SDimitry Andric  * hence the name "timer". (But can be any value, so we use this for "number of
1320b57cec5SDimitry Andric  * arguments passed to fork" as well). For timers the threads are not
1330b57cec5SDimitry Andric  * significant, it's the individual observations that count, so the statistics
1340b57cec5SDimitry Andric  * are at that level. Format is "macro(name, flags, arg)"
1350b57cec5SDimitry Andric  *
1360b57cec5SDimitry Andric  * @ingroup STATS_GATHERING2
1370b57cec5SDimitry Andric  */
1380b57cec5SDimitry Andric // clang-format off
1390b57cec5SDimitry Andric #define KMP_FOREACH_TIMER(macro, arg)                                          \
1400b57cec5SDimitry Andric   macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg)                 \
1410b57cec5SDimitry Andric   macro (OMP_parallel, stats_flags_e::logEvent, arg)                           \
1420b57cec5SDimitry Andric   macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg)                  \
1430b57cec5SDimitry Andric   macro (OMP_teams, stats_flags_e::logEvent, arg)                              \
1440b57cec5SDimitry Andric   macro (OMP_teams_overhead, stats_flags_e::logEvent, arg)                     \
1450b57cec5SDimitry Andric   macro (OMP_loop_static, 0, arg)                                              \
1460b57cec5SDimitry Andric   macro (OMP_loop_static_scheduling, 0, arg)                                   \
1470b57cec5SDimitry Andric   macro (OMP_loop_dynamic, 0, arg)                                             \
1480b57cec5SDimitry Andric   macro (OMP_loop_dynamic_scheduling, 0, arg)                                  \
1490b57cec5SDimitry Andric   macro (OMP_distribute, 0, arg)                                               \
1500b57cec5SDimitry Andric   macro (OMP_distribute_scheduling, 0, arg)                                    \
1510b57cec5SDimitry Andric   macro (OMP_critical, 0, arg)                                                 \
1520b57cec5SDimitry Andric   macro (OMP_critical_wait, 0, arg)                                            \
1530b57cec5SDimitry Andric   macro (OMP_single, 0, arg)                                                   \
154*bdd1243dSDimitry Andric   macro (OMP_sections, 0, arg)                                                 \
155*bdd1243dSDimitry Andric   macro (OMP_sections_overhead, 0, arg)                                        \
1560b57cec5SDimitry Andric   macro (OMP_master, 0, arg)                                                   \
157fe6060f1SDimitry Andric   macro (OMP_masked, 0, arg)                                                   \
1580b57cec5SDimitry Andric   macro (OMP_task_immediate, 0, arg)                                           \
1590b57cec5SDimitry Andric   macro (OMP_task_taskwait, 0, arg)                                            \
1600b57cec5SDimitry Andric   macro (OMP_task_taskyield, 0, arg)                                           \
1610b57cec5SDimitry Andric   macro (OMP_task_taskgroup, 0, arg)                                           \
1620b57cec5SDimitry Andric   macro (OMP_task_join_bar, 0, arg)                                            \
1630b57cec5SDimitry Andric   macro (OMP_task_plain_bar, 0, arg)                                           \
1640b57cec5SDimitry Andric   macro (OMP_taskloop_scheduling, 0, arg)                                      \
1650b57cec5SDimitry Andric   macro (OMP_plain_barrier, stats_flags_e::logEvent, arg)                      \
1660b57cec5SDimitry Andric   macro (OMP_idle, stats_flags_e::logEvent, arg)                               \
1670b57cec5SDimitry Andric   macro (OMP_fork_barrier, stats_flags_e::logEvent, arg)                       \
1680b57cec5SDimitry Andric   macro (OMP_join_barrier, stats_flags_e::logEvent, arg)                       \
1690b57cec5SDimitry Andric   macro (OMP_serial, stats_flags_e::logEvent, arg)                             \
1700b57cec5SDimitry Andric   macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,  \
1710b57cec5SDimitry Andric          arg)                                                                  \
1720b57cec5SDimitry Andric   macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal,   \
1730b57cec5SDimitry Andric          arg)                                                                  \
1740b57cec5SDimitry Andric   macro (OMP_loop_static_iterations,                                           \
1750b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
1760b57cec5SDimitry Andric   macro (OMP_loop_static_total_iterations,                                     \
1770b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
1780b57cec5SDimitry Andric   macro (OMP_loop_dynamic_iterations,                                          \
1790b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
1800b57cec5SDimitry Andric   macro (OMP_loop_dynamic_total_iterations,                                    \
1810b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
1820b57cec5SDimitry Andric   macro (OMP_distribute_iterations,                                            \
1830b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
1840b57cec5SDimitry Andric   KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
1850b57cec5SDimitry Andric // clang-format on
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric // OMP_worker_thread_life -- Time from thread becoming an OpenMP thread (either
188fe6060f1SDimitry Andric //                           initializing OpenMP or being created by a primary
189fe6060f1SDimitry Andric //                           thread) until the thread is destroyed
1900b57cec5SDimitry Andric // OMP_parallel           -- Time thread spends executing work directly
1910b57cec5SDimitry Andric //                           within a #pragma omp parallel
1920b57cec5SDimitry Andric // OMP_parallel_overhead  -- Time thread spends setting up a parallel region
1930b57cec5SDimitry Andric // OMP_loop_static        -- Time thread spends executing loop iterations from
1940b57cec5SDimitry Andric //                           a statically scheduled loop
1950b57cec5SDimitry Andric // OMP_loop_static_scheduling -- Time thread spends scheduling loop iterations
1960b57cec5SDimitry Andric //                               from a statically scheduled loop
1970b57cec5SDimitry Andric // OMP_loop_dynamic       -- Time thread spends executing loop iterations from
1980b57cec5SDimitry Andric //                           a dynamically scheduled loop
1990b57cec5SDimitry Andric // OMP_loop_dynamic_scheduling -- Time thread spends scheduling loop iterations
2000b57cec5SDimitry Andric //                                from a dynamically scheduled loop
2010b57cec5SDimitry Andric // OMP_critical           -- Time thread spends executing critical section
2020b57cec5SDimitry Andric // OMP_critical_wait      -- Time thread spends waiting to enter
2035ffd83dbSDimitry Andric //                           a critical section
2040b57cec5SDimitry Andric // OMP_single             -- Time spent executing a "single" region
2050b57cec5SDimitry Andric // OMP_master             -- Time spent executing a "master" region
206fe6060f1SDimitry Andric // OMP_masked             -- Time spent executing a "masked" region
2070b57cec5SDimitry Andric // OMP_task_immediate     -- Time spent executing non-deferred tasks
2080b57cec5SDimitry Andric // OMP_task_taskwait      -- Time spent executing tasks inside a taskwait
2090b57cec5SDimitry Andric //                           construct
2100b57cec5SDimitry Andric // OMP_task_taskyield     -- Time spent executing tasks inside a taskyield
2110b57cec5SDimitry Andric //                           construct
2120b57cec5SDimitry Andric // OMP_task_taskgroup     -- Time spent executing tasks inside a taskygroup
2130b57cec5SDimitry Andric //                           construct
2140b57cec5SDimitry Andric // OMP_task_join_bar      -- Time spent executing tasks inside a join barrier
2150b57cec5SDimitry Andric // OMP_task_plain_bar     -- Time spent executing tasks inside a barrier
2160b57cec5SDimitry Andric //                           construct
2170b57cec5SDimitry Andric // OMP_taskloop_scheduling -- Time spent scheduling tasks inside a taskloop
2180b57cec5SDimitry Andric //                            construct
2190b57cec5SDimitry Andric // OMP_plain_barrier      -- Time spent in a #pragma omp barrier construct or
2200b57cec5SDimitry Andric //                           inside implicit barrier at end of worksharing
2210b57cec5SDimitry Andric //                           construct
2220b57cec5SDimitry Andric // OMP_idle               -- Time worker threads spend waiting for next
2230b57cec5SDimitry Andric //                           parallel region
2240b57cec5SDimitry Andric // OMP_fork_barrier       -- Time spent in a the fork barrier surrounding a
2250b57cec5SDimitry Andric //                           parallel region
2260b57cec5SDimitry Andric // OMP_join_barrier       -- Time spent in a the join barrier surrounding a
2270b57cec5SDimitry Andric //                           parallel region
2280b57cec5SDimitry Andric // OMP_serial             -- Time thread zero spends executing serial code
2290b57cec5SDimitry Andric // OMP_set_numthreads     -- Values passed to omp_set_num_threads
2300b57cec5SDimitry Andric // OMP_PARALLEL_args      -- Number of arguments passed to a parallel region
2310b57cec5SDimitry Andric // OMP_loop_static_iterations -- Number of iterations thread is assigned for
2320b57cec5SDimitry Andric //                               statically scheduled loops
2330b57cec5SDimitry Andric // OMP_loop_dynamic_iterations -- Number of iterations thread is assigned for
2340b57cec5SDimitry Andric //                                dynamically scheduled loops
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric #if (KMP_DEVELOPER_STATS)
2370b57cec5SDimitry Andric // Timers which are of interest to runtime library developers, not end users.
2380b57cec5SDimitry Andric // These have to be explicitly enabled in addition to the other stats.
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric // KMP_fork_barrier       -- time in __kmp_fork_barrier
2410b57cec5SDimitry Andric // KMP_join_barrier       -- time in __kmp_join_barrier
2420b57cec5SDimitry Andric // KMP_barrier            -- time in __kmp_barrier
2430b57cec5SDimitry Andric // KMP_end_split_barrier  -- time in __kmp_end_split_barrier
2440b57cec5SDimitry Andric // KMP_setup_icv_copy     -- time in __kmp_setup_icv_copy
2450b57cec5SDimitry Andric // KMP_icv_copy           -- start/stop timer for any ICV copying
2460b57cec5SDimitry Andric // KMP_linear_gather      -- time in __kmp_linear_barrier_gather
2470b57cec5SDimitry Andric // KMP_linear_release     -- time in __kmp_linear_barrier_release
2480b57cec5SDimitry Andric // KMP_tree_gather        -- time in __kmp_tree_barrier_gather
2490b57cec5SDimitry Andric // KMP_tree_release       -- time in __kmp_tree_barrier_release
2500b57cec5SDimitry Andric // KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather
2510b57cec5SDimitry Andric // KMP_hyper_release      -- time in __kmp_hyper_barrier_release
252349cc55cSDimitry Andric // KMP_dist_gather       -- time in __kmp_dist_barrier_gather
253349cc55cSDimitry Andric // KMP_dist_release      -- time in __kmp_dist_barrier_release
2540b57cec5SDimitry Andric // clang-format off
2550b57cec5SDimitry Andric #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                                \
2560b57cec5SDimitry Andric   macro(KMP_fork_call, 0, arg)                                                 \
2570b57cec5SDimitry Andric   macro(KMP_join_call, 0, arg)                                                 \
2580b57cec5SDimitry Andric   macro(KMP_end_split_barrier, 0, arg)                                         \
2590b57cec5SDimitry Andric   macro(KMP_hier_gather, 0, arg)                                               \
2600b57cec5SDimitry Andric   macro(KMP_hier_release, 0, arg)                                              \
2610b57cec5SDimitry Andric   macro(KMP_hyper_gather, 0, arg)                                              \
2620b57cec5SDimitry Andric   macro(KMP_hyper_release, 0, arg)                                             \
263349cc55cSDimitry Andric   macro(KMP_dist_gather, 0, arg)                                              \
264349cc55cSDimitry Andric   macro(KMP_dist_release, 0, arg)                                             \
2650b57cec5SDimitry Andric   macro(KMP_linear_gather, 0, arg)                                             \
2660b57cec5SDimitry Andric   macro(KMP_linear_release, 0, arg)                                            \
2670b57cec5SDimitry Andric   macro(KMP_tree_gather, 0, arg)                                               \
2680b57cec5SDimitry Andric   macro(KMP_tree_release, 0, arg)                                              \
2690b57cec5SDimitry Andric   macro(USER_resume, 0, arg)                                                   \
2700b57cec5SDimitry Andric   macro(USER_suspend, 0, arg)                                                  \
271e8d8bef9SDimitry Andric   macro(USER_mwait, 0, arg)                                                    \
2720b57cec5SDimitry Andric   macro(KMP_allocate_team, 0, arg)                                             \
2730b57cec5SDimitry Andric   macro(KMP_setup_icv_copy, 0, arg)                                            \
2740b57cec5SDimitry Andric   macro(USER_icv_copy, 0, arg)                                                 \
2750b57cec5SDimitry Andric   macro (FOR_static_steal_stolen,                                              \
2760b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
2770b57cec5SDimitry Andric   macro (FOR_static_steal_chunks,                                              \
2780b57cec5SDimitry Andric          stats_flags_e::noUnits | stats_flags_e::noTotal, arg)
2790b57cec5SDimitry Andric #else
2800b57cec5SDimitry Andric #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
2810b57cec5SDimitry Andric #endif
2820b57cec5SDimitry Andric // clang-format on
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric /*!
2850b57cec5SDimitry Andric  * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
2860b57cec5SDimitry Andric  *
2870b57cec5SDimitry Andric  * @param macro a user defined macro that takes three arguments -
2880b57cec5SDimitry Andric  * macro(TIMER_NAME, flags, arg)
2890b57cec5SDimitry Andric  * @param arg a user defined argument to send to the user defined macro
2900b57cec5SDimitry Andric  *
2910b57cec5SDimitry Andric  * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE
2920b57cec5SDimitry Andric  * BAD THINGS WILL HAPPEN!
2930b57cec5SDimitry Andric  *
2940b57cec5SDimitry Andric  * \details Explicit timers are ones where we need to allocate a timer itself
2950b57cec5SDimitry Andric  * (as well as the accumulated timing statistics). We allocate these on a
2960b57cec5SDimitry Andric  * per-thread basis, and explicitly start and stop them. Block timers just
2970b57cec5SDimitry Andric  * allocate the timer itself on the stack, and use the destructor to notice
2980b57cec5SDimitry Andric  * block exit; they don't need to be defined here. The name here should be the
2990b57cec5SDimitry Andric  * same as that of a timer above.
3000b57cec5SDimitry Andric  *
3010b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
3020b57cec5SDimitry Andric  */
3030b57cec5SDimitry Andric #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
3040b57cec5SDimitry Andric 
3050b57cec5SDimitry Andric #define ENUMERATE(name, ignore, prefix) prefix##name,
3060b57cec5SDimitry Andric enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric enum explicit_timer_e {
3090b57cec5SDimitry Andric   KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
3100b57cec5SDimitry Andric };
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
3130b57cec5SDimitry Andric #undef ENUMERATE
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric /*
3160b57cec5SDimitry Andric  * A logarithmic histogram. It accumulates the number of values in each power of
3170b57cec5SDimitry Andric  * ten bin.  So 1<=x<10, 10<=x<100, ...
3180b57cec5SDimitry Andric  * Mostly useful where we have some big outliers and want to see information
3190b57cec5SDimitry Andric  * about them.
3200b57cec5SDimitry Andric  */
3210b57cec5SDimitry Andric class logHistogram {
3220b57cec5SDimitry Andric   enum {
3230b57cec5SDimitry Andric     numBins = 31, /* Number of powers of 10. If this changes you need to change
3240b57cec5SDimitry Andric                    * the initializer for binMax */
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric     /*
3270b57cec5SDimitry Andric      * If you want to use this to analyse values that may be less than 1, (for
3280b57cec5SDimitry Andric      * instance times in s), then the logOffset gives you negative powers.
3290b57cec5SDimitry Andric      * In our case here, we're just looking at times in ticks, or counts, so we
3300b57cec5SDimitry Andric      * can never see values with magnitude < 1 (other than zero), so we can set
3310b57cec5SDimitry Andric      * it to 0.  As above change the initializer if you change this.
3320b57cec5SDimitry Andric      */
3330b57cec5SDimitry Andric     logOffset = 0
3340b57cec5SDimitry Andric   };
3350b57cec5SDimitry Andric   uint32_t KMP_ALIGN_CACHE zeroCount;
3360b57cec5SDimitry Andric   struct {
3370b57cec5SDimitry Andric     uint32_t count;
3380b57cec5SDimitry Andric     double total;
3390b57cec5SDimitry Andric   } bins[numBins];
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric   static double binMax[numBins];
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric #ifdef KMP_DEBUG
3440b57cec5SDimitry Andric   uint64_t _total;
3450b57cec5SDimitry Andric 
check()3460b57cec5SDimitry Andric   void check() const {
3470b57cec5SDimitry Andric     uint64_t t = zeroCount;
3480b57cec5SDimitry Andric     for (int i = 0; i < numBins; i++)
3490b57cec5SDimitry Andric       t += bins[i].count;
3500b57cec5SDimitry Andric     KMP_DEBUG_ASSERT(t == _total);
3510b57cec5SDimitry Andric   }
3520b57cec5SDimitry Andric #else
check()3530b57cec5SDimitry Andric   void check() const {}
3540b57cec5SDimitry Andric #endif
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric public:
logHistogram()3570b57cec5SDimitry Andric   logHistogram() { reset(); }
3580b57cec5SDimitry Andric 
logHistogram(logHistogram const & o)3590b57cec5SDimitry Andric   logHistogram(logHistogram const &o) {
3600b57cec5SDimitry Andric     for (int i = 0; i < numBins; i++)
3610b57cec5SDimitry Andric       bins[i] = o.bins[i];
3620b57cec5SDimitry Andric #ifdef KMP_DEBUG
3630b57cec5SDimitry Andric     _total = o._total;
3640b57cec5SDimitry Andric #endif
3650b57cec5SDimitry Andric   }
3660b57cec5SDimitry Andric 
reset()3670b57cec5SDimitry Andric   void reset() {
3680b57cec5SDimitry Andric     zeroCount = 0;
3690b57cec5SDimitry Andric     for (int i = 0; i < numBins; i++) {
3700b57cec5SDimitry Andric       bins[i].count = 0;
3710b57cec5SDimitry Andric       bins[i].total = 0;
3720b57cec5SDimitry Andric     }
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric #ifdef KMP_DEBUG
3750b57cec5SDimitry Andric     _total = 0;
3760b57cec5SDimitry Andric #endif
3770b57cec5SDimitry Andric   }
count(int b)3780b57cec5SDimitry Andric   uint32_t count(int b) const { return bins[b + logOffset].count; }
total(int b)3790b57cec5SDimitry Andric   double total(int b) const { return bins[b + logOffset].total; }
3800b57cec5SDimitry Andric   static uint32_t findBin(double sample);
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   logHistogram &operator+=(logHistogram const &o) {
3830b57cec5SDimitry Andric     zeroCount += o.zeroCount;
3840b57cec5SDimitry Andric     for (int i = 0; i < numBins; i++) {
3850b57cec5SDimitry Andric       bins[i].count += o.bins[i].count;
3860b57cec5SDimitry Andric       bins[i].total += o.bins[i].total;
3870b57cec5SDimitry Andric     }
3880b57cec5SDimitry Andric #ifdef KMP_DEBUG
3890b57cec5SDimitry Andric     _total += o._total;
3900b57cec5SDimitry Andric     check();
3910b57cec5SDimitry Andric #endif
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric     return *this;
3940b57cec5SDimitry Andric   }
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   void addSample(double sample);
3970b57cec5SDimitry Andric   int minBin() const;
3980b57cec5SDimitry Andric   int maxBin() const;
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   std::string format(char) const;
4010b57cec5SDimitry Andric };
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric class statistic {
4040b57cec5SDimitry Andric   double KMP_ALIGN_CACHE minVal;
4050b57cec5SDimitry Andric   double maxVal;
4060b57cec5SDimitry Andric   double meanVal;
4070b57cec5SDimitry Andric   double m2;
4080b57cec5SDimitry Andric   uint64_t sampleCount;
4090b57cec5SDimitry Andric   double offset;
4100b57cec5SDimitry Andric   bool collectingHist;
4110b57cec5SDimitry Andric   logHistogram hist;
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric public:
4140b57cec5SDimitry Andric   statistic(bool doHist = bool(KMP_STATS_HIST)) {
4150b57cec5SDimitry Andric     reset();
4160b57cec5SDimitry Andric     collectingHist = doHist;
4170b57cec5SDimitry Andric   }
statistic(statistic const & o)4180b57cec5SDimitry Andric   statistic(statistic const &o)
4190b57cec5SDimitry Andric       : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
4200b57cec5SDimitry Andric         sampleCount(o.sampleCount), offset(o.offset),
4210b57cec5SDimitry Andric         collectingHist(o.collectingHist), hist(o.hist) {}
statistic(double minv,double maxv,double meanv,uint64_t sc,double sd)4220b57cec5SDimitry Andric   statistic(double minv, double maxv, double meanv, uint64_t sc, double sd)
4230b57cec5SDimitry Andric       : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc),
4240b57cec5SDimitry Andric         sampleCount(sc), offset(0.0), collectingHist(false) {}
haveHist()4250b57cec5SDimitry Andric   bool haveHist() const { return collectingHist; }
getMin()4260b57cec5SDimitry Andric   double getMin() const { return minVal; }
getMean()4270b57cec5SDimitry Andric   double getMean() const { return meanVal; }
getMax()4280b57cec5SDimitry Andric   double getMax() const { return maxVal; }
getCount()4290b57cec5SDimitry Andric   uint64_t getCount() const { return sampleCount; }
getSD()4300b57cec5SDimitry Andric   double getSD() const { return sqrt(m2 / sampleCount); }
getTotal()4310b57cec5SDimitry Andric   double getTotal() const { return sampleCount * meanVal; }
getHist()4320b57cec5SDimitry Andric   logHistogram const *getHist() const { return &hist; }
setOffset(double d)4330b57cec5SDimitry Andric   void setOffset(double d) { offset = d; }
4340b57cec5SDimitry Andric 
reset()4350b57cec5SDimitry Andric   void reset() {
436e8d8bef9SDimitry Andric     minVal = (std::numeric_limits<double>::max)();
4370b57cec5SDimitry Andric     maxVal = -minVal;
4380b57cec5SDimitry Andric     meanVal = 0.0;
4390b57cec5SDimitry Andric     m2 = 0.0;
4400b57cec5SDimitry Andric     sampleCount = 0;
4410b57cec5SDimitry Andric     offset = 0.0;
4420b57cec5SDimitry Andric     hist.reset();
4430b57cec5SDimitry Andric   }
4440b57cec5SDimitry Andric   void addSample(double sample);
4450b57cec5SDimitry Andric   void scale(double factor);
scaleDown(double f)4460b57cec5SDimitry Andric   void scaleDown(double f) { scale(1. / f); }
forceCount(uint64_t count)4470b57cec5SDimitry Andric   void forceCount(uint64_t count) { sampleCount = count; }
4480b57cec5SDimitry Andric   statistic &operator+=(statistic const &other);
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric   std::string format(char unit, bool total = false) const;
formatHist(char unit)4510b57cec5SDimitry Andric   std::string formatHist(char unit) const { return hist.format(unit); }
4520b57cec5SDimitry Andric };
4530b57cec5SDimitry Andric 
4540b57cec5SDimitry Andric struct statInfo {
4550b57cec5SDimitry Andric   const char *name;
4560b57cec5SDimitry Andric   uint32_t flags;
4570b57cec5SDimitry Andric };
4580b57cec5SDimitry Andric 
4590b57cec5SDimitry Andric class timeStat : public statistic {
4600b57cec5SDimitry Andric   static statInfo timerInfo[];
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric public:
timeStat()4630b57cec5SDimitry Andric   timeStat() : statistic() {}
name(timer_e e)4640b57cec5SDimitry Andric   static const char *name(timer_e e) { return timerInfo[e].name; }
noTotal(timer_e e)4650b57cec5SDimitry Andric   static bool noTotal(timer_e e) {
4660b57cec5SDimitry Andric     return timerInfo[e].flags & stats_flags_e::noTotal;
4670b57cec5SDimitry Andric   }
masterOnly(timer_e e)4680b57cec5SDimitry Andric   static bool masterOnly(timer_e e) {
4690b57cec5SDimitry Andric     return timerInfo[e].flags & stats_flags_e::onlyInMaster;
4700b57cec5SDimitry Andric   }
workerOnly(timer_e e)4710b57cec5SDimitry Andric   static bool workerOnly(timer_e e) {
4720b57cec5SDimitry Andric     return timerInfo[e].flags & stats_flags_e::notInMaster;
4730b57cec5SDimitry Andric   }
noUnits(timer_e e)4740b57cec5SDimitry Andric   static bool noUnits(timer_e e) {
4750b57cec5SDimitry Andric     return timerInfo[e].flags & stats_flags_e::noUnits;
4760b57cec5SDimitry Andric   }
logEvent(timer_e e)4770b57cec5SDimitry Andric   static bool logEvent(timer_e e) {
4780b57cec5SDimitry Andric     return timerInfo[e].flags & stats_flags_e::logEvent;
4790b57cec5SDimitry Andric   }
clearEventFlags()4800b57cec5SDimitry Andric   static void clearEventFlags() {
4810b57cec5SDimitry Andric     for (int i = 0; i < TIMER_LAST; i++) {
4820b57cec5SDimitry Andric       timerInfo[i].flags &= (~(stats_flags_e::logEvent));
4830b57cec5SDimitry Andric     }
4840b57cec5SDimitry Andric   }
4850b57cec5SDimitry Andric };
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric // Where we need explicitly to start and end the timer, this version can be used
4880b57cec5SDimitry Andric // Since these timers normally aren't nicely scoped, so don't have a good place
4890b57cec5SDimitry Andric // to live on the stack of the thread, they're more work to use.
4900b57cec5SDimitry Andric class explicitTimer {
4910b57cec5SDimitry Andric   timeStat *stat;
4920b57cec5SDimitry Andric   timer_e timerEnumValue;
4930b57cec5SDimitry Andric   tsc_tick_count startTime;
4940b57cec5SDimitry Andric   tsc_tick_count pauseStartTime;
4950b57cec5SDimitry Andric   tsc_tick_count::tsc_interval_t totalPauseTime;
4960b57cec5SDimitry Andric 
4970b57cec5SDimitry Andric public:
explicitTimer(timeStat * s,timer_e te)4980b57cec5SDimitry Andric   explicitTimer(timeStat *s, timer_e te)
4990b57cec5SDimitry Andric       : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0),
5000b57cec5SDimitry Andric         totalPauseTime() {}
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric   // void setStat(timeStat *s) { stat = s; }
5030b57cec5SDimitry Andric   void start(tsc_tick_count tick);
pause(tsc_tick_count tick)5040b57cec5SDimitry Andric   void pause(tsc_tick_count tick) { pauseStartTime = tick; }
resume(tsc_tick_count tick)5050b57cec5SDimitry Andric   void resume(tsc_tick_count tick) {
5060b57cec5SDimitry Andric     totalPauseTime += (tick - pauseStartTime);
5070b57cec5SDimitry Andric   }
5080b57cec5SDimitry Andric   void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr = nullptr);
reset()5090b57cec5SDimitry Andric   void reset() {
5100b57cec5SDimitry Andric     startTime = 0;
5110b57cec5SDimitry Andric     pauseStartTime = 0;
5120b57cec5SDimitry Andric     totalPauseTime = 0;
5130b57cec5SDimitry Andric   }
get_type()5140b57cec5SDimitry Andric   timer_e get_type() const { return timerEnumValue; }
5150b57cec5SDimitry Andric };
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric // Where you need to partition a threads clock ticks into separate states
5180b57cec5SDimitry Andric // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
5190b57cec5SDimitry Andric // DOING_NOTHING would render these conditions:
5200b57cec5SDimitry Andric // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
5210b57cec5SDimitry Andric // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
5220b57cec5SDimitry Andric // versa
5230b57cec5SDimitry Andric class partitionedTimers {
5240b57cec5SDimitry Andric private:
5250b57cec5SDimitry Andric   std::vector<explicitTimer> timer_stack;
5260b57cec5SDimitry Andric 
5270b57cec5SDimitry Andric public:
5280b57cec5SDimitry Andric   partitionedTimers();
5290b57cec5SDimitry Andric   void init(explicitTimer timer);
5300b57cec5SDimitry Andric   void exchange(explicitTimer timer);
5310b57cec5SDimitry Andric   void push(explicitTimer timer);
5320b57cec5SDimitry Andric   void pop();
5330b57cec5SDimitry Andric   void windup();
5340b57cec5SDimitry Andric };
5350b57cec5SDimitry Andric 
5365ffd83dbSDimitry Andric // Special wrapper around the partitioned timers to aid timing code blocks
5370b57cec5SDimitry Andric // It avoids the need to have an explicit end, leaving the scope suffices.
5380b57cec5SDimitry Andric class blockPartitionedTimer {
5390b57cec5SDimitry Andric   partitionedTimers *part_timers;
5400b57cec5SDimitry Andric 
5410b57cec5SDimitry Andric public:
blockPartitionedTimer(partitionedTimers * pt,explicitTimer timer)5420b57cec5SDimitry Andric   blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer)
5430b57cec5SDimitry Andric       : part_timers(pt) {
5440b57cec5SDimitry Andric     part_timers->push(timer);
5450b57cec5SDimitry Andric   }
~blockPartitionedTimer()5460b57cec5SDimitry Andric   ~blockPartitionedTimer() { part_timers->pop(); }
5470b57cec5SDimitry Andric };
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric // Special wrapper around the thread state to aid in keeping state in code
5500b57cec5SDimitry Andric // blocks It avoids the need to have an explicit end, leaving the scope
5510b57cec5SDimitry Andric // suffices.
5520b57cec5SDimitry Andric class blockThreadState {
5530b57cec5SDimitry Andric   stats_state_e *state_pointer;
5540b57cec5SDimitry Andric   stats_state_e old_state;
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric public:
blockThreadState(stats_state_e * thread_state_pointer,stats_state_e new_state)5570b57cec5SDimitry Andric   blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
5580b57cec5SDimitry Andric       : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
5590b57cec5SDimitry Andric     *state_pointer = new_state;
5600b57cec5SDimitry Andric   }
~blockThreadState()5610b57cec5SDimitry Andric   ~blockThreadState() { *state_pointer = old_state; }
5620b57cec5SDimitry Andric };
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric // If all you want is a count, then you can use this...
5650b57cec5SDimitry Andric // The individual per-thread counts will be aggregated into a statistic at
5660b57cec5SDimitry Andric // program exit.
5670b57cec5SDimitry Andric class counter {
5680b57cec5SDimitry Andric   uint64_t value;
5690b57cec5SDimitry Andric   static const statInfo counterInfo[];
5700b57cec5SDimitry Andric 
5710b57cec5SDimitry Andric public:
counter()5720b57cec5SDimitry Andric   counter() : value(0) {}
increment()5730b57cec5SDimitry Andric   void increment() { value++; }
getValue()5740b57cec5SDimitry Andric   uint64_t getValue() const { return value; }
reset()5750b57cec5SDimitry Andric   void reset() { value = 0; }
name(counter_e e)5760b57cec5SDimitry Andric   static const char *name(counter_e e) { return counterInfo[e].name; }
masterOnly(counter_e e)5770b57cec5SDimitry Andric   static bool masterOnly(counter_e e) {
5780b57cec5SDimitry Andric     return counterInfo[e].flags & stats_flags_e::onlyInMaster;
5790b57cec5SDimitry Andric   }
5800b57cec5SDimitry Andric };
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric /* ****************************************************************
5830b57cec5SDimitry Andric     Class to implement an event
5840b57cec5SDimitry Andric 
5850b57cec5SDimitry Andric     There are four components to an event: start time, stop time
5860b57cec5SDimitry Andric     nest_level, and timer_name.
5870b57cec5SDimitry Andric     The start and stop time should be obvious (recorded in clock ticks).
5880b57cec5SDimitry Andric     The nest_level relates to the bar width in the timeline graph.
5890b57cec5SDimitry Andric     The timer_name is used to determine which timer event triggered this event.
5900b57cec5SDimitry Andric 
5910b57cec5SDimitry Andric     the interface to this class is through four read-only operations:
5920b57cec5SDimitry Andric     1) getStart()     -- returns the start time as 64 bit integer
5930b57cec5SDimitry Andric     2) getStop()      -- returns the stop time as 64 bit integer
5940b57cec5SDimitry Andric     3) getNestLevel() -- returns the nest level of the event
5950b57cec5SDimitry Andric     4) getTimerName() -- returns the timer name that triggered event
5960b57cec5SDimitry Andric 
5970b57cec5SDimitry Andric     *MORE ON NEST_LEVEL*
5980b57cec5SDimitry Andric     The nest level is used in the bar graph that represents the timeline.
5990b57cec5SDimitry Andric     Its main purpose is for showing how events are nested inside eachother.
6000b57cec5SDimitry Andric     For example, say events, A, B, and C are recorded.  If the timeline
6010b57cec5SDimitry Andric     looks like this:
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric Begin -------------------------------------------------------------> Time
6040b57cec5SDimitry Andric          |    |          |        |          |              |
6050b57cec5SDimitry Andric          A    B          C        C          B              A
6060b57cec5SDimitry Andric        start start     start     end        end            end
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric        Then A, B, C will have a nest level of 1, 2, 3 respectively.
6090b57cec5SDimitry Andric        These values are then used to calculate the barwidth so you can
6100b57cec5SDimitry Andric        see that inside A, B has occurred, and inside B, C has occurred.
6110b57cec5SDimitry Andric        Currently, this is shown with A's bar width being larger than B's
6120b57cec5SDimitry Andric        bar width, and B's bar width being larger than C's bar width.
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric **************************************************************** */
6150b57cec5SDimitry Andric class kmp_stats_event {
6160b57cec5SDimitry Andric   uint64_t start;
6170b57cec5SDimitry Andric   uint64_t stop;
6180b57cec5SDimitry Andric   int nest_level;
6190b57cec5SDimitry Andric   timer_e timer_name;
6200b57cec5SDimitry Andric 
6210b57cec5SDimitry Andric public:
kmp_stats_event()6220b57cec5SDimitry Andric   kmp_stats_event()
6230b57cec5SDimitry Andric       : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
kmp_stats_event(uint64_t strt,uint64_t stp,int nst,timer_e nme)6240b57cec5SDimitry Andric   kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
6250b57cec5SDimitry Andric       : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
getStart()6260b57cec5SDimitry Andric   inline uint64_t getStart() const { return start; }
getStop()6270b57cec5SDimitry Andric   inline uint64_t getStop() const { return stop; }
getNestLevel()6280b57cec5SDimitry Andric   inline int getNestLevel() const { return nest_level; }
getTimerName()6290b57cec5SDimitry Andric   inline timer_e getTimerName() const { return timer_name; }
6300b57cec5SDimitry Andric };
6310b57cec5SDimitry Andric 
6320b57cec5SDimitry Andric /* ****************************************************************
6330b57cec5SDimitry Andric     Class to implement a dynamically expandable array of events
6340b57cec5SDimitry Andric 
6350b57cec5SDimitry Andric     ---------------------------------------------------------
6360b57cec5SDimitry Andric     | event 1 | event 2 | event 3 | event 4 | ... | event N |
6370b57cec5SDimitry Andric     ---------------------------------------------------------
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric     An event is pushed onto the back of this array at every
6400b57cec5SDimitry Andric     explicitTimer->stop() call.  The event records the thread #,
6410b57cec5SDimitry Andric     start time, stop time, and nest level related to the bar width.
6420b57cec5SDimitry Andric 
6430b57cec5SDimitry Andric     The event vector starts at size INIT_SIZE and grows (doubles in size)
6440b57cec5SDimitry Andric     if needed.  An implication of this behavior is that log(N)
6450b57cec5SDimitry Andric     reallocations are needed (where N is number of events).  If you want
6460b57cec5SDimitry Andric     to avoid reallocations, then set INIT_SIZE to a large value.
6470b57cec5SDimitry Andric 
6480b57cec5SDimitry Andric     the interface to this class is through six operations:
6490b57cec5SDimitry Andric     1) reset() -- sets the internal_size back to 0 but does not deallocate any
6500b57cec5SDimitry Andric        memory
6510b57cec5SDimitry Andric     2) size()  -- returns the number of valid elements in the vector
6520b57cec5SDimitry Andric     3) push_back(start, stop, nest, timer_name) -- pushes an event onto
6530b57cec5SDimitry Andric        the back of the array
6540b57cec5SDimitry Andric     4) deallocate() -- frees all memory associated with the vector
6550b57cec5SDimitry Andric     5) sort() -- sorts the vector by start time
6560b57cec5SDimitry Andric     6) operator[index] or at(index) -- returns event reference at that index
6570b57cec5SDimitry Andric **************************************************************** */
6580b57cec5SDimitry Andric class kmp_stats_event_vector {
6590b57cec5SDimitry Andric   kmp_stats_event *events;
6600b57cec5SDimitry Andric   int internal_size;
6610b57cec5SDimitry Andric   int allocated_size;
6620b57cec5SDimitry Andric   static const int INIT_SIZE = 1024;
6630b57cec5SDimitry Andric 
6640b57cec5SDimitry Andric public:
kmp_stats_event_vector()6650b57cec5SDimitry Andric   kmp_stats_event_vector() {
6660b57cec5SDimitry Andric     events =
6670b57cec5SDimitry Andric         (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
6680b57cec5SDimitry Andric     internal_size = 0;
6690b57cec5SDimitry Andric     allocated_size = INIT_SIZE;
6700b57cec5SDimitry Andric   }
~kmp_stats_event_vector()6710b57cec5SDimitry Andric   ~kmp_stats_event_vector() {}
reset()6720b57cec5SDimitry Andric   inline void reset() { internal_size = 0; }
size()6730b57cec5SDimitry Andric   inline int size() const { return internal_size; }
push_back(uint64_t start_time,uint64_t stop_time,int nest_level,timer_e name)6740b57cec5SDimitry Andric   void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
6750b57cec5SDimitry Andric                  timer_e name) {
6760b57cec5SDimitry Andric     int i;
6770b57cec5SDimitry Andric     if (internal_size == allocated_size) {
6780b57cec5SDimitry Andric       kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
6790b57cec5SDimitry Andric           sizeof(kmp_stats_event) * allocated_size * 2);
6800b57cec5SDimitry Andric       for (i = 0; i < internal_size; i++)
6810b57cec5SDimitry Andric         tmp[i] = events[i];
6820b57cec5SDimitry Andric       __kmp_free(events);
6830b57cec5SDimitry Andric       events = tmp;
6840b57cec5SDimitry Andric       allocated_size *= 2;
6850b57cec5SDimitry Andric     }
6860b57cec5SDimitry Andric     events[internal_size] =
6870b57cec5SDimitry Andric         kmp_stats_event(start_time, stop_time, nest_level, name);
6880b57cec5SDimitry Andric     internal_size++;
6890b57cec5SDimitry Andric     return;
6900b57cec5SDimitry Andric   }
6910b57cec5SDimitry Andric   void deallocate();
6920b57cec5SDimitry Andric   void sort();
6930b57cec5SDimitry Andric   const kmp_stats_event &operator[](int index) const { return events[index]; }
6940b57cec5SDimitry Andric   kmp_stats_event &operator[](int index) { return events[index]; }
at(int index)6950b57cec5SDimitry Andric   const kmp_stats_event &at(int index) const { return events[index]; }
at(int index)6960b57cec5SDimitry Andric   kmp_stats_event &at(int index) { return events[index]; }
6970b57cec5SDimitry Andric };
6980b57cec5SDimitry Andric 
6990b57cec5SDimitry Andric /* ****************************************************************
7000b57cec5SDimitry Andric     Class to implement a doubly-linked, circular, statistics list
7010b57cec5SDimitry Andric 
7020b57cec5SDimitry Andric     |---| ---> |---| ---> |---| ---> |---| ---> ... next
7030b57cec5SDimitry Andric     |   |      |   |      |   |      |   |
7040b57cec5SDimitry Andric     |---| <--- |---| <--- |---| <--- |---| <--- ... prev
7050b57cec5SDimitry Andric     Sentinel   first      second     third
7060b57cec5SDimitry Andric     Node       node       node       node
7070b57cec5SDimitry Andric 
7080b57cec5SDimitry Andric     The Sentinel Node is the user handle on the list.
7090b57cec5SDimitry Andric     The first node corresponds to thread 0's statistics.
7100b57cec5SDimitry Andric     The second node corresponds to thread 1's statistics and so on...
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric     Each node has a _timers, _counters, and _explicitTimers array to hold that
7130b57cec5SDimitry Andric     thread's statistics. The _explicitTimers point to the correct _timer and
7140b57cec5SDimitry Andric     update its statistics at every stop() call. The explicitTimers' pointers are
7150b57cec5SDimitry Andric     set up in the constructor. Each node also has an event vector to hold that
7160b57cec5SDimitry Andric     thread's timing events. The event vector expands as necessary and records
7170b57cec5SDimitry Andric     the start-stop times for each timer.
7180b57cec5SDimitry Andric 
7190b57cec5SDimitry Andric     The nestLevel variable is for plotting events and is related
7200b57cec5SDimitry Andric     to the bar width in the timeline graph.
7210b57cec5SDimitry Andric 
7220b57cec5SDimitry Andric     Every thread will have a thread local pointer to its node in
723fe6060f1SDimitry Andric     the list.  The sentinel node is used by the primary thread to
7240b57cec5SDimitry Andric     store "dummy" statistics before __kmp_create_worker() is called.
7250b57cec5SDimitry Andric **************************************************************** */
7260b57cec5SDimitry Andric class kmp_stats_list {
7270b57cec5SDimitry Andric   int gtid;
7280b57cec5SDimitry Andric   timeStat _timers[TIMER_LAST + 1];
7290b57cec5SDimitry Andric   counter _counters[COUNTER_LAST + 1];
7300b57cec5SDimitry Andric   explicitTimer thread_life_timer;
7310b57cec5SDimitry Andric   partitionedTimers _partitionedTimers;
7320b57cec5SDimitry Andric   int _nestLevel; // one per thread
7330b57cec5SDimitry Andric   kmp_stats_event_vector _event_vector;
7340b57cec5SDimitry Andric   kmp_stats_list *next;
7350b57cec5SDimitry Andric   kmp_stats_list *prev;
7360b57cec5SDimitry Andric   stats_state_e state;
7370b57cec5SDimitry Andric   int thread_is_idle_flag;
7380b57cec5SDimitry Andric 
7390b57cec5SDimitry Andric public:
kmp_stats_list()7400b57cec5SDimitry Andric   kmp_stats_list()
7410b57cec5SDimitry Andric       : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life],
7420b57cec5SDimitry Andric                           TIMER_OMP_worker_thread_life),
7430b57cec5SDimitry Andric         _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
7440b57cec5SDimitry Andric         thread_is_idle_flag(0) {}
~kmp_stats_list()7450b57cec5SDimitry Andric   ~kmp_stats_list() {}
getTimer(timer_e idx)7460b57cec5SDimitry Andric   inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
getCounter(counter_e idx)7470b57cec5SDimitry Andric   inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
getPartitionedTimers()7480b57cec5SDimitry Andric   inline partitionedTimers *getPartitionedTimers() {
7490b57cec5SDimitry Andric     return &_partitionedTimers;
7500b57cec5SDimitry Andric   }
getTimers()7510b57cec5SDimitry Andric   inline timeStat *getTimers() { return _timers; }
getCounters()7520b57cec5SDimitry Andric   inline counter *getCounters() { return _counters; }
getEventVector()7530b57cec5SDimitry Andric   inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
startLife()7540b57cec5SDimitry Andric   inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); }
endLife()7550b57cec5SDimitry Andric   inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(), this); }
resetEventVector()7560b57cec5SDimitry Andric   inline void resetEventVector() { _event_vector.reset(); }
incrementNestValue()7570b57cec5SDimitry Andric   inline void incrementNestValue() { _nestLevel++; }
getNestValue()7580b57cec5SDimitry Andric   inline int getNestValue() { return _nestLevel; }
decrementNestValue()7590b57cec5SDimitry Andric   inline void decrementNestValue() { _nestLevel--; }
getGtid()7600b57cec5SDimitry Andric   inline int getGtid() const { return gtid; }
setGtid(int newgtid)7610b57cec5SDimitry Andric   inline void setGtid(int newgtid) { gtid = newgtid; }
setState(stats_state_e newstate)7620b57cec5SDimitry Andric   inline void setState(stats_state_e newstate) { state = newstate; }
getState()7630b57cec5SDimitry Andric   inline stats_state_e getState() const { return state; }
getStatePointer()7640b57cec5SDimitry Andric   inline stats_state_e *getStatePointer() { return &state; }
isIdle()7650b57cec5SDimitry Andric   inline bool isIdle() { return thread_is_idle_flag == 1; }
setIdleFlag()7660b57cec5SDimitry Andric   inline void setIdleFlag() { thread_is_idle_flag = 1; }
resetIdleFlag()7670b57cec5SDimitry Andric   inline void resetIdleFlag() { thread_is_idle_flag = 0; }
7680b57cec5SDimitry Andric   kmp_stats_list *push_back(int gtid); // returns newly created list node
push_event(uint64_t start_time,uint64_t stop_time,int nest_level,timer_e name)7690b57cec5SDimitry Andric   inline void push_event(uint64_t start_time, uint64_t stop_time,
7700b57cec5SDimitry Andric                          int nest_level, timer_e name) {
7710b57cec5SDimitry Andric     _event_vector.push_back(start_time, stop_time, nest_level, name);
7720b57cec5SDimitry Andric   }
7730b57cec5SDimitry Andric   void deallocate();
7740b57cec5SDimitry Andric   class iterator;
7750b57cec5SDimitry Andric   kmp_stats_list::iterator begin();
7760b57cec5SDimitry Andric   kmp_stats_list::iterator end();
7770b57cec5SDimitry Andric   int size();
7780b57cec5SDimitry Andric   class iterator {
7790b57cec5SDimitry Andric     kmp_stats_list *ptr;
7800b57cec5SDimitry Andric     friend kmp_stats_list::iterator kmp_stats_list::begin();
7810b57cec5SDimitry Andric     friend kmp_stats_list::iterator kmp_stats_list::end();
7820b57cec5SDimitry Andric 
7830b57cec5SDimitry Andric   public:
7840b57cec5SDimitry Andric     iterator();
7850b57cec5SDimitry Andric     ~iterator();
7860b57cec5SDimitry Andric     iterator operator++();
7870b57cec5SDimitry Andric     iterator operator++(int dummy);
7880b57cec5SDimitry Andric     iterator operator--();
7890b57cec5SDimitry Andric     iterator operator--(int dummy);
7900b57cec5SDimitry Andric     bool operator!=(const iterator &rhs);
7910b57cec5SDimitry Andric     bool operator==(const iterator &rhs);
7920b57cec5SDimitry Andric     kmp_stats_list *operator*() const; // dereference operator
7930b57cec5SDimitry Andric   };
7940b57cec5SDimitry Andric };
7950b57cec5SDimitry Andric 
7960b57cec5SDimitry Andric /* ****************************************************************
7970b57cec5SDimitry Andric    Class to encapsulate all output functions and the environment variables
7980b57cec5SDimitry Andric 
7990b57cec5SDimitry Andric    This module holds filenames for various outputs (normal stats, events, plot
8000b57cec5SDimitry Andric    file), as well as coloring information for the plot file.
8010b57cec5SDimitry Andric 
8020b57cec5SDimitry Andric    The filenames and flags variables are read from environment variables.
8030b57cec5SDimitry Andric    These are read once by the constructor of the global variable
8040b57cec5SDimitry Andric    __kmp_stats_output which calls init().
8050b57cec5SDimitry Andric 
8060b57cec5SDimitry Andric    During this init() call, event flags for the timeStat::timerInfo[] global
8070b57cec5SDimitry Andric    array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
8080b57cec5SDimitry Andric 
8090b57cec5SDimitry Andric    The only interface function that is public is outputStats(heading).  This
8100b57cec5SDimitry Andric    function should print out everything it needs to, either to files or stderr,
8110b57cec5SDimitry Andric    depending on the environment variables described below
8120b57cec5SDimitry Andric 
8130b57cec5SDimitry Andric    ENVIRONMENT VARIABLES:
8140b57cec5SDimitry Andric    KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
8150b57cec5SDimitry Andric                      file, otherwise, print to stderr
8160b57cec5SDimitry Andric    KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
8170b57cec5SDimitry Andric                         either KMP_STATS_FILE or stderr
8180b57cec5SDimitry Andric    KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
8190b57cec5SDimitry Andric                           otherwise, the plot file is sent to "events.plt"
8200b57cec5SDimitry Andric    KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
8210b57cec5SDimitry Andric                        events
8220b57cec5SDimitry Andric    KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
8230b57cec5SDimitry Andric                             otherwise, output is sent to "events.dat"
8240b57cec5SDimitry Andric **************************************************************** */
8250b57cec5SDimitry Andric class kmp_stats_output_module {
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric public:
8280b57cec5SDimitry Andric   struct rgb_color {
8290b57cec5SDimitry Andric     float r;
8300b57cec5SDimitry Andric     float g;
8310b57cec5SDimitry Andric     float b;
8320b57cec5SDimitry Andric   };
8330b57cec5SDimitry Andric 
8340b57cec5SDimitry Andric private:
8350b57cec5SDimitry Andric   std::string outputFileName;
8360b57cec5SDimitry Andric   static const char *eventsFileName;
8370b57cec5SDimitry Andric   static const char *plotFileName;
8380b57cec5SDimitry Andric   static int printPerThreadFlag;
8390b57cec5SDimitry Andric   static int printPerThreadEventsFlag;
8400b57cec5SDimitry Andric   static const rgb_color globalColorArray[];
8410b57cec5SDimitry Andric   static rgb_color timerColorInfo[];
8420b57cec5SDimitry Andric 
8430b57cec5SDimitry Andric   void init();
8440b57cec5SDimitry Andric   static void setupEventColors();
8450b57cec5SDimitry Andric   static void printPloticusFile();
8460b57cec5SDimitry Andric   static void printHeaderInfo(FILE *statsOut);
8470b57cec5SDimitry Andric   static void printTimerStats(FILE *statsOut, statistic const *theStats,
8480b57cec5SDimitry Andric                               statistic const *totalStats);
8490b57cec5SDimitry Andric   static void printCounterStats(FILE *statsOut, statistic const *theStats);
8500b57cec5SDimitry Andric   static void printCounters(FILE *statsOut, counter const *theCounters);
8510b57cec5SDimitry Andric   static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
8520b57cec5SDimitry Andric                           int gtid);
getEventColor(timer_e e)8530b57cec5SDimitry Andric   static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
8540b57cec5SDimitry Andric   static void windupExplicitTimers();
eventPrintingEnabled()8550b57cec5SDimitry Andric   bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
8560b57cec5SDimitry Andric 
8570b57cec5SDimitry Andric public:
kmp_stats_output_module()8580b57cec5SDimitry Andric   kmp_stats_output_module() { init(); }
8590b57cec5SDimitry Andric   void outputStats(const char *heading);
8600b57cec5SDimitry Andric };
8610b57cec5SDimitry Andric 
8620b57cec5SDimitry Andric #ifdef __cplusplus
8630b57cec5SDimitry Andric extern "C" {
8640b57cec5SDimitry Andric #endif
8650b57cec5SDimitry Andric void __kmp_stats_init();
8660b57cec5SDimitry Andric void __kmp_stats_fini();
8670b57cec5SDimitry Andric void __kmp_reset_stats();
8680b57cec5SDimitry Andric void __kmp_output_stats(const char *);
8690b57cec5SDimitry Andric void __kmp_accumulate_stats_at_exit(void);
8700b57cec5SDimitry Andric // thread local pointer to stats node within list
8710b57cec5SDimitry Andric extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
8720b57cec5SDimitry Andric // head to stats list.
8730b57cec5SDimitry Andric extern kmp_stats_list *__kmp_stats_list;
8740b57cec5SDimitry Andric // lock for __kmp_stats_list
8750b57cec5SDimitry Andric extern kmp_tas_lock_t __kmp_stats_lock;
8760b57cec5SDimitry Andric // reference start time
8770b57cec5SDimitry Andric extern tsc_tick_count __kmp_stats_start_time;
8780b57cec5SDimitry Andric // interface to output
8790b57cec5SDimitry Andric extern kmp_stats_output_module __kmp_stats_output;
8800b57cec5SDimitry Andric 
8810b57cec5SDimitry Andric #ifdef __cplusplus
8820b57cec5SDimitry Andric }
8830b57cec5SDimitry Andric #endif
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric // Simple, standard interfaces that drop out completely if stats aren't enabled
8860b57cec5SDimitry Andric 
8870b57cec5SDimitry Andric /*!
8880b57cec5SDimitry Andric  * \brief Adds value to specified timer (name).
8890b57cec5SDimitry Andric  *
8900b57cec5SDimitry Andric  * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
8910b57cec5SDimitry Andric  * @param value double precision sample value to add to statistics for the timer
8920b57cec5SDimitry Andric  *
8930b57cec5SDimitry Andric  * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to
8940b57cec5SDimitry Andric  * a timer statistics.
8950b57cec5SDimitry Andric  *
8960b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
8970b57cec5SDimitry Andric  */
8980b57cec5SDimitry Andric #define KMP_COUNT_VALUE(name, value)                                           \
899e8d8bef9SDimitry Andric   __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample((double)value)
9000b57cec5SDimitry Andric 
9010b57cec5SDimitry Andric /*!
9020b57cec5SDimitry Andric  * \brief Increments specified counter (name).
9030b57cec5SDimitry Andric  *
9040b57cec5SDimitry Andric  * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro
9050b57cec5SDimitry Andric  *
9060b57cec5SDimitry Andric  * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics
9070b57cec5SDimitry Andric  * counter for the executing thread.
9080b57cec5SDimitry Andric  *
9090b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
9100b57cec5SDimitry Andric  */
9110b57cec5SDimitry Andric #define KMP_COUNT_BLOCK(name)                                                  \
9120b57cec5SDimitry Andric   __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
9130b57cec5SDimitry Andric 
9140b57cec5SDimitry Andric /*!
9150b57cec5SDimitry Andric  * \brief Outputs the current thread statistics and reset them.
9160b57cec5SDimitry Andric  *
9170b57cec5SDimitry Andric  * @param heading_string heading put above the final stats output
9180b57cec5SDimitry Andric  *
9190b57cec5SDimitry Andric  * \details Explicitly stops all timers and outputs all stats. Environment
9200b57cec5SDimitry Andric  * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a
9210b57cec5SDimitry Andric  * filename instead of stderr. Environment variable,
9220b57cec5SDimitry Andric  * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific
9230b57cec5SDimitry Andric  * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be
9240b57cec5SDimitry Andric  * defined with any value, which will print out thread specific stats, or it can
9250b57cec5SDimitry Andric  * be undefined (not specified in the environment) and thread specific stats
9260b57cec5SDimitry Andric  * won't be printed. It should be noted that all statistics are reset when this
9270b57cec5SDimitry Andric  * macro is called.
9280b57cec5SDimitry Andric  *
9290b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
9300b57cec5SDimitry Andric  */
9310b57cec5SDimitry Andric #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
9320b57cec5SDimitry Andric 
9330b57cec5SDimitry Andric /*!
9345ffd83dbSDimitry Andric  * \brief Initializes the partitioned timers to begin with name.
9350b57cec5SDimitry Andric  *
9360b57cec5SDimitry Andric  * @param name timer which you want this thread to begin with
9370b57cec5SDimitry Andric  *
9380b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
9390b57cec5SDimitry Andric  */
9400b57cec5SDimitry Andric #define KMP_INIT_PARTITIONED_TIMERS(name)                                      \
9410b57cec5SDimitry Andric   __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer(          \
9420b57cec5SDimitry Andric       __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
9430b57cec5SDimitry Andric 
9440b57cec5SDimitry Andric #define KMP_TIME_PARTITIONED_BLOCK(name)                                       \
9450b57cec5SDimitry Andric   blockPartitionedTimer __PBLOCKTIME__(                                        \
9460b57cec5SDimitry Andric       __kmp_stats_thread_ptr->getPartitionedTimers(),                          \
9470b57cec5SDimitry Andric       explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name),            \
9480b57cec5SDimitry Andric                     TIMER_##name))
9490b57cec5SDimitry Andric 
9500b57cec5SDimitry Andric #define KMP_PUSH_PARTITIONED_TIMER(name)                                       \
9510b57cec5SDimitry Andric   __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer(          \
9520b57cec5SDimitry Andric       __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
9530b57cec5SDimitry Andric 
9540b57cec5SDimitry Andric #define KMP_POP_PARTITIONED_TIMER()                                            \
9550b57cec5SDimitry Andric   __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
9560b57cec5SDimitry Andric 
9570b57cec5SDimitry Andric #define KMP_EXCHANGE_PARTITIONED_TIMER(name)                                   \
9580b57cec5SDimitry Andric   __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer(      \
9590b57cec5SDimitry Andric       __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric #define KMP_SET_THREAD_STATE(state_name)                                       \
9620b57cec5SDimitry Andric   __kmp_stats_thread_ptr->setState(state_name)
9630b57cec5SDimitry Andric 
9640b57cec5SDimitry Andric #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
9650b57cec5SDimitry Andric 
9660b57cec5SDimitry Andric #define KMP_SET_THREAD_STATE_BLOCK(state_name)                                 \
9670b57cec5SDimitry Andric   blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
9680b57cec5SDimitry Andric                                     state_name)
9690b57cec5SDimitry Andric 
9700b57cec5SDimitry Andric /*!
9710b57cec5SDimitry Andric  * \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
9720b57cec5SDimitry Andric  *
9730b57cec5SDimitry Andric  * \details Reset all stats for all threads.
9740b57cec5SDimitry Andric  *
9750b57cec5SDimitry Andric  * @ingroup STATS_GATHERING
9760b57cec5SDimitry Andric  */
9770b57cec5SDimitry Andric #define KMP_RESET_STATS() __kmp_reset_stats()
9780b57cec5SDimitry Andric 
9790b57cec5SDimitry Andric #if (KMP_DEVELOPER_STATS)
9800b57cec5SDimitry Andric #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
9810b57cec5SDimitry Andric #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
9820b57cec5SDimitry Andric #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
9830b57cec5SDimitry Andric #define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) KMP_PUSH_PARTITIONED_TIMER(n)
9840b57cec5SDimitry Andric #define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) KMP_POP_PARTITIONED_TIMER(n)
9850b57cec5SDimitry Andric #define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n)                            \
9860b57cec5SDimitry Andric   KMP_EXCHANGE_PARTITIONED_TIMER(n)
9870b57cec5SDimitry Andric #else
9880b57cec5SDimitry Andric // Null definitions
9890b57cec5SDimitry Andric #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
9900b57cec5SDimitry Andric #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
9910b57cec5SDimitry Andric #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
9920b57cec5SDimitry Andric #define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
9930b57cec5SDimitry Andric #define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
9940b57cec5SDimitry Andric #define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
9950b57cec5SDimitry Andric #endif
9960b57cec5SDimitry Andric 
9970b57cec5SDimitry Andric #else // KMP_STATS_ENABLED
9980b57cec5SDimitry Andric 
9990b57cec5SDimitry Andric // Null definitions
10000b57cec5SDimitry Andric #define KMP_COUNT_VALUE(n, v) ((void)0)
10010b57cec5SDimitry Andric #define KMP_COUNT_BLOCK(n) ((void)0)
10020b57cec5SDimitry Andric 
10030b57cec5SDimitry Andric #define KMP_OUTPUT_STATS(heading_string) ((void)0)
10040b57cec5SDimitry Andric #define KMP_RESET_STATS() ((void)0)
10050b57cec5SDimitry Andric 
10060b57cec5SDimitry Andric #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
10070b57cec5SDimitry Andric #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
10080b57cec5SDimitry Andric #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
10090b57cec5SDimitry Andric #define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
10100b57cec5SDimitry Andric #define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
10110b57cec5SDimitry Andric #define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
10120b57cec5SDimitry Andric #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
10130b57cec5SDimitry Andric #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
10140b57cec5SDimitry Andric #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
10150b57cec5SDimitry Andric #define KMP_POP_PARTITIONED_TIMER() ((void)0)
10160b57cec5SDimitry Andric #define KMP_SET_THREAD_STATE(state_name) ((void)0)
10170b57cec5SDimitry Andric #define KMP_GET_THREAD_STATE() ((void)0)
10180b57cec5SDimitry Andric #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
10190b57cec5SDimitry Andric #endif // KMP_STATS_ENABLED
10200b57cec5SDimitry Andric 
10210b57cec5SDimitry Andric #endif // KMP_STATS_H
1022