xref: /llvm-project/openmp/runtime/test/ompt/callback.h (revision 7a72856af8b30f50e546b8368596fdc5f44cbca9)
1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
15 
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
18 
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
24 
25 static const char *ompt_thread_t_values[] = {
26     "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27     "ompt_thread_other"};
28 
29 static const char *ompt_task_status_t_values[] = {
30     "ompt_task_UNDEFINED",
31     "ompt_task_complete", // 1
32     "ompt_task_yield", // 2
33     "ompt_task_cancel", // 3
34     "ompt_task_detach", // 4
35     "ompt_task_early_fulfill", // 5
36     "ompt_task_late_fulfill", // 6
37     "ompt_task_switch", // 7
38     "ompt_taskwait_complete" // 8
39 };
40 static const char* ompt_cancel_flag_t_values[] = {
41   "ompt_cancel_parallel",
42   "ompt_cancel_sections",
43   "ompt_cancel_loop",
44   "ompt_cancel_taskgroup",
45   "ompt_cancel_activated",
46   "ompt_cancel_detected",
47   "ompt_cancel_discarded_task"
48 };
49 
50 static const char *ompt_work_t_values[] = {"undefined",
51                                            "ompt_work_loop",
52                                            "ompt_work_sections",
53                                            "ompt_work_single_executor",
54                                            "ompt_work_single_other",
55                                            "ompt_work_workshare",
56                                            "ompt_work_distribute",
57                                            "ompt_work_taskloop",
58                                            "ompt_work_scope",
59                                            "ompt_work_workdistribute",
60                                            "ompt_work_loop_static",
61                                            "ompt_work_loop_dynamic",
62                                            "ompt_work_loop_guided",
63                                            "ompt_work_loop_other"};
64 
65 static const char *ompt_work_events_t_values[] = {"undefined",
66                                                   "ompt_event_loop",
67                                                   "ompt_event_sections",
68                                                   "ompt_event_single_in_block",
69                                                   "ompt_event_single_others",
70                                                   "ompt_event_workshare",
71                                                   "ompt_event_distribute",
72                                                   "ompt_event_taskloop",
73                                                   "ompt_event_scope",
74                                                   "ompt_event_workdistribute",
75                                                   "ompt_event_loop_static",
76                                                   "ompt_event_loop_dynamic",
77                                                   "ompt_event_loop_guided",
78                                                   "ompt_event_loop_other"};
79 
80 static const char *ompt_dependence_type_t_values[36] = {
81     "ompt_dependence_type_UNDEFINED",
82     "ompt_dependence_type_in", // 1
83     "ompt_dependence_type_out", // 2
84     "ompt_dependence_type_inout", // 3
85     "ompt_dependence_type_mutexinoutset", // 4
86     "ompt_dependence_type_source", // 5
87     "ompt_dependence_type_sink", // 6
88     "ompt_dependence_type_inoutset", // 7
89     "", "", "", "", "", "", // 8-13
90     "", "", "", "", "", "", "", "", "", "", // 14-23
91     "", "", "", "", "", "", "", "", "", "", // 24-33
92     "ompt_dependence_type_out_all_memory", // 34
93     "ompt_dependence_type_inout_all_memory" // 35
94 };
95 
96 static const char *ompt_sync_region_t_values[] = {"undefined",
97                                                   "barrier",
98                                                   "barrier_implicit",
99                                                   "barrier_explicit",
100                                                   "barrier_implementation",
101                                                   "taskwait",
102                                                   "taskgroup",
103                                                   "reduction",
104                                                   "barrier_implicit_workshare",
105                                                   "barrier_implicit_parallel",
106                                                   "barrier_teams"};
107 
108 static void format_task_type(int type, char *buffer) {
109   char *progress = buffer;
110   if (type & ompt_task_initial)
111     progress += sprintf(progress, "ompt_task_initial");
112   if (type & ompt_task_implicit)
113     progress += sprintf(progress, "ompt_task_implicit");
114   if (type & ompt_task_explicit)
115     progress += sprintf(progress, "ompt_task_explicit");
116   if (type & ompt_task_target)
117     progress += sprintf(progress, "ompt_task_target");
118   if (type & ompt_task_taskwait)
119     progress += sprintf(progress, "ompt_task_taskwait");
120   if (type & ompt_task_undeferred)
121     progress += sprintf(progress, "|ompt_task_undeferred");
122   if (type & ompt_task_untied)
123     progress += sprintf(progress, "|ompt_task_untied");
124   if (type & ompt_task_final)
125     progress += sprintf(progress, "|ompt_task_final");
126   if (type & ompt_task_mergeable)
127     progress += sprintf(progress, "|ompt_task_mergeable");
128   if (type & ompt_task_merged)
129     progress += sprintf(progress, "|ompt_task_merged");
130 }
131 
132 static ompt_set_callback_t ompt_set_callback;
133 static ompt_get_callback_t ompt_get_callback;
134 static ompt_get_state_t ompt_get_state;
135 static ompt_get_task_info_t ompt_get_task_info;
136 static ompt_get_task_memory_t ompt_get_task_memory;
137 static ompt_get_thread_data_t ompt_get_thread_data;
138 static ompt_get_parallel_info_t ompt_get_parallel_info;
139 static ompt_get_unique_id_t ompt_get_unique_id;
140 static ompt_finalize_tool_t ompt_finalize_tool;
141 static ompt_get_num_procs_t ompt_get_num_procs;
142 static ompt_get_num_places_t ompt_get_num_places;
143 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
144 static ompt_get_place_num_t ompt_get_place_num;
145 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
146 static ompt_get_proc_id_t ompt_get_proc_id;
147 static ompt_enumerate_states_t ompt_enumerate_states;
148 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
149 
150 static void print_ids(int level)
151 {
152   int task_type, thread_num;
153   ompt_frame_t *frame;
154   ompt_data_t *task_parallel_data;
155   ompt_data_t *task_data;
156   int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
157                                        &task_parallel_data, &thread_num);
158   char buffer[2048];
159   format_task_type(task_type, buffer);
160   if (frame)
161     printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
162            ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
163            "task_type=%s=%d, thread_num=%d\n",
164            ompt_get_thread_data()->value, level,
165            exists_task ? task_parallel_data->value : 0,
166            exists_task ? task_data->value : 0, frame->exit_frame.ptr,
167            frame->enter_frame.ptr, buffer, task_type, thread_num);
168 }
169 
170 #define get_frame_address(level) __builtin_frame_address(level)
171 
172 #define print_frame(level)                                                     \
173   printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n",                      \
174          ompt_get_thread_data()->value, level, get_frame_address(level))
175 
176 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
177 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
178   #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
179     #define print_frame_from_outlined_fn(level) print_frame(level+1)
180   #else
181     #define print_frame_from_outlined_fn(level) print_frame(level)
182   #endif
183 
184   #if defined(__clang__) && __clang_major__ >= 5
185     #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
186     #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
187   #endif
188 #endif
189 
190 // This macro helps to define a label at the current position that can be used
191 // to get the current address in the code.
192 //
193 // For print_current_address():
194 //   To reliably determine the offset between the address of the label and the
195 //   actual return address, we insert a NOP instruction as a jump target as the
196 //   compiler would otherwise insert an instruction that we can't control. The
197 //   instruction length is target dependent and is explained below.
198 //
199 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
200 // workaround for a bug in the Intel Compiler.)
201 #define define_ompt_label(id) \
202   {} \
203   __asm__("nop"); \
204 ompt_label_##id:
205 
206 // This macro helps to get the address of a label that is inserted by the above
207 // macro define_ompt_label(). The address is obtained with a GNU extension
208 // (&&label) that has been tested with gcc, clang and icc.
209 #define get_ompt_label_address(id) (&& ompt_label_##id)
210 
211 // This macro prints the exact address that a previously called runtime function
212 // returns to.
213 #define print_current_address(id) \
214   define_ompt_label(id) \
215   print_possible_return_addresses(get_ompt_label_address(id))
216 
217 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
218 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
219 // a MOV instruction for non-void runtime functions which is 3 bytes long.
220 #define print_possible_return_addresses(addr) \
221   printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
222          ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
223 #elif KMP_ARCH_PPC64
224 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
225 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
226 // functions Clang inserts a STW instruction (but only if compiling under
227 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
228 #define print_possible_return_addresses(addr) \
229   printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
230          ((char *)addr) - 8, ((char *)addr) - 12)
231 #elif KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32
232 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
233 // store instruction (another 4 bytes long).
234 // FIXME: PR #65696 addded a third possibility (12 byte offset) to make the
235 // tests pass on Darwin. Adding the same for other OSes. However, the proper
236 // fix for this is to remove the extra branch instruction being generated by
237 // the AArch64 backend. See issue #69627.
238 #define print_possible_return_addresses(addr)                                  \
239   printf("%" PRIu64 ": current_address=%p or %p or %p\n",                      \
240          ompt_get_thread_data()->value, ((char *)addr) - 4,                    \
241          ((char *)addr) - 8, ((char *)addr) - 12)
242 #elif KMP_ARCH_RISCV64
243 #if __riscv_compressed
244 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
245 // inserts a J instruction (targeting the successor basic block), which
246 // accounts for another 4 bytes. Finally, an additional J instruction may
247 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
248 // another branch).
249 #define print_possible_return_addresses(addr) \
250   printf("%" PRIu64 ": current_address=%p or %p\n", \
251          ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
252 #else
253 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
254 // inserts a J instruction (targeting the successor basic block), which
255 // accounts for another 4 bytes. Finally, an additional J instruction may
256 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
257 // another branch).
258 #define print_possible_return_addresses(addr) \
259   printf("%" PRIu64 ": current_address=%p or %p\n", \
260          ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
261 #endif
262 #elif KMP_ARCH_LOONGARCH64
263 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
264 // inserted jump instruction (another 4 bytes long). And an additional jump
265 // instruction may appear (adding 4 more bytes) when the NOP is referenced
266 // elsewhere (ie. another branch).
267 #define print_possible_return_addresses(addr)                                  \
268   printf("%" PRIu64 ": current_address=%p or %p or %p\n",                      \
269          ompt_get_thread_data()->value, ((char *)addr) - 4,                    \
270          ((char *)addr) - 8, ((char *)addr) - 12)
271 #elif KMP_ARCH_VE
272 // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
273 // a ??? instruction for non-void runtime functions which is ? bytes long.
274 #define print_possible_return_addresses(addr)                                  \
275   printf("%" PRIu64 ": current_address=%p or %p\n",                            \
276          ompt_get_thread_data()->value, ((char *)addr) - 8,                    \
277          ((char *)addr) - 8)
278 #elif KMP_ARCH_S390X
279 // On s390x the NOP instruction is 2 bytes long. For non-void runtime
280 // functions Clang inserts a STY instruction (but only if compiling under
281 // -fno-PIC which will be the default with Clang 8.0, another 6 bytes).
282 //
283 // Another possibility is:
284 //
285 //                brasl %r14,__kmpc_end_master@plt
286 //   a7 f4 00 02  j 0f
287 //   47 00 00 00  0: nop
288 //   a7 f4 00 02  j addr
289 //                addr:
290 #define print_possible_return_addresses(addr)                                  \
291   printf("%" PRIu64 ": current_address=%p or %p or %p\n",                      \
292          ompt_get_thread_data()->value, ((char *)addr) - 2,                    \
293          ((char *)addr) - 8, ((char *)addr) - 12)
294 #else
295 #error Unsupported target architecture, cannot determine address offset!
296 #endif
297 
298 
299 // This macro performs a somewhat similar job to print_current_address(), except
300 // that it discards a certain number of nibbles from the address and only prints
301 // the most significant bits / nibbles. This can be used for cases where the
302 // return address can only be approximated.
303 //
304 // To account for overflows (ie the most significant bits / nibbles have just
305 // changed as we are a few bytes above the relevant power of two) the addresses
306 // of the "current" and of the "previous block" are printed.
307 #define print_fuzzy_address(id) \
308   define_ompt_label(id) \
309   print_fuzzy_address_blocks(get_ompt_label_address(id))
310 
311 // If you change this define you need to adapt all capture patterns in the tests
312 // to include or discard the new number of nibbles!
313 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
314 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
315 #define print_fuzzy_address_blocks(addr)                                       \
316   printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64              \
317          " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n",                          \
318          ompt_get_thread_data()->value,                                        \
319          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1,                   \
320          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES,                       \
321          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1,                   \
322          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
323 
324 #define register_ompt_callback_t(name, type)                                   \
325   do {                                                                         \
326     type f_##name = &on_##name;                                                \
327     if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never)  \
328       printf("0: Could not register callback '" #name "'\n");                  \
329   } while (0)
330 
331 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
332 
333 #ifndef USE_PRIVATE_TOOL
334 static void
335 on_ompt_callback_mutex_acquire(
336   ompt_mutex_t kind,
337   unsigned int hint,
338   unsigned int impl,
339   ompt_wait_id_t wait_id,
340   const void *codeptr_ra)
341 {
342   switch(kind)
343   {
344     case ompt_mutex_lock:
345       printf("%" PRIu64 ":" _TOOL_PREFIX
346              " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
347              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
348              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
349       break;
350     case ompt_mutex_test_lock:
351       printf("%" PRIu64 ":" _TOOL_PREFIX
352              " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
353              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
354              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
355       break;
356     case ompt_mutex_nest_lock:
357       printf("%" PRIu64 ":" _TOOL_PREFIX
358              " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
359              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
360              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
361       break;
362     case ompt_mutex_test_nest_lock:
363       printf("%" PRIu64 ":" _TOOL_PREFIX
364              " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64
365              ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
366              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
367       break;
368     case ompt_mutex_critical:
369       printf("%" PRIu64 ":" _TOOL_PREFIX
370              " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
371              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
372              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
373       break;
374     case ompt_mutex_atomic:
375       printf("%" PRIu64 ":" _TOOL_PREFIX
376              " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
377              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
378              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
379       break;
380     case ompt_mutex_ordered:
381       printf("%" PRIu64 ":" _TOOL_PREFIX
382              " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
383              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
384              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
385       break;
386     default:
387       break;
388   }
389 }
390 
391 static void
392 on_ompt_callback_mutex_acquired(
393   ompt_mutex_t kind,
394   ompt_wait_id_t wait_id,
395   const void *codeptr_ra)
396 {
397   switch(kind)
398   {
399     case ompt_mutex_lock:
400       printf("%" PRIu64 ":" _TOOL_PREFIX
401              " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
402              ompt_get_thread_data()->value, wait_id, codeptr_ra);
403       break;
404     case ompt_mutex_test_lock:
405       printf("%" PRIu64 ":" _TOOL_PREFIX
406              " ompt_event_acquired_test_lock: wait_id=%" PRIu64
407              ", codeptr_ra=%p \n",
408              ompt_get_thread_data()->value, wait_id, codeptr_ra);
409       break;
410     case ompt_mutex_nest_lock:
411       printf("%" PRIu64 ":" _TOOL_PREFIX
412              " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
413              ", codeptr_ra=%p \n",
414              ompt_get_thread_data()->value, wait_id, codeptr_ra);
415       break;
416     case ompt_mutex_test_nest_lock:
417       printf("%" PRIu64 ":" _TOOL_PREFIX
418              " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
419              ", codeptr_ra=%p \n",
420              ompt_get_thread_data()->value, wait_id, codeptr_ra);
421       break;
422     case ompt_mutex_critical:
423       printf("%" PRIu64 ":" _TOOL_PREFIX
424              " ompt_event_acquired_critical: wait_id=%" PRIu64
425              ", codeptr_ra=%p \n",
426              ompt_get_thread_data()->value, wait_id, codeptr_ra);
427       break;
428     case ompt_mutex_atomic:
429       printf("%" PRIu64 ":" _TOOL_PREFIX
430              " ompt_event_acquired_atomic: wait_id=%" PRIu64
431              ", codeptr_ra=%p \n",
432              ompt_get_thread_data()->value, wait_id, codeptr_ra);
433       break;
434     case ompt_mutex_ordered:
435       printf("%" PRIu64 ":" _TOOL_PREFIX
436              " ompt_event_acquired_ordered: wait_id=%" PRIu64
437              ", codeptr_ra=%p \n",
438              ompt_get_thread_data()->value, wait_id, codeptr_ra);
439       break;
440     default:
441       break;
442   }
443 }
444 
445 static void
446 on_ompt_callback_mutex_released(
447   ompt_mutex_t kind,
448   ompt_wait_id_t wait_id,
449   const void *codeptr_ra)
450 {
451   switch(kind)
452   {
453     case ompt_mutex_lock:
454       printf("%" PRIu64 ":" _TOOL_PREFIX
455              " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
456              ompt_get_thread_data()->value, wait_id, codeptr_ra);
457       break;
458     case ompt_mutex_nest_lock:
459       printf("%" PRIu64 ":" _TOOL_PREFIX
460              " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
461              ", codeptr_ra=%p \n",
462              ompt_get_thread_data()->value, wait_id, codeptr_ra);
463       break;
464     case ompt_mutex_critical:
465       printf("%" PRIu64 ":" _TOOL_PREFIX
466              " ompt_event_release_critical: wait_id=%" PRIu64
467              ", codeptr_ra=%p \n",
468              ompt_get_thread_data()->value, wait_id, codeptr_ra);
469       break;
470     case ompt_mutex_atomic:
471       printf("%" PRIu64 ":" _TOOL_PREFIX
472              " ompt_event_release_atomic: wait_id=%" PRIu64
473              ", codeptr_ra=%p \n",
474              ompt_get_thread_data()->value, wait_id, codeptr_ra);
475       break;
476     case ompt_mutex_ordered:
477       printf("%" PRIu64 ":" _TOOL_PREFIX
478              " ompt_event_release_ordered: wait_id=%" PRIu64
479              ", codeptr_ra=%p \n",
480              ompt_get_thread_data()->value, wait_id, codeptr_ra);
481       break;
482     default:
483       break;
484   }
485 }
486 
487 static void
488 on_ompt_callback_nest_lock(
489     ompt_scope_endpoint_t endpoint,
490     ompt_wait_id_t wait_id,
491     const void *codeptr_ra)
492 {
493   switch(endpoint)
494   {
495     case ompt_scope_begin:
496       printf("%" PRIu64 ":" _TOOL_PREFIX
497              " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
498              ", codeptr_ra=%p \n",
499              ompt_get_thread_data()->value, wait_id, codeptr_ra);
500       break;
501     case ompt_scope_end:
502       printf("%" PRIu64 ":" _TOOL_PREFIX
503              " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
504              ", codeptr_ra=%p \n",
505              ompt_get_thread_data()->value, wait_id, codeptr_ra);
506       break;
507     case ompt_scope_beginend:
508       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
509       exit(-1);
510   }
511 }
512 
513 static void
514 on_ompt_callback_sync_region(
515   ompt_sync_region_t kind,
516   ompt_scope_endpoint_t endpoint,
517   ompt_data_t *parallel_data,
518   ompt_data_t *task_data,
519   const void *codeptr_ra)
520 {
521   if (endpoint == ompt_scope_beginend) {
522     printf("ompt_scope_beginend should never be passed to %s\n", __func__);
523     exit(-1);
524   }
525   if (kind == ompt_sync_region_reduction) {
526     printf("ompt_sync_region_reduction should never be passed to %s\n",
527            __func__);
528     exit(-1);
529   }
530   uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0;
531   const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end";
532   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_%s: parallel_id=%" PRIu64
533          ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
534          ompt_get_thread_data()->value, ompt_sync_region_t_values[kind],
535          begin_or_end, parallel_data_value, task_data->value, codeptr_ra);
536   switch (kind) {
537   case ompt_sync_region_barrier:
538   case ompt_sync_region_barrier_implicit:
539   case ompt_sync_region_barrier_implicit_workshare:
540   case ompt_sync_region_barrier_implicit_parallel:
541   case ompt_sync_region_barrier_teams:
542   case ompt_sync_region_barrier_explicit:
543   case ompt_sync_region_barrier_implementation:
544     if (endpoint == ompt_scope_begin)
545       print_ids(0);
546   default:;
547   }
548 }
549 
550 static void
551 on_ompt_callback_sync_region_wait(
552   ompt_sync_region_t kind,
553   ompt_scope_endpoint_t endpoint,
554   ompt_data_t *parallel_data,
555   ompt_data_t *task_data,
556   const void *codeptr_ra)
557 {
558   if (endpoint == ompt_scope_beginend) {
559     printf("ompt_scope_beginend should never be passed to %s\n", __func__);
560     exit(-1);
561   }
562   if (kind == ompt_sync_region_reduction) {
563     printf("ompt_sync_region_reduction should never be passed to %s\n",
564            __func__);
565     exit(-1);
566   }
567   uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0;
568   const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end";
569   printf("%" PRIu64 ":" _TOOL_PREFIX
570          " ompt_event_wait_%s_%s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
571          ", codeptr_ra=%p\n",
572          ompt_get_thread_data()->value, ompt_sync_region_t_values[kind],
573          begin_or_end, parallel_data_value, task_data->value, codeptr_ra);
574 }
575 
576 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
577                                        ompt_scope_endpoint_t endpoint,
578                                        ompt_data_t *parallel_data,
579                                        ompt_data_t *task_data,
580                                        const void *codeptr_ra) {
581   switch (endpoint) {
582   case ompt_scope_begin:
583     printf("%" PRIu64 ":" _TOOL_PREFIX
584            " ompt_event_reduction_begin: parallel_id=%" PRIu64
585            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
586            ompt_get_thread_data()->value,
587            (parallel_data) ? parallel_data->value : 0, task_data->value,
588            codeptr_ra);
589     break;
590   case ompt_scope_end:
591     printf("%" PRIu64 ":" _TOOL_PREFIX
592            " ompt_event_reduction_end: parallel_id=%" PRIu64
593            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
594            ompt_get_thread_data()->value,
595            (parallel_data) ? parallel_data->value : 0, task_data->value,
596            codeptr_ra);
597     break;
598   case ompt_scope_beginend:
599     printf("ompt_scope_beginend should never be passed to %s\n", __func__);
600     exit(-1);
601   }
602 }
603 
604 static void
605 on_ompt_callback_flush(
606     ompt_data_t *thread_data,
607     const void *codeptr_ra)
608 {
609   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
610          thread_data->value, codeptr_ra);
611 }
612 
613 static void
614 on_ompt_callback_cancel(
615     ompt_data_t *task_data,
616     int flags,
617     const void *codeptr_ra)
618 {
619   const char* first_flag_value;
620   const char* second_flag_value;
621   if(flags & ompt_cancel_parallel)
622     first_flag_value = ompt_cancel_flag_t_values[0];
623   else if(flags & ompt_cancel_sections)
624     first_flag_value = ompt_cancel_flag_t_values[1];
625   else if(flags & ompt_cancel_loop)
626     first_flag_value = ompt_cancel_flag_t_values[2];
627   else if(flags & ompt_cancel_taskgroup)
628     first_flag_value = ompt_cancel_flag_t_values[3];
629 
630   if(flags & ompt_cancel_activated)
631     second_flag_value = ompt_cancel_flag_t_values[4];
632   else if(flags & ompt_cancel_detected)
633     second_flag_value = ompt_cancel_flag_t_values[5];
634   else if(flags & ompt_cancel_discarded_task)
635     second_flag_value = ompt_cancel_flag_t_values[6];
636 
637   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
638          ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
639          ompt_get_thread_data()->value, task_data->value, first_flag_value,
640          second_flag_value, flags, codeptr_ra);
641 }
642 
643 static void
644 on_ompt_callback_implicit_task(
645     ompt_scope_endpoint_t endpoint,
646     ompt_data_t *parallel_data,
647     ompt_data_t *task_data,
648     unsigned int team_size,
649     unsigned int thread_num,
650     int flags)
651 {
652   switch(endpoint)
653   {
654     case ompt_scope_begin:
655       if(task_data->ptr)
656         printf("%s\n", "0: task_data initially not null");
657       task_data->value = ompt_get_unique_id();
658 
659       //there is no parallel_begin callback for implicit parallel region
660       //thus it is initialized in initial task
661       if(flags & ompt_task_initial)
662       {
663         char buffer[2048];
664 
665         format_task_type(flags, buffer);
666         // Only check initial task not created by teams construct
667         if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
668           printf("%s\n", "0: parallel_data initially not null");
669         parallel_data->value = ompt_get_unique_id();
670         printf("%" PRIu64 ":" _TOOL_PREFIX
671                " ompt_event_initial_task_begin: parallel_id=%" PRIu64
672                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
673                ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
674                ompt_get_thread_data()->value, parallel_data->value,
675                task_data->value, team_size, thread_num, flags);
676       } else {
677         printf("%" PRIu64 ":" _TOOL_PREFIX
678                " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
679                ", task_id=%" PRIu64 ", team_size=%" PRIu32
680                ", thread_num=%" PRIu32 "\n",
681                ompt_get_thread_data()->value, parallel_data->value,
682                task_data->value, team_size, thread_num);
683       }
684 
685       break;
686     case ompt_scope_end:
687       if(flags & ompt_task_initial){
688         printf("%" PRIu64 ":" _TOOL_PREFIX
689                " ompt_event_initial_task_end: parallel_id=%" PRIu64
690                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
691                ", index=%" PRIu32 "\n",
692                ompt_get_thread_data()->value,
693                (parallel_data) ? parallel_data->value : 0, task_data->value,
694                team_size, thread_num);
695       } else {
696         printf("%" PRIu64 ":" _TOOL_PREFIX
697                " ompt_event_implicit_task_end: parallel_id=%" PRIu64
698                ", task_id=%" PRIu64 ", team_size=%" PRIu32
699                ", thread_num=%" PRIu32 "\n",
700                ompt_get_thread_data()->value,
701                (parallel_data) ? parallel_data->value : 0, task_data->value,
702                team_size, thread_num);
703       }
704       break;
705     case ompt_scope_beginend:
706       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
707       exit(-1);
708   }
709 }
710 
711 static void
712 on_ompt_callback_lock_init(
713   ompt_mutex_t kind,
714   unsigned int hint,
715   unsigned int impl,
716   ompt_wait_id_t wait_id,
717   const void *codeptr_ra)
718 {
719   switch(kind)
720   {
721     case ompt_mutex_lock:
722       printf("%" PRIu64 ":" _TOOL_PREFIX
723              " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
724              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
725              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
726       break;
727     case ompt_mutex_nest_lock:
728       printf("%" PRIu64 ":" _TOOL_PREFIX
729              " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
730              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
731              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
732       break;
733     default:
734       break;
735   }
736 }
737 
738 static void
739 on_ompt_callback_lock_destroy(
740   ompt_mutex_t kind,
741   ompt_wait_id_t wait_id,
742   const void *codeptr_ra)
743 {
744   switch(kind)
745   {
746     case ompt_mutex_lock:
747       printf("%" PRIu64 ":" _TOOL_PREFIX
748              " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
749              ompt_get_thread_data()->value, wait_id, codeptr_ra);
750       break;
751     case ompt_mutex_nest_lock:
752       printf("%" PRIu64 ":" _TOOL_PREFIX
753              " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
754              ", codeptr_ra=%p \n",
755              ompt_get_thread_data()->value, wait_id, codeptr_ra);
756       break;
757     default:
758       break;
759   }
760 }
761 
762 static void
763 on_ompt_callback_work(
764   ompt_work_t wstype,
765   ompt_scope_endpoint_t endpoint,
766   ompt_data_t *parallel_data,
767   ompt_data_t *task_data,
768   uint64_t count,
769   const void *codeptr_ra)
770 {
771   switch(endpoint)
772   {
773   case ompt_scope_begin:
774     printf("%" PRIu64 ":" _TOOL_PREFIX " %s_begin: parallel_id=%" PRIu64
775            ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
776            ompt_get_thread_data()->value, ompt_work_events_t_values[wstype],
777            parallel_data->value, task_data->value, codeptr_ra, count);
778     break;
779   case ompt_scope_end:
780     printf("%" PRIu64 ":" _TOOL_PREFIX " %s_end: parallel_id=%" PRIu64
781            ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
782            ompt_get_thread_data()->value, ompt_work_events_t_values[wstype],
783            parallel_data->value, task_data->value, codeptr_ra, count);
784     break;
785   case ompt_scope_beginend:
786     printf("ompt_scope_beginend should never be passed to %s\n", __func__);
787     exit(-1);
788   }
789 }
790 
791 static void on_ompt_callback_dispatch(
792     ompt_data_t *parallel_data,
793     ompt_data_t *task_data,
794     ompt_dispatch_t kind,
795     ompt_data_t instance) {
796   char *event_name = NULL;
797   void *codeptr_ra = NULL;
798   ompt_dispatch_chunk_t *dispatch_chunk = NULL;
799   switch (kind) {
800   case ompt_dispatch_section:
801     event_name = "ompt_event_section_begin";
802     codeptr_ra = instance.ptr;
803     break;
804   case ompt_dispatch_ws_loop_chunk:
805     event_name = "ompt_event_ws_loop_chunk_begin";
806     dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
807     break;
808   case ompt_dispatch_taskloop_chunk:
809     event_name = "ompt_event_taskloop_chunk_begin";
810     dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
811     break;
812   case ompt_dispatch_distribute_chunk:
813     event_name = "ompt_event_distribute_chunk_begin";
814     dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
815     break;
816   default:
817     event_name = "ompt_ws_loop_iteration_begin";
818   }
819   printf("%" PRIu64 ":" _TOOL_PREFIX
820          " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
821          ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
822          "\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
823          task_data->value, codeptr_ra,
824          dispatch_chunk ? dispatch_chunk->start : 0,
825          dispatch_chunk ? dispatch_chunk->iterations : 0);
826 }
827 
828 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
829                                     ompt_data_t *parallel_data,
830                                     ompt_data_t *task_data,
831                                     const void *codeptr_ra) {
832   switch(endpoint)
833   {
834     case ompt_scope_begin:
835       printf("%" PRIu64 ":" _TOOL_PREFIX
836              " ompt_event_masked_begin: parallel_id=%" PRIu64
837              ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
838              ompt_get_thread_data()->value, parallel_data->value,
839              task_data->value, codeptr_ra);
840       break;
841     case ompt_scope_end:
842       printf("%" PRIu64 ":" _TOOL_PREFIX
843              " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
844              ", codeptr_ra=%p\n",
845              ompt_get_thread_data()->value, parallel_data->value,
846              task_data->value, codeptr_ra);
847       break;
848     case ompt_scope_beginend:
849       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
850       exit(-1);
851   }
852 }
853 
854 static void on_ompt_callback_parallel_begin(
855     ompt_data_t *encountering_task_data,
856     const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
857     uint32_t requested_team_size, int flag, const void *codeptr_ra) {
858   if(parallel_data->ptr)
859     printf("0: parallel_data initially not null\n");
860   parallel_data->value = ompt_get_unique_id();
861   int invoker = flag & 0xF;
862   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
863   const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
864   printf("%" PRIu64 ":" _TOOL_PREFIX
865          " ompt_event_%s_begin: parent_task_id=%" PRIu64
866          ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
867          "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
868          ", codeptr_ra=%p, invoker=%d\n",
869          ompt_get_thread_data()->value, event, encountering_task_data->value,
870          encountering_task_frame->exit_frame.ptr,
871          encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
872          requested_team_size, codeptr_ra, invoker);
873 }
874 
875 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
876                                           ompt_data_t *encountering_task_data,
877                                           int flag, const void *codeptr_ra) {
878   int invoker = flag & 0xF;
879   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
880   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
881          ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
882          ompt_get_thread_data()->value, event, parallel_data->value,
883          encountering_task_data->value, invoker, codeptr_ra);
884 }
885 
886 static void
887 on_ompt_callback_task_create(
888     ompt_data_t *encountering_task_data,
889     const ompt_frame_t *encountering_task_frame,
890     ompt_data_t* new_task_data,
891     int type,
892     int has_dependences,
893     const void *codeptr_ra)
894 {
895   if(new_task_data->ptr)
896     printf("0: new_task_data initially not null\n");
897   new_task_data->value = ompt_get_unique_id();
898   char buffer[2048];
899 
900   format_task_type(type, buffer);
901 
902   printf(
903       "%" PRIu64 ":" _TOOL_PREFIX
904       " ompt_event_task_create: parent_task_id=%" PRIu64
905       ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
906       "new_task_id=%" PRIu64
907       ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
908       ompt_get_thread_data()->value,
909       encountering_task_data ? encountering_task_data->value : 0,
910       encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
911       encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
912       new_task_data->value, codeptr_ra, buffer, type,
913       has_dependences ? "yes" : "no");
914 }
915 
916 static void
917 on_ompt_callback_task_schedule(
918     ompt_data_t *first_task_data,
919     ompt_task_status_t prior_task_status,
920     ompt_data_t *second_task_data)
921 {
922   printf("%" PRIu64 ":" _TOOL_PREFIX
923          " ompt_event_task_schedule: first_task_id=%" PRIu64
924          ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
925          ompt_get_thread_data()->value, first_task_data->value,
926          (second_task_data ? second_task_data->value : -1),
927          ompt_task_status_t_values[prior_task_status], prior_task_status);
928   if (prior_task_status == ompt_task_complete ||
929       prior_task_status == ompt_task_late_fulfill ||
930       prior_task_status == ompt_taskwait_complete) {
931     printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
932            "\n", ompt_get_thread_data()->value, first_task_data->value);
933   }
934 }
935 
936 static void
937 on_ompt_callback_dependences(
938   ompt_data_t *task_data,
939   const ompt_dependence_t *deps,
940   int ndeps)
941 {
942   char buffer[2048];
943   char *progress = buffer;
944   int i;
945   for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
946     if (deps[i].dependence_type == ompt_dependence_type_source ||
947         deps[i].dependence_type == ompt_dependence_type_sink)
948       progress +=
949           sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
950                   ompt_dependence_type_t_values[deps[i].dependence_type]);
951     else
952       progress +=
953           sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
954                   ompt_dependence_type_t_values[deps[i].dependence_type]);
955   }
956   if (ndeps > 0)
957     progress[-2] = 0;
958   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
959          ", deps=[%s], ndeps=%d\n",
960          ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
961 }
962 
963 static void
964 on_ompt_callback_task_dependence(
965   ompt_data_t *first_task_data,
966   ompt_data_t *second_task_data)
967 {
968   printf("%" PRIu64 ":" _TOOL_PREFIX
969          " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
970          ", second_task_id=%" PRIu64 "\n",
971          ompt_get_thread_data()->value, first_task_data->value,
972          second_task_data->value);
973 }
974 
975 static void
976 on_ompt_callback_thread_begin(
977   ompt_thread_t thread_type,
978   ompt_data_t *thread_data)
979 {
980   if(thread_data->ptr)
981     printf("%s\n", "0: thread_data initially not null");
982   thread_data->value = ompt_get_unique_id();
983   printf("%" PRIu64 ":" _TOOL_PREFIX
984          " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
985          ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
986          thread_type, thread_data->value);
987 }
988 
989 static void
990 on_ompt_callback_thread_end(
991   ompt_data_t *thread_data)
992 {
993   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
994          "\n",
995          ompt_get_thread_data()->value, thread_data->value);
996 }
997 
998 static int
999 on_ompt_callback_control_tool(
1000   uint64_t command,
1001   uint64_t modifier,
1002   void *arg,
1003   const void *codeptr_ra)
1004 {
1005   ompt_frame_t* omptTaskFrame;
1006   ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1007   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1008          ", modifier=%" PRIu64
1009          ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1010          "current_task_frame.reenter=%p \n",
1011          ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1012          omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1013 
1014   // the following would interfere with expected output for OMPT tests, so skip
1015 #ifndef _OMPT_TESTS
1016   // print task data
1017   int task_level = 0;
1018   ompt_data_t *task_data;
1019   while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1020                             NULL, NULL)) {
1021     printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1022            ompt_get_thread_data()->value, task_level, task_data->value);
1023     task_level++;
1024   }
1025 
1026   // print parallel data
1027   int parallel_level = 0;
1028   ompt_data_t *parallel_data;
1029   while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1030                                 NULL)) {
1031     printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1032            "\n",
1033            ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1034     parallel_level++;
1035   }
1036 #endif
1037   return 0; //success
1038 }
1039 
1040 static void on_ompt_callback_error(ompt_severity_t severity,
1041                                    const char *message, size_t length,
1042                                    const void *codeptr_ra) {
1043   printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1044          ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1045          ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1046          codeptr_ra);
1047 }
1048 
1049 int ompt_initialize(
1050   ompt_function_lookup_t lookup,
1051   int initial_device_num,
1052   ompt_data_t *tool_data)
1053 {
1054   ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1055   ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1056   ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1057   ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1058   ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1059   ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1060   ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1061   ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1062   ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1063 
1064   ompt_get_unique_id();
1065 
1066   ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1067   ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1068   ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1069   ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1070   ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1071   ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1072   ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1073   ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1074 
1075   register_ompt_callback(ompt_callback_mutex_acquire);
1076   register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1077   register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1078   register_ompt_callback(ompt_callback_nest_lock);
1079   register_ompt_callback(ompt_callback_sync_region);
1080   register_ompt_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1081   register_ompt_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1082   register_ompt_callback(ompt_callback_control_tool);
1083   register_ompt_callback(ompt_callback_flush);
1084   register_ompt_callback(ompt_callback_cancel);
1085   register_ompt_callback(ompt_callback_implicit_task);
1086   register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1087   register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1088   register_ompt_callback(ompt_callback_work);
1089   register_ompt_callback(ompt_callback_dispatch);
1090   register_ompt_callback(ompt_callback_masked);
1091   register_ompt_callback(ompt_callback_parallel_begin);
1092   register_ompt_callback(ompt_callback_parallel_end);
1093   register_ompt_callback(ompt_callback_task_create);
1094   register_ompt_callback(ompt_callback_task_schedule);
1095   register_ompt_callback(ompt_callback_dependences);
1096   register_ompt_callback(ompt_callback_task_dependence);
1097   register_ompt_callback(ompt_callback_thread_begin);
1098   register_ompt_callback(ompt_callback_thread_end);
1099   register_ompt_callback(ompt_callback_error);
1100   printf("0: NULL_POINTER=%p\n", (void*)NULL);
1101   return 1; //success
1102 }
1103 
1104 void ompt_finalize(ompt_data_t *tool_data)
1105 {
1106   printf("0: ompt_event_runtime_shutdown\n");
1107 }
1108 
1109 #ifdef __cplusplus
1110 extern "C" {
1111 #endif
1112 ompt_start_tool_result_t* ompt_start_tool(
1113   unsigned int omp_version,
1114   const char *runtime_version)
1115 {
1116   static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1117   return &ompt_start_tool_result;
1118 }
1119 #ifdef __cplusplus
1120 }
1121 #endif
1122 #endif // ifndef USE_PRIVATE_TOOL
1123 #ifdef _OMPT_TESTS
1124 #undef _OMPT_TESTS
1125 #endif
1126