1 #ifndef _BSD_SOURCE 2 #define _BSD_SOURCE 3 #endif 4 #ifndef _DEFAULT_SOURCE 5 #define _DEFAULT_SOURCE 6 #endif 7 #include <stdio.h> 8 #ifndef __STDC_FORMAT_MACROS 9 #define __STDC_FORMAT_MACROS 10 #endif 11 #include <inttypes.h> 12 #include <omp.h> 13 #include <omp-tools.h> 14 #include "ompt-signal.h" 15 16 // Used to detect architecture 17 #include "../../src/kmp_platform.h" 18 19 #ifndef _TOOL_PREFIX 20 #define _TOOL_PREFIX "" 21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test 22 #define _OMPT_TESTS 23 #endif 24 25 static const char *ompt_thread_t_values[] = { 26 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker", 27 "ompt_thread_other"}; 28 29 static const char *ompt_task_status_t_values[] = { 30 "ompt_task_UNDEFINED", 31 "ompt_task_complete", // 1 32 "ompt_task_yield", // 2 33 "ompt_task_cancel", // 3 34 "ompt_task_detach", // 4 35 "ompt_task_early_fulfill", // 5 36 "ompt_task_late_fulfill", // 6 37 "ompt_task_switch", // 7 38 "ompt_taskwait_complete" // 8 39 }; 40 static const char* ompt_cancel_flag_t_values[] = { 41 "ompt_cancel_parallel", 42 "ompt_cancel_sections", 43 "ompt_cancel_loop", 44 "ompt_cancel_taskgroup", 45 "ompt_cancel_activated", 46 "ompt_cancel_detected", 47 "ompt_cancel_discarded_task" 48 }; 49 50 static const char *ompt_work_t_values[] = {"undefined", 51 "ompt_work_loop", 52 "ompt_work_sections", 53 "ompt_work_single_executor", 54 "ompt_work_single_other", 55 "ompt_work_workshare", 56 "ompt_work_distribute", 57 "ompt_work_taskloop", 58 "ompt_work_scope", 59 "ompt_work_workdistribute", 60 "ompt_work_loop_static", 61 "ompt_work_loop_dynamic", 62 "ompt_work_loop_guided", 63 "ompt_work_loop_other"}; 64 65 static const char *ompt_work_events_t_values[] = {"undefined", 66 "ompt_event_loop", 67 "ompt_event_sections", 68 "ompt_event_single_in_block", 69 "ompt_event_single_others", 70 "ompt_event_workshare", 71 "ompt_event_distribute", 72 "ompt_event_taskloop", 73 "ompt_event_scope", 74 "ompt_event_workdistribute", 75 "ompt_event_loop_static", 76 "ompt_event_loop_dynamic", 77 "ompt_event_loop_guided", 78 "ompt_event_loop_other"}; 79 80 static const char *ompt_dependence_type_t_values[36] = { 81 "ompt_dependence_type_UNDEFINED", 82 "ompt_dependence_type_in", // 1 83 "ompt_dependence_type_out", // 2 84 "ompt_dependence_type_inout", // 3 85 "ompt_dependence_type_mutexinoutset", // 4 86 "ompt_dependence_type_source", // 5 87 "ompt_dependence_type_sink", // 6 88 "ompt_dependence_type_inoutset", // 7 89 "", "", "", "", "", "", // 8-13 90 "", "", "", "", "", "", "", "", "", "", // 14-23 91 "", "", "", "", "", "", "", "", "", "", // 24-33 92 "ompt_dependence_type_out_all_memory", // 34 93 "ompt_dependence_type_inout_all_memory" // 35 94 }; 95 96 static const char *ompt_sync_region_t_values[] = {"undefined", 97 "barrier", 98 "barrier_implicit", 99 "barrier_explicit", 100 "barrier_implementation", 101 "taskwait", 102 "taskgroup", 103 "reduction", 104 "barrier_implicit_workshare", 105 "barrier_implicit_parallel", 106 "barrier_teams"}; 107 108 static void format_task_type(int type, char *buffer) { 109 char *progress = buffer; 110 if (type & ompt_task_initial) 111 progress += sprintf(progress, "ompt_task_initial"); 112 if (type & ompt_task_implicit) 113 progress += sprintf(progress, "ompt_task_implicit"); 114 if (type & ompt_task_explicit) 115 progress += sprintf(progress, "ompt_task_explicit"); 116 if (type & ompt_task_target) 117 progress += sprintf(progress, "ompt_task_target"); 118 if (type & ompt_task_taskwait) 119 progress += sprintf(progress, "ompt_task_taskwait"); 120 if (type & ompt_task_undeferred) 121 progress += sprintf(progress, "|ompt_task_undeferred"); 122 if (type & ompt_task_untied) 123 progress += sprintf(progress, "|ompt_task_untied"); 124 if (type & ompt_task_final) 125 progress += sprintf(progress, "|ompt_task_final"); 126 if (type & ompt_task_mergeable) 127 progress += sprintf(progress, "|ompt_task_mergeable"); 128 if (type & ompt_task_merged) 129 progress += sprintf(progress, "|ompt_task_merged"); 130 } 131 132 static ompt_set_callback_t ompt_set_callback; 133 static ompt_get_callback_t ompt_get_callback; 134 static ompt_get_state_t ompt_get_state; 135 static ompt_get_task_info_t ompt_get_task_info; 136 static ompt_get_task_memory_t ompt_get_task_memory; 137 static ompt_get_thread_data_t ompt_get_thread_data; 138 static ompt_get_parallel_info_t ompt_get_parallel_info; 139 static ompt_get_unique_id_t ompt_get_unique_id; 140 static ompt_finalize_tool_t ompt_finalize_tool; 141 static ompt_get_num_procs_t ompt_get_num_procs; 142 static ompt_get_num_places_t ompt_get_num_places; 143 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids; 144 static ompt_get_place_num_t ompt_get_place_num; 145 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums; 146 static ompt_get_proc_id_t ompt_get_proc_id; 147 static ompt_enumerate_states_t ompt_enumerate_states; 148 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls; 149 150 static void print_ids(int level) 151 { 152 int task_type, thread_num; 153 ompt_frame_t *frame; 154 ompt_data_t *task_parallel_data; 155 ompt_data_t *task_data; 156 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame, 157 &task_parallel_data, &thread_num); 158 char buffer[2048]; 159 format_task_type(task_type, buffer); 160 if (frame) 161 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64 162 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, " 163 "task_type=%s=%d, thread_num=%d\n", 164 ompt_get_thread_data()->value, level, 165 exists_task ? task_parallel_data->value : 0, 166 exists_task ? task_data->value : 0, frame->exit_frame.ptr, 167 frame->enter_frame.ptr, buffer, task_type, thread_num); 168 } 169 170 #define get_frame_address(level) __builtin_frame_address(level) 171 172 #define print_frame(level) \ 173 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \ 174 ompt_get_thread_data()->value, level, get_frame_address(level)) 175 176 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g) 177 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN) 178 #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5 179 #define print_frame_from_outlined_fn(level) print_frame(level+1) 180 #else 181 #define print_frame_from_outlined_fn(level) print_frame(level) 182 #endif 183 184 #if defined(__clang__) && __clang_major__ >= 5 185 #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information." 186 #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!" 187 #endif 188 #endif 189 190 // This macro helps to define a label at the current position that can be used 191 // to get the current address in the code. 192 // 193 // For print_current_address(): 194 // To reliably determine the offset between the address of the label and the 195 // actual return address, we insert a NOP instruction as a jump target as the 196 // compiler would otherwise insert an instruction that we can't control. The 197 // instruction length is target dependent and is explained below. 198 // 199 // (The empty block between "#pragma omp ..." and the __asm__ statement is a 200 // workaround for a bug in the Intel Compiler.) 201 #define define_ompt_label(id) \ 202 {} \ 203 __asm__("nop"); \ 204 ompt_label_##id: 205 206 // This macro helps to get the address of a label that is inserted by the above 207 // macro define_ompt_label(). The address is obtained with a GNU extension 208 // (&&label) that has been tested with gcc, clang and icc. 209 #define get_ompt_label_address(id) (&& ompt_label_##id) 210 211 // This macro prints the exact address that a previously called runtime function 212 // returns to. 213 #define print_current_address(id) \ 214 define_ompt_label(id) \ 215 print_possible_return_addresses(get_ompt_label_address(id)) 216 217 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 218 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts 219 // a MOV instruction for non-void runtime functions which is 3 bytes long. 220 #define print_possible_return_addresses(addr) \ 221 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \ 222 ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4) 223 #elif KMP_ARCH_PPC64 224 // On Power the NOP instruction is 4 bytes long. In addition, the compiler 225 // inserts a second NOP instruction (another 4 bytes). For non-void runtime 226 // functions Clang inserts a STW instruction (but only if compiling under 227 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes). 228 #define print_possible_return_addresses(addr) \ 229 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \ 230 ((char *)addr) - 8, ((char *)addr) - 12) 231 #elif KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 232 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted 233 // store instruction (another 4 bytes long). 234 // FIXME: PR #65696 addded a third possibility (12 byte offset) to make the 235 // tests pass on Darwin. Adding the same for other OSes. However, the proper 236 // fix for this is to remove the extra branch instruction being generated by 237 // the AArch64 backend. See issue #69627. 238 #define print_possible_return_addresses(addr) \ 239 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \ 240 ompt_get_thread_data()->value, ((char *)addr) - 4, \ 241 ((char *)addr) - 8, ((char *)addr) - 12) 242 #elif KMP_ARCH_RISCV64 243 #if __riscv_compressed 244 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler 245 // inserts a J instruction (targeting the successor basic block), which 246 // accounts for another 4 bytes. Finally, an additional J instruction may 247 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie. 248 // another branch). 249 #define print_possible_return_addresses(addr) \ 250 printf("%" PRIu64 ": current_address=%p or %p\n", \ 251 ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10) 252 #else 253 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler 254 // inserts a J instruction (targeting the successor basic block), which 255 // accounts for another 4 bytes. Finally, an additional J instruction may 256 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie. 257 // another branch). 258 #define print_possible_return_addresses(addr) \ 259 printf("%" PRIu64 ": current_address=%p or %p\n", \ 260 ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12) 261 #endif 262 #elif KMP_ARCH_LOONGARCH64 263 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by 264 // inserted jump instruction (another 4 bytes long). And an additional jump 265 // instruction may appear (adding 4 more bytes) when the NOP is referenced 266 // elsewhere (ie. another branch). 267 #define print_possible_return_addresses(addr) \ 268 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \ 269 ompt_get_thread_data()->value, ((char *)addr) - 4, \ 270 ((char *)addr) - 8, ((char *)addr) - 12) 271 #elif KMP_ARCH_VE 272 // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts 273 // a ??? instruction for non-void runtime functions which is ? bytes long. 274 #define print_possible_return_addresses(addr) \ 275 printf("%" PRIu64 ": current_address=%p or %p\n", \ 276 ompt_get_thread_data()->value, ((char *)addr) - 8, \ 277 ((char *)addr) - 8) 278 #elif KMP_ARCH_S390X 279 // On s390x the NOP instruction is 2 bytes long. For non-void runtime 280 // functions Clang inserts a STY instruction (but only if compiling under 281 // -fno-PIC which will be the default with Clang 8.0, another 6 bytes). 282 // 283 // Another possibility is: 284 // 285 // brasl %r14,__kmpc_end_master@plt 286 // a7 f4 00 02 j 0f 287 // 47 00 00 00 0: nop 288 // a7 f4 00 02 j addr 289 // addr: 290 #define print_possible_return_addresses(addr) \ 291 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \ 292 ompt_get_thread_data()->value, ((char *)addr) - 2, \ 293 ((char *)addr) - 8, ((char *)addr) - 12) 294 #else 295 #error Unsupported target architecture, cannot determine address offset! 296 #endif 297 298 299 // This macro performs a somewhat similar job to print_current_address(), except 300 // that it discards a certain number of nibbles from the address and only prints 301 // the most significant bits / nibbles. This can be used for cases where the 302 // return address can only be approximated. 303 // 304 // To account for overflows (ie the most significant bits / nibbles have just 305 // changed as we are a few bytes above the relevant power of two) the addresses 306 // of the "current" and of the "previous block" are printed. 307 #define print_fuzzy_address(id) \ 308 define_ompt_label(id) \ 309 print_fuzzy_address_blocks(get_ompt_label_address(id)) 310 311 // If you change this define you need to adapt all capture patterns in the tests 312 // to include or discard the new number of nibbles! 313 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2 314 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4)) 315 #define print_fuzzy_address_blocks(addr) \ 316 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \ 317 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \ 318 ompt_get_thread_data()->value, \ 319 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \ 320 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \ 321 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \ 322 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr) 323 324 #define register_ompt_callback_t(name, type) \ 325 do { \ 326 type f_##name = &on_##name; \ 327 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ 328 printf("0: Could not register callback '" #name "'\n"); \ 329 } while (0) 330 331 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) 332 333 #ifndef USE_PRIVATE_TOOL 334 static void 335 on_ompt_callback_mutex_acquire( 336 ompt_mutex_t kind, 337 unsigned int hint, 338 unsigned int impl, 339 ompt_wait_id_t wait_id, 340 const void *codeptr_ra) 341 { 342 switch(kind) 343 { 344 case ompt_mutex_lock: 345 printf("%" PRIu64 ":" _TOOL_PREFIX 346 " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 347 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 348 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 349 break; 350 case ompt_mutex_test_lock: 351 printf("%" PRIu64 ":" _TOOL_PREFIX 352 " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 353 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 354 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 355 break; 356 case ompt_mutex_nest_lock: 357 printf("%" PRIu64 ":" _TOOL_PREFIX 358 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 359 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 360 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 361 break; 362 case ompt_mutex_test_nest_lock: 363 printf("%" PRIu64 ":" _TOOL_PREFIX 364 " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64 365 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 366 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 367 break; 368 case ompt_mutex_critical: 369 printf("%" PRIu64 ":" _TOOL_PREFIX 370 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32 371 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 372 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 373 break; 374 case ompt_mutex_atomic: 375 printf("%" PRIu64 ":" _TOOL_PREFIX 376 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32 377 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 378 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 379 break; 380 case ompt_mutex_ordered: 381 printf("%" PRIu64 ":" _TOOL_PREFIX 382 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32 383 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 384 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 385 break; 386 default: 387 break; 388 } 389 } 390 391 static void 392 on_ompt_callback_mutex_acquired( 393 ompt_mutex_t kind, 394 ompt_wait_id_t wait_id, 395 const void *codeptr_ra) 396 { 397 switch(kind) 398 { 399 case ompt_mutex_lock: 400 printf("%" PRIu64 ":" _TOOL_PREFIX 401 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", 402 ompt_get_thread_data()->value, wait_id, codeptr_ra); 403 break; 404 case ompt_mutex_test_lock: 405 printf("%" PRIu64 ":" _TOOL_PREFIX 406 " ompt_event_acquired_test_lock: wait_id=%" PRIu64 407 ", codeptr_ra=%p \n", 408 ompt_get_thread_data()->value, wait_id, codeptr_ra); 409 break; 410 case ompt_mutex_nest_lock: 411 printf("%" PRIu64 ":" _TOOL_PREFIX 412 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64 413 ", codeptr_ra=%p \n", 414 ompt_get_thread_data()->value, wait_id, codeptr_ra); 415 break; 416 case ompt_mutex_test_nest_lock: 417 printf("%" PRIu64 ":" _TOOL_PREFIX 418 " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64 419 ", codeptr_ra=%p \n", 420 ompt_get_thread_data()->value, wait_id, codeptr_ra); 421 break; 422 case ompt_mutex_critical: 423 printf("%" PRIu64 ":" _TOOL_PREFIX 424 " ompt_event_acquired_critical: wait_id=%" PRIu64 425 ", codeptr_ra=%p \n", 426 ompt_get_thread_data()->value, wait_id, codeptr_ra); 427 break; 428 case ompt_mutex_atomic: 429 printf("%" PRIu64 ":" _TOOL_PREFIX 430 " ompt_event_acquired_atomic: wait_id=%" PRIu64 431 ", codeptr_ra=%p \n", 432 ompt_get_thread_data()->value, wait_id, codeptr_ra); 433 break; 434 case ompt_mutex_ordered: 435 printf("%" PRIu64 ":" _TOOL_PREFIX 436 " ompt_event_acquired_ordered: wait_id=%" PRIu64 437 ", codeptr_ra=%p \n", 438 ompt_get_thread_data()->value, wait_id, codeptr_ra); 439 break; 440 default: 441 break; 442 } 443 } 444 445 static void 446 on_ompt_callback_mutex_released( 447 ompt_mutex_t kind, 448 ompt_wait_id_t wait_id, 449 const void *codeptr_ra) 450 { 451 switch(kind) 452 { 453 case ompt_mutex_lock: 454 printf("%" PRIu64 ":" _TOOL_PREFIX 455 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", 456 ompt_get_thread_data()->value, wait_id, codeptr_ra); 457 break; 458 case ompt_mutex_nest_lock: 459 printf("%" PRIu64 ":" _TOOL_PREFIX 460 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64 461 ", codeptr_ra=%p \n", 462 ompt_get_thread_data()->value, wait_id, codeptr_ra); 463 break; 464 case ompt_mutex_critical: 465 printf("%" PRIu64 ":" _TOOL_PREFIX 466 " ompt_event_release_critical: wait_id=%" PRIu64 467 ", codeptr_ra=%p \n", 468 ompt_get_thread_data()->value, wait_id, codeptr_ra); 469 break; 470 case ompt_mutex_atomic: 471 printf("%" PRIu64 ":" _TOOL_PREFIX 472 " ompt_event_release_atomic: wait_id=%" PRIu64 473 ", codeptr_ra=%p \n", 474 ompt_get_thread_data()->value, wait_id, codeptr_ra); 475 break; 476 case ompt_mutex_ordered: 477 printf("%" PRIu64 ":" _TOOL_PREFIX 478 " ompt_event_release_ordered: wait_id=%" PRIu64 479 ", codeptr_ra=%p \n", 480 ompt_get_thread_data()->value, wait_id, codeptr_ra); 481 break; 482 default: 483 break; 484 } 485 } 486 487 static void 488 on_ompt_callback_nest_lock( 489 ompt_scope_endpoint_t endpoint, 490 ompt_wait_id_t wait_id, 491 const void *codeptr_ra) 492 { 493 switch(endpoint) 494 { 495 case ompt_scope_begin: 496 printf("%" PRIu64 ":" _TOOL_PREFIX 497 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64 498 ", codeptr_ra=%p \n", 499 ompt_get_thread_data()->value, wait_id, codeptr_ra); 500 break; 501 case ompt_scope_end: 502 printf("%" PRIu64 ":" _TOOL_PREFIX 503 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64 504 ", codeptr_ra=%p \n", 505 ompt_get_thread_data()->value, wait_id, codeptr_ra); 506 break; 507 case ompt_scope_beginend: 508 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 509 exit(-1); 510 } 511 } 512 513 static void 514 on_ompt_callback_sync_region( 515 ompt_sync_region_t kind, 516 ompt_scope_endpoint_t endpoint, 517 ompt_data_t *parallel_data, 518 ompt_data_t *task_data, 519 const void *codeptr_ra) 520 { 521 if (endpoint == ompt_scope_beginend) { 522 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 523 exit(-1); 524 } 525 if (kind == ompt_sync_region_reduction) { 526 printf("ompt_sync_region_reduction should never be passed to %s\n", 527 __func__); 528 exit(-1); 529 } 530 uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0; 531 const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end"; 532 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_%s: parallel_id=%" PRIu64 533 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", 534 ompt_get_thread_data()->value, ompt_sync_region_t_values[kind], 535 begin_or_end, parallel_data_value, task_data->value, codeptr_ra); 536 switch (kind) { 537 case ompt_sync_region_barrier: 538 case ompt_sync_region_barrier_implicit: 539 case ompt_sync_region_barrier_implicit_workshare: 540 case ompt_sync_region_barrier_implicit_parallel: 541 case ompt_sync_region_barrier_teams: 542 case ompt_sync_region_barrier_explicit: 543 case ompt_sync_region_barrier_implementation: 544 if (endpoint == ompt_scope_begin) 545 print_ids(0); 546 default:; 547 } 548 } 549 550 static void 551 on_ompt_callback_sync_region_wait( 552 ompt_sync_region_t kind, 553 ompt_scope_endpoint_t endpoint, 554 ompt_data_t *parallel_data, 555 ompt_data_t *task_data, 556 const void *codeptr_ra) 557 { 558 if (endpoint == ompt_scope_beginend) { 559 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 560 exit(-1); 561 } 562 if (kind == ompt_sync_region_reduction) { 563 printf("ompt_sync_region_reduction should never be passed to %s\n", 564 __func__); 565 exit(-1); 566 } 567 uint64_t parallel_data_value = parallel_data ? parallel_data->value : 0; 568 const char *begin_or_end = (endpoint == ompt_scope_begin) ? "begin" : "end"; 569 printf("%" PRIu64 ":" _TOOL_PREFIX 570 " ompt_event_wait_%s_%s: parallel_id=%" PRIu64 ", task_id=%" PRIu64 571 ", codeptr_ra=%p\n", 572 ompt_get_thread_data()->value, ompt_sync_region_t_values[kind], 573 begin_or_end, parallel_data_value, task_data->value, codeptr_ra); 574 } 575 576 static void on_ompt_callback_reduction(ompt_sync_region_t kind, 577 ompt_scope_endpoint_t endpoint, 578 ompt_data_t *parallel_data, 579 ompt_data_t *task_data, 580 const void *codeptr_ra) { 581 switch (endpoint) { 582 case ompt_scope_begin: 583 printf("%" PRIu64 ":" _TOOL_PREFIX 584 " ompt_event_reduction_begin: parallel_id=%" PRIu64 585 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", 586 ompt_get_thread_data()->value, 587 (parallel_data) ? parallel_data->value : 0, task_data->value, 588 codeptr_ra); 589 break; 590 case ompt_scope_end: 591 printf("%" PRIu64 ":" _TOOL_PREFIX 592 " ompt_event_reduction_end: parallel_id=%" PRIu64 593 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", 594 ompt_get_thread_data()->value, 595 (parallel_data) ? parallel_data->value : 0, task_data->value, 596 codeptr_ra); 597 break; 598 case ompt_scope_beginend: 599 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 600 exit(-1); 601 } 602 } 603 604 static void 605 on_ompt_callback_flush( 606 ompt_data_t *thread_data, 607 const void *codeptr_ra) 608 { 609 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n", 610 thread_data->value, codeptr_ra); 611 } 612 613 static void 614 on_ompt_callback_cancel( 615 ompt_data_t *task_data, 616 int flags, 617 const void *codeptr_ra) 618 { 619 const char* first_flag_value; 620 const char* second_flag_value; 621 if(flags & ompt_cancel_parallel) 622 first_flag_value = ompt_cancel_flag_t_values[0]; 623 else if(flags & ompt_cancel_sections) 624 first_flag_value = ompt_cancel_flag_t_values[1]; 625 else if(flags & ompt_cancel_loop) 626 first_flag_value = ompt_cancel_flag_t_values[2]; 627 else if(flags & ompt_cancel_taskgroup) 628 first_flag_value = ompt_cancel_flag_t_values[3]; 629 630 if(flags & ompt_cancel_activated) 631 second_flag_value = ompt_cancel_flag_t_values[4]; 632 else if(flags & ompt_cancel_detected) 633 second_flag_value = ompt_cancel_flag_t_values[5]; 634 else if(flags & ompt_cancel_discarded_task) 635 second_flag_value = ompt_cancel_flag_t_values[6]; 636 637 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64 638 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n", 639 ompt_get_thread_data()->value, task_data->value, first_flag_value, 640 second_flag_value, flags, codeptr_ra); 641 } 642 643 static void 644 on_ompt_callback_implicit_task( 645 ompt_scope_endpoint_t endpoint, 646 ompt_data_t *parallel_data, 647 ompt_data_t *task_data, 648 unsigned int team_size, 649 unsigned int thread_num, 650 int flags) 651 { 652 switch(endpoint) 653 { 654 case ompt_scope_begin: 655 if(task_data->ptr) 656 printf("%s\n", "0: task_data initially not null"); 657 task_data->value = ompt_get_unique_id(); 658 659 //there is no parallel_begin callback for implicit parallel region 660 //thus it is initialized in initial task 661 if(flags & ompt_task_initial) 662 { 663 char buffer[2048]; 664 665 format_task_type(flags, buffer); 666 // Only check initial task not created by teams construct 667 if (team_size == 1 && thread_num == 1 && parallel_data->ptr) 668 printf("%s\n", "0: parallel_data initially not null"); 669 parallel_data->value = ompt_get_unique_id(); 670 printf("%" PRIu64 ":" _TOOL_PREFIX 671 " ompt_event_initial_task_begin: parallel_id=%" PRIu64 672 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32 673 ", index=%" PRIu32 ", flags=%" PRIu32 "\n", 674 ompt_get_thread_data()->value, parallel_data->value, 675 task_data->value, team_size, thread_num, flags); 676 } else { 677 printf("%" PRIu64 ":" _TOOL_PREFIX 678 " ompt_event_implicit_task_begin: parallel_id=%" PRIu64 679 ", task_id=%" PRIu64 ", team_size=%" PRIu32 680 ", thread_num=%" PRIu32 "\n", 681 ompt_get_thread_data()->value, parallel_data->value, 682 task_data->value, team_size, thread_num); 683 } 684 685 break; 686 case ompt_scope_end: 687 if(flags & ompt_task_initial){ 688 printf("%" PRIu64 ":" _TOOL_PREFIX 689 " ompt_event_initial_task_end: parallel_id=%" PRIu64 690 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32 691 ", index=%" PRIu32 "\n", 692 ompt_get_thread_data()->value, 693 (parallel_data) ? parallel_data->value : 0, task_data->value, 694 team_size, thread_num); 695 } else { 696 printf("%" PRIu64 ":" _TOOL_PREFIX 697 " ompt_event_implicit_task_end: parallel_id=%" PRIu64 698 ", task_id=%" PRIu64 ", team_size=%" PRIu32 699 ", thread_num=%" PRIu32 "\n", 700 ompt_get_thread_data()->value, 701 (parallel_data) ? parallel_data->value : 0, task_data->value, 702 team_size, thread_num); 703 } 704 break; 705 case ompt_scope_beginend: 706 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 707 exit(-1); 708 } 709 } 710 711 static void 712 on_ompt_callback_lock_init( 713 ompt_mutex_t kind, 714 unsigned int hint, 715 unsigned int impl, 716 ompt_wait_id_t wait_id, 717 const void *codeptr_ra) 718 { 719 switch(kind) 720 { 721 case ompt_mutex_lock: 722 printf("%" PRIu64 ":" _TOOL_PREFIX 723 " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 724 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 725 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 726 break; 727 case ompt_mutex_nest_lock: 728 printf("%" PRIu64 ":" _TOOL_PREFIX 729 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32 730 ", impl=%" PRIu32 ", codeptr_ra=%p \n", 731 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra); 732 break; 733 default: 734 break; 735 } 736 } 737 738 static void 739 on_ompt_callback_lock_destroy( 740 ompt_mutex_t kind, 741 ompt_wait_id_t wait_id, 742 const void *codeptr_ra) 743 { 744 switch(kind) 745 { 746 case ompt_mutex_lock: 747 printf("%" PRIu64 ":" _TOOL_PREFIX 748 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n", 749 ompt_get_thread_data()->value, wait_id, codeptr_ra); 750 break; 751 case ompt_mutex_nest_lock: 752 printf("%" PRIu64 ":" _TOOL_PREFIX 753 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64 754 ", codeptr_ra=%p \n", 755 ompt_get_thread_data()->value, wait_id, codeptr_ra); 756 break; 757 default: 758 break; 759 } 760 } 761 762 static void 763 on_ompt_callback_work( 764 ompt_work_t wstype, 765 ompt_scope_endpoint_t endpoint, 766 ompt_data_t *parallel_data, 767 ompt_data_t *task_data, 768 uint64_t count, 769 const void *codeptr_ra) 770 { 771 switch(endpoint) 772 { 773 case ompt_scope_begin: 774 printf("%" PRIu64 ":" _TOOL_PREFIX " %s_begin: parallel_id=%" PRIu64 775 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", 776 ompt_get_thread_data()->value, ompt_work_events_t_values[wstype], 777 parallel_data->value, task_data->value, codeptr_ra, count); 778 break; 779 case ompt_scope_end: 780 printf("%" PRIu64 ":" _TOOL_PREFIX " %s_end: parallel_id=%" PRIu64 781 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n", 782 ompt_get_thread_data()->value, ompt_work_events_t_values[wstype], 783 parallel_data->value, task_data->value, codeptr_ra, count); 784 break; 785 case ompt_scope_beginend: 786 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 787 exit(-1); 788 } 789 } 790 791 static void on_ompt_callback_dispatch( 792 ompt_data_t *parallel_data, 793 ompt_data_t *task_data, 794 ompt_dispatch_t kind, 795 ompt_data_t instance) { 796 char *event_name = NULL; 797 void *codeptr_ra = NULL; 798 ompt_dispatch_chunk_t *dispatch_chunk = NULL; 799 switch (kind) { 800 case ompt_dispatch_section: 801 event_name = "ompt_event_section_begin"; 802 codeptr_ra = instance.ptr; 803 break; 804 case ompt_dispatch_ws_loop_chunk: 805 event_name = "ompt_event_ws_loop_chunk_begin"; 806 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr; 807 break; 808 case ompt_dispatch_taskloop_chunk: 809 event_name = "ompt_event_taskloop_chunk_begin"; 810 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr; 811 break; 812 case ompt_dispatch_distribute_chunk: 813 event_name = "ompt_event_distribute_chunk_begin"; 814 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr; 815 break; 816 default: 817 event_name = "ompt_ws_loop_iteration_begin"; 818 } 819 printf("%" PRIu64 ":" _TOOL_PREFIX 820 " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64 821 ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64 822 "\n", ompt_get_thread_data()->value, event_name, parallel_data->value, 823 task_data->value, codeptr_ra, 824 dispatch_chunk ? dispatch_chunk->start : 0, 825 dispatch_chunk ? dispatch_chunk->iterations : 0); 826 } 827 828 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint, 829 ompt_data_t *parallel_data, 830 ompt_data_t *task_data, 831 const void *codeptr_ra) { 832 switch(endpoint) 833 { 834 case ompt_scope_begin: 835 printf("%" PRIu64 ":" _TOOL_PREFIX 836 " ompt_event_masked_begin: parallel_id=%" PRIu64 837 ", task_id=%" PRIu64 ", codeptr_ra=%p\n", 838 ompt_get_thread_data()->value, parallel_data->value, 839 task_data->value, codeptr_ra); 840 break; 841 case ompt_scope_end: 842 printf("%" PRIu64 ":" _TOOL_PREFIX 843 " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 844 ", codeptr_ra=%p\n", 845 ompt_get_thread_data()->value, parallel_data->value, 846 task_data->value, codeptr_ra); 847 break; 848 case ompt_scope_beginend: 849 printf("ompt_scope_beginend should never be passed to %s\n", __func__); 850 exit(-1); 851 } 852 } 853 854 static void on_ompt_callback_parallel_begin( 855 ompt_data_t *encountering_task_data, 856 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, 857 uint32_t requested_team_size, int flag, const void *codeptr_ra) { 858 if(parallel_data->ptr) 859 printf("0: parallel_data initially not null\n"); 860 parallel_data->value = ompt_get_unique_id(); 861 int invoker = flag & 0xF; 862 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams"; 863 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams"; 864 printf("%" PRIu64 ":" _TOOL_PREFIX 865 " ompt_event_%s_begin: parent_task_id=%" PRIu64 866 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, " 867 "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32 868 ", codeptr_ra=%p, invoker=%d\n", 869 ompt_get_thread_data()->value, event, encountering_task_data->value, 870 encountering_task_frame->exit_frame.ptr, 871 encountering_task_frame->enter_frame.ptr, parallel_data->value, size, 872 requested_team_size, codeptr_ra, invoker); 873 } 874 875 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, 876 ompt_data_t *encountering_task_data, 877 int flag, const void *codeptr_ra) { 878 int invoker = flag & 0xF; 879 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams"; 880 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64 881 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n", 882 ompt_get_thread_data()->value, event, parallel_data->value, 883 encountering_task_data->value, invoker, codeptr_ra); 884 } 885 886 static void 887 on_ompt_callback_task_create( 888 ompt_data_t *encountering_task_data, 889 const ompt_frame_t *encountering_task_frame, 890 ompt_data_t* new_task_data, 891 int type, 892 int has_dependences, 893 const void *codeptr_ra) 894 { 895 if(new_task_data->ptr) 896 printf("0: new_task_data initially not null\n"); 897 new_task_data->value = ompt_get_unique_id(); 898 char buffer[2048]; 899 900 format_task_type(type, buffer); 901 902 printf( 903 "%" PRIu64 ":" _TOOL_PREFIX 904 " ompt_event_task_create: parent_task_id=%" PRIu64 905 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, " 906 "new_task_id=%" PRIu64 907 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", 908 ompt_get_thread_data()->value, 909 encountering_task_data ? encountering_task_data->value : 0, 910 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL, 911 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL, 912 new_task_data->value, codeptr_ra, buffer, type, 913 has_dependences ? "yes" : "no"); 914 } 915 916 static void 917 on_ompt_callback_task_schedule( 918 ompt_data_t *first_task_data, 919 ompt_task_status_t prior_task_status, 920 ompt_data_t *second_task_data) 921 { 922 printf("%" PRIu64 ":" _TOOL_PREFIX 923 " ompt_event_task_schedule: first_task_id=%" PRIu64 924 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n", 925 ompt_get_thread_data()->value, first_task_data->value, 926 (second_task_data ? second_task_data->value : -1), 927 ompt_task_status_t_values[prior_task_status], prior_task_status); 928 if (prior_task_status == ompt_task_complete || 929 prior_task_status == ompt_task_late_fulfill || 930 prior_task_status == ompt_taskwait_complete) { 931 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64 932 "\n", ompt_get_thread_data()->value, first_task_data->value); 933 } 934 } 935 936 static void 937 on_ompt_callback_dependences( 938 ompt_data_t *task_data, 939 const ompt_dependence_t *deps, 940 int ndeps) 941 { 942 char buffer[2048]; 943 char *progress = buffer; 944 int i; 945 for (i = 0; i < ndeps && progress < buffer + 2000; i++) { 946 if (deps[i].dependence_type == ompt_dependence_type_source || 947 deps[i].dependence_type == ompt_dependence_type_sink) 948 progress += 949 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value, 950 ompt_dependence_type_t_values[deps[i].dependence_type]); 951 else 952 progress += 953 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr, 954 ompt_dependence_type_t_values[deps[i].dependence_type]); 955 } 956 if (ndeps > 0) 957 progress[-2] = 0; 958 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64 959 ", deps=[%s], ndeps=%d\n", 960 ompt_get_thread_data()->value, task_data->value, buffer, ndeps); 961 } 962 963 static void 964 on_ompt_callback_task_dependence( 965 ompt_data_t *first_task_data, 966 ompt_data_t *second_task_data) 967 { 968 printf("%" PRIu64 ":" _TOOL_PREFIX 969 " ompt_event_task_dependence_pair: first_task_id=%" PRIu64 970 ", second_task_id=%" PRIu64 "\n", 971 ompt_get_thread_data()->value, first_task_data->value, 972 second_task_data->value); 973 } 974 975 static void 976 on_ompt_callback_thread_begin( 977 ompt_thread_t thread_type, 978 ompt_data_t *thread_data) 979 { 980 if(thread_data->ptr) 981 printf("%s\n", "0: thread_data initially not null"); 982 thread_data->value = ompt_get_unique_id(); 983 printf("%" PRIu64 ":" _TOOL_PREFIX 984 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n", 985 ompt_get_thread_data()->value, ompt_thread_t_values[thread_type], 986 thread_type, thread_data->value); 987 } 988 989 static void 990 on_ompt_callback_thread_end( 991 ompt_data_t *thread_data) 992 { 993 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64 994 "\n", 995 ompt_get_thread_data()->value, thread_data->value); 996 } 997 998 static int 999 on_ompt_callback_control_tool( 1000 uint64_t command, 1001 uint64_t modifier, 1002 void *arg, 1003 const void *codeptr_ra) 1004 { 1005 ompt_frame_t* omptTaskFrame; 1006 ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL); 1007 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64 1008 ", modifier=%" PRIu64 1009 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, " 1010 "current_task_frame.reenter=%p \n", 1011 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, 1012 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr); 1013 1014 // the following would interfere with expected output for OMPT tests, so skip 1015 #ifndef _OMPT_TESTS 1016 // print task data 1017 int task_level = 0; 1018 ompt_data_t *task_data; 1019 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL, 1020 NULL, NULL)) { 1021 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n", 1022 ompt_get_thread_data()->value, task_level, task_data->value); 1023 task_level++; 1024 } 1025 1026 // print parallel data 1027 int parallel_level = 0; 1028 ompt_data_t *parallel_data; 1029 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)¶llel_data, 1030 NULL)) { 1031 printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64 1032 "\n", 1033 ompt_get_thread_data()->value, parallel_level, parallel_data->value); 1034 parallel_level++; 1035 } 1036 #endif 1037 return 0; //success 1038 } 1039 1040 static void on_ompt_callback_error(ompt_severity_t severity, 1041 const char *message, size_t length, 1042 const void *codeptr_ra) { 1043 printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32 1044 ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n", 1045 ompt_get_thread_data()->value, severity, message, (uint64_t)length, 1046 codeptr_ra); 1047 } 1048 1049 int ompt_initialize( 1050 ompt_function_lookup_t lookup, 1051 int initial_device_num, 1052 ompt_data_t *tool_data) 1053 { 1054 ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); 1055 ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback"); 1056 ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state"); 1057 ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info"); 1058 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory"); 1059 ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); 1060 ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info"); 1061 ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); 1062 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool"); 1063 1064 ompt_get_unique_id(); 1065 1066 ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs"); 1067 ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places"); 1068 ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids"); 1069 ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num"); 1070 ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums"); 1071 ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id"); 1072 ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states"); 1073 ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls"); 1074 1075 register_ompt_callback(ompt_callback_mutex_acquire); 1076 register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t); 1077 register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t); 1078 register_ompt_callback(ompt_callback_nest_lock); 1079 register_ompt_callback(ompt_callback_sync_region); 1080 register_ompt_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t); 1081 register_ompt_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t); 1082 register_ompt_callback(ompt_callback_control_tool); 1083 register_ompt_callback(ompt_callback_flush); 1084 register_ompt_callback(ompt_callback_cancel); 1085 register_ompt_callback(ompt_callback_implicit_task); 1086 register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t); 1087 register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t); 1088 register_ompt_callback(ompt_callback_work); 1089 register_ompt_callback(ompt_callback_dispatch); 1090 register_ompt_callback(ompt_callback_masked); 1091 register_ompt_callback(ompt_callback_parallel_begin); 1092 register_ompt_callback(ompt_callback_parallel_end); 1093 register_ompt_callback(ompt_callback_task_create); 1094 register_ompt_callback(ompt_callback_task_schedule); 1095 register_ompt_callback(ompt_callback_dependences); 1096 register_ompt_callback(ompt_callback_task_dependence); 1097 register_ompt_callback(ompt_callback_thread_begin); 1098 register_ompt_callback(ompt_callback_thread_end); 1099 register_ompt_callback(ompt_callback_error); 1100 printf("0: NULL_POINTER=%p\n", (void*)NULL); 1101 return 1; //success 1102 } 1103 1104 void ompt_finalize(ompt_data_t *tool_data) 1105 { 1106 printf("0: ompt_event_runtime_shutdown\n"); 1107 } 1108 1109 #ifdef __cplusplus 1110 extern "C" { 1111 #endif 1112 ompt_start_tool_result_t* ompt_start_tool( 1113 unsigned int omp_version, 1114 const char *runtime_version) 1115 { 1116 static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; 1117 return &ompt_start_tool_result; 1118 } 1119 #ifdef __cplusplus 1120 } 1121 #endif 1122 #endif // ifndef USE_PRIVATE_TOOL 1123 #ifdef _OMPT_TESTS 1124 #undef _OMPT_TESTS 1125 #endif 1126