xref: /llvm-project/openmp/tools/multiplex/ompt-multiplex.h (revision 820be30ad96591de2d7e651b3ec9cc0253ca6344)
1 //===--- ompt-multiplex.h - header-only multiplexing of OMPT tools -- C -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header file enables an OMPT tool to load another OMPT tool and
10 // automatically forwards OMPT event-callbacks to the nested tool.
11 //
12 // For details see openmp/tools/multiplex/README.md
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef OMPT_MULTIPLEX_H
17 #define OMPT_MULTIPLEX_H
18 
19 #ifndef _GNU_SOURCE
20 #define _GNU_SOURCE
21 #endif
22 #include <dlfcn.h>
23 #include <errno.h>
24 #include <execinfo.h>
25 #include <inttypes.h>
26 #include <omp-tools.h>
27 #include <omp.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 static ompt_set_callback_t ompt_multiplex_set_callback;
32 static ompt_get_task_info_t ompt_multiplex_get_task_info;
33 static ompt_get_thread_data_t ompt_multiplex_get_thread_data;
34 static ompt_get_parallel_info_t ompt_multiplex_get_parallel_info;
35 
36 // If OMPT_MULTIPLEX_TOOL_NAME is defined, use the tool name as prefix
37 // contains name of the environment var in which the tool path is specified
38 // for TOOL_LIBRARIES and VERBOSE_INIT variables. Only overwrite, if
39 // they are not explicitly defined.
40 #ifdef OMPT_MULTIPLEX_TOOL_NAME
41 #ifndef CLIENT_TOOL_LIBRARIES_VAR
42 #define CLIENT_TOOL_LIBRARIES_VAR OMPT_MULTIPLEX_TOOL_NAME "_TOOL_LIBRARIES"
43 #endif
44 #ifndef CLIENT_TOOL_VERBOSE_INIT_VAR
45 #define CLIENT_TOOL_VERBOSE_INIT_VAR                                           \
46   OMPT_MULTIPLEX_TOOL_NAME "_TOOL_VERBOSE_INIT"
47 #endif
48 #endif
49 
50 // If CLIENT_TOOL_VERBOSE_INIT_VAR is still not defined, use the OMPT
51 // env var.
52 #ifndef CLIENT_TOOL_VERBOSE_INIT_VAR
53 #warning CLIENT_TOOL_VERBOSE_INIT_VAR redefined to OMP_TOOL_VERBOSE_INIT
54 #define CLIENT_TOOL_VERBOSE_INIT_VAR "OMP_TOOL_VERBOSE_INIT"
55 #endif
56 
57 // contains name of the environment var in which the tool path is specified
58 #ifndef CLIENT_TOOL_LIBRARIES_VAR
59 #error CLIENT_TOOL_LIBRARIES_VAR should be defined before including of ompt-multiplex.h
60 #endif
61 
62 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) &&                         \
63     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
64 #error OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA must be set if OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA is set
65 #endif
66 
67 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) &&                     \
68     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA)
69 #error OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA must be set if OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA is set
70 #endif
71 
72 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) &&                       \
73     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA)
74 #error OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA must be set if OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA is set
75 #endif
76 
77 #define OMPT_API_ROUTINE static
78 
79 #ifndef OMPT_STR_MATCH
80 #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
81 #endif
82 
83 // prints for an enabled OMP_TOOL_VERBOSE_INIT.
84 // In the future a prefix could be added in the first define, the second define
85 // omits the prefix to allow for continued lines. Example: "PREFIX: Start
86 // tool... Success." instead of "PREFIX: Start tool... PREFIX: Success."
87 #define OMPT_VERBOSE_INIT_PRINT(...)                                           \
88   if (verbose_init)                                                            \
89   fprintf(verbose_file, __VA_ARGS__)
90 #define OMPT_VERBOSE_INIT_CONTINUED_PRINT(...)                                 \
91   if (verbose_init)                                                            \
92   fprintf(verbose_file, __VA_ARGS__)
93 
94 static FILE *verbose_file;
95 static int verbose_init;
96 
setup_verbose_init()97 void setup_verbose_init() {
98   const char *ompt_env_verbose_init = getenv(CLIENT_TOOL_VERBOSE_INIT_VAR);
99   // possible options: disabled | stdout | stderr | <filename>
100   // if set, not empty and not disabled -> prepare for logging
101   if (ompt_env_verbose_init && strcmp(ompt_env_verbose_init, "") &&
102       !OMPT_STR_MATCH(ompt_env_verbose_init, "disabled")) {
103     verbose_init = 1;
104     if (OMPT_STR_MATCH(ompt_env_verbose_init, "STDERR"))
105       verbose_file = stderr;
106     else if (OMPT_STR_MATCH(ompt_env_verbose_init, "STDOUT"))
107       verbose_file = stdout;
108     else if (!OMPT_STR_MATCH(ompt_env_verbose_init,
109                              getenv("OMP_TOOL_VERBOSE_INIT")))
110       verbose_file = fopen(ompt_env_verbose_init, "w");
111     else {
112       verbose_init = 0;
113       printf("Multiplex: Can not open file defined in OMP_TOOL_VERBOSE_INIT "
114              "twice.");
115     }
116   } else
117     verbose_init = 0;
118 }
119 
120 #define OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(macro)                             \
121   macro(callback_thread_begin, ompt_callback_thread_begin_t, 1);               \
122   macro(callback_thread_end, ompt_callback_thread_end_t, 2);                   \
123   macro(callback_parallel_begin, ompt_callback_parallel_begin_t, 3);           \
124   macro(callback_parallel_end, ompt_callback_parallel_end_t, 4);               \
125   macro(callback_task_create, ompt_callback_task_create_t, 5);                 \
126   macro(callback_task_schedule, ompt_callback_task_schedule_t, 6);             \
127   macro(callback_implicit_task, ompt_callback_implicit_task_t, 7);             \
128   macro(callback_target, ompt_callback_target_t, 8);                           \
129   macro(callback_target_data_op, ompt_callback_target_data_op_t, 9);           \
130   macro(callback_target_submit, ompt_callback_target_submit_t, 10);            \
131   macro(callback_control_tool, ompt_callback_control_tool_t, 11);              \
132   macro(callback_device_initialize, ompt_callback_device_initialize_t, 12);    \
133   macro(callback_device_finalize, ompt_callback_device_finalize_t, 13);        \
134   macro(callback_device_load, ompt_callback_device_load_t, 14);                \
135   macro(callback_device_unload, ompt_callback_device_unload_t, 15);            \
136   macro(callback_sync_region_wait, ompt_callback_sync_region_t, 16);           \
137   macro(callback_mutex_released, ompt_callback_mutex_t, 17);                   \
138   macro(callback_dependences, ompt_callback_dependences_t, 18);                \
139   macro(callback_task_dependence, ompt_callback_task_dependence_t, 19);        \
140   macro(callback_work, ompt_callback_work_t, 20);                              \
141   macro(callback_masked, ompt_callback_masked_t, 21);                          \
142   macro(callback_target_map, ompt_callback_target_map_t, 22);                  \
143   macro(callback_sync_region, ompt_callback_sync_region_t, 23);                \
144   macro(callback_lock_init, ompt_callback_mutex_acquire_t, 24);                \
145   macro(callback_lock_destroy, ompt_callback_mutex_t, 25);                     \
146   macro(callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26);            \
147   macro(callback_mutex_acquired, ompt_callback_mutex_t, 27);                   \
148   macro(callback_nest_lock, ompt_callback_nest_lock_t, 28);                    \
149   macro(callback_flush, ompt_callback_flush_t, 29);                            \
150   macro(callback_cancel, ompt_callback_cancel_t, 30);                          \
151   macro(callback_reduction, ompt_callback_sync_region_t, 31);                  \
152   macro(callback_dispatch, ompt_callback_dispatch_t, 32);
153 
154 typedef struct ompt_multiplex_callbacks_s {
155 #define ompt_event_macro(event, callback, eventid) callback ompt_##event
156 
157   OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
158 
159 #undef ompt_event_macro
160 } ompt_multiplex_callbacks_t;
161 
162 typedef struct ompt_multiplex_callback_implementation_status_s {
163 #define ompt_event_macro(event, callback, eventid) int ompt_##event
164 
165   OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
166 
167 #undef ompt_event_macro
168 } ompt_multiplex_callback_implementation_status_t;
169 
170 ompt_start_tool_result_t *ompt_multiplex_own_fns = NULL;
171 ompt_start_tool_result_t *ompt_multiplex_client_fns = NULL;
172 ompt_function_lookup_t ompt_multiplex_lookup_function;
173 ompt_multiplex_callbacks_t ompt_multiplex_own_callbacks,
174     ompt_multiplex_client_callbacks;
175 ompt_multiplex_callback_implementation_status_t
176     ompt_multiplex_implementation_status;
177 
178 typedef struct ompt_multiplex_data_pair_s {
179   ompt_data_t own_data;
180   ompt_data_t client_data;
181 } ompt_multiplex_data_pair_t;
182 
183 #if !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) ||                  \
184     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) ||                \
185     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
186 static ompt_multiplex_data_pair_t *
ompt_multiplex_allocate_data_pair(ompt_data_t * data_pointer)187 ompt_multiplex_allocate_data_pair(ompt_data_t *data_pointer) {
188   data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t));
189   if (!data_pointer->ptr) {
190     printf("Malloc ERROR\n");
191     exit(-1);
192   }
193   ompt_multiplex_data_pair_t *data_pair =
194       (ompt_multiplex_data_pair_t *)data_pointer->ptr;
195   data_pair->own_data.ptr = NULL;
196   data_pair->client_data.ptr = NULL;
197   return data_pair;
198 }
199 
ompt_multiplex_free_data_pair(ompt_data_t * data_pointer)200 static void ompt_multiplex_free_data_pair(ompt_data_t *data_pointer) {
201   free((*data_pointer).ptr);
202 }
203 
ompt_multiplex_get_own_ompt_data(ompt_data_t * data)204 static ompt_data_t *ompt_multiplex_get_own_ompt_data(ompt_data_t *data) {
205   if (!data)
206     return NULL;
207   if (!data->ptr)
208     return NULL;
209   ompt_multiplex_data_pair_t *data_pair =
210       (ompt_multiplex_data_pair_t *)data->ptr;
211   return &(data_pair->own_data);
212 }
213 
ompt_multiplex_get_client_ompt_data(ompt_data_t * data)214 static ompt_data_t *ompt_multiplex_get_client_ompt_data(ompt_data_t *data) {
215   if (!data)
216     return NULL;
217   if (!data->ptr)
218     return NULL;
219   ompt_multiplex_data_pair_t *data_pair =
220       (ompt_multiplex_data_pair_t *)data->ptr;
221   return &(data_pair->client_data);
222 }
223 #endif //! defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) ||
224        //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) ||
225        //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
226 
ompt_multiplex_get_own_thread_data(ompt_data_t * data)227 static ompt_data_t *ompt_multiplex_get_own_thread_data(ompt_data_t *data) {
228 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
229   return ompt_multiplex_get_own_ompt_data(data);
230 #else
231   return data;
232 #endif
233 }
234 
ompt_multiplex_get_own_parallel_data(ompt_data_t * data)235 static ompt_data_t *ompt_multiplex_get_own_parallel_data(ompt_data_t *data) {
236 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
237   return ompt_multiplex_get_own_ompt_data(data);
238 #else
239   return data;
240 #endif
241 }
242 
ompt_multiplex_get_own_task_data(ompt_data_t * data)243 static ompt_data_t *ompt_multiplex_get_own_task_data(ompt_data_t *data) {
244 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
245   return ompt_multiplex_get_own_ompt_data(data);
246 #else
247   return data;
248 #endif
249 }
250 
ompt_multiplex_get_client_thread_data(ompt_data_t * data)251 static ompt_data_t *ompt_multiplex_get_client_thread_data(ompt_data_t *data) {
252 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
253   return ompt_multiplex_get_client_ompt_data(data);
254 #else
255   return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(data);
256 #endif
257 }
258 
ompt_multiplex_get_client_parallel_data(ompt_data_t * data)259 static ompt_data_t *ompt_multiplex_get_client_parallel_data(ompt_data_t *data) {
260 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
261   return ompt_multiplex_get_client_ompt_data(data);
262 #else
263   return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(data);
264 #endif
265 }
266 
ompt_multiplex_get_client_task_data(ompt_data_t * data)267 static ompt_data_t *ompt_multiplex_get_client_task_data(ompt_data_t *data) {
268 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
269   return ompt_multiplex_get_client_ompt_data(data);
270 #else
271   return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(data);
272 #endif
273 }
274 
ompt_multiplex_callback_mutex_acquire(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)275 static void ompt_multiplex_callback_mutex_acquire(ompt_mutex_t kind,
276                                                   unsigned int hint,
277                                                   unsigned int impl,
278                                                   ompt_wait_id_t wait_id,
279                                                   const void *codeptr_ra) {
280   if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire) {
281     ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire(
282         kind, hint, impl, wait_id, codeptr_ra);
283   }
284   if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire) {
285     ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire(
286         kind, hint, impl, wait_id, codeptr_ra);
287   }
288 }
289 
ompt_multiplex_callback_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)290 static void ompt_multiplex_callback_mutex_acquired(ompt_mutex_t kind,
291                                                    ompt_wait_id_t wait_id,
292                                                    const void *codeptr_ra) {
293   if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired) {
294     ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired(kind, wait_id,
295                                                               codeptr_ra);
296   }
297   if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired) {
298     ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired(kind, wait_id,
299                                                                  codeptr_ra);
300   }
301 }
302 
ompt_multiplex_callback_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)303 static void ompt_multiplex_callback_mutex_released(ompt_mutex_t kind,
304                                                    ompt_wait_id_t wait_id,
305                                                    const void *codeptr_ra) {
306   if (ompt_multiplex_own_callbacks.ompt_callback_mutex_released) {
307     ompt_multiplex_own_callbacks.ompt_callback_mutex_released(kind, wait_id,
308                                                               codeptr_ra);
309   }
310   if (ompt_multiplex_client_callbacks.ompt_callback_mutex_released) {
311     ompt_multiplex_client_callbacks.ompt_callback_mutex_released(kind, wait_id,
312                                                                  codeptr_ra);
313   }
314 }
315 
ompt_multiplex_callback_nest_lock(ompt_scope_endpoint_t endpoint,ompt_wait_id_t wait_id,const void * codeptr_ra)316 static void ompt_multiplex_callback_nest_lock(ompt_scope_endpoint_t endpoint,
317                                               ompt_wait_id_t wait_id,
318                                               const void *codeptr_ra) {
319   if (ompt_multiplex_own_callbacks.ompt_callback_nest_lock) {
320     ompt_multiplex_own_callbacks.ompt_callback_nest_lock(endpoint, wait_id,
321                                                          codeptr_ra);
322   }
323   if (ompt_multiplex_client_callbacks.ompt_callback_nest_lock) {
324     ompt_multiplex_client_callbacks.ompt_callback_nest_lock(endpoint, wait_id,
325                                                             codeptr_ra);
326   }
327 }
328 
ompt_multiplex_callback_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)329 static void ompt_multiplex_callback_sync_region(ompt_sync_region_t kind,
330                                                 ompt_scope_endpoint_t endpoint,
331                                                 ompt_data_t *parallel_data,
332                                                 ompt_data_t *task_data,
333                                                 const void *codeptr_ra) {
334   if (ompt_multiplex_own_callbacks.ompt_callback_sync_region) {
335     ompt_multiplex_own_callbacks.ompt_callback_sync_region(
336         kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
337         ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
338   }
339   if (ompt_multiplex_client_callbacks.ompt_callback_sync_region) {
340     ompt_multiplex_client_callbacks.ompt_callback_sync_region(
341         kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
342         ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
343   }
344 }
345 
ompt_multiplex_callback_sync_region_wait(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)346 static void ompt_multiplex_callback_sync_region_wait(
347     ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint,
348     ompt_data_t *parallel_data, ompt_data_t *task_data,
349     const void *codeptr_ra) {
350   if (ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait) {
351     ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait(
352         kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
353         ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
354   }
355   if (ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait) {
356     ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait(
357         kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
358         ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
359   }
360 }
361 
ompt_multiplex_callback_flush(ompt_data_t * thread_data,const void * codeptr_ra)362 static void ompt_multiplex_callback_flush(ompt_data_t *thread_data,
363                                           const void *codeptr_ra) {
364   if (ompt_multiplex_own_callbacks.ompt_callback_flush) {
365     ompt_multiplex_own_callbacks.ompt_callback_flush(
366         ompt_multiplex_get_own_thread_data(thread_data), codeptr_ra);
367   }
368   if (ompt_multiplex_client_callbacks.ompt_callback_flush) {
369     ompt_multiplex_client_callbacks.ompt_callback_flush(
370         ompt_multiplex_get_client_thread_data(thread_data), codeptr_ra);
371   }
372 }
373 
ompt_multiplex_callback_cancel(ompt_data_t * task_data,int flags,const void * codeptr_ra)374 static void ompt_multiplex_callback_cancel(ompt_data_t *task_data, int flags,
375                                            const void *codeptr_ra) {
376   if (ompt_multiplex_own_callbacks.ompt_callback_cancel) {
377     ompt_multiplex_own_callbacks.ompt_callback_cancel(
378         ompt_multiplex_get_own_task_data(task_data), flags, codeptr_ra);
379   }
380   if (ompt_multiplex_client_callbacks.ompt_callback_cancel) {
381     ompt_multiplex_client_callbacks.ompt_callback_cancel(
382         ompt_multiplex_get_client_task_data(task_data), flags, codeptr_ra);
383   }
384 }
385 
ompt_multiplex_callback_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int flags)386 static void ompt_multiplex_callback_implicit_task(
387     ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data,
388     ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num,
389     int flags) {
390   if (endpoint == ompt_scope_begin) {
391 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
392     ompt_multiplex_allocate_data_pair(task_data);
393 #endif
394 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
395     if (flags & ompt_task_initial)
396       ompt_multiplex_allocate_data_pair(parallel_data);
397 #endif
398     if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
399       ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
400           endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
401           ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
402           flags);
403     }
404     if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) {
405       ompt_multiplex_client_callbacks.ompt_callback_implicit_task(
406           endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
407           ompt_multiplex_get_client_task_data(task_data), team_size, thread_num,
408           flags);
409     }
410   } else {
411 // defines to make sure, callbacks are called in correct order depending on
412 // defines set by the user
413 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) ||                         \
414     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
415     if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
416       ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
417           endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
418           ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
419           flags);
420     }
421 #endif
422 
423     if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) {
424       ompt_multiplex_client_callbacks.ompt_callback_implicit_task(
425           endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
426           ompt_multiplex_get_client_task_data(task_data), team_size, thread_num,
427           flags);
428     }
429 
430 #if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) &&                     \
431     !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
432     if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
433       ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
434           endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
435           ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
436           flags);
437     }
438 #endif
439 
440 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
441     ompt_multiplex_free_data_pair(task_data);
442 #endif
443 
444 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
445     if (flags & ompt_task_initial)
446       OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data);
447 #endif
448 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
449     OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(task_data);
450 #endif
451   }
452 }
453 
ompt_multiplex_callback_lock_init(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)454 static void ompt_multiplex_callback_lock_init(ompt_mutex_t kind,
455                                               unsigned int hint,
456                                               unsigned int impl,
457                                               ompt_wait_id_t wait_id,
458                                               const void *codeptr_ra) {
459   if (ompt_multiplex_own_callbacks.ompt_callback_lock_init) {
460     ompt_multiplex_own_callbacks.ompt_callback_lock_init(kind, hint, impl,
461                                                          wait_id, codeptr_ra);
462   }
463   if (ompt_multiplex_client_callbacks.ompt_callback_lock_init) {
464     ompt_multiplex_client_callbacks.ompt_callback_lock_init(
465         kind, hint, impl, wait_id, codeptr_ra);
466   }
467 }
468 
ompt_multiplex_callback_lock_destroy(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)469 static void ompt_multiplex_callback_lock_destroy(ompt_mutex_t kind,
470                                                  ompt_wait_id_t wait_id,
471                                                  const void *codeptr_ra) {
472   if (ompt_multiplex_own_callbacks.ompt_callback_lock_destroy) {
473     ompt_multiplex_own_callbacks.ompt_callback_lock_destroy(kind, wait_id,
474                                                             codeptr_ra);
475   }
476   if (ompt_multiplex_client_callbacks.ompt_callback_lock_destroy) {
477     ompt_multiplex_client_callbacks.ompt_callback_lock_destroy(kind, wait_id,
478                                                                codeptr_ra);
479   }
480 }
481 
ompt_multiplex_callback_work(ompt_work_t wstype,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,uint64_t count,const void * codeptr_ra)482 static void ompt_multiplex_callback_work(ompt_work_t wstype,
483                                          ompt_scope_endpoint_t endpoint,
484                                          ompt_data_t *parallel_data,
485                                          ompt_data_t *task_data, uint64_t count,
486                                          const void *codeptr_ra) {
487   if (ompt_multiplex_own_callbacks.ompt_callback_work) {
488     ompt_multiplex_own_callbacks.ompt_callback_work(
489         wstype, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
490         ompt_multiplex_get_own_task_data(task_data), count, codeptr_ra);
491   }
492   if (ompt_multiplex_client_callbacks.ompt_callback_work) {
493     ompt_multiplex_client_callbacks.ompt_callback_work(
494         wstype, endpoint,
495         ompt_multiplex_get_client_parallel_data(parallel_data),
496         ompt_multiplex_get_client_task_data(task_data), count, codeptr_ra);
497   }
498 }
499 
ompt_multiplex_callback_masked(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)500 static void ompt_multiplex_callback_masked(ompt_scope_endpoint_t endpoint,
501                                            ompt_data_t *parallel_data,
502                                            ompt_data_t *task_data,
503                                            const void *codeptr_ra) {
504   if (ompt_multiplex_own_callbacks.ompt_callback_masked) {
505     ompt_multiplex_own_callbacks.ompt_callback_masked(
506         endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
507         ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
508   }
509   if (ompt_multiplex_client_callbacks.ompt_callback_masked) {
510     ompt_multiplex_client_callbacks.ompt_callback_masked(
511         endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
512         ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
513   }
514 }
515 
ompt_multiplex_callback_parallel_begin(ompt_data_t * parent_task_data,const ompt_frame_t * parent_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)516 static void ompt_multiplex_callback_parallel_begin(
517     ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame,
518     ompt_data_t *parallel_data, uint32_t requested_team_size, int flag,
519     const void *codeptr_ra) {
520 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
521   ompt_multiplex_allocate_data_pair(parallel_data);
522 #endif
523   if (ompt_multiplex_own_callbacks.ompt_callback_parallel_begin) {
524     ompt_multiplex_own_callbacks.ompt_callback_parallel_begin(
525         ompt_multiplex_get_own_task_data(parent_task_data), parent_task_frame,
526         ompt_multiplex_get_own_parallel_data(parallel_data),
527         requested_team_size, flag, codeptr_ra);
528   }
529   if (ompt_multiplex_client_callbacks.ompt_callback_parallel_begin) {
530     ompt_multiplex_client_callbacks.ompt_callback_parallel_begin(
531         ompt_multiplex_get_client_task_data(parent_task_data),
532         parent_task_frame,
533         ompt_multiplex_get_client_parallel_data(parallel_data),
534         requested_team_size, flag, codeptr_ra);
535   }
536 }
537 
ompt_multiplex_callback_parallel_end(ompt_data_t * parallel_data,ompt_data_t * task_data,int flag,const void * codeptr_ra)538 static void ompt_multiplex_callback_parallel_end(ompt_data_t *parallel_data,
539                                                  ompt_data_t *task_data,
540                                                  int flag,
541                                                  const void *codeptr_ra) {
542 // defines to make sure, callbacks are called in correct order depending on
543 // defines set by the user
544 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) ||                     \
545     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA)
546   if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) {
547     ompt_multiplex_own_callbacks.ompt_callback_parallel_end(
548         ompt_multiplex_get_own_parallel_data(parallel_data),
549         ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra);
550   }
551 #endif
552 
553   if (ompt_multiplex_client_callbacks.ompt_callback_parallel_end) {
554     ompt_multiplex_client_callbacks.ompt_callback_parallel_end(
555         ompt_multiplex_get_client_parallel_data(parallel_data),
556         ompt_multiplex_get_client_task_data(task_data), flag, codeptr_ra);
557   }
558 
559 #if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) &&                 \
560     !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
561   if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) {
562     ompt_multiplex_own_callbacks.ompt_callback_parallel_end(
563         ompt_multiplex_get_own_parallel_data(parallel_data),
564         ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra);
565   }
566 #endif
567 
568 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
569   ompt_multiplex_free_data_pair(parallel_data);
570 #endif
571 
572 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
573   OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data);
574 #endif
575 }
576 
ompt_multiplex_callback_task_create(ompt_data_t * parent_task_data,const ompt_frame_t * parent_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)577 static void ompt_multiplex_callback_task_create(
578     ompt_data_t *parent_task_data, const ompt_frame_t *parent_frame,
579     ompt_data_t *new_task_data, int type, int has_dependences,
580     const void *codeptr_ra) {
581 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
582   ompt_multiplex_allocate_data_pair(new_task_data);
583 #endif
584 
585 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
586   if (type & ompt_task_initial) {
587     ompt_data_t *parallel_data;
588     ompt_multiplex_get_parallel_info(0, &parallel_data, NULL);
589     ompt_multiplex_allocate_data_pair(parallel_data);
590   }
591 #endif
592 
593   if (ompt_multiplex_own_callbacks.ompt_callback_task_create) {
594     ompt_multiplex_own_callbacks.ompt_callback_task_create(
595         ompt_multiplex_get_own_task_data(parent_task_data), parent_frame,
596         ompt_multiplex_get_own_task_data(new_task_data), type, has_dependences,
597         codeptr_ra);
598   }
599   if (ompt_multiplex_client_callbacks.ompt_callback_task_create) {
600     ompt_multiplex_client_callbacks.ompt_callback_task_create(
601         ompt_multiplex_get_client_task_data(parent_task_data), parent_frame,
602         ompt_multiplex_get_client_task_data(new_task_data), type,
603         has_dependences, codeptr_ra);
604   }
605 }
606 
607 static void
ompt_multiplex_callback_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)608 ompt_multiplex_callback_task_schedule(ompt_data_t *first_task_data,
609                                       ompt_task_status_t prior_task_status,
610                                       ompt_data_t *second_task_data) {
611   if (prior_task_status != ompt_task_complete) {
612     if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
613       ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
614           ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
615           ompt_multiplex_get_own_task_data(second_task_data));
616     }
617     if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) {
618       ompt_multiplex_client_callbacks.ompt_callback_task_schedule(
619           ompt_multiplex_get_client_task_data(first_task_data),
620           prior_task_status,
621           ompt_multiplex_get_client_task_data(second_task_data));
622     }
623   } else {
624 // defines to make sure, callbacks are called in correct order depending on
625 // defines set by the user
626 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) ||                         \
627     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
628     if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
629       ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
630           ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
631           ompt_multiplex_get_own_task_data(second_task_data));
632     }
633 #endif
634 
635     if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) {
636       ompt_multiplex_client_callbacks.ompt_callback_task_schedule(
637           ompt_multiplex_get_client_task_data(first_task_data),
638           prior_task_status,
639           ompt_multiplex_get_client_task_data(second_task_data));
640     }
641 
642 #if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) &&                     \
643     !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
644     if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
645       ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
646           ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
647           ompt_multiplex_get_own_task_data(second_task_data));
648     }
649 #endif
650 
651 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
652     ompt_multiplex_free_data_pair(first_task_data);
653 #endif
654 
655 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
656     OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(first_task_data);
657 #endif
658   }
659 }
660 
ompt_multiplex_callback_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)661 static void ompt_multiplex_callback_dependences(ompt_data_t *task_data,
662                                                 const ompt_dependence_t *deps,
663                                                 int ndeps) {
664   if (ompt_multiplex_own_callbacks.ompt_callback_dependences) {
665     ompt_multiplex_own_callbacks.ompt_callback_dependences(
666         ompt_multiplex_get_own_task_data(task_data), deps, ndeps);
667   }
668   if (ompt_multiplex_client_callbacks.ompt_callback_dependences) {
669     ompt_multiplex_client_callbacks.ompt_callback_dependences(
670         ompt_multiplex_get_client_task_data(task_data), deps, ndeps);
671   }
672 }
673 
674 static void
ompt_multiplex_callback_task_dependence(ompt_data_t * first_task_data,ompt_data_t * second_task_data)675 ompt_multiplex_callback_task_dependence(ompt_data_t *first_task_data,
676                                         ompt_data_t *second_task_data) {
677   if (ompt_multiplex_own_callbacks.ompt_callback_task_dependence) {
678     ompt_multiplex_own_callbacks.ompt_callback_task_dependence(
679         ompt_multiplex_get_own_task_data(first_task_data),
680         ompt_multiplex_get_own_task_data(second_task_data));
681   }
682   if (ompt_multiplex_client_callbacks.ompt_callback_task_dependence) {
683     ompt_multiplex_client_callbacks.ompt_callback_task_dependence(
684         ompt_multiplex_get_client_task_data(first_task_data),
685         ompt_multiplex_get_client_task_data(second_task_data));
686   }
687 }
688 
ompt_multiplex_callback_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)689 static void ompt_multiplex_callback_thread_begin(ompt_thread_t thread_type,
690                                                  ompt_data_t *thread_data) {
691 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
692   ompt_multiplex_allocate_data_pair(thread_data);
693 #endif
694   if (ompt_multiplex_own_callbacks.ompt_callback_thread_begin) {
695     ompt_multiplex_own_callbacks.ompt_callback_thread_begin(
696         thread_type, ompt_multiplex_get_own_thread_data(thread_data));
697   }
698   if (ompt_multiplex_client_callbacks.ompt_callback_thread_begin) {
699     ompt_multiplex_client_callbacks.ompt_callback_thread_begin(
700         thread_type, ompt_multiplex_get_client_thread_data(thread_data));
701   }
702 }
703 
ompt_multiplex_callback_thread_end(ompt_data_t * thread_data)704 static void ompt_multiplex_callback_thread_end(ompt_data_t *thread_data) {
705 // defines to make sure, callbacks are called in correct order depending on
706 // defines set by the user
707 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) ||                       \
708     !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA)
709   if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) {
710     ompt_multiplex_own_callbacks.ompt_callback_thread_end(
711         ompt_multiplex_get_own_thread_data(thread_data));
712   }
713 #endif
714 
715   if (ompt_multiplex_client_callbacks.ompt_callback_thread_end) {
716     ompt_multiplex_client_callbacks.ompt_callback_thread_end(
717         ompt_multiplex_get_client_thread_data(thread_data));
718   }
719 
720 #if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) &&                   \
721     !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA)
722   if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) {
723     ompt_multiplex_own_callbacks.ompt_callback_thread_end(
724         ompt_multiplex_get_own_thread_data(thread_data));
725   }
726 #endif
727 
728 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
729   ompt_multiplex_free_data_pair(thread_data);
730 #endif
731 
732 #if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA)
733   OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA(thread_data);
734 #endif
735 }
736 
ompt_multiplex_callback_control_tool(uint64_t command,uint64_t modifier,void * arg,const void * codeptr_ra)737 static int ompt_multiplex_callback_control_tool(uint64_t command,
738                                                 uint64_t modifier, void *arg,
739                                                 const void *codeptr_ra) {
740   int ownRet = 0, clientRet = 0;
741   if (ompt_multiplex_own_callbacks.ompt_callback_control_tool) {
742     ownRet = ompt_multiplex_own_callbacks.ompt_callback_control_tool(
743         command, modifier, arg, codeptr_ra);
744   }
745   if (ompt_multiplex_client_callbacks.ompt_callback_control_tool) {
746     clientRet = ompt_multiplex_client_callbacks.ompt_callback_control_tool(
747         command, modifier, arg, codeptr_ra);
748   }
749   return ownRet < clientRet ? ownRet : clientRet;
750 }
751 
ompt_multiplex_callback_target(ompt_target_t kind,ompt_scope_endpoint_t endpoint,int device_num,ompt_data_t * task_data,ompt_id_t target_id,const void * codeptr_ra)752 static void ompt_multiplex_callback_target(
753     ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num,
754     ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr_ra) {
755   if (ompt_multiplex_own_callbacks.ompt_callback_target) {
756     ompt_multiplex_own_callbacks.ompt_callback_target(
757         kind, endpoint, device_num, ompt_multiplex_get_own_task_data(task_data),
758         target_id, codeptr_ra);
759   }
760   if (ompt_multiplex_client_callbacks.ompt_callback_target) {
761     ompt_multiplex_client_callbacks.ompt_callback_target(
762         kind, endpoint, device_num,
763         ompt_multiplex_get_client_task_data(task_data), target_id, codeptr_ra);
764   }
765 }
766 
ompt_multiplex_callback_target_data_op(ompt_id_t target_id,ompt_id_t host_op_id,ompt_target_data_op_t optype,void * src_addr,int src_device_num,void * dest_addr,int dest_device_num,size_t bytes,const void * codeptr_ra)767 static void ompt_multiplex_callback_target_data_op(
768     ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype,
769     void *src_addr, int src_device_num, void *dest_addr, int dest_device_num,
770     size_t bytes, const void *codeptr_ra) {
771   if (ompt_multiplex_own_callbacks.ompt_callback_target_data_op) {
772     ompt_multiplex_own_callbacks.ompt_callback_target_data_op(
773         target_id, host_op_id, optype, src_addr, src_device_num, dest_addr,
774         dest_device_num, bytes, codeptr_ra);
775   }
776   if (ompt_multiplex_client_callbacks.ompt_callback_target_data_op) {
777     ompt_multiplex_client_callbacks.ompt_callback_target_data_op(
778         target_id, host_op_id, optype, src_addr, src_device_num, dest_addr,
779         dest_device_num, bytes, codeptr_ra);
780   }
781 }
782 
783 static void
ompt_multiplex_callback_target_submit(ompt_id_t target_id,ompt_id_t host_op_id,unsigned int requested_num_teams)784 ompt_multiplex_callback_target_submit(ompt_id_t target_id, ompt_id_t host_op_id,
785                                       unsigned int requested_num_teams) {
786   if (ompt_multiplex_own_callbacks.ompt_callback_target_submit) {
787     ompt_multiplex_own_callbacks.ompt_callback_target_submit(
788         target_id, host_op_id, requested_num_teams);
789   }
790   if (ompt_multiplex_client_callbacks.ompt_callback_target_submit) {
791     ompt_multiplex_client_callbacks.ompt_callback_target_submit(
792         target_id, host_op_id, requested_num_teams);
793   }
794 }
795 
ompt_multiplex_callback_device_initialize(int device_num,const char * type,ompt_device_t * device,ompt_function_lookup_t lookup,const char * documentation)796 static void ompt_multiplex_callback_device_initialize(
797     int device_num, const char *type, ompt_device_t *device,
798     ompt_function_lookup_t lookup, const char *documentation) {
799   if (ompt_multiplex_own_callbacks.ompt_callback_device_initialize) {
800     ompt_multiplex_own_callbacks.ompt_callback_device_initialize(
801         device_num, type, device, lookup, documentation);
802   }
803   if (ompt_multiplex_client_callbacks.ompt_callback_device_initialize) {
804     ompt_multiplex_client_callbacks.ompt_callback_device_initialize(
805         device_num, type, device, lookup, documentation);
806   }
807 }
808 
ompt_multiplex_callback_device_finalize(int device_num)809 static void ompt_multiplex_callback_device_finalize(int device_num) {
810   if (ompt_multiplex_own_callbacks.ompt_callback_device_finalize) {
811     ompt_multiplex_own_callbacks.ompt_callback_device_finalize(device_num);
812   }
813   if (ompt_multiplex_client_callbacks.ompt_callback_device_finalize) {
814     ompt_multiplex_client_callbacks.ompt_callback_device_finalize(device_num);
815   }
816 }
817 
818 static void
ompt_multiplex_callback_device_load(int device_num,const char * filename,int64_t offset_in_file,void * vma_in_file,size_t bytes,void * host_addr,void * device_addr,uint64_t module_id)819 ompt_multiplex_callback_device_load(int device_num, const char *filename,
820                                     int64_t offset_in_file, void *vma_in_file,
821                                     size_t bytes, void *host_addr,
822                                     void *device_addr, uint64_t module_id) {
823   if (ompt_multiplex_own_callbacks.ompt_callback_device_load) {
824     ompt_multiplex_own_callbacks.ompt_callback_device_load(
825         device_num, filename, offset_in_file, vma_in_file, bytes, host_addr,
826         device_addr, module_id);
827   }
828   if (ompt_multiplex_client_callbacks.ompt_callback_device_load) {
829     ompt_multiplex_client_callbacks.ompt_callback_device_load(
830         device_num, filename, offset_in_file, vma_in_file, bytes, host_addr,
831         device_addr, module_id);
832   }
833 }
834 
ompt_multiplex_callback_device_unload(int device_num,uint64_t module_id)835 static void ompt_multiplex_callback_device_unload(int device_num,
836                                                   uint64_t module_id) {
837   if (ompt_multiplex_own_callbacks.ompt_callback_device_unload) {
838     ompt_multiplex_own_callbacks.ompt_callback_device_unload(device_num,
839                                                              module_id);
840   }
841   if (ompt_multiplex_client_callbacks.ompt_callback_device_unload) {
842     ompt_multiplex_client_callbacks.ompt_callback_device_unload(device_num,
843                                                                 module_id);
844   }
845 }
846 
847 static void
ompt_multiplex_callback_target_map(ompt_id_t target_id,unsigned int nitems,void ** host_addr,void ** device_addr,size_t * bytes,unsigned int * mapping_flags,const void * codeptr_ra)848 ompt_multiplex_callback_target_map(ompt_id_t target_id, unsigned int nitems,
849                                    void **host_addr, void **device_addr,
850                                    size_t *bytes, unsigned int *mapping_flags,
851                                    const void *codeptr_ra) {
852   if (ompt_multiplex_own_callbacks.ompt_callback_target_map) {
853     ompt_multiplex_own_callbacks.ompt_callback_target_map(
854         target_id, nitems, host_addr, device_addr, bytes, mapping_flags,
855         codeptr_ra);
856   }
857   if (ompt_multiplex_client_callbacks.ompt_callback_target_map) {
858     ompt_multiplex_client_callbacks.ompt_callback_target_map(
859         target_id, nitems, host_addr, device_addr, bytes, mapping_flags,
860         codeptr_ra);
861   }
862 }
863 
ompt_multiplex_callback_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)864 static void ompt_multiplex_callback_reduction(ompt_sync_region_t kind,
865                                               ompt_scope_endpoint_t endpoint,
866                                               ompt_data_t *parallel_data,
867                                               ompt_data_t *task_data,
868                                               const void *codeptr_ra) {
869   if (ompt_multiplex_own_callbacks.ompt_callback_reduction) {
870     ompt_multiplex_own_callbacks.ompt_callback_reduction(
871         kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
872         ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
873   }
874   if (ompt_multiplex_client_callbacks.ompt_callback_reduction) {
875     ompt_multiplex_client_callbacks.ompt_callback_reduction(
876         kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
877         ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
878   }
879 }
880 
ompt_multiplex_callback_dispatch(ompt_data_t * parallel_data,ompt_data_t * task_data,ompt_dispatch_t kind,ompt_data_t instance)881 static void ompt_multiplex_callback_dispatch(ompt_data_t *parallel_data,
882                                              ompt_data_t *task_data,
883                                              ompt_dispatch_t kind,
884                                              ompt_data_t instance) {
885   if (ompt_multiplex_own_callbacks.ompt_callback_dispatch) {
886     ompt_multiplex_own_callbacks.ompt_callback_dispatch(
887         ompt_multiplex_get_own_parallel_data(parallel_data),
888         ompt_multiplex_get_own_task_data(task_data), kind, instance);
889   }
890   if (ompt_multiplex_client_callbacks.ompt_callback_dispatch) {
891     ompt_multiplex_client_callbacks.ompt_callback_dispatch(
892         ompt_multiplex_get_client_parallel_data(parallel_data),
893         ompt_multiplex_get_client_task_data(task_data), kind, instance);
894   }
895 }
896 
897 // runtime entry functions
898 
ompt_multiplex_own_get_task_info(int ancestor_level,int * type,ompt_data_t ** task_data,ompt_frame_t ** task_frame,ompt_data_t ** parallel_data,int * thread_num)899 int ompt_multiplex_own_get_task_info(int ancestor_level, int *type,
900                                      ompt_data_t **task_data,
901                                      ompt_frame_t **task_frame,
902                                      ompt_data_t **parallel_data,
903                                      int *thread_num) {
904   int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data,
905                                          task_frame, parallel_data, thread_num);
906 
907 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
908   if (task_data)
909     *task_data = ompt_multiplex_get_own_ompt_data(*task_data);
910 #endif
911 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
912   if (parallel_data)
913     *parallel_data = ompt_multiplex_get_own_ompt_data(*parallel_data);
914 #endif
915   return ret;
916 }
917 
ompt_multiplex_client_get_task_info(int ancestor_level,int * type,ompt_data_t ** task_data,ompt_frame_t ** task_frame,ompt_data_t ** parallel_data,int * thread_num)918 int ompt_multiplex_client_get_task_info(int ancestor_level, int *type,
919                                         ompt_data_t **task_data,
920                                         ompt_frame_t **task_frame,
921                                         ompt_data_t **parallel_data,
922                                         int *thread_num) {
923   int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data,
924                                          task_frame, parallel_data, thread_num);
925 
926   if (task_data)
927 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
928     *task_data = ompt_multiplex_get_client_ompt_data(*task_data);
929 #else
930     *task_data = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(*task_data);
931 #endif
932 
933   if (parallel_data)
934 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
935     *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data);
936 #else
937     *parallel_data =
938         OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data);
939 #endif
940   return ret;
941 }
942 
ompt_multiplex_own_get_thread_data()943 ompt_data_t *ompt_multiplex_own_get_thread_data() {
944   ompt_data_t *ret;
945 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
946   ret = ompt_multiplex_get_own_ompt_data(ompt_multiplex_get_thread_data());
947 #else
948   ret = ompt_multiplex_get_thread_data();
949 #endif
950   return ret;
951 }
952 
ompt_multiplex_client_get_thread_data()953 ompt_data_t *ompt_multiplex_client_get_thread_data() {
954   ompt_data_t *ret;
955 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
956   ret = ompt_multiplex_get_client_ompt_data(ompt_multiplex_get_thread_data());
957 #else
958   ret = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(
959       ompt_multiplex_get_thread_data());
960 #endif
961   return ret;
962 }
963 
ompt_multiplex_own_get_parallel_info(int ancestor_level,ompt_data_t ** parallel_data,int * team_size)964 int ompt_multiplex_own_get_parallel_info(int ancestor_level,
965                                          ompt_data_t **parallel_data,
966                                          int *team_size) {
967   int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data,
968                                              team_size);
969   if (parallel_data)
970     *parallel_data = ompt_multiplex_get_own_parallel_data(*parallel_data);
971   return ret;
972 }
973 
ompt_multiplex_client_get_parallel_info(int ancestor_level,ompt_data_t ** parallel_data,int * team_size)974 int ompt_multiplex_client_get_parallel_info(int ancestor_level,
975                                             ompt_data_t **parallel_data,
976                                             int *team_size) {
977   int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data,
978                                              team_size);
979   if (parallel_data)
980 #ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
981     *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data);
982 #else
983     *parallel_data =
984         OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data);
985 #endif
986   return ret;
987 }
988 
ompt_multiplex_own_set_callback(ompt_callbacks_t which,ompt_callback_t callback)989 OMPT_API_ROUTINE int ompt_multiplex_own_set_callback(ompt_callbacks_t which,
990                                                      ompt_callback_t callback) {
991   switch (which) {
992 
993 #define ompt_event_macro(event_name, callback_type, event_id)                  \
994   case ompt_##event_name:                                                      \
995     ompt_multiplex_own_callbacks.ompt_##event_name = (callback_type)callback;  \
996     if (ompt_multiplex_implementation_status.ompt_##event_name == -1)          \
997       return ompt_multiplex_implementation_status.ompt_##event_name =          \
998                  ompt_multiplex_set_callback(                                  \
999                      ompt_##event_name,                                        \
1000                      (ompt_callback_t)&ompt_multiplex_##event_name);           \
1001     else                                                                       \
1002       return ompt_multiplex_implementation_status.ompt_##event_name
1003 
1004     OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
1005 
1006 #undef ompt_event_macro
1007 
1008   default:
1009     return ompt_set_error;
1010   }
1011 }
1012 
1013 OMPT_API_ROUTINE int
ompt_multiplex_client_set_callback(ompt_callbacks_t which,ompt_callback_t callback)1014 ompt_multiplex_client_set_callback(ompt_callbacks_t which,
1015                                    ompt_callback_t callback) {
1016   switch (which) {
1017 
1018 #define ompt_event_macro(event_name, callback_type, event_id)                  \
1019   case ompt_##event_name:                                                      \
1020     ompt_multiplex_client_callbacks.ompt_##event_name =                        \
1021         (callback_type)callback;                                               \
1022     if (ompt_multiplex_implementation_status.ompt_##event_name == -1)          \
1023       return ompt_multiplex_implementation_status.ompt_##event_name =          \
1024                  ompt_multiplex_set_callback(                                  \
1025                      ompt_##event_name,                                        \
1026                      (ompt_callback_t)&ompt_multiplex_##event_name);           \
1027     else                                                                       \
1028       return ompt_multiplex_implementation_status.ompt_##event_name
1029 
1030     OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
1031 
1032 #undef ompt_event_macro
1033 
1034   default:
1035     return ompt_set_error;
1036   }
1037 }
1038 
ompt_multiplex_own_lookup(const char * name)1039 ompt_interface_fn_t ompt_multiplex_own_lookup(const char *name) {
1040   if (!strcmp(name, "ompt_set_callback"))
1041     return (ompt_interface_fn_t)&ompt_multiplex_own_set_callback;
1042   else if (!strcmp(name, "ompt_get_task_info"))
1043     return (ompt_interface_fn_t)&ompt_multiplex_own_get_task_info;
1044   else if (!strcmp(name, "ompt_get_thread_data"))
1045     return (ompt_interface_fn_t)&ompt_multiplex_own_get_thread_data;
1046   else if (!strcmp(name, "ompt_get_parallel_info"))
1047     return (ompt_interface_fn_t)&ompt_multiplex_own_get_parallel_info;
1048   else
1049     return ompt_multiplex_lookup_function(name);
1050 }
1051 
ompt_multiplex_client_lookup(const char * name)1052 ompt_interface_fn_t ompt_multiplex_client_lookup(const char *name) {
1053   if (!strcmp(name, "ompt_set_callback"))
1054     return (ompt_interface_fn_t)&ompt_multiplex_client_set_callback;
1055   else if (!strcmp(name, "ompt_get_task_info"))
1056     return (ompt_interface_fn_t)&ompt_multiplex_client_get_task_info;
1057   else if (!strcmp(name, "ompt_get_thread_data"))
1058     return (ompt_interface_fn_t)&ompt_multiplex_client_get_thread_data;
1059   else if (!strcmp(name, "ompt_get_parallel_info"))
1060     return (ompt_interface_fn_t)&ompt_multiplex_client_get_parallel_info;
1061   else
1062     return ompt_multiplex_lookup_function(name);
1063 }
1064 
ompt_multiplex_initialize(ompt_function_lookup_t lookup,int initial_device_num,ompt_data_t * data)1065 int ompt_multiplex_initialize(ompt_function_lookup_t lookup,
1066                               int initial_device_num, ompt_data_t *data) {
1067   ompt_multiplex_lookup_function = lookup;
1068   ompt_multiplex_set_callback =
1069       (ompt_set_callback_t)lookup("ompt_set_callback");
1070   ompt_multiplex_get_task_info =
1071       (ompt_get_task_info_t)lookup("ompt_get_task_info");
1072   ompt_multiplex_get_thread_data =
1073       (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1074   ompt_multiplex_get_parallel_info =
1075       (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1076 
1077   // initialize ompt_multiplex_implementation_status
1078 #define ompt_event_macro(event_name, callback_type, event_id)                  \
1079   ompt_multiplex_implementation_status.ompt_##event_name = -1
1080 
1081   OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
1082 
1083 #undef ompt_event_macro
1084 
1085   int ownRet = ompt_multiplex_own_fns->initialize(
1086       ompt_multiplex_own_lookup, initial_device_num,
1087       &(ompt_multiplex_own_fns->tool_data));
1088   int clientRet = 0;
1089   if (ompt_multiplex_client_fns)
1090     clientRet = ompt_multiplex_client_fns->initialize(
1091         ompt_multiplex_client_lookup, initial_device_num,
1092         &(ompt_multiplex_client_fns->tool_data));
1093 
1094   return ownRet > clientRet ? ownRet : clientRet;
1095 }
1096 
ompt_multiplex_finalize(ompt_data_t * fns)1097 void ompt_multiplex_finalize(ompt_data_t *fns) {
1098   if (ompt_multiplex_client_fns)
1099     ompt_multiplex_client_fns->finalize(
1100         &(ompt_multiplex_client_fns->tool_data));
1101   ompt_multiplex_own_fns->finalize(&(ompt_multiplex_own_fns->tool_data));
1102 }
1103 
1104 #ifdef __cplusplus
1105 extern "C" {
1106 #endif
1107 
1108 // forward declaration because of name shifting from ompt_start_tool
1109 // to ompt_multiplex_own_start_tool below
1110 ompt_start_tool_result_t *
1111 ompt_multiplex_own_start_tool(unsigned int omp_version,
1112                               const char *runtime_version);
1113 
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1114 ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
1115                                           const char *runtime_version) {
1116   setup_verbose_init();
1117   OMPT_VERBOSE_INIT_PRINT(
1118       "----- START LOGGING OF CLIENT TOOL REGISTRATION -----\n");
1119   // try loading client tool
1120   OMPT_VERBOSE_INIT_PRINT("Search for " CLIENT_TOOL_LIBRARIES_VAR
1121                           " env var... ");
1122   const char *tool_libs = getenv(CLIENT_TOOL_LIBRARIES_VAR);
1123   if (tool_libs) {
1124     OMPT_VERBOSE_INIT_CONTINUED_PRINT("Sucess.\n");
1125     OMPT_VERBOSE_INIT_PRINT(CLIENT_TOOL_LIBRARIES_VAR " = %s\n", tool_libs);
1126     // copy environement variable
1127     char *tool_libs_buffer = strdup(tool_libs);
1128     if (!tool_libs_buffer) {
1129       printf("strdup Error (%i)\n", errno);
1130       exit(-1);
1131     }
1132 
1133     int progress = 0;
1134     // Reset dl-error
1135     dlerror();
1136     while (progress < strlen(tool_libs)) {
1137       ompt_multiplex_client_fns = NULL;
1138       ompt_start_tool_result_t *(*client_start_tool)(unsigned int,
1139                                                      const char *) = NULL;
1140       OMPT_VERBOSE_INIT_PRINT(
1141           "Look for candidates within " CLIENT_TOOL_LIBRARIES_VAR "...\n");
1142       int tmp_progress = progress;
1143       while (tmp_progress < strlen(tool_libs) &&
1144              tool_libs_buffer[tmp_progress] != ':')
1145         tmp_progress++;
1146       if (tmp_progress < strlen(tool_libs))
1147         tool_libs_buffer[tmp_progress] = 0;
1148       OMPT_VERBOSE_INIT_PRINT("Try out one candidate...\n");
1149       char *fname = tool_libs_buffer + progress;
1150       OMPT_VERBOSE_INIT_PRINT("Opening %s... ", fname);
1151       void *h = dlopen(fname, RTLD_LAZY);
1152       if (h) {
1153         client_start_tool =
1154             (ompt_start_tool_result_t * (*)(unsigned int, const char *))
1155                 dlsym(h, "ompt_start_tool");
1156         if (client_start_tool &&
1157             (ompt_multiplex_client_fns =
1158                  (*client_start_tool)(omp_version, runtime_version))) {
1159           OMPT_VERBOSE_INIT_CONTINUED_PRINT("Sucess.\n");
1160           OMPT_VERBOSE_INIT_PRINT(
1161               "Tool was started and is using the OMPT interface.\n");
1162           break;
1163         } else {
1164           OMPT_VERBOSE_INIT_CONTINUED_PRINT(
1165               "Failed: client_start_tool = %p, ompt_multiplex_client_fns = %p, "
1166               "%s\n",
1167               client_start_tool, ompt_multiplex_client_fns, dlerror());
1168         }
1169       } else {
1170         OMPT_VERBOSE_INIT_CONTINUED_PRINT("Failed: %s\n", dlerror());
1171         printf("Loading %s from %s failed with: %s\n",
1172                tool_libs_buffer + progress, CLIENT_TOOL_LIBRARIES_VAR,
1173                dlerror());
1174       }
1175       progress = tmp_progress + 1;
1176     }
1177     free(tool_libs_buffer);
1178     OMPT_VERBOSE_INIT_PRINT(
1179         "----- END LOGGING OF CLIENT TOOL REGISTRATION -----\n");
1180   }
1181   // load own tool
1182   OMPT_VERBOSE_INIT_PRINT(
1183       "----- START LOGGING OF OWN TOOL REGISTRATION -----\n");
1184   ompt_multiplex_own_fns =
1185       ompt_multiplex_own_start_tool(omp_version, runtime_version);
1186   OMPT_VERBOSE_INIT_PRINT("ompt_multiplex_own_fns = %p\n",
1187                           ompt_multiplex_own_fns);
1188   OMPT_VERBOSE_INIT_PRINT("----- END LOGGING OF OWN TOOL REGISTRATION -----\n");
1189   // return multiplexed versions
1190   static ompt_start_tool_result_t ompt_start_tool_result = {
1191       &ompt_multiplex_initialize, &ompt_multiplex_finalize, {0}};
1192   if (verbose_init && verbose_file != stderr && verbose_file != stdout)
1193     fclose(verbose_file);
1194   if (!ompt_multiplex_client_fns)
1195     return ompt_multiplex_own_fns;
1196   if (!ompt_multiplex_own_fns)
1197     return ompt_multiplex_client_fns;
1198   return &ompt_start_tool_result;
1199 }
1200 #ifdef __cplusplus
1201 }
1202 #endif
1203 
1204 // We rename the ompt_start_tool function of the OMPT tool and call the
1205 // renamed function from the ompt_start_tool function defined above.
1206 #define ompt_start_tool ompt_multiplex_own_start_tool
1207 
1208 #endif /* OMPT_MULTIPLEX_H */
1209