1*4c3eb207Smrg /* OpenACC Profiling Interface
2*4c3eb207Smrg
3*4c3eb207Smrg Copyright (C) 2019-2020 Free Software Foundation, Inc.
4*4c3eb207Smrg
5*4c3eb207Smrg Contributed by Mentor, a Siemens Business.
6*4c3eb207Smrg
7*4c3eb207Smrg This file is part of the GNU Offloading and Multi Processing Library
8*4c3eb207Smrg (libgomp).
9*4c3eb207Smrg
10*4c3eb207Smrg Libgomp is free software; you can redistribute it and/or modify it
11*4c3eb207Smrg under the terms of the GNU General Public License as published by
12*4c3eb207Smrg the Free Software Foundation; either version 3, or (at your option)
13*4c3eb207Smrg any later version.
14*4c3eb207Smrg
15*4c3eb207Smrg Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16*4c3eb207Smrg WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17*4c3eb207Smrg FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18*4c3eb207Smrg more details.
19*4c3eb207Smrg
20*4c3eb207Smrg Under Section 7 of GPL version 3, you are granted additional
21*4c3eb207Smrg permissions described in the GCC Runtime Library Exception, version
22*4c3eb207Smrg 3.1, as published by the Free Software Foundation.
23*4c3eb207Smrg
24*4c3eb207Smrg You should have received a copy of the GNU General Public License and
25*4c3eb207Smrg a copy of the GCC Runtime Library Exception along with this program;
26*4c3eb207Smrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27*4c3eb207Smrg <http://www.gnu.org/licenses/>. */
28*4c3eb207Smrg
29*4c3eb207Smrg #define _GNU_SOURCE
30*4c3eb207Smrg #include "libgomp.h"
31*4c3eb207Smrg #include "oacc-int.h"
32*4c3eb207Smrg #include "secure_getenv.h"
33*4c3eb207Smrg #include "acc_prof.h"
34*4c3eb207Smrg #include <assert.h>
35*4c3eb207Smrg #ifdef HAVE_STRING_H
36*4c3eb207Smrg # include <string.h>
37*4c3eb207Smrg #endif
38*4c3eb207Smrg #ifdef PLUGIN_SUPPORT
39*4c3eb207Smrg # include <dlfcn.h>
40*4c3eb207Smrg #endif
41*4c3eb207Smrg
42*4c3eb207Smrg #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
43*4c3eb207Smrg
44*4c3eb207Smrg /* Statically assert that the layout of the common fields in the
45*4c3eb207Smrg 'acc_event_info' variants matches. */
46*4c3eb207Smrg /* 'event_type' */
47*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, event_type)
48*4c3eb207Smrg == offsetof (acc_event_info, data_event.event_type));
49*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
50*4c3eb207Smrg == offsetof (acc_event_info, launch_event.event_type));
51*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
52*4c3eb207Smrg == offsetof (acc_event_info, other_event.event_type));
53*4c3eb207Smrg /* 'valid_bytes' */
54*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
55*4c3eb207Smrg == offsetof (acc_event_info, launch_event.valid_bytes));
56*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
57*4c3eb207Smrg == offsetof (acc_event_info, other_event.valid_bytes));
58*4c3eb207Smrg /* 'parent_construct' */
59*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
60*4c3eb207Smrg == offsetof (acc_event_info, launch_event.parent_construct));
61*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
62*4c3eb207Smrg == offsetof (acc_event_info, other_event.parent_construct));
63*4c3eb207Smrg /* 'implicit' */
64*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
65*4c3eb207Smrg == offsetof (acc_event_info, launch_event.implicit));
66*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
67*4c3eb207Smrg == offsetof (acc_event_info, other_event.implicit));
68*4c3eb207Smrg /* 'tool_info' */
69*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
70*4c3eb207Smrg == offsetof (acc_event_info, launch_event.tool_info));
71*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
72*4c3eb207Smrg == offsetof (acc_event_info, other_event.tool_info));
73*4c3eb207Smrg
74*4c3eb207Smrg struct goacc_prof_callback_entry
75*4c3eb207Smrg {
76*4c3eb207Smrg acc_prof_callback cb;
77*4c3eb207Smrg int ref;
78*4c3eb207Smrg bool enabled;
79*4c3eb207Smrg struct goacc_prof_callback_entry *next;
80*4c3eb207Smrg };
81*4c3eb207Smrg
82*4c3eb207Smrg /* Use a separate flag to minimize run-time performance impact for the (very
83*4c3eb207Smrg common) case that profiling is not enabled.
84*4c3eb207Smrg
85*4c3eb207Smrg Once enabled, we're not going to disable this anymore, anywhere. We
86*4c3eb207Smrg probably could, by adding appropriate logic to 'acc_prof_register',
87*4c3eb207Smrg 'acc_prof_unregister'. */
88*4c3eb207Smrg bool goacc_prof_enabled = false;
89*4c3eb207Smrg
90*4c3eb207Smrg /* Global state for registered callbacks.
91*4c3eb207Smrg 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
92*4c3eb207Smrg static bool goacc_prof_callbacks_enabled[acc_ev_last];
93*4c3eb207Smrg static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
94*4c3eb207Smrg /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95*4c3eb207Smrg 'goacc_prof_callback_entries'. */
96*4c3eb207Smrg static gomp_mutex_t goacc_prof_lock;
97*4c3eb207Smrg
98*4c3eb207Smrg void
goacc_profiling_initialize(void)99*4c3eb207Smrg goacc_profiling_initialize (void)
100*4c3eb207Smrg {
101*4c3eb207Smrg gomp_mutex_init (&goacc_prof_lock);
102*4c3eb207Smrg
103*4c3eb207Smrg /* Initially, all callbacks for all events are enabled. */
104*4c3eb207Smrg for (int i = 0; i < acc_ev_last; ++i)
105*4c3eb207Smrg goacc_prof_callbacks_enabled[i] = true;
106*4c3eb207Smrg
107*4c3eb207Smrg
108*4c3eb207Smrg #ifdef PLUGIN_SUPPORT
109*4c3eb207Smrg char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
110*4c3eb207Smrg while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
111*4c3eb207Smrg {
112*4c3eb207Smrg char *acc_proflibs_sep = strchr (acc_proflibs, ';');
113*4c3eb207Smrg char *acc_proflib;
114*4c3eb207Smrg if (acc_proflibs_sep == acc_proflibs)
115*4c3eb207Smrg {
116*4c3eb207Smrg /* Stray ';' separator: make sure we don't 'dlopen' the main
117*4c3eb207Smrg program. */
118*4c3eb207Smrg acc_proflib = NULL;
119*4c3eb207Smrg }
120*4c3eb207Smrg else
121*4c3eb207Smrg {
122*4c3eb207Smrg if (acc_proflibs_sep != NULL)
123*4c3eb207Smrg {
124*4c3eb207Smrg /* Single out the first library. */
125*4c3eb207Smrg acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
126*4c3eb207Smrg memcpy (acc_proflib, acc_proflibs,
127*4c3eb207Smrg acc_proflibs_sep - acc_proflibs);
128*4c3eb207Smrg acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
129*4c3eb207Smrg }
130*4c3eb207Smrg else
131*4c3eb207Smrg {
132*4c3eb207Smrg /* No ';' separator, so only one library. */
133*4c3eb207Smrg acc_proflib = acc_proflibs;
134*4c3eb207Smrg }
135*4c3eb207Smrg
136*4c3eb207Smrg gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
137*4c3eb207Smrg void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
138*4c3eb207Smrg if (dl_handle != NULL)
139*4c3eb207Smrg {
140*4c3eb207Smrg typeof (&acc_register_library) a_r_l
141*4c3eb207Smrg = dlsym (dl_handle, "acc_register_library");
142*4c3eb207Smrg if (a_r_l == NULL)
143*4c3eb207Smrg goto dl_fail;
144*4c3eb207Smrg gomp_debug (0, " %s: calling %s:acc_register_library\n",
145*4c3eb207Smrg __FUNCTION__, acc_proflib);
146*4c3eb207Smrg a_r_l (acc_prof_register, acc_prof_unregister,
147*4c3eb207Smrg acc_prof_lookup);
148*4c3eb207Smrg }
149*4c3eb207Smrg else
150*4c3eb207Smrg {
151*4c3eb207Smrg dl_fail:
152*4c3eb207Smrg gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153*4c3eb207Smrg acc_proflib, dlerror ());
154*4c3eb207Smrg if (dl_handle != NULL)
155*4c3eb207Smrg {
156*4c3eb207Smrg int err = dlclose (dl_handle);
157*4c3eb207Smrg dl_handle = NULL;
158*4c3eb207Smrg if (err != 0)
159*4c3eb207Smrg goto dl_fail;
160*4c3eb207Smrg }
161*4c3eb207Smrg }
162*4c3eb207Smrg }
163*4c3eb207Smrg
164*4c3eb207Smrg if (acc_proflib != acc_proflibs)
165*4c3eb207Smrg {
166*4c3eb207Smrg free (acc_proflib);
167*4c3eb207Smrg
168*4c3eb207Smrg acc_proflibs = acc_proflibs_sep + 1;
169*4c3eb207Smrg }
170*4c3eb207Smrg else
171*4c3eb207Smrg acc_proflibs = NULL;
172*4c3eb207Smrg }
173*4c3eb207Smrg #endif /* PLUGIN_SUPPORT */
174*4c3eb207Smrg }
175*4c3eb207Smrg
176*4c3eb207Smrg void
acc_prof_register(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)177*4c3eb207Smrg acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
178*4c3eb207Smrg {
179*4c3eb207Smrg gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180*4c3eb207Smrg __FUNCTION__, (int) ev, (void *) cb, (int) reg);
181*4c3eb207Smrg
182*4c3eb207Smrg
183*4c3eb207Smrg /* For any events to be dispatched, the user first has to register a
184*4c3eb207Smrg callback, which makes this here a good place for enabling the whole
185*4c3eb207Smrg machinery. */
186*4c3eb207Smrg if (!GOACC_PROF_ENABLED)
187*4c3eb207Smrg __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
188*4c3eb207Smrg
189*4c3eb207Smrg
190*4c3eb207Smrg enum
191*4c3eb207Smrg {
192*4c3eb207Smrg EVENT_KIND_BOGUS,
193*4c3eb207Smrg EVENT_KIND_NORMAL,
194*4c3eb207Smrg /* As end events invoke callbacks in the reverse order, we register these
195*4c3eb207Smrg in the reverse order here. */
196*4c3eb207Smrg EVENT_KIND_END,
197*4c3eb207Smrg } event_kind = EVENT_KIND_BOGUS;
198*4c3eb207Smrg switch (ev)
199*4c3eb207Smrg {
200*4c3eb207Smrg case acc_ev_none:
201*4c3eb207Smrg case acc_ev_device_init_start:
202*4c3eb207Smrg case acc_ev_device_shutdown_start:
203*4c3eb207Smrg case acc_ev_runtime_shutdown:
204*4c3eb207Smrg case acc_ev_create:
205*4c3eb207Smrg case acc_ev_delete:
206*4c3eb207Smrg case acc_ev_alloc:
207*4c3eb207Smrg case acc_ev_free:
208*4c3eb207Smrg case acc_ev_enter_data_start:
209*4c3eb207Smrg case acc_ev_exit_data_start:
210*4c3eb207Smrg case acc_ev_update_start:
211*4c3eb207Smrg case acc_ev_compute_construct_start:
212*4c3eb207Smrg case acc_ev_enqueue_launch_start:
213*4c3eb207Smrg case acc_ev_enqueue_upload_start:
214*4c3eb207Smrg case acc_ev_enqueue_download_start:
215*4c3eb207Smrg case acc_ev_wait_start:
216*4c3eb207Smrg event_kind = EVENT_KIND_NORMAL;
217*4c3eb207Smrg break;
218*4c3eb207Smrg case acc_ev_device_init_end:
219*4c3eb207Smrg case acc_ev_device_shutdown_end:
220*4c3eb207Smrg case acc_ev_enter_data_end:
221*4c3eb207Smrg case acc_ev_exit_data_end:
222*4c3eb207Smrg case acc_ev_update_end:
223*4c3eb207Smrg case acc_ev_compute_construct_end:
224*4c3eb207Smrg case acc_ev_enqueue_launch_end:
225*4c3eb207Smrg case acc_ev_enqueue_upload_end:
226*4c3eb207Smrg case acc_ev_enqueue_download_end:
227*4c3eb207Smrg case acc_ev_wait_end:
228*4c3eb207Smrg event_kind = EVENT_KIND_END;
229*4c3eb207Smrg break;
230*4c3eb207Smrg case acc_ev_last:
231*4c3eb207Smrg break;
232*4c3eb207Smrg }
233*4c3eb207Smrg if (event_kind == EVENT_KIND_BOGUS)
234*4c3eb207Smrg {
235*4c3eb207Smrg /* Silently ignore. */
236*4c3eb207Smrg gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
237*4c3eb207Smrg return;
238*4c3eb207Smrg }
239*4c3eb207Smrg
240*4c3eb207Smrg bool bogus = true;
241*4c3eb207Smrg switch (reg)
242*4c3eb207Smrg {
243*4c3eb207Smrg case acc_reg:
244*4c3eb207Smrg case acc_toggle:
245*4c3eb207Smrg case acc_toggle_per_thread:
246*4c3eb207Smrg bogus = false;
247*4c3eb207Smrg break;
248*4c3eb207Smrg }
249*4c3eb207Smrg if (bogus)
250*4c3eb207Smrg {
251*4c3eb207Smrg /* Silently ignore. */
252*4c3eb207Smrg gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
253*4c3eb207Smrg return;
254*4c3eb207Smrg }
255*4c3eb207Smrg
256*4c3eb207Smrg /* Special cases. */
257*4c3eb207Smrg if (reg == acc_toggle)
258*4c3eb207Smrg {
259*4c3eb207Smrg if (cb == NULL)
260*4c3eb207Smrg {
261*4c3eb207Smrg gomp_debug (0, " globally enabling callbacks\n");
262*4c3eb207Smrg gomp_mutex_lock (&goacc_prof_lock);
263*4c3eb207Smrg /* For 'acc_ev_none', this acts as a global toggle. */
264*4c3eb207Smrg goacc_prof_callbacks_enabled[ev] = true;
265*4c3eb207Smrg gomp_mutex_unlock (&goacc_prof_lock);
266*4c3eb207Smrg return;
267*4c3eb207Smrg }
268*4c3eb207Smrg else if (ev == acc_ev_none && cb != NULL)
269*4c3eb207Smrg {
270*4c3eb207Smrg gomp_debug (0, " ignoring request\n");
271*4c3eb207Smrg return;
272*4c3eb207Smrg }
273*4c3eb207Smrg }
274*4c3eb207Smrg else if (reg == acc_toggle_per_thread)
275*4c3eb207Smrg {
276*4c3eb207Smrg if (ev == acc_ev_none && cb == NULL)
277*4c3eb207Smrg {
278*4c3eb207Smrg gomp_debug (0, " thread: enabling callbacks\n");
279*4c3eb207Smrg goacc_lazy_initialize ();
280*4c3eb207Smrg struct goacc_thread *thr = goacc_thread ();
281*4c3eb207Smrg thr->prof_callbacks_enabled = true;
282*4c3eb207Smrg return;
283*4c3eb207Smrg }
284*4c3eb207Smrg /* Silently ignore. */
285*4c3eb207Smrg gomp_debug (0, " ignoring bogus request\n");
286*4c3eb207Smrg return;
287*4c3eb207Smrg }
288*4c3eb207Smrg
289*4c3eb207Smrg gomp_mutex_lock (&goacc_prof_lock);
290*4c3eb207Smrg
291*4c3eb207Smrg struct goacc_prof_callback_entry *it, *it_p;
292*4c3eb207Smrg it = goacc_prof_callback_entries[ev];
293*4c3eb207Smrg it_p = NULL;
294*4c3eb207Smrg while (it)
295*4c3eb207Smrg {
296*4c3eb207Smrg if (it->cb == cb)
297*4c3eb207Smrg break;
298*4c3eb207Smrg it_p = it;
299*4c3eb207Smrg it = it->next;
300*4c3eb207Smrg }
301*4c3eb207Smrg
302*4c3eb207Smrg switch (reg)
303*4c3eb207Smrg {
304*4c3eb207Smrg case acc_reg:
305*4c3eb207Smrg /* If we already have this callback registered, just increment its
306*4c3eb207Smrg reference count. */
307*4c3eb207Smrg if (it != NULL)
308*4c3eb207Smrg {
309*4c3eb207Smrg it->ref++;
310*4c3eb207Smrg gomp_debug (0, " already registered;"
311*4c3eb207Smrg " incrementing reference count to: %d\n", it->ref);
312*4c3eb207Smrg }
313*4c3eb207Smrg else
314*4c3eb207Smrg {
315*4c3eb207Smrg struct goacc_prof_callback_entry *e
316*4c3eb207Smrg = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
317*4c3eb207Smrg e->cb = cb;
318*4c3eb207Smrg e->ref = 1;
319*4c3eb207Smrg e->enabled = true;
320*4c3eb207Smrg bool prepend = (event_kind == EVENT_KIND_END);
321*4c3eb207Smrg /* If we don't have any callback registered yet, also use the
322*4c3eb207Smrg 'prepend' code path. */
323*4c3eb207Smrg if (it_p == NULL)
324*4c3eb207Smrg prepend = true;
325*4c3eb207Smrg if (prepend)
326*4c3eb207Smrg {
327*4c3eb207Smrg gomp_debug (0, " prepending\n");
328*4c3eb207Smrg e->next = goacc_prof_callback_entries[ev];
329*4c3eb207Smrg goacc_prof_callback_entries[ev] = e;
330*4c3eb207Smrg }
331*4c3eb207Smrg else
332*4c3eb207Smrg {
333*4c3eb207Smrg gomp_debug (0, " appending\n");
334*4c3eb207Smrg e->next = NULL;
335*4c3eb207Smrg it_p->next = e;
336*4c3eb207Smrg }
337*4c3eb207Smrg }
338*4c3eb207Smrg break;
339*4c3eb207Smrg
340*4c3eb207Smrg case acc_toggle:
341*4c3eb207Smrg if (it == NULL)
342*4c3eb207Smrg {
343*4c3eb207Smrg gomp_debug (0, " ignoring request: is not registered\n");
344*4c3eb207Smrg break;
345*4c3eb207Smrg }
346*4c3eb207Smrg else
347*4c3eb207Smrg {
348*4c3eb207Smrg gomp_debug (0, " enabling\n");
349*4c3eb207Smrg it->enabled = true;
350*4c3eb207Smrg }
351*4c3eb207Smrg break;
352*4c3eb207Smrg
353*4c3eb207Smrg case acc_toggle_per_thread:
354*4c3eb207Smrg __builtin_unreachable ();
355*4c3eb207Smrg }
356*4c3eb207Smrg
357*4c3eb207Smrg gomp_mutex_unlock (&goacc_prof_lock);
358*4c3eb207Smrg }
359*4c3eb207Smrg
360*4c3eb207Smrg void
acc_prof_unregister(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)361*4c3eb207Smrg acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
362*4c3eb207Smrg {
363*4c3eb207Smrg gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364*4c3eb207Smrg __FUNCTION__, (int) ev, (void *) cb, (int) reg);
365*4c3eb207Smrg
366*4c3eb207Smrg /* If profiling is not enabled, there cannot be anything to unregister. */
367*4c3eb207Smrg if (!GOACC_PROF_ENABLED)
368*4c3eb207Smrg return;
369*4c3eb207Smrg
370*4c3eb207Smrg if (ev < acc_ev_none
371*4c3eb207Smrg || ev >= acc_ev_last)
372*4c3eb207Smrg {
373*4c3eb207Smrg /* Silently ignore. */
374*4c3eb207Smrg gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
375*4c3eb207Smrg return;
376*4c3eb207Smrg }
377*4c3eb207Smrg
378*4c3eb207Smrg bool bogus = true;
379*4c3eb207Smrg switch (reg)
380*4c3eb207Smrg {
381*4c3eb207Smrg case acc_reg:
382*4c3eb207Smrg case acc_toggle:
383*4c3eb207Smrg case acc_toggle_per_thread:
384*4c3eb207Smrg bogus = false;
385*4c3eb207Smrg break;
386*4c3eb207Smrg }
387*4c3eb207Smrg if (bogus)
388*4c3eb207Smrg {
389*4c3eb207Smrg /* Silently ignore. */
390*4c3eb207Smrg gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
391*4c3eb207Smrg return;
392*4c3eb207Smrg }
393*4c3eb207Smrg
394*4c3eb207Smrg /* Special cases. */
395*4c3eb207Smrg if (reg == acc_toggle)
396*4c3eb207Smrg {
397*4c3eb207Smrg if (cb == NULL)
398*4c3eb207Smrg {
399*4c3eb207Smrg gomp_debug (0, " globally disabling callbacks\n");
400*4c3eb207Smrg gomp_mutex_lock (&goacc_prof_lock);
401*4c3eb207Smrg /* For 'acc_ev_none', this acts as a global toggle. */
402*4c3eb207Smrg goacc_prof_callbacks_enabled[ev] = false;
403*4c3eb207Smrg gomp_mutex_unlock (&goacc_prof_lock);
404*4c3eb207Smrg return;
405*4c3eb207Smrg }
406*4c3eb207Smrg else if (ev == acc_ev_none && cb != NULL)
407*4c3eb207Smrg {
408*4c3eb207Smrg gomp_debug (0, " ignoring request\n");
409*4c3eb207Smrg return;
410*4c3eb207Smrg }
411*4c3eb207Smrg }
412*4c3eb207Smrg else if (reg == acc_toggle_per_thread)
413*4c3eb207Smrg {
414*4c3eb207Smrg if (ev == acc_ev_none && cb == NULL)
415*4c3eb207Smrg {
416*4c3eb207Smrg gomp_debug (0, " thread: disabling callbacks\n");
417*4c3eb207Smrg goacc_lazy_initialize ();
418*4c3eb207Smrg struct goacc_thread *thr = goacc_thread ();
419*4c3eb207Smrg thr->prof_callbacks_enabled = false;
420*4c3eb207Smrg return;
421*4c3eb207Smrg }
422*4c3eb207Smrg /* Silently ignore. */
423*4c3eb207Smrg gomp_debug (0, " ignoring bogus request\n");
424*4c3eb207Smrg return;
425*4c3eb207Smrg }
426*4c3eb207Smrg
427*4c3eb207Smrg gomp_mutex_lock (&goacc_prof_lock);
428*4c3eb207Smrg
429*4c3eb207Smrg struct goacc_prof_callback_entry *it, *it_p;
430*4c3eb207Smrg it = goacc_prof_callback_entries[ev];
431*4c3eb207Smrg it_p = NULL;
432*4c3eb207Smrg while (it)
433*4c3eb207Smrg {
434*4c3eb207Smrg if (it->cb == cb)
435*4c3eb207Smrg break;
436*4c3eb207Smrg it_p = it;
437*4c3eb207Smrg it = it->next;
438*4c3eb207Smrg }
439*4c3eb207Smrg
440*4c3eb207Smrg switch (reg)
441*4c3eb207Smrg {
442*4c3eb207Smrg case acc_reg:
443*4c3eb207Smrg if (it == NULL)
444*4c3eb207Smrg {
445*4c3eb207Smrg /* Silently ignore. */
446*4c3eb207Smrg gomp_debug (0, " ignoring bogus request: is not registered\n");
447*4c3eb207Smrg break;
448*4c3eb207Smrg }
449*4c3eb207Smrg it->ref--;
450*4c3eb207Smrg gomp_debug (0, " decrementing reference count to: %d\n", it->ref);
451*4c3eb207Smrg if (it->ref == 0)
452*4c3eb207Smrg {
453*4c3eb207Smrg if (it_p == NULL)
454*4c3eb207Smrg goacc_prof_callback_entries[ev] = it->next;
455*4c3eb207Smrg else
456*4c3eb207Smrg it_p->next = it->next;
457*4c3eb207Smrg free (it);
458*4c3eb207Smrg }
459*4c3eb207Smrg break;
460*4c3eb207Smrg
461*4c3eb207Smrg case acc_toggle:
462*4c3eb207Smrg if (it == NULL)
463*4c3eb207Smrg {
464*4c3eb207Smrg gomp_debug (0, " ignoring request: is not registered\n");
465*4c3eb207Smrg break;
466*4c3eb207Smrg }
467*4c3eb207Smrg else
468*4c3eb207Smrg {
469*4c3eb207Smrg gomp_debug (0, " disabling\n");
470*4c3eb207Smrg it->enabled = false;
471*4c3eb207Smrg }
472*4c3eb207Smrg break;
473*4c3eb207Smrg
474*4c3eb207Smrg case acc_toggle_per_thread:
475*4c3eb207Smrg __builtin_unreachable ();
476*4c3eb207Smrg }
477*4c3eb207Smrg
478*4c3eb207Smrg gomp_mutex_unlock (&goacc_prof_lock);
479*4c3eb207Smrg }
480*4c3eb207Smrg
481*4c3eb207Smrg acc_query_fn
acc_prof_lookup(const char * name)482*4c3eb207Smrg acc_prof_lookup (const char *name)
483*4c3eb207Smrg {
484*4c3eb207Smrg gomp_debug (0, "%s (%s)\n",
485*4c3eb207Smrg __FUNCTION__, name ?: "NULL");
486*4c3eb207Smrg
487*4c3eb207Smrg return NULL;
488*4c3eb207Smrg }
489*4c3eb207Smrg
490*4c3eb207Smrg void
acc_register_library(acc_prof_reg reg,acc_prof_reg unreg,acc_prof_lookup_func lookup)491*4c3eb207Smrg acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
492*4c3eb207Smrg acc_prof_lookup_func lookup)
493*4c3eb207Smrg {
494*4c3eb207Smrg gomp_fatal ("TODO");
495*4c3eb207Smrg }
496*4c3eb207Smrg
497*4c3eb207Smrg /* Prepare to dispatch events? */
498*4c3eb207Smrg
499*4c3eb207Smrg bool
_goacc_profiling_dispatch_p(bool check_not_nested_p)500*4c3eb207Smrg _goacc_profiling_dispatch_p (bool check_not_nested_p)
501*4c3eb207Smrg {
502*4c3eb207Smrg gomp_debug (0, "%s\n", __FUNCTION__);
503*4c3eb207Smrg
504*4c3eb207Smrg bool ret;
505*4c3eb207Smrg
506*4c3eb207Smrg struct goacc_thread *thr = goacc_thread ();
507*4c3eb207Smrg if (__builtin_expect (thr == NULL, false))
508*4c3eb207Smrg {
509*4c3eb207Smrg /* If we don't have any per-thread state yet, that means that per-thread
510*4c3eb207Smrg callback dispatch has not been explicitly disabled (which only a call
511*4c3eb207Smrg to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512*4c3eb207Smrg that would have allocated per-thread state via
513*4c3eb207Smrg 'goacc_lazy_initialize'); initially, all callbacks for all events are
514*4c3eb207Smrg enabled. */
515*4c3eb207Smrg gomp_debug (0, " %s: don't have any per-thread state yet\n", __FUNCTION__);
516*4c3eb207Smrg }
517*4c3eb207Smrg else
518*4c3eb207Smrg {
519*4c3eb207Smrg if (check_not_nested_p)
520*4c3eb207Smrg {
521*4c3eb207Smrg /* No nesting. */
522*4c3eb207Smrg assert (thr->prof_info == NULL);
523*4c3eb207Smrg assert (thr->api_info == NULL);
524*4c3eb207Smrg }
525*4c3eb207Smrg
526*4c3eb207Smrg if (__builtin_expect (!thr->prof_callbacks_enabled, true))
527*4c3eb207Smrg {
528*4c3eb207Smrg gomp_debug (0, " %s: disabled for this thread\n", __FUNCTION__);
529*4c3eb207Smrg ret = false;
530*4c3eb207Smrg goto out;
531*4c3eb207Smrg }
532*4c3eb207Smrg }
533*4c3eb207Smrg
534*4c3eb207Smrg gomp_mutex_lock (&goacc_prof_lock);
535*4c3eb207Smrg
536*4c3eb207Smrg /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
537*4c3eb207Smrg if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
538*4c3eb207Smrg {
539*4c3eb207Smrg gomp_debug (0, " %s: disabled globally\n", __FUNCTION__);
540*4c3eb207Smrg ret = false;
541*4c3eb207Smrg goto out_unlock;
542*4c3eb207Smrg }
543*4c3eb207Smrg else
544*4c3eb207Smrg ret = true;
545*4c3eb207Smrg
546*4c3eb207Smrg out_unlock:
547*4c3eb207Smrg gomp_mutex_unlock (&goacc_prof_lock);
548*4c3eb207Smrg
549*4c3eb207Smrg out:
550*4c3eb207Smrg return ret;
551*4c3eb207Smrg }
552*4c3eb207Smrg
553*4c3eb207Smrg /* Set up to dispatch events? */
554*4c3eb207Smrg
555*4c3eb207Smrg bool
_goacc_profiling_setup_p(struct goacc_thread * thr,acc_prof_info * prof_info,acc_api_info * api_info)556*4c3eb207Smrg _goacc_profiling_setup_p (struct goacc_thread *thr,
557*4c3eb207Smrg acc_prof_info *prof_info, acc_api_info *api_info)
558*4c3eb207Smrg {
559*4c3eb207Smrg gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
560*4c3eb207Smrg
561*4c3eb207Smrg /* If we don't have any per-thread state yet, we can't register 'prof_info'
562*4c3eb207Smrg and 'api_info'. */
563*4c3eb207Smrg if (__builtin_expect (thr == NULL, false))
564*4c3eb207Smrg {
565*4c3eb207Smrg gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566*4c3eb207Smrg " the current call, construct, or directive\n");
567*4c3eb207Smrg return false;
568*4c3eb207Smrg }
569*4c3eb207Smrg
570*4c3eb207Smrg if (thr->prof_info != NULL)
571*4c3eb207Smrg {
572*4c3eb207Smrg /* Profiling has already been set up for an outer construct. In this
573*4c3eb207Smrg case, we continue to use the existing information, and thus return
574*4c3eb207Smrg 'false' here.
575*4c3eb207Smrg
576*4c3eb207Smrg This can happen, for example, for an 'enter data' directive, which
577*4c3eb207Smrg sets up profiling, then calls into 'acc_copyin', which should not
578*4c3eb207Smrg again set up profiling, should not overwrite the existing
579*4c3eb207Smrg information. */
580*4c3eb207Smrg return false;
581*4c3eb207Smrg }
582*4c3eb207Smrg
583*4c3eb207Smrg thr->prof_info = prof_info;
584*4c3eb207Smrg thr->api_info = api_info;
585*4c3eb207Smrg
586*4c3eb207Smrg /* Fill in some defaults. */
587*4c3eb207Smrg
588*4c3eb207Smrg prof_info->event_type = -1; /* Must be set later. */
589*4c3eb207Smrg prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
590*4c3eb207Smrg prof_info->version = _ACC_PROF_INFO_VERSION;
591*4c3eb207Smrg if (thr->dev)
592*4c3eb207Smrg {
593*4c3eb207Smrg prof_info->device_type = acc_device_type (thr->dev->type);
594*4c3eb207Smrg prof_info->device_number = thr->dev->target_id;
595*4c3eb207Smrg }
596*4c3eb207Smrg else
597*4c3eb207Smrg {
598*4c3eb207Smrg prof_info->device_type = -1;
599*4c3eb207Smrg prof_info->device_number = -1;
600*4c3eb207Smrg }
601*4c3eb207Smrg prof_info->thread_id = -1;
602*4c3eb207Smrg prof_info->async = acc_async_sync;
603*4c3eb207Smrg prof_info->async_queue = prof_info->async;
604*4c3eb207Smrg prof_info->src_file = NULL;
605*4c3eb207Smrg prof_info->func_name = NULL;
606*4c3eb207Smrg prof_info->line_no = -1;
607*4c3eb207Smrg prof_info->end_line_no = -1;
608*4c3eb207Smrg prof_info->func_line_no = -1;
609*4c3eb207Smrg prof_info->func_end_line_no = -1;
610*4c3eb207Smrg
611*4c3eb207Smrg api_info->device_api = acc_device_api_none;
612*4c3eb207Smrg api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
613*4c3eb207Smrg api_info->device_type = prof_info->device_type;
614*4c3eb207Smrg api_info->vendor = -1;
615*4c3eb207Smrg api_info->device_handle = NULL;
616*4c3eb207Smrg api_info->context_handle = NULL;
617*4c3eb207Smrg api_info->async_handle = NULL;
618*4c3eb207Smrg
619*4c3eb207Smrg return true;
620*4c3eb207Smrg }
621*4c3eb207Smrg
622*4c3eb207Smrg /* Dispatch events.
623*4c3eb207Smrg
624*4c3eb207Smrg This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625*4c3eb207Smrg 'GOACC_PROFILING_SETUP_P' returned a true result. */
626*4c3eb207Smrg
627*4c3eb207Smrg void
goacc_profiling_dispatch(acc_prof_info * prof_info,acc_event_info * event_info,acc_api_info * apt_info)628*4c3eb207Smrg goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
629*4c3eb207Smrg acc_api_info *apt_info)
630*4c3eb207Smrg {
631*4c3eb207Smrg acc_event_t event_type = event_info->event_type;
632*4c3eb207Smrg gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
633*4c3eb207Smrg assert (event_type > acc_ev_none
634*4c3eb207Smrg && event_type < acc_ev_last);
635*4c3eb207Smrg
636*4c3eb207Smrg gomp_mutex_lock (&goacc_prof_lock);
637*4c3eb207Smrg
638*4c3eb207Smrg if (!goacc_prof_callbacks_enabled[event_type])
639*4c3eb207Smrg {
640*4c3eb207Smrg gomp_debug (0, " disabled for this event type\n");
641*4c3eb207Smrg
642*4c3eb207Smrg goto out_unlock;
643*4c3eb207Smrg }
644*4c3eb207Smrg
645*4c3eb207Smrg for (struct goacc_prof_callback_entry *e
646*4c3eb207Smrg = goacc_prof_callback_entries[event_type];
647*4c3eb207Smrg e != NULL;
648*4c3eb207Smrg e = e->next)
649*4c3eb207Smrg {
650*4c3eb207Smrg if (!e->enabled)
651*4c3eb207Smrg {
652*4c3eb207Smrg gomp_debug (0, " disabled for callback %p\n", e->cb);
653*4c3eb207Smrg continue;
654*4c3eb207Smrg }
655*4c3eb207Smrg
656*4c3eb207Smrg gomp_debug (0, " calling callback %p\n", e->cb);
657*4c3eb207Smrg e->cb (prof_info, event_info, apt_info);
658*4c3eb207Smrg }
659*4c3eb207Smrg
660*4c3eb207Smrg out_unlock:
661*4c3eb207Smrg gomp_mutex_unlock (&goacc_prof_lock);
662*4c3eb207Smrg }
663