xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/oacc-profiling.c (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1*4c3eb207Smrg /* OpenACC Profiling Interface
2*4c3eb207Smrg 
3*4c3eb207Smrg    Copyright (C) 2019-2020 Free Software Foundation, Inc.
4*4c3eb207Smrg 
5*4c3eb207Smrg    Contributed by Mentor, a Siemens Business.
6*4c3eb207Smrg 
7*4c3eb207Smrg    This file is part of the GNU Offloading and Multi Processing Library
8*4c3eb207Smrg    (libgomp).
9*4c3eb207Smrg 
10*4c3eb207Smrg    Libgomp is free software; you can redistribute it and/or modify it
11*4c3eb207Smrg    under the terms of the GNU General Public License as published by
12*4c3eb207Smrg    the Free Software Foundation; either version 3, or (at your option)
13*4c3eb207Smrg    any later version.
14*4c3eb207Smrg 
15*4c3eb207Smrg    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16*4c3eb207Smrg    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17*4c3eb207Smrg    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18*4c3eb207Smrg    more details.
19*4c3eb207Smrg 
20*4c3eb207Smrg    Under Section 7 of GPL version 3, you are granted additional
21*4c3eb207Smrg    permissions described in the GCC Runtime Library Exception, version
22*4c3eb207Smrg    3.1, as published by the Free Software Foundation.
23*4c3eb207Smrg 
24*4c3eb207Smrg    You should have received a copy of the GNU General Public License and
25*4c3eb207Smrg    a copy of the GCC Runtime Library Exception along with this program;
26*4c3eb207Smrg    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27*4c3eb207Smrg    <http://www.gnu.org/licenses/>.  */
28*4c3eb207Smrg 
29*4c3eb207Smrg #define _GNU_SOURCE
30*4c3eb207Smrg #include "libgomp.h"
31*4c3eb207Smrg #include "oacc-int.h"
32*4c3eb207Smrg #include "secure_getenv.h"
33*4c3eb207Smrg #include "acc_prof.h"
34*4c3eb207Smrg #include <assert.h>
35*4c3eb207Smrg #ifdef HAVE_STRING_H
36*4c3eb207Smrg # include <string.h>
37*4c3eb207Smrg #endif
38*4c3eb207Smrg #ifdef PLUGIN_SUPPORT
39*4c3eb207Smrg # include <dlfcn.h>
40*4c3eb207Smrg #endif
41*4c3eb207Smrg 
42*4c3eb207Smrg #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
43*4c3eb207Smrg 
44*4c3eb207Smrg /* Statically assert that the layout of the common fields in the
45*4c3eb207Smrg    'acc_event_info' variants matches.  */
46*4c3eb207Smrg /* 'event_type' */
47*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, event_type)
48*4c3eb207Smrg 	       == offsetof (acc_event_info, data_event.event_type));
49*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
50*4c3eb207Smrg 	       == offsetof (acc_event_info, launch_event.event_type));
51*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
52*4c3eb207Smrg 	       == offsetof (acc_event_info, other_event.event_type));
53*4c3eb207Smrg /* 'valid_bytes' */
54*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
55*4c3eb207Smrg 	       == offsetof (acc_event_info, launch_event.valid_bytes));
56*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
57*4c3eb207Smrg 	       == offsetof (acc_event_info, other_event.valid_bytes));
58*4c3eb207Smrg /* 'parent_construct' */
59*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
60*4c3eb207Smrg 	       == offsetof (acc_event_info, launch_event.parent_construct));
61*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
62*4c3eb207Smrg 	       == offsetof (acc_event_info, other_event.parent_construct));
63*4c3eb207Smrg /* 'implicit' */
64*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
65*4c3eb207Smrg 	       == offsetof (acc_event_info, launch_event.implicit));
66*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
67*4c3eb207Smrg 	       == offsetof (acc_event_info, other_event.implicit));
68*4c3eb207Smrg /* 'tool_info' */
69*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
70*4c3eb207Smrg 	       == offsetof (acc_event_info, launch_event.tool_info));
71*4c3eb207Smrg STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
72*4c3eb207Smrg 	       == offsetof (acc_event_info, other_event.tool_info));
73*4c3eb207Smrg 
74*4c3eb207Smrg struct goacc_prof_callback_entry
75*4c3eb207Smrg {
76*4c3eb207Smrg   acc_prof_callback cb;
77*4c3eb207Smrg   int ref;
78*4c3eb207Smrg   bool enabled;
79*4c3eb207Smrg   struct goacc_prof_callback_entry *next;
80*4c3eb207Smrg };
81*4c3eb207Smrg 
82*4c3eb207Smrg /* Use a separate flag to minimize run-time performance impact for the (very
83*4c3eb207Smrg    common) case that profiling is not enabled.
84*4c3eb207Smrg 
85*4c3eb207Smrg    Once enabled, we're not going to disable this anymore, anywhere.  We
86*4c3eb207Smrg    probably could, by adding appropriate logic to 'acc_prof_register',
87*4c3eb207Smrg    'acc_prof_unregister'.  */
88*4c3eb207Smrg bool goacc_prof_enabled = false;
89*4c3eb207Smrg 
90*4c3eb207Smrg /* Global state for registered callbacks.
91*4c3eb207Smrg    'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
92*4c3eb207Smrg static bool goacc_prof_callbacks_enabled[acc_ev_last];
93*4c3eb207Smrg static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
94*4c3eb207Smrg /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95*4c3eb207Smrg    'goacc_prof_callback_entries'.  */
96*4c3eb207Smrg static gomp_mutex_t goacc_prof_lock;
97*4c3eb207Smrg 
98*4c3eb207Smrg void
goacc_profiling_initialize(void)99*4c3eb207Smrg goacc_profiling_initialize (void)
100*4c3eb207Smrg {
101*4c3eb207Smrg   gomp_mutex_init (&goacc_prof_lock);
102*4c3eb207Smrg 
103*4c3eb207Smrg   /* Initially, all callbacks for all events are enabled.  */
104*4c3eb207Smrg   for (int i = 0; i < acc_ev_last; ++i)
105*4c3eb207Smrg     goacc_prof_callbacks_enabled[i] = true;
106*4c3eb207Smrg 
107*4c3eb207Smrg 
108*4c3eb207Smrg #ifdef PLUGIN_SUPPORT
109*4c3eb207Smrg   char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
110*4c3eb207Smrg   while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
111*4c3eb207Smrg     {
112*4c3eb207Smrg       char *acc_proflibs_sep = strchr (acc_proflibs, ';');
113*4c3eb207Smrg       char *acc_proflib;
114*4c3eb207Smrg       if (acc_proflibs_sep == acc_proflibs)
115*4c3eb207Smrg 	{
116*4c3eb207Smrg 	  /* Stray ';' separator: make sure we don't 'dlopen' the main
117*4c3eb207Smrg 	     program.  */
118*4c3eb207Smrg 	  acc_proflib = NULL;
119*4c3eb207Smrg 	}
120*4c3eb207Smrg       else
121*4c3eb207Smrg 	{
122*4c3eb207Smrg 	  if (acc_proflibs_sep != NULL)
123*4c3eb207Smrg 	    {
124*4c3eb207Smrg 	      /* Single out the first library.  */
125*4c3eb207Smrg 	      acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
126*4c3eb207Smrg 	      memcpy (acc_proflib, acc_proflibs,
127*4c3eb207Smrg 		      acc_proflibs_sep - acc_proflibs);
128*4c3eb207Smrg 	      acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
129*4c3eb207Smrg 	    }
130*4c3eb207Smrg 	  else
131*4c3eb207Smrg 	    {
132*4c3eb207Smrg 	      /* No ';' separator, so only one library.  */
133*4c3eb207Smrg 	      acc_proflib = acc_proflibs;
134*4c3eb207Smrg 	    }
135*4c3eb207Smrg 
136*4c3eb207Smrg 	  gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
137*4c3eb207Smrg 	  void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
138*4c3eb207Smrg 	  if (dl_handle != NULL)
139*4c3eb207Smrg 	    {
140*4c3eb207Smrg 	      typeof (&acc_register_library) a_r_l
141*4c3eb207Smrg 		= dlsym (dl_handle, "acc_register_library");
142*4c3eb207Smrg 	      if (a_r_l == NULL)
143*4c3eb207Smrg 		goto dl_fail;
144*4c3eb207Smrg 	      gomp_debug (0, "  %s: calling %s:acc_register_library\n",
145*4c3eb207Smrg 			  __FUNCTION__, acc_proflib);
146*4c3eb207Smrg 	      a_r_l (acc_prof_register, acc_prof_unregister,
147*4c3eb207Smrg 		     acc_prof_lookup);
148*4c3eb207Smrg 	    }
149*4c3eb207Smrg 	  else
150*4c3eb207Smrg 	    {
151*4c3eb207Smrg 	    dl_fail:
152*4c3eb207Smrg 	      gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153*4c3eb207Smrg 			  acc_proflib, dlerror ());
154*4c3eb207Smrg 	      if (dl_handle != NULL)
155*4c3eb207Smrg 		{
156*4c3eb207Smrg 		  int err = dlclose (dl_handle);
157*4c3eb207Smrg 		  dl_handle = NULL;
158*4c3eb207Smrg 		  if (err != 0)
159*4c3eb207Smrg 		    goto dl_fail;
160*4c3eb207Smrg 		}
161*4c3eb207Smrg 	    }
162*4c3eb207Smrg 	}
163*4c3eb207Smrg 
164*4c3eb207Smrg       if (acc_proflib != acc_proflibs)
165*4c3eb207Smrg 	{
166*4c3eb207Smrg 	  free (acc_proflib);
167*4c3eb207Smrg 
168*4c3eb207Smrg 	  acc_proflibs = acc_proflibs_sep + 1;
169*4c3eb207Smrg 	}
170*4c3eb207Smrg       else
171*4c3eb207Smrg 	acc_proflibs = NULL;
172*4c3eb207Smrg     }
173*4c3eb207Smrg #endif /* PLUGIN_SUPPORT */
174*4c3eb207Smrg }
175*4c3eb207Smrg 
176*4c3eb207Smrg void
acc_prof_register(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)177*4c3eb207Smrg acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
178*4c3eb207Smrg {
179*4c3eb207Smrg   gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180*4c3eb207Smrg 	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
181*4c3eb207Smrg 
182*4c3eb207Smrg 
183*4c3eb207Smrg   /* For any events to be dispatched, the user first has to register a
184*4c3eb207Smrg      callback, which makes this here a good place for enabling the whole
185*4c3eb207Smrg      machinery.  */
186*4c3eb207Smrg   if (!GOACC_PROF_ENABLED)
187*4c3eb207Smrg     __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
188*4c3eb207Smrg 
189*4c3eb207Smrg 
190*4c3eb207Smrg   enum
191*4c3eb207Smrg   {
192*4c3eb207Smrg     EVENT_KIND_BOGUS,
193*4c3eb207Smrg     EVENT_KIND_NORMAL,
194*4c3eb207Smrg     /* As end events invoke callbacks in the reverse order, we register these
195*4c3eb207Smrg        in the reverse order here.  */
196*4c3eb207Smrg     EVENT_KIND_END,
197*4c3eb207Smrg   } event_kind = EVENT_KIND_BOGUS;
198*4c3eb207Smrg   switch (ev)
199*4c3eb207Smrg     {
200*4c3eb207Smrg     case acc_ev_none:
201*4c3eb207Smrg     case acc_ev_device_init_start:
202*4c3eb207Smrg     case acc_ev_device_shutdown_start:
203*4c3eb207Smrg     case acc_ev_runtime_shutdown:
204*4c3eb207Smrg     case acc_ev_create:
205*4c3eb207Smrg     case acc_ev_delete:
206*4c3eb207Smrg     case acc_ev_alloc:
207*4c3eb207Smrg     case acc_ev_free:
208*4c3eb207Smrg     case acc_ev_enter_data_start:
209*4c3eb207Smrg     case acc_ev_exit_data_start:
210*4c3eb207Smrg     case acc_ev_update_start:
211*4c3eb207Smrg     case acc_ev_compute_construct_start:
212*4c3eb207Smrg     case acc_ev_enqueue_launch_start:
213*4c3eb207Smrg     case acc_ev_enqueue_upload_start:
214*4c3eb207Smrg     case acc_ev_enqueue_download_start:
215*4c3eb207Smrg     case acc_ev_wait_start:
216*4c3eb207Smrg       event_kind = EVENT_KIND_NORMAL;
217*4c3eb207Smrg       break;
218*4c3eb207Smrg     case acc_ev_device_init_end:
219*4c3eb207Smrg     case acc_ev_device_shutdown_end:
220*4c3eb207Smrg     case acc_ev_enter_data_end:
221*4c3eb207Smrg     case acc_ev_exit_data_end:
222*4c3eb207Smrg     case acc_ev_update_end:
223*4c3eb207Smrg     case acc_ev_compute_construct_end:
224*4c3eb207Smrg     case acc_ev_enqueue_launch_end:
225*4c3eb207Smrg     case acc_ev_enqueue_upload_end:
226*4c3eb207Smrg     case acc_ev_enqueue_download_end:
227*4c3eb207Smrg     case acc_ev_wait_end:
228*4c3eb207Smrg       event_kind = EVENT_KIND_END;
229*4c3eb207Smrg       break;
230*4c3eb207Smrg     case acc_ev_last:
231*4c3eb207Smrg       break;
232*4c3eb207Smrg     }
233*4c3eb207Smrg   if (event_kind == EVENT_KIND_BOGUS)
234*4c3eb207Smrg     {
235*4c3eb207Smrg       /* Silently ignore.  */
236*4c3eb207Smrg       gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
237*4c3eb207Smrg       return;
238*4c3eb207Smrg     }
239*4c3eb207Smrg 
240*4c3eb207Smrg   bool bogus = true;
241*4c3eb207Smrg   switch (reg)
242*4c3eb207Smrg     {
243*4c3eb207Smrg     case acc_reg:
244*4c3eb207Smrg     case acc_toggle:
245*4c3eb207Smrg     case acc_toggle_per_thread:
246*4c3eb207Smrg       bogus = false;
247*4c3eb207Smrg       break;
248*4c3eb207Smrg     }
249*4c3eb207Smrg   if (bogus)
250*4c3eb207Smrg     {
251*4c3eb207Smrg       /* Silently ignore.  */
252*4c3eb207Smrg       gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
253*4c3eb207Smrg       return;
254*4c3eb207Smrg     }
255*4c3eb207Smrg 
256*4c3eb207Smrg   /* Special cases.  */
257*4c3eb207Smrg   if (reg == acc_toggle)
258*4c3eb207Smrg     {
259*4c3eb207Smrg       if (cb == NULL)
260*4c3eb207Smrg 	{
261*4c3eb207Smrg 	  gomp_debug (0, "  globally enabling callbacks\n");
262*4c3eb207Smrg 	  gomp_mutex_lock (&goacc_prof_lock);
263*4c3eb207Smrg 	  /* For 'acc_ev_none', this acts as a global toggle.  */
264*4c3eb207Smrg 	  goacc_prof_callbacks_enabled[ev] = true;
265*4c3eb207Smrg 	  gomp_mutex_unlock (&goacc_prof_lock);
266*4c3eb207Smrg 	  return;
267*4c3eb207Smrg 	}
268*4c3eb207Smrg       else if (ev == acc_ev_none && cb != NULL)
269*4c3eb207Smrg 	{
270*4c3eb207Smrg 	  gomp_debug (0, "  ignoring request\n");
271*4c3eb207Smrg 	  return;
272*4c3eb207Smrg 	}
273*4c3eb207Smrg     }
274*4c3eb207Smrg   else if (reg == acc_toggle_per_thread)
275*4c3eb207Smrg     {
276*4c3eb207Smrg       if (ev == acc_ev_none && cb == NULL)
277*4c3eb207Smrg 	{
278*4c3eb207Smrg 	  gomp_debug (0, "  thread: enabling callbacks\n");
279*4c3eb207Smrg 	  goacc_lazy_initialize ();
280*4c3eb207Smrg 	  struct goacc_thread *thr = goacc_thread ();
281*4c3eb207Smrg 	  thr->prof_callbacks_enabled = true;
282*4c3eb207Smrg 	  return;
283*4c3eb207Smrg 	}
284*4c3eb207Smrg       /* Silently ignore.  */
285*4c3eb207Smrg       gomp_debug (0, "  ignoring bogus request\n");
286*4c3eb207Smrg       return;
287*4c3eb207Smrg     }
288*4c3eb207Smrg 
289*4c3eb207Smrg   gomp_mutex_lock (&goacc_prof_lock);
290*4c3eb207Smrg 
291*4c3eb207Smrg   struct goacc_prof_callback_entry *it, *it_p;
292*4c3eb207Smrg   it = goacc_prof_callback_entries[ev];
293*4c3eb207Smrg   it_p = NULL;
294*4c3eb207Smrg   while (it)
295*4c3eb207Smrg     {
296*4c3eb207Smrg       if (it->cb == cb)
297*4c3eb207Smrg 	break;
298*4c3eb207Smrg       it_p = it;
299*4c3eb207Smrg       it = it->next;
300*4c3eb207Smrg     }
301*4c3eb207Smrg 
302*4c3eb207Smrg   switch (reg)
303*4c3eb207Smrg     {
304*4c3eb207Smrg     case acc_reg:
305*4c3eb207Smrg       /* If we already have this callback registered, just increment its
306*4c3eb207Smrg 	 reference count.  */
307*4c3eb207Smrg       if (it != NULL)
308*4c3eb207Smrg 	{
309*4c3eb207Smrg 	  it->ref++;
310*4c3eb207Smrg 	  gomp_debug (0, "  already registered;"
311*4c3eb207Smrg 		      " incrementing reference count to: %d\n", it->ref);
312*4c3eb207Smrg 	}
313*4c3eb207Smrg       else
314*4c3eb207Smrg 	{
315*4c3eb207Smrg 	  struct goacc_prof_callback_entry *e
316*4c3eb207Smrg 	    = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
317*4c3eb207Smrg 	  e->cb = cb;
318*4c3eb207Smrg 	  e->ref = 1;
319*4c3eb207Smrg 	  e->enabled = true;
320*4c3eb207Smrg 	  bool prepend = (event_kind == EVENT_KIND_END);
321*4c3eb207Smrg 	  /* If we don't have any callback registered yet, also use the
322*4c3eb207Smrg 	     'prepend' code path.  */
323*4c3eb207Smrg 	  if (it_p == NULL)
324*4c3eb207Smrg 	    prepend = true;
325*4c3eb207Smrg 	  if (prepend)
326*4c3eb207Smrg 	    {
327*4c3eb207Smrg 	      gomp_debug (0, "  prepending\n");
328*4c3eb207Smrg 	      e->next = goacc_prof_callback_entries[ev];
329*4c3eb207Smrg 	      goacc_prof_callback_entries[ev] = e;
330*4c3eb207Smrg 	    }
331*4c3eb207Smrg 	  else
332*4c3eb207Smrg 	    {
333*4c3eb207Smrg 	      gomp_debug (0, "  appending\n");
334*4c3eb207Smrg 	      e->next = NULL;
335*4c3eb207Smrg 	      it_p->next = e;
336*4c3eb207Smrg 	    }
337*4c3eb207Smrg 	}
338*4c3eb207Smrg       break;
339*4c3eb207Smrg 
340*4c3eb207Smrg     case acc_toggle:
341*4c3eb207Smrg       if (it == NULL)
342*4c3eb207Smrg 	{
343*4c3eb207Smrg 	  gomp_debug (0, "  ignoring request: is not registered\n");
344*4c3eb207Smrg 	  break;
345*4c3eb207Smrg 	}
346*4c3eb207Smrg       else
347*4c3eb207Smrg 	{
348*4c3eb207Smrg 	  gomp_debug (0, "  enabling\n");
349*4c3eb207Smrg 	  it->enabled = true;
350*4c3eb207Smrg 	}
351*4c3eb207Smrg       break;
352*4c3eb207Smrg 
353*4c3eb207Smrg     case acc_toggle_per_thread:
354*4c3eb207Smrg       __builtin_unreachable ();
355*4c3eb207Smrg     }
356*4c3eb207Smrg 
357*4c3eb207Smrg   gomp_mutex_unlock (&goacc_prof_lock);
358*4c3eb207Smrg }
359*4c3eb207Smrg 
360*4c3eb207Smrg void
acc_prof_unregister(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)361*4c3eb207Smrg acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
362*4c3eb207Smrg {
363*4c3eb207Smrg   gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364*4c3eb207Smrg 	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
365*4c3eb207Smrg 
366*4c3eb207Smrg   /* If profiling is not enabled, there cannot be anything to unregister.  */
367*4c3eb207Smrg   if (!GOACC_PROF_ENABLED)
368*4c3eb207Smrg     return;
369*4c3eb207Smrg 
370*4c3eb207Smrg   if (ev < acc_ev_none
371*4c3eb207Smrg       || ev >= acc_ev_last)
372*4c3eb207Smrg     {
373*4c3eb207Smrg       /* Silently ignore.  */
374*4c3eb207Smrg       gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
375*4c3eb207Smrg       return;
376*4c3eb207Smrg     }
377*4c3eb207Smrg 
378*4c3eb207Smrg   bool bogus = true;
379*4c3eb207Smrg   switch (reg)
380*4c3eb207Smrg     {
381*4c3eb207Smrg     case acc_reg:
382*4c3eb207Smrg     case acc_toggle:
383*4c3eb207Smrg     case acc_toggle_per_thread:
384*4c3eb207Smrg       bogus = false;
385*4c3eb207Smrg       break;
386*4c3eb207Smrg     }
387*4c3eb207Smrg   if (bogus)
388*4c3eb207Smrg     {
389*4c3eb207Smrg       /* Silently ignore.  */
390*4c3eb207Smrg       gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
391*4c3eb207Smrg       return;
392*4c3eb207Smrg     }
393*4c3eb207Smrg 
394*4c3eb207Smrg   /* Special cases.  */
395*4c3eb207Smrg   if (reg == acc_toggle)
396*4c3eb207Smrg     {
397*4c3eb207Smrg       if (cb == NULL)
398*4c3eb207Smrg 	{
399*4c3eb207Smrg 	  gomp_debug (0, "  globally disabling callbacks\n");
400*4c3eb207Smrg 	  gomp_mutex_lock (&goacc_prof_lock);
401*4c3eb207Smrg 	  /* For 'acc_ev_none', this acts as a global toggle.  */
402*4c3eb207Smrg 	  goacc_prof_callbacks_enabled[ev] = false;
403*4c3eb207Smrg 	  gomp_mutex_unlock (&goacc_prof_lock);
404*4c3eb207Smrg 	  return;
405*4c3eb207Smrg 	}
406*4c3eb207Smrg       else if (ev == acc_ev_none && cb != NULL)
407*4c3eb207Smrg 	{
408*4c3eb207Smrg 	  gomp_debug (0, "  ignoring request\n");
409*4c3eb207Smrg 	  return;
410*4c3eb207Smrg 	}
411*4c3eb207Smrg     }
412*4c3eb207Smrg   else if (reg == acc_toggle_per_thread)
413*4c3eb207Smrg     {
414*4c3eb207Smrg       if (ev == acc_ev_none && cb == NULL)
415*4c3eb207Smrg 	{
416*4c3eb207Smrg 	  gomp_debug (0, "  thread: disabling callbacks\n");
417*4c3eb207Smrg 	  goacc_lazy_initialize ();
418*4c3eb207Smrg 	  struct goacc_thread *thr = goacc_thread ();
419*4c3eb207Smrg 	  thr->prof_callbacks_enabled = false;
420*4c3eb207Smrg 	  return;
421*4c3eb207Smrg 	}
422*4c3eb207Smrg       /* Silently ignore.  */
423*4c3eb207Smrg       gomp_debug (0, "  ignoring bogus request\n");
424*4c3eb207Smrg       return;
425*4c3eb207Smrg     }
426*4c3eb207Smrg 
427*4c3eb207Smrg   gomp_mutex_lock (&goacc_prof_lock);
428*4c3eb207Smrg 
429*4c3eb207Smrg   struct goacc_prof_callback_entry *it, *it_p;
430*4c3eb207Smrg   it = goacc_prof_callback_entries[ev];
431*4c3eb207Smrg   it_p = NULL;
432*4c3eb207Smrg   while (it)
433*4c3eb207Smrg     {
434*4c3eb207Smrg       if (it->cb == cb)
435*4c3eb207Smrg 	break;
436*4c3eb207Smrg       it_p = it;
437*4c3eb207Smrg       it = it->next;
438*4c3eb207Smrg     }
439*4c3eb207Smrg 
440*4c3eb207Smrg   switch (reg)
441*4c3eb207Smrg     {
442*4c3eb207Smrg     case acc_reg:
443*4c3eb207Smrg       if (it == NULL)
444*4c3eb207Smrg 	{
445*4c3eb207Smrg 	  /* Silently ignore.  */
446*4c3eb207Smrg 	  gomp_debug (0, "  ignoring bogus request: is not registered\n");
447*4c3eb207Smrg 	  break;
448*4c3eb207Smrg 	}
449*4c3eb207Smrg       it->ref--;
450*4c3eb207Smrg       gomp_debug (0, "  decrementing reference count to: %d\n", it->ref);
451*4c3eb207Smrg       if (it->ref == 0)
452*4c3eb207Smrg 	{
453*4c3eb207Smrg 	  if (it_p == NULL)
454*4c3eb207Smrg 	    goacc_prof_callback_entries[ev] = it->next;
455*4c3eb207Smrg 	  else
456*4c3eb207Smrg 	    it_p->next = it->next;
457*4c3eb207Smrg 	  free (it);
458*4c3eb207Smrg 	}
459*4c3eb207Smrg       break;
460*4c3eb207Smrg 
461*4c3eb207Smrg     case acc_toggle:
462*4c3eb207Smrg       if (it == NULL)
463*4c3eb207Smrg 	{
464*4c3eb207Smrg 	  gomp_debug (0, "  ignoring request: is not registered\n");
465*4c3eb207Smrg 	  break;
466*4c3eb207Smrg 	}
467*4c3eb207Smrg       else
468*4c3eb207Smrg 	{
469*4c3eb207Smrg 	  gomp_debug (0, "  disabling\n");
470*4c3eb207Smrg 	  it->enabled = false;
471*4c3eb207Smrg 	}
472*4c3eb207Smrg       break;
473*4c3eb207Smrg 
474*4c3eb207Smrg     case acc_toggle_per_thread:
475*4c3eb207Smrg       __builtin_unreachable ();
476*4c3eb207Smrg     }
477*4c3eb207Smrg 
478*4c3eb207Smrg   gomp_mutex_unlock (&goacc_prof_lock);
479*4c3eb207Smrg }
480*4c3eb207Smrg 
481*4c3eb207Smrg acc_query_fn
acc_prof_lookup(const char * name)482*4c3eb207Smrg acc_prof_lookup (const char *name)
483*4c3eb207Smrg {
484*4c3eb207Smrg   gomp_debug (0, "%s (%s)\n",
485*4c3eb207Smrg 	      __FUNCTION__, name ?: "NULL");
486*4c3eb207Smrg 
487*4c3eb207Smrg   return NULL;
488*4c3eb207Smrg }
489*4c3eb207Smrg 
490*4c3eb207Smrg void
acc_register_library(acc_prof_reg reg,acc_prof_reg unreg,acc_prof_lookup_func lookup)491*4c3eb207Smrg acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
492*4c3eb207Smrg 		      acc_prof_lookup_func lookup)
493*4c3eb207Smrg {
494*4c3eb207Smrg   gomp_fatal ("TODO");
495*4c3eb207Smrg }
496*4c3eb207Smrg 
497*4c3eb207Smrg /* Prepare to dispatch events?  */
498*4c3eb207Smrg 
499*4c3eb207Smrg bool
_goacc_profiling_dispatch_p(bool check_not_nested_p)500*4c3eb207Smrg _goacc_profiling_dispatch_p (bool check_not_nested_p)
501*4c3eb207Smrg {
502*4c3eb207Smrg   gomp_debug (0, "%s\n", __FUNCTION__);
503*4c3eb207Smrg 
504*4c3eb207Smrg   bool ret;
505*4c3eb207Smrg 
506*4c3eb207Smrg   struct goacc_thread *thr = goacc_thread ();
507*4c3eb207Smrg   if (__builtin_expect (thr == NULL, false))
508*4c3eb207Smrg     {
509*4c3eb207Smrg       /* If we don't have any per-thread state yet, that means that per-thread
510*4c3eb207Smrg 	 callback dispatch has not been explicitly disabled (which only a call
511*4c3eb207Smrg 	 to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512*4c3eb207Smrg 	 that would have allocated per-thread state via
513*4c3eb207Smrg 	 'goacc_lazy_initialize'); initially, all callbacks for all events are
514*4c3eb207Smrg 	 enabled.  */
515*4c3eb207Smrg       gomp_debug (0, "  %s: don't have any per-thread state yet\n", __FUNCTION__);
516*4c3eb207Smrg     }
517*4c3eb207Smrg   else
518*4c3eb207Smrg     {
519*4c3eb207Smrg       if (check_not_nested_p)
520*4c3eb207Smrg 	{
521*4c3eb207Smrg 	  /* No nesting.  */
522*4c3eb207Smrg 	  assert (thr->prof_info == NULL);
523*4c3eb207Smrg 	  assert (thr->api_info == NULL);
524*4c3eb207Smrg 	}
525*4c3eb207Smrg 
526*4c3eb207Smrg       if (__builtin_expect (!thr->prof_callbacks_enabled, true))
527*4c3eb207Smrg 	{
528*4c3eb207Smrg 	  gomp_debug (0, "  %s: disabled for this thread\n", __FUNCTION__);
529*4c3eb207Smrg 	  ret = false;
530*4c3eb207Smrg 	  goto out;
531*4c3eb207Smrg 	}
532*4c3eb207Smrg     }
533*4c3eb207Smrg 
534*4c3eb207Smrg   gomp_mutex_lock (&goacc_prof_lock);
535*4c3eb207Smrg 
536*4c3eb207Smrg   /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
537*4c3eb207Smrg   if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
538*4c3eb207Smrg     {
539*4c3eb207Smrg       gomp_debug (0, "  %s: disabled globally\n", __FUNCTION__);
540*4c3eb207Smrg       ret = false;
541*4c3eb207Smrg       goto out_unlock;
542*4c3eb207Smrg     }
543*4c3eb207Smrg   else
544*4c3eb207Smrg     ret = true;
545*4c3eb207Smrg 
546*4c3eb207Smrg  out_unlock:
547*4c3eb207Smrg   gomp_mutex_unlock (&goacc_prof_lock);
548*4c3eb207Smrg 
549*4c3eb207Smrg  out:
550*4c3eb207Smrg   return ret;
551*4c3eb207Smrg }
552*4c3eb207Smrg 
553*4c3eb207Smrg /* Set up to dispatch events?  */
554*4c3eb207Smrg 
555*4c3eb207Smrg bool
_goacc_profiling_setup_p(struct goacc_thread * thr,acc_prof_info * prof_info,acc_api_info * api_info)556*4c3eb207Smrg _goacc_profiling_setup_p (struct goacc_thread *thr,
557*4c3eb207Smrg 			  acc_prof_info *prof_info, acc_api_info *api_info)
558*4c3eb207Smrg {
559*4c3eb207Smrg   gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
560*4c3eb207Smrg 
561*4c3eb207Smrg   /* If we don't have any per-thread state yet, we can't register 'prof_info'
562*4c3eb207Smrg      and 'api_info'.  */
563*4c3eb207Smrg   if (__builtin_expect (thr == NULL, false))
564*4c3eb207Smrg     {
565*4c3eb207Smrg       gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566*4c3eb207Smrg 		  " the current call, construct, or directive\n");
567*4c3eb207Smrg       return false;
568*4c3eb207Smrg     }
569*4c3eb207Smrg 
570*4c3eb207Smrg   if (thr->prof_info != NULL)
571*4c3eb207Smrg     {
572*4c3eb207Smrg       /* Profiling has already been set up for an outer construct.  In this
573*4c3eb207Smrg 	 case, we continue to use the existing information, and thus return
574*4c3eb207Smrg 	 'false' here.
575*4c3eb207Smrg 
576*4c3eb207Smrg 	 This can happen, for example, for an 'enter data' directive, which
577*4c3eb207Smrg 	 sets up profiling, then calls into 'acc_copyin', which should not
578*4c3eb207Smrg 	 again set up profiling, should not overwrite the existing
579*4c3eb207Smrg 	 information.  */
580*4c3eb207Smrg       return false;
581*4c3eb207Smrg     }
582*4c3eb207Smrg 
583*4c3eb207Smrg   thr->prof_info = prof_info;
584*4c3eb207Smrg   thr->api_info = api_info;
585*4c3eb207Smrg 
586*4c3eb207Smrg   /* Fill in some defaults.  */
587*4c3eb207Smrg 
588*4c3eb207Smrg   prof_info->event_type = -1; /* Must be set later.  */
589*4c3eb207Smrg   prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
590*4c3eb207Smrg   prof_info->version = _ACC_PROF_INFO_VERSION;
591*4c3eb207Smrg   if (thr->dev)
592*4c3eb207Smrg     {
593*4c3eb207Smrg       prof_info->device_type = acc_device_type (thr->dev->type);
594*4c3eb207Smrg       prof_info->device_number = thr->dev->target_id;
595*4c3eb207Smrg     }
596*4c3eb207Smrg   else
597*4c3eb207Smrg     {
598*4c3eb207Smrg       prof_info->device_type = -1;
599*4c3eb207Smrg       prof_info->device_number = -1;
600*4c3eb207Smrg     }
601*4c3eb207Smrg   prof_info->thread_id = -1;
602*4c3eb207Smrg   prof_info->async = acc_async_sync;
603*4c3eb207Smrg   prof_info->async_queue = prof_info->async;
604*4c3eb207Smrg   prof_info->src_file = NULL;
605*4c3eb207Smrg   prof_info->func_name = NULL;
606*4c3eb207Smrg   prof_info->line_no = -1;
607*4c3eb207Smrg   prof_info->end_line_no = -1;
608*4c3eb207Smrg   prof_info->func_line_no = -1;
609*4c3eb207Smrg   prof_info->func_end_line_no = -1;
610*4c3eb207Smrg 
611*4c3eb207Smrg   api_info->device_api = acc_device_api_none;
612*4c3eb207Smrg   api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
613*4c3eb207Smrg   api_info->device_type = prof_info->device_type;
614*4c3eb207Smrg   api_info->vendor = -1;
615*4c3eb207Smrg   api_info->device_handle = NULL;
616*4c3eb207Smrg   api_info->context_handle = NULL;
617*4c3eb207Smrg   api_info->async_handle = NULL;
618*4c3eb207Smrg 
619*4c3eb207Smrg   return true;
620*4c3eb207Smrg }
621*4c3eb207Smrg 
622*4c3eb207Smrg /* Dispatch events.
623*4c3eb207Smrg 
624*4c3eb207Smrg    This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625*4c3eb207Smrg    'GOACC_PROFILING_SETUP_P' returned a true result.  */
626*4c3eb207Smrg 
627*4c3eb207Smrg void
goacc_profiling_dispatch(acc_prof_info * prof_info,acc_event_info * event_info,acc_api_info * apt_info)628*4c3eb207Smrg goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
629*4c3eb207Smrg 			  acc_api_info *apt_info)
630*4c3eb207Smrg {
631*4c3eb207Smrg   acc_event_t event_type = event_info->event_type;
632*4c3eb207Smrg   gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
633*4c3eb207Smrg   assert (event_type > acc_ev_none
634*4c3eb207Smrg 	  && event_type < acc_ev_last);
635*4c3eb207Smrg 
636*4c3eb207Smrg   gomp_mutex_lock (&goacc_prof_lock);
637*4c3eb207Smrg 
638*4c3eb207Smrg   if (!goacc_prof_callbacks_enabled[event_type])
639*4c3eb207Smrg     {
640*4c3eb207Smrg       gomp_debug (0, "  disabled for this event type\n");
641*4c3eb207Smrg 
642*4c3eb207Smrg       goto out_unlock;
643*4c3eb207Smrg     }
644*4c3eb207Smrg 
645*4c3eb207Smrg   for (struct goacc_prof_callback_entry *e
646*4c3eb207Smrg 	 = goacc_prof_callback_entries[event_type];
647*4c3eb207Smrg        e != NULL;
648*4c3eb207Smrg        e = e->next)
649*4c3eb207Smrg     {
650*4c3eb207Smrg       if (!e->enabled)
651*4c3eb207Smrg 	{
652*4c3eb207Smrg 	  gomp_debug (0, "  disabled for callback %p\n", e->cb);
653*4c3eb207Smrg 	  continue;
654*4c3eb207Smrg 	}
655*4c3eb207Smrg 
656*4c3eb207Smrg       gomp_debug (0, "  calling callback %p\n", e->cb);
657*4c3eb207Smrg       e->cb (prof_info, event_info, apt_info);
658*4c3eb207Smrg     }
659*4c3eb207Smrg 
660*4c3eb207Smrg  out_unlock:
661*4c3eb207Smrg   gomp_mutex_unlock (&goacc_prof_lock);
662*4c3eb207Smrg }
663