xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/oacc-profiling.c (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1 /* OpenACC Profiling Interface
2 
3    Copyright (C) 2019-2020 Free Software Foundation, Inc.
4 
5    Contributed by Mentor, a Siemens Business.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #define _GNU_SOURCE
30 #include "libgomp.h"
31 #include "oacc-int.h"
32 #include "secure_getenv.h"
33 #include "acc_prof.h"
34 #include <assert.h>
35 #ifdef HAVE_STRING_H
36 # include <string.h>
37 #endif
38 #ifdef PLUGIN_SUPPORT
39 # include <dlfcn.h>
40 #endif
41 
42 #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
43 
44 /* Statically assert that the layout of the common fields in the
45    'acc_event_info' variants matches.  */
46 /* 'event_type' */
47 STATIC_ASSERT (offsetof (acc_event_info, event_type)
48 	       == offsetof (acc_event_info, data_event.event_type));
49 STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
50 	       == offsetof (acc_event_info, launch_event.event_type));
51 STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
52 	       == offsetof (acc_event_info, other_event.event_type));
53 /* 'valid_bytes' */
54 STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
55 	       == offsetof (acc_event_info, launch_event.valid_bytes));
56 STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
57 	       == offsetof (acc_event_info, other_event.valid_bytes));
58 /* 'parent_construct' */
59 STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
60 	       == offsetof (acc_event_info, launch_event.parent_construct));
61 STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
62 	       == offsetof (acc_event_info, other_event.parent_construct));
63 /* 'implicit' */
64 STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
65 	       == offsetof (acc_event_info, launch_event.implicit));
66 STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
67 	       == offsetof (acc_event_info, other_event.implicit));
68 /* 'tool_info' */
69 STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
70 	       == offsetof (acc_event_info, launch_event.tool_info));
71 STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
72 	       == offsetof (acc_event_info, other_event.tool_info));
73 
74 struct goacc_prof_callback_entry
75 {
76   acc_prof_callback cb;
77   int ref;
78   bool enabled;
79   struct goacc_prof_callback_entry *next;
80 };
81 
82 /* Use a separate flag to minimize run-time performance impact for the (very
83    common) case that profiling is not enabled.
84 
85    Once enabled, we're not going to disable this anymore, anywhere.  We
86    probably could, by adding appropriate logic to 'acc_prof_register',
87    'acc_prof_unregister'.  */
88 bool goacc_prof_enabled = false;
89 
90 /* Global state for registered callbacks.
91    'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
92 static bool goacc_prof_callbacks_enabled[acc_ev_last];
93 static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
94 /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95    'goacc_prof_callback_entries'.  */
96 static gomp_mutex_t goacc_prof_lock;
97 
98 void
goacc_profiling_initialize(void)99 goacc_profiling_initialize (void)
100 {
101   gomp_mutex_init (&goacc_prof_lock);
102 
103   /* Initially, all callbacks for all events are enabled.  */
104   for (int i = 0; i < acc_ev_last; ++i)
105     goacc_prof_callbacks_enabled[i] = true;
106 
107 
108 #ifdef PLUGIN_SUPPORT
109   char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
110   while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
111     {
112       char *acc_proflibs_sep = strchr (acc_proflibs, ';');
113       char *acc_proflib;
114       if (acc_proflibs_sep == acc_proflibs)
115 	{
116 	  /* Stray ';' separator: make sure we don't 'dlopen' the main
117 	     program.  */
118 	  acc_proflib = NULL;
119 	}
120       else
121 	{
122 	  if (acc_proflibs_sep != NULL)
123 	    {
124 	      /* Single out the first library.  */
125 	      acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
126 	      memcpy (acc_proflib, acc_proflibs,
127 		      acc_proflibs_sep - acc_proflibs);
128 	      acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
129 	    }
130 	  else
131 	    {
132 	      /* No ';' separator, so only one library.  */
133 	      acc_proflib = acc_proflibs;
134 	    }
135 
136 	  gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
137 	  void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
138 	  if (dl_handle != NULL)
139 	    {
140 	      typeof (&acc_register_library) a_r_l
141 		= dlsym (dl_handle, "acc_register_library");
142 	      if (a_r_l == NULL)
143 		goto dl_fail;
144 	      gomp_debug (0, "  %s: calling %s:acc_register_library\n",
145 			  __FUNCTION__, acc_proflib);
146 	      a_r_l (acc_prof_register, acc_prof_unregister,
147 		     acc_prof_lookup);
148 	    }
149 	  else
150 	    {
151 	    dl_fail:
152 	      gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153 			  acc_proflib, dlerror ());
154 	      if (dl_handle != NULL)
155 		{
156 		  int err = dlclose (dl_handle);
157 		  dl_handle = NULL;
158 		  if (err != 0)
159 		    goto dl_fail;
160 		}
161 	    }
162 	}
163 
164       if (acc_proflib != acc_proflibs)
165 	{
166 	  free (acc_proflib);
167 
168 	  acc_proflibs = acc_proflibs_sep + 1;
169 	}
170       else
171 	acc_proflibs = NULL;
172     }
173 #endif /* PLUGIN_SUPPORT */
174 }
175 
176 void
acc_prof_register(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)177 acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
178 {
179   gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180 	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
181 
182 
183   /* For any events to be dispatched, the user first has to register a
184      callback, which makes this here a good place for enabling the whole
185      machinery.  */
186   if (!GOACC_PROF_ENABLED)
187     __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
188 
189 
190   enum
191   {
192     EVENT_KIND_BOGUS,
193     EVENT_KIND_NORMAL,
194     /* As end events invoke callbacks in the reverse order, we register these
195        in the reverse order here.  */
196     EVENT_KIND_END,
197   } event_kind = EVENT_KIND_BOGUS;
198   switch (ev)
199     {
200     case acc_ev_none:
201     case acc_ev_device_init_start:
202     case acc_ev_device_shutdown_start:
203     case acc_ev_runtime_shutdown:
204     case acc_ev_create:
205     case acc_ev_delete:
206     case acc_ev_alloc:
207     case acc_ev_free:
208     case acc_ev_enter_data_start:
209     case acc_ev_exit_data_start:
210     case acc_ev_update_start:
211     case acc_ev_compute_construct_start:
212     case acc_ev_enqueue_launch_start:
213     case acc_ev_enqueue_upload_start:
214     case acc_ev_enqueue_download_start:
215     case acc_ev_wait_start:
216       event_kind = EVENT_KIND_NORMAL;
217       break;
218     case acc_ev_device_init_end:
219     case acc_ev_device_shutdown_end:
220     case acc_ev_enter_data_end:
221     case acc_ev_exit_data_end:
222     case acc_ev_update_end:
223     case acc_ev_compute_construct_end:
224     case acc_ev_enqueue_launch_end:
225     case acc_ev_enqueue_upload_end:
226     case acc_ev_enqueue_download_end:
227     case acc_ev_wait_end:
228       event_kind = EVENT_KIND_END;
229       break;
230     case acc_ev_last:
231       break;
232     }
233   if (event_kind == EVENT_KIND_BOGUS)
234     {
235       /* Silently ignore.  */
236       gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
237       return;
238     }
239 
240   bool bogus = true;
241   switch (reg)
242     {
243     case acc_reg:
244     case acc_toggle:
245     case acc_toggle_per_thread:
246       bogus = false;
247       break;
248     }
249   if (bogus)
250     {
251       /* Silently ignore.  */
252       gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
253       return;
254     }
255 
256   /* Special cases.  */
257   if (reg == acc_toggle)
258     {
259       if (cb == NULL)
260 	{
261 	  gomp_debug (0, "  globally enabling callbacks\n");
262 	  gomp_mutex_lock (&goacc_prof_lock);
263 	  /* For 'acc_ev_none', this acts as a global toggle.  */
264 	  goacc_prof_callbacks_enabled[ev] = true;
265 	  gomp_mutex_unlock (&goacc_prof_lock);
266 	  return;
267 	}
268       else if (ev == acc_ev_none && cb != NULL)
269 	{
270 	  gomp_debug (0, "  ignoring request\n");
271 	  return;
272 	}
273     }
274   else if (reg == acc_toggle_per_thread)
275     {
276       if (ev == acc_ev_none && cb == NULL)
277 	{
278 	  gomp_debug (0, "  thread: enabling callbacks\n");
279 	  goacc_lazy_initialize ();
280 	  struct goacc_thread *thr = goacc_thread ();
281 	  thr->prof_callbacks_enabled = true;
282 	  return;
283 	}
284       /* Silently ignore.  */
285       gomp_debug (0, "  ignoring bogus request\n");
286       return;
287     }
288 
289   gomp_mutex_lock (&goacc_prof_lock);
290 
291   struct goacc_prof_callback_entry *it, *it_p;
292   it = goacc_prof_callback_entries[ev];
293   it_p = NULL;
294   while (it)
295     {
296       if (it->cb == cb)
297 	break;
298       it_p = it;
299       it = it->next;
300     }
301 
302   switch (reg)
303     {
304     case acc_reg:
305       /* If we already have this callback registered, just increment its
306 	 reference count.  */
307       if (it != NULL)
308 	{
309 	  it->ref++;
310 	  gomp_debug (0, "  already registered;"
311 		      " incrementing reference count to: %d\n", it->ref);
312 	}
313       else
314 	{
315 	  struct goacc_prof_callback_entry *e
316 	    = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
317 	  e->cb = cb;
318 	  e->ref = 1;
319 	  e->enabled = true;
320 	  bool prepend = (event_kind == EVENT_KIND_END);
321 	  /* If we don't have any callback registered yet, also use the
322 	     'prepend' code path.  */
323 	  if (it_p == NULL)
324 	    prepend = true;
325 	  if (prepend)
326 	    {
327 	      gomp_debug (0, "  prepending\n");
328 	      e->next = goacc_prof_callback_entries[ev];
329 	      goacc_prof_callback_entries[ev] = e;
330 	    }
331 	  else
332 	    {
333 	      gomp_debug (0, "  appending\n");
334 	      e->next = NULL;
335 	      it_p->next = e;
336 	    }
337 	}
338       break;
339 
340     case acc_toggle:
341       if (it == NULL)
342 	{
343 	  gomp_debug (0, "  ignoring request: is not registered\n");
344 	  break;
345 	}
346       else
347 	{
348 	  gomp_debug (0, "  enabling\n");
349 	  it->enabled = true;
350 	}
351       break;
352 
353     case acc_toggle_per_thread:
354       __builtin_unreachable ();
355     }
356 
357   gomp_mutex_unlock (&goacc_prof_lock);
358 }
359 
360 void
acc_prof_unregister(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)361 acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
362 {
363   gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364 	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
365 
366   /* If profiling is not enabled, there cannot be anything to unregister.  */
367   if (!GOACC_PROF_ENABLED)
368     return;
369 
370   if (ev < acc_ev_none
371       || ev >= acc_ev_last)
372     {
373       /* Silently ignore.  */
374       gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
375       return;
376     }
377 
378   bool bogus = true;
379   switch (reg)
380     {
381     case acc_reg:
382     case acc_toggle:
383     case acc_toggle_per_thread:
384       bogus = false;
385       break;
386     }
387   if (bogus)
388     {
389       /* Silently ignore.  */
390       gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
391       return;
392     }
393 
394   /* Special cases.  */
395   if (reg == acc_toggle)
396     {
397       if (cb == NULL)
398 	{
399 	  gomp_debug (0, "  globally disabling callbacks\n");
400 	  gomp_mutex_lock (&goacc_prof_lock);
401 	  /* For 'acc_ev_none', this acts as a global toggle.  */
402 	  goacc_prof_callbacks_enabled[ev] = false;
403 	  gomp_mutex_unlock (&goacc_prof_lock);
404 	  return;
405 	}
406       else if (ev == acc_ev_none && cb != NULL)
407 	{
408 	  gomp_debug (0, "  ignoring request\n");
409 	  return;
410 	}
411     }
412   else if (reg == acc_toggle_per_thread)
413     {
414       if (ev == acc_ev_none && cb == NULL)
415 	{
416 	  gomp_debug (0, "  thread: disabling callbacks\n");
417 	  goacc_lazy_initialize ();
418 	  struct goacc_thread *thr = goacc_thread ();
419 	  thr->prof_callbacks_enabled = false;
420 	  return;
421 	}
422       /* Silently ignore.  */
423       gomp_debug (0, "  ignoring bogus request\n");
424       return;
425     }
426 
427   gomp_mutex_lock (&goacc_prof_lock);
428 
429   struct goacc_prof_callback_entry *it, *it_p;
430   it = goacc_prof_callback_entries[ev];
431   it_p = NULL;
432   while (it)
433     {
434       if (it->cb == cb)
435 	break;
436       it_p = it;
437       it = it->next;
438     }
439 
440   switch (reg)
441     {
442     case acc_reg:
443       if (it == NULL)
444 	{
445 	  /* Silently ignore.  */
446 	  gomp_debug (0, "  ignoring bogus request: is not registered\n");
447 	  break;
448 	}
449       it->ref--;
450       gomp_debug (0, "  decrementing reference count to: %d\n", it->ref);
451       if (it->ref == 0)
452 	{
453 	  if (it_p == NULL)
454 	    goacc_prof_callback_entries[ev] = it->next;
455 	  else
456 	    it_p->next = it->next;
457 	  free (it);
458 	}
459       break;
460 
461     case acc_toggle:
462       if (it == NULL)
463 	{
464 	  gomp_debug (0, "  ignoring request: is not registered\n");
465 	  break;
466 	}
467       else
468 	{
469 	  gomp_debug (0, "  disabling\n");
470 	  it->enabled = false;
471 	}
472       break;
473 
474     case acc_toggle_per_thread:
475       __builtin_unreachable ();
476     }
477 
478   gomp_mutex_unlock (&goacc_prof_lock);
479 }
480 
481 acc_query_fn
acc_prof_lookup(const char * name)482 acc_prof_lookup (const char *name)
483 {
484   gomp_debug (0, "%s (%s)\n",
485 	      __FUNCTION__, name ?: "NULL");
486 
487   return NULL;
488 }
489 
490 void
acc_register_library(acc_prof_reg reg,acc_prof_reg unreg,acc_prof_lookup_func lookup)491 acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
492 		      acc_prof_lookup_func lookup)
493 {
494   gomp_fatal ("TODO");
495 }
496 
497 /* Prepare to dispatch events?  */
498 
499 bool
_goacc_profiling_dispatch_p(bool check_not_nested_p)500 _goacc_profiling_dispatch_p (bool check_not_nested_p)
501 {
502   gomp_debug (0, "%s\n", __FUNCTION__);
503 
504   bool ret;
505 
506   struct goacc_thread *thr = goacc_thread ();
507   if (__builtin_expect (thr == NULL, false))
508     {
509       /* If we don't have any per-thread state yet, that means that per-thread
510 	 callback dispatch has not been explicitly disabled (which only a call
511 	 to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512 	 that would have allocated per-thread state via
513 	 'goacc_lazy_initialize'); initially, all callbacks for all events are
514 	 enabled.  */
515       gomp_debug (0, "  %s: don't have any per-thread state yet\n", __FUNCTION__);
516     }
517   else
518     {
519       if (check_not_nested_p)
520 	{
521 	  /* No nesting.  */
522 	  assert (thr->prof_info == NULL);
523 	  assert (thr->api_info == NULL);
524 	}
525 
526       if (__builtin_expect (!thr->prof_callbacks_enabled, true))
527 	{
528 	  gomp_debug (0, "  %s: disabled for this thread\n", __FUNCTION__);
529 	  ret = false;
530 	  goto out;
531 	}
532     }
533 
534   gomp_mutex_lock (&goacc_prof_lock);
535 
536   /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
537   if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
538     {
539       gomp_debug (0, "  %s: disabled globally\n", __FUNCTION__);
540       ret = false;
541       goto out_unlock;
542     }
543   else
544     ret = true;
545 
546  out_unlock:
547   gomp_mutex_unlock (&goacc_prof_lock);
548 
549  out:
550   return ret;
551 }
552 
553 /* Set up to dispatch events?  */
554 
555 bool
_goacc_profiling_setup_p(struct goacc_thread * thr,acc_prof_info * prof_info,acc_api_info * api_info)556 _goacc_profiling_setup_p (struct goacc_thread *thr,
557 			  acc_prof_info *prof_info, acc_api_info *api_info)
558 {
559   gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
560 
561   /* If we don't have any per-thread state yet, we can't register 'prof_info'
562      and 'api_info'.  */
563   if (__builtin_expect (thr == NULL, false))
564     {
565       gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566 		  " the current call, construct, or directive\n");
567       return false;
568     }
569 
570   if (thr->prof_info != NULL)
571     {
572       /* Profiling has already been set up for an outer construct.  In this
573 	 case, we continue to use the existing information, and thus return
574 	 'false' here.
575 
576 	 This can happen, for example, for an 'enter data' directive, which
577 	 sets up profiling, then calls into 'acc_copyin', which should not
578 	 again set up profiling, should not overwrite the existing
579 	 information.  */
580       return false;
581     }
582 
583   thr->prof_info = prof_info;
584   thr->api_info = api_info;
585 
586   /* Fill in some defaults.  */
587 
588   prof_info->event_type = -1; /* Must be set later.  */
589   prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
590   prof_info->version = _ACC_PROF_INFO_VERSION;
591   if (thr->dev)
592     {
593       prof_info->device_type = acc_device_type (thr->dev->type);
594       prof_info->device_number = thr->dev->target_id;
595     }
596   else
597     {
598       prof_info->device_type = -1;
599       prof_info->device_number = -1;
600     }
601   prof_info->thread_id = -1;
602   prof_info->async = acc_async_sync;
603   prof_info->async_queue = prof_info->async;
604   prof_info->src_file = NULL;
605   prof_info->func_name = NULL;
606   prof_info->line_no = -1;
607   prof_info->end_line_no = -1;
608   prof_info->func_line_no = -1;
609   prof_info->func_end_line_no = -1;
610 
611   api_info->device_api = acc_device_api_none;
612   api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
613   api_info->device_type = prof_info->device_type;
614   api_info->vendor = -1;
615   api_info->device_handle = NULL;
616   api_info->context_handle = NULL;
617   api_info->async_handle = NULL;
618 
619   return true;
620 }
621 
622 /* Dispatch events.
623 
624    This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625    'GOACC_PROFILING_SETUP_P' returned a true result.  */
626 
627 void
goacc_profiling_dispatch(acc_prof_info * prof_info,acc_event_info * event_info,acc_api_info * apt_info)628 goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
629 			  acc_api_info *apt_info)
630 {
631   acc_event_t event_type = event_info->event_type;
632   gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
633   assert (event_type > acc_ev_none
634 	  && event_type < acc_ev_last);
635 
636   gomp_mutex_lock (&goacc_prof_lock);
637 
638   if (!goacc_prof_callbacks_enabled[event_type])
639     {
640       gomp_debug (0, "  disabled for this event type\n");
641 
642       goto out_unlock;
643     }
644 
645   for (struct goacc_prof_callback_entry *e
646 	 = goacc_prof_callback_entries[event_type];
647        e != NULL;
648        e = e->next)
649     {
650       if (!e->enabled)
651 	{
652 	  gomp_debug (0, "  disabled for callback %p\n", e->cb);
653 	  continue;
654 	}
655 
656       gomp_debug (0, "  calling callback %p\n", e->cb);
657       e->cb (prof_info, event_info, apt_info);
658     }
659 
660  out_unlock:
661   gomp_mutex_unlock (&goacc_prof_lock);
662 }
663