xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/oacc-init.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2020 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "libgomp.h"
30 #include "oacc-int.h"
31 #include "openacc.h"
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <strings.h>
35 #include <stdbool.h>
36 #include <string.h>
37 
38 /* This lock is used to protect access to cached_base_dev, dispatchers and
39    the (abstract) initialisation state of attached offloading devices.  */
40 
41 static gomp_mutex_t acc_device_lock;
42 
43 static gomp_mutex_t acc_init_state_lock;
44 static enum { uninitialized, initializing, initialized } acc_init_state
45   = uninitialized;
46 static pthread_t acc_init_thread;
47 
48 /* A cached version of the dispatcher for the global "current" accelerator type,
49    e.g. used as the default when creating new host threads.  This is the
50    device-type equivalent of goacc_device_num (which specifies which device to
51    use out of potentially several of the same type).  If there are several
52    devices of a given type, this points at the first one.  */
53 
54 static struct gomp_device_descr *cached_base_dev = NULL;
55 
56 #if defined HAVE_TLS || defined USE_EMUTLS
57 __thread struct goacc_thread *goacc_tls_data;
58 #else
59 pthread_key_t goacc_tls_key;
60 #endif
61 static pthread_key_t goacc_cleanup_key;
62 
63 static struct goacc_thread *goacc_threads;
64 static gomp_mutex_t goacc_thread_lock;
65 
66 /* An array of dispatchers for device types, indexed by the type.  This array
67    only references "base" devices, and other instances of the same type are
68    found by simply indexing from each such device (which are stored linearly,
69    grouped by device in target.c:devices).  */
70 static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
71 
72 attribute_hidden void
goacc_register(struct gomp_device_descr * disp)73 goacc_register (struct gomp_device_descr *disp)
74 {
75   /* Only register the 0th device here.  */
76   if (disp->target_id != 0)
77     return;
78 
79   gomp_mutex_lock (&acc_device_lock);
80 
81   assert (acc_device_type (disp->type) != acc_device_none
82 	  && acc_device_type (disp->type) != acc_device_default
83 	  && acc_device_type (disp->type) != acc_device_not_host);
84   assert (!dispatchers[disp->type]);
85   dispatchers[disp->type] = disp;
86 
87   gomp_mutex_unlock (&acc_device_lock);
88 }
89 
90 static bool
known_device_type_p(acc_device_t d)91 known_device_type_p (acc_device_t d)
92 {
93   return d >= 0 && d < _ACC_device_hwm;
94 }
95 
96 static void
unknown_device_type_error(acc_device_t invalid_type)97 unknown_device_type_error (acc_device_t invalid_type)
98 {
99   gomp_fatal ("unknown device type %u", invalid_type);
100 }
101 
102 /* OpenACC names some things a little differently.  */
103 
104 static const char *
get_openacc_name(const char * name)105 get_openacc_name (const char *name)
106 {
107   if (strcmp (name, "gcn") == 0)
108     return "radeon";
109   else if (strcmp (name, "nvptx") == 0)
110     return "nvidia";
111   else
112     return name;
113 }
114 
115 static const char *
name_of_acc_device_t(enum acc_device_t type)116 name_of_acc_device_t (enum acc_device_t type)
117 {
118   switch (type)
119     {
120     case acc_device_none: return "none";
121     case acc_device_default: return "default";
122     case acc_device_host: return "host";
123     case acc_device_not_host: return "not_host";
124     case acc_device_nvidia: return "nvidia";
125     case acc_device_radeon: return "radeon";
126     default: unknown_device_type_error (type);
127     }
128   __builtin_unreachable ();
129 }
130 
131 /* ACC_DEVICE_LOCK must be held before calling this function.  If FAIL_IS_ERROR
132    is true, this function raises an error if there are no devices of type D,
133    otherwise it returns NULL in that case.  */
134 
135 static struct gomp_device_descr *
resolve_device(acc_device_t d,bool fail_is_error)136 resolve_device (acc_device_t d, bool fail_is_error)
137 {
138   acc_device_t d_arg = d;
139 
140   switch (d)
141     {
142     case acc_device_default:
143       {
144 	if (goacc_device_type)
145 	  {
146 	    /* Lookup the named device.  */
147 	    while (known_device_type_p (++d))
148 	      if (dispatchers[d]
149 		  && !strcasecmp (goacc_device_type,
150 				  get_openacc_name (dispatchers[d]->name))
151 		  && dispatchers[d]->get_num_devices_func () > 0)
152 		goto found;
153 
154 	    if (fail_is_error)
155 	      {
156 		gomp_mutex_unlock (&acc_device_lock);
157 		gomp_fatal ("device type %s not supported", goacc_device_type);
158 	      }
159 	    else
160 	      return NULL;
161 	  }
162 
163 	/* No default device specified, so start scanning for any non-host
164 	   device that is available.  */
165 	d = acc_device_not_host;
166       }
167       /* FALLTHROUGH */
168 
169     case acc_device_not_host:
170       /* Find the first available device after acc_device_not_host.  */
171       while (known_device_type_p (++d))
172 	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
173 	  goto found;
174       if (d_arg == acc_device_default)
175 	{
176 	  d = acc_device_host;
177 	  goto found;
178 	}
179       if (fail_is_error)
180         {
181 	  gomp_mutex_unlock (&acc_device_lock);
182 	  gomp_fatal ("no device found");
183 	}
184       else
185         return NULL;
186       break;
187 
188     case acc_device_host:
189       break;
190 
191     default:
192       if (!known_device_type_p (d))
193 	{
194 	  if (fail_is_error)
195 	    goto unsupported_device;
196 	  else
197 	    return NULL;
198 	}
199       break;
200     }
201  found:
202 
203   assert (d != acc_device_none
204 	  && d != acc_device_default
205 	  && d != acc_device_not_host);
206 
207   if (dispatchers[d] == NULL && fail_is_error)
208     {
209     unsupported_device:
210       gomp_mutex_unlock (&acc_device_lock);
211       gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
212     }
213 
214   return dispatchers[d];
215 }
216 
217 /* Emit a suitable error if no device of a particular type is available, or
218    the given device number is out-of-range.  */
219 static void
acc_dev_num_out_of_range(acc_device_t d,int ord,int ndevs)220 acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
221 {
222   if (ndevs == 0)
223     gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
224   else
225     gomp_fatal ("device %u out of range", ord);
226 }
227 
228 /* This is called when plugins have been initialized, and serves to call
229    (indirectly) the target's device_init hook.  Calling multiple times without
230    an intervening acc_shutdown_1 call is an error.  ACC_DEVICE_LOCK must be
231    held before calling this function.  */
232 
233 static struct gomp_device_descr *
acc_init_1(acc_device_t d,acc_construct_t parent_construct,int implicit)234 acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
235 {
236   gomp_mutex_lock (&acc_init_state_lock);
237   acc_init_state = initializing;
238   acc_init_thread = pthread_self ();
239   gomp_mutex_unlock (&acc_init_state_lock);
240 
241   bool check_not_nested_p;
242   if (implicit)
243     {
244       /* In the implicit case, there should (TODO: must?) already be something
245 	 have been set up for an outer construct.  */
246       check_not_nested_p = false;
247     }
248   else
249     {
250       check_not_nested_p = true;
251       /* TODO: should we set 'thr->prof_info' etc. in this case ('acc_init')?
252 	 The problem is, that we don't have 'thr' yet?  (So,
253 	 'check_not_nested_p = true' also is pointless actually.)  */
254     }
255   bool profiling_p = GOACC_PROFILING_DISPATCH_P (check_not_nested_p);
256 
257   acc_prof_info prof_info;
258   if (profiling_p)
259     {
260       prof_info.event_type = acc_ev_device_init_start;
261       prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
262       prof_info.version = _ACC_PROF_INFO_VERSION;
263       prof_info.device_type = d;
264       prof_info.device_number = goacc_device_num;
265       prof_info.thread_id = -1;
266       prof_info.async = acc_async_sync;
267       prof_info.async_queue = prof_info.async;
268       prof_info.src_file = NULL;
269       prof_info.func_name = NULL;
270       prof_info.line_no = -1;
271       prof_info.end_line_no = -1;
272       prof_info.func_line_no = -1;
273       prof_info.func_end_line_no = -1;
274     }
275   acc_event_info device_init_event_info;
276   if (profiling_p)
277     {
278       device_init_event_info.other_event.event_type = prof_info.event_type;
279       device_init_event_info.other_event.valid_bytes
280 	= _ACC_OTHER_EVENT_INFO_VALID_BYTES;
281       device_init_event_info.other_event.parent_construct = parent_construct;
282       device_init_event_info.other_event.implicit = implicit;
283       device_init_event_info.other_event.tool_info = NULL;
284     }
285   acc_api_info api_info;
286   if (profiling_p)
287     {
288       api_info.device_api = acc_device_api_none;
289       api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
290       api_info.device_type = prof_info.device_type;
291       api_info.vendor = -1;
292       api_info.device_handle = NULL;
293       api_info.context_handle = NULL;
294       api_info.async_handle = NULL;
295     }
296 
297   if (profiling_p)
298     goacc_profiling_dispatch (&prof_info, &device_init_event_info, &api_info);
299 
300   struct gomp_device_descr *base_dev, *acc_dev;
301   int ndevs;
302 
303   base_dev = resolve_device (d, true);
304 
305   ndevs = base_dev->get_num_devices_func ();
306 
307   if (ndevs <= 0 || goacc_device_num >= ndevs)
308     acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
309 
310   acc_dev = &base_dev[goacc_device_num];
311 
312   gomp_mutex_lock (&acc_dev->lock);
313   if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
314     {
315       gomp_mutex_unlock (&acc_dev->lock);
316       gomp_fatal ("device already active");
317     }
318 
319   gomp_init_device (acc_dev);
320   gomp_mutex_unlock (&acc_dev->lock);
321 
322   if (profiling_p)
323     {
324       prof_info.event_type = acc_ev_device_init_end;
325       device_init_event_info.other_event.event_type = prof_info.event_type;
326       goacc_profiling_dispatch (&prof_info, &device_init_event_info,
327 				&api_info);
328     }
329 
330   /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a
331      nested 'acc_get_device_type' called from a profiling callback still sees
332      'initializing', so that we don't deadlock when it then again tries to lock
333      'goacc_prof_lock'.  See also the discussion in 'acc_get_device_type'.  */
334   gomp_mutex_lock (&acc_init_state_lock);
335   acc_init_state = initialized;
336   gomp_mutex_unlock (&acc_init_state_lock);
337 
338   return base_dev;
339 }
340 
341 /* ACC_DEVICE_LOCK must be held before calling this function.  */
342 
343 static void
acc_shutdown_1(acc_device_t d)344 acc_shutdown_1 (acc_device_t d)
345 {
346   struct gomp_device_descr *base_dev;
347   struct goacc_thread *walk;
348   int ndevs, i;
349   bool devices_active = false;
350 
351   /* Get the base device for this device type.  */
352   base_dev = resolve_device (d, true);
353 
354   ndevs = base_dev->get_num_devices_func ();
355 
356   /* Unload all the devices of this type that have been opened.  */
357   for (i = 0; i < ndevs; i++)
358     {
359       struct gomp_device_descr *acc_dev = &base_dev[i];
360 
361       gomp_mutex_lock (&acc_dev->lock);
362       gomp_unload_device (acc_dev);
363       gomp_mutex_unlock (&acc_dev->lock);
364     }
365 
366   gomp_mutex_lock (&goacc_thread_lock);
367 
368   /* Free target-specific TLS data and close all devices.  */
369   for (walk = goacc_threads; walk != NULL; walk = walk->next)
370     {
371       if (walk->target_tls)
372 	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
373 
374       walk->target_tls = NULL;
375 
376       /* This would mean the user is shutting down OpenACC in the middle of an
377          "acc data" pragma.  Likely not intentional.  */
378       if (walk->mapped_data)
379 	{
380 	  gomp_mutex_unlock (&goacc_thread_lock);
381 	  gomp_fatal ("shutdown in 'acc data' region");
382 	}
383 
384       /* Similarly, if this happens then user code has done something weird.  */
385       if (walk->saved_bound_dev)
386 	{
387 	  gomp_mutex_unlock (&goacc_thread_lock);
388 	  gomp_fatal ("shutdown during host fallback");
389 	}
390 
391       if (walk->dev)
392 	{
393 	  gomp_mutex_lock (&walk->dev->lock);
394 
395 	  while (walk->dev->mem_map.root)
396 	    {
397 	      splay_tree_key k = &walk->dev->mem_map.root->key;
398 	      if (k->aux)
399 		k->aux->link_key = NULL;
400 	      gomp_remove_var (walk->dev, k);
401 	    }
402 
403 	  gomp_mutex_unlock (&walk->dev->lock);
404 
405 	  walk->dev = NULL;
406 	  walk->base_dev = NULL;
407 	}
408     }
409 
410   gomp_mutex_unlock (&goacc_thread_lock);
411 
412   /* Close all the devices of this type that have been opened.  */
413   bool ret = true;
414   for (i = 0; i < ndevs; i++)
415     {
416       struct gomp_device_descr *acc_dev = &base_dev[i];
417       gomp_mutex_lock (&acc_dev->lock);
418       if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
419         {
420 	  devices_active = true;
421 	  ret &= gomp_fini_device (acc_dev);
422 	  acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
423 	}
424       gomp_mutex_unlock (&acc_dev->lock);
425     }
426 
427   if (!ret)
428     gomp_fatal ("device finalization failed");
429 
430   if (!devices_active)
431     gomp_fatal ("no device initialized");
432 }
433 
434 static struct goacc_thread *
goacc_new_thread(void)435 goacc_new_thread (void)
436 {
437   struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
438 
439 #if defined HAVE_TLS || defined USE_EMUTLS
440   goacc_tls_data = thr;
441 #else
442   pthread_setspecific (goacc_tls_key, thr);
443 #endif
444 
445   pthread_setspecific (goacc_cleanup_key, thr);
446 
447   gomp_mutex_lock (&goacc_thread_lock);
448   thr->next = goacc_threads;
449   goacc_threads = thr;
450   gomp_mutex_unlock (&goacc_thread_lock);
451 
452   return thr;
453 }
454 
455 static void
goacc_destroy_thread(void * data)456 goacc_destroy_thread (void *data)
457 {
458   struct goacc_thread *thr = data, *walk, *prev;
459 
460   gomp_mutex_lock (&goacc_thread_lock);
461 
462   if (thr)
463     {
464       struct gomp_device_descr *acc_dev = thr->dev;
465 
466       if (acc_dev && thr->target_tls)
467 	{
468 	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
469 	  thr->target_tls = NULL;
470 	}
471 
472       assert (!thr->mapped_data);
473 
474       /* Remove from thread list.  */
475       for (prev = NULL, walk = goacc_threads; walk;
476 	   prev = walk, walk = walk->next)
477 	if (walk == thr)
478 	  {
479 	    if (prev == NULL)
480 	      goacc_threads = walk->next;
481 	    else
482 	      prev->next = walk->next;
483 
484 	    free (thr);
485 
486 	    break;
487 	  }
488 
489       assert (walk);
490     }
491 
492   gomp_mutex_unlock (&goacc_thread_lock);
493 }
494 
495 /* Use the ORD'th device instance for the current host thread (or -1 for the
496    current global default).  The device (and the runtime) must be initialised
497    before calling this function.  */
498 
499 void
goacc_attach_host_thread_to_device(int ord)500 goacc_attach_host_thread_to_device (int ord)
501 {
502   struct goacc_thread *thr = goacc_thread ();
503   struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
504   int num_devices;
505 
506   if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
507     return;
508 
509   if (ord < 0)
510     ord = goacc_device_num;
511 
512   /* Decide which type of device to use.  If the current thread has a device
513      type already (e.g. set by acc_set_device_type), use that, else use the
514      global default.  */
515   if (thr && thr->base_dev)
516     base_dev = thr->base_dev;
517   else
518     {
519       assert (cached_base_dev);
520       base_dev = cached_base_dev;
521     }
522 
523   num_devices = base_dev->get_num_devices_func ();
524   if (num_devices <= 0 || ord >= num_devices)
525     acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
526 			      num_devices);
527 
528   if (!thr)
529     thr = goacc_new_thread ();
530 
531   thr->base_dev = base_dev;
532   thr->dev = acc_dev = &base_dev[ord];
533   thr->saved_bound_dev = NULL;
534   thr->mapped_data = NULL;
535   thr->prof_info = NULL;
536   thr->api_info = NULL;
537   /* Initially, all callbacks for all events are enabled.  */
538   thr->prof_callbacks_enabled = true;
539 
540   thr->target_tls
541     = acc_dev->openacc.create_thread_data_func (ord);
542 }
543 
544 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
545    init/shutdown is per-process or per-thread.  We choose per-process.  */
546 
547 void
acc_init(acc_device_t d)548 acc_init (acc_device_t d)
549 {
550   if (!known_device_type_p (d))
551     unknown_device_type_error (d);
552 
553   gomp_init_targets_once ();
554 
555   gomp_mutex_lock (&acc_device_lock);
556   cached_base_dev = acc_init_1 (d, acc_construct_runtime_api, 0);
557   gomp_mutex_unlock (&acc_device_lock);
558 
559   goacc_attach_host_thread_to_device (-1);
560 }
561 
ialias(acc_init)562 ialias (acc_init)
563 
564 void
565 acc_shutdown (acc_device_t d)
566 {
567   if (!known_device_type_p (d))
568     unknown_device_type_error (d);
569 
570   gomp_init_targets_once ();
571 
572   gomp_mutex_lock (&acc_device_lock);
573 
574   acc_shutdown_1 (d);
575 
576   gomp_mutex_unlock (&acc_device_lock);
577 }
578 
ialias(acc_shutdown)579 ialias (acc_shutdown)
580 
581 int
582 acc_get_num_devices (acc_device_t d)
583 {
584   if (!known_device_type_p (d))
585     unknown_device_type_error (d);
586 
587   int n = 0;
588   struct gomp_device_descr *acc_dev;
589 
590   if (d == acc_device_none)
591     return 0;
592 
593   gomp_init_targets_once ();
594 
595   gomp_mutex_lock (&acc_device_lock);
596   acc_dev = resolve_device (d, false);
597   gomp_mutex_unlock (&acc_device_lock);
598 
599   if (!acc_dev)
600     return 0;
601 
602   n = acc_dev->get_num_devices_func ();
603   if (n < 0)
604     n = 0;
605 
606   return n;
607 }
608 
ialias(acc_get_num_devices)609 ialias (acc_get_num_devices)
610 
611 /* Set the device type for the current thread only (using the current global
612    default device number), initialising that device if necessary.  Also set the
613    default device type for new threads to D.  */
614 
615 void
616 acc_set_device_type (acc_device_t d)
617 {
618   if (!known_device_type_p (d))
619     unknown_device_type_error (d);
620 
621   struct gomp_device_descr *base_dev, *acc_dev;
622   struct goacc_thread *thr = goacc_thread ();
623 
624   acc_prof_info prof_info;
625   acc_api_info api_info;
626   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
627   if (profiling_p)
628     prof_info.device_type = d;
629 
630   gomp_init_targets_once ();
631 
632   gomp_mutex_lock (&acc_device_lock);
633 
634   cached_base_dev = base_dev = resolve_device (d, true);
635   acc_dev = &base_dev[goacc_device_num];
636 
637   gomp_mutex_lock (&acc_dev->lock);
638   if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
639     gomp_init_device (acc_dev);
640   gomp_mutex_unlock (&acc_dev->lock);
641 
642   gomp_mutex_unlock (&acc_device_lock);
643 
644   /* We're changing device type: invalidate the current thread's dev and
645      base_dev pointers.  */
646   if (thr && thr->base_dev != base_dev)
647     {
648       thr->base_dev = thr->dev = NULL;
649       if (thr->mapped_data)
650         gomp_fatal ("acc_set_device_type in 'acc data' region");
651     }
652 
653   goacc_attach_host_thread_to_device (-1);
654 
655   if (profiling_p)
656     {
657       thr->prof_info = NULL;
658       thr->api_info = NULL;
659     }
660 }
661 
ialias(acc_set_device_type)662 ialias (acc_set_device_type)
663 
664 static bool
665 self_initializing_p (void)
666 {
667   bool res;
668   gomp_mutex_lock (&acc_init_state_lock);
669   res = (acc_init_state == initializing
670 	 && pthread_equal (acc_init_thread, pthread_self ()));
671   gomp_mutex_unlock (&acc_init_state_lock);
672   return res;
673 }
674 
675 acc_device_t
acc_get_device_type(void)676 acc_get_device_type (void)
677 {
678   acc_device_t res = acc_device_none;
679   struct gomp_device_descr *dev;
680   struct goacc_thread *thr = goacc_thread ();
681 
682   if (thr && thr->base_dev)
683     res = acc_device_type (thr->base_dev->type);
684   else if (self_initializing_p ())
685     /* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during the
686        acc_ev_device_init_start event callback, which is dispatched during
687        acc_init_1.  Trying to lock acc_device_lock during such a call (as we do
688        in the else clause below), will result in deadlock, since the lock has
689        already been taken by the acc_init_1 caller.  We work around this problem
690        by using the acc_get_device_type property "If the device type has not yet
691        been selected, the value acc_device_none may be returned".  */
692     ;
693   else
694     {
695       acc_prof_info prof_info;
696       acc_api_info api_info;
697       bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
698 
699       gomp_init_targets_once ();
700 
701       gomp_mutex_lock (&acc_device_lock);
702       dev = resolve_device (acc_device_default, true);
703       gomp_mutex_unlock (&acc_device_lock);
704       res = acc_device_type (dev->type);
705 
706       if (profiling_p)
707 	{
708 	  thr->prof_info = NULL;
709 	  thr->api_info = NULL;
710 	}
711     }
712 
713   assert (res != acc_device_default
714 	  && res != acc_device_not_host
715 	  && res != acc_device_current);
716 
717   return res;
718 }
719 
ialias(acc_get_device_type)720 ialias (acc_get_device_type)
721 
722 int
723 acc_get_device_num (acc_device_t d)
724 {
725   if (!known_device_type_p (d))
726     unknown_device_type_error (d);
727 
728   const struct gomp_device_descr *dev;
729   struct goacc_thread *thr = goacc_thread ();
730 
731   acc_prof_info prof_info;
732   acc_api_info api_info;
733   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
734   if (profiling_p)
735     prof_info.device_type = d;
736 
737   gomp_init_targets_once ();
738 
739   gomp_mutex_lock (&acc_device_lock);
740   dev = resolve_device (d, true);
741   gomp_mutex_unlock (&acc_device_lock);
742 
743   if (profiling_p)
744     {
745       thr->prof_info = NULL;
746       thr->api_info = NULL;
747     }
748 
749   if (thr && thr->base_dev == dev && thr->dev)
750     return thr->dev->target_id;
751 
752   return goacc_device_num;
753 }
754 
ialias(acc_get_device_num)755 ialias (acc_get_device_num)
756 
757 void
758 acc_set_device_num (int ord, acc_device_t d)
759 {
760   if (!known_device_type_p (d))
761     unknown_device_type_error (d);
762 
763   struct gomp_device_descr *base_dev, *acc_dev;
764   int num_devices;
765 
766   gomp_init_targets_once ();
767 
768   if (ord < 0)
769     ord = goacc_device_num;
770 
771   if ((int) d == 0)
772     /* Set whatever device is being used by the current host thread to use
773        device instance ORD.  It's unclear if this is supposed to affect other
774        host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
775     goacc_attach_host_thread_to_device (ord);
776   else
777     {
778       gomp_mutex_lock (&acc_device_lock);
779 
780       cached_base_dev = base_dev = resolve_device (d, true);
781 
782       num_devices = base_dev->get_num_devices_func ();
783 
784       if (num_devices <= 0 || ord >= num_devices)
785         acc_dev_num_out_of_range (d, ord, num_devices);
786 
787       acc_dev = &base_dev[ord];
788 
789       gomp_mutex_lock (&acc_dev->lock);
790       if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
791         gomp_init_device (acc_dev);
792       gomp_mutex_unlock (&acc_dev->lock);
793 
794       gomp_mutex_unlock (&acc_device_lock);
795 
796       goacc_attach_host_thread_to_device (ord);
797     }
798 
799   goacc_device_num = ord;
800 }
801 
ialias(acc_set_device_num)802 ialias (acc_set_device_num)
803 
804 static union goacc_property_value
805 get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
806 {
807   goacc_lazy_initialize ();
808   struct goacc_thread *thr = goacc_thread ();
809 
810   if (d == acc_device_current && thr && thr->dev)
811     return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
812 
813   gomp_mutex_lock (&acc_device_lock);
814 
815   struct gomp_device_descr *dev = resolve_device (d, true);
816 
817   int num_devices = dev->get_num_devices_func ();
818 
819   if (num_devices <= 0 || ord >= num_devices)
820     acc_dev_num_out_of_range (d, ord, num_devices);
821 
822   dev += ord;
823 
824   gomp_mutex_lock (&dev->lock);
825   if (dev->state == GOMP_DEVICE_UNINITIALIZED)
826     gomp_init_device (dev);
827   gomp_mutex_unlock (&dev->lock);
828 
829   gomp_mutex_unlock (&acc_device_lock);
830 
831   assert (dev);
832 
833   return dev->openacc.get_property_func (dev->target_id, prop);
834 }
835 
836 size_t
acc_get_property(int ord,acc_device_t d,acc_device_property_t prop)837 acc_get_property (int ord, acc_device_t d, acc_device_property_t prop)
838 {
839   if (!known_device_type_p (d))
840     unknown_device_type_error(d);
841 
842   if (prop & GOACC_PROPERTY_STRING_MASK)
843     return 0;
844   else
845     return get_property_any (ord, d, prop).val;
846 }
847 
ialias(acc_get_property)848 ialias (acc_get_property)
849 
850 const char *
851 acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop)
852 {
853   if (!known_device_type_p (d))
854     unknown_device_type_error(d);
855 
856   if (prop & GOACC_PROPERTY_STRING_MASK)
857     return get_property_any (ord, d, prop).ptr;
858   else
859     return NULL;
860 }
861 
ialias(acc_get_property_string)862 ialias (acc_get_property_string)
863 
864 /* For -O and higher, the compiler always attempts to expand acc_on_device, but
865    if the user disables the builtin, or calls it via a pointer, we'll need this
866    version.
867 
868    Compile this with optimization, so that the compiler expands
869    this, rather than generating infinitely recursive code.
870 
871    The function just forwards its argument to __builtin_acc_on_device.  It does
872    not verify that the argument is a valid acc_device_t enumeration value.  */
873 
874 int __attribute__ ((__optimize__ ("O2")))
875 acc_on_device (acc_device_t dev)
876 {
877   return __builtin_acc_on_device (dev);
878 }
879 
ialias(acc_on_device)880 ialias (acc_on_device)
881 
882 attribute_hidden void
883 goacc_runtime_initialize (void)
884 {
885   gomp_mutex_init (&acc_device_lock);
886 
887 #if !(defined HAVE_TLS || defined USE_EMUTLS)
888   pthread_key_create (&goacc_tls_key, NULL);
889 #endif
890 
891   pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
892 
893   cached_base_dev = NULL;
894 
895   goacc_threads = NULL;
896   gomp_mutex_init (&goacc_thread_lock);
897 
898   /* Initialize and register the 'host' device type.  */
899   goacc_host_init ();
900 }
901 
902 static void __attribute__((destructor))
goacc_runtime_deinitialize(void)903 goacc_runtime_deinitialize (void)
904 {
905 #if !(defined HAVE_TLS || defined USE_EMUTLS)
906   pthread_key_delete (goacc_tls_key);
907 #endif
908   pthread_key_delete (goacc_cleanup_key);
909 }
910 
911 /* Compiler helper functions */
912 
913 attribute_hidden void
goacc_save_and_set_bind(acc_device_t d)914 goacc_save_and_set_bind (acc_device_t d)
915 {
916   struct goacc_thread *thr = goacc_thread ();
917 
918   assert (!thr->saved_bound_dev);
919 
920   thr->saved_bound_dev = thr->dev;
921   thr->dev = dispatchers[d];
922 }
923 
924 attribute_hidden void
goacc_restore_bind(void)925 goacc_restore_bind (void)
926 {
927   struct goacc_thread *thr = goacc_thread ();
928 
929   thr->dev = thr->saved_bound_dev;
930   thr->saved_bound_dev = NULL;
931 }
932 
933 /* This is called from any OpenACC support function that may need to implicitly
934    initialize the libgomp runtime, either globally or from a new host thread.
935    On exit "goacc_thread" will return a valid & populated thread block.  */
936 
937 attribute_hidden void
goacc_lazy_initialize(void)938 goacc_lazy_initialize (void)
939 {
940   struct goacc_thread *thr = goacc_thread ();
941 
942   if (thr && thr->dev)
943     return;
944 
945   gomp_init_targets_once ();
946 
947   gomp_mutex_lock (&acc_device_lock);
948   if (!cached_base_dev)
949     cached_base_dev = acc_init_1 (acc_device_default,
950 				  acc_construct_parallel, 1);
951   gomp_mutex_unlock (&acc_device_lock);
952 
953   goacc_attach_host_thread_to_device (-1);
954 }
955