xref: /dflybsd-src/contrib/gcc-8.0/libgomp/oacc-init.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* OpenACC Runtime initialization routines
2*38fd1498Szrj 
3*38fd1498Szrj    Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj 
5*38fd1498Szrj    Contributed by Mentor Embedded.
6*38fd1498Szrj 
7*38fd1498Szrj    This file is part of the GNU Offloading and Multi Processing Library
8*38fd1498Szrj    (libgomp).
9*38fd1498Szrj 
10*38fd1498Szrj    Libgomp is free software; you can redistribute it and/or modify it
11*38fd1498Szrj    under the terms of the GNU General Public License as published by
12*38fd1498Szrj    the Free Software Foundation; either version 3, or (at your option)
13*38fd1498Szrj    any later version.
14*38fd1498Szrj 
15*38fd1498Szrj    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16*38fd1498Szrj    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17*38fd1498Szrj    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18*38fd1498Szrj    more details.
19*38fd1498Szrj 
20*38fd1498Szrj    Under Section 7 of GPL version 3, you are granted additional
21*38fd1498Szrj    permissions described in the GCC Runtime Library Exception, version
22*38fd1498Szrj    3.1, as published by the Free Software Foundation.
23*38fd1498Szrj 
24*38fd1498Szrj    You should have received a copy of the GNU General Public License and
25*38fd1498Szrj    a copy of the GCC Runtime Library Exception along with this program;
26*38fd1498Szrj    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27*38fd1498Szrj    <http://www.gnu.org/licenses/>.  */
28*38fd1498Szrj 
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include "oacc-int.h"
31*38fd1498Szrj #include "openacc.h"
32*38fd1498Szrj #include <assert.h>
33*38fd1498Szrj #include <stdlib.h>
34*38fd1498Szrj #include <strings.h>
35*38fd1498Szrj #include <stdbool.h>
36*38fd1498Szrj #include <string.h>
37*38fd1498Szrj 
38*38fd1498Szrj /* This lock is used to protect access to cached_base_dev, dispatchers and
39*38fd1498Szrj    the (abstract) initialisation state of attached offloading devices.  */
40*38fd1498Szrj 
41*38fd1498Szrj static gomp_mutex_t acc_device_lock;
42*38fd1498Szrj 
43*38fd1498Szrj /* A cached version of the dispatcher for the global "current" accelerator type,
44*38fd1498Szrj    e.g. used as the default when creating new host threads.  This is the
45*38fd1498Szrj    device-type equivalent of goacc_device_num (which specifies which device to
46*38fd1498Szrj    use out of potentially several of the same type).  If there are several
47*38fd1498Szrj    devices of a given type, this points at the first one.  */
48*38fd1498Szrj 
49*38fd1498Szrj static struct gomp_device_descr *cached_base_dev = NULL;
50*38fd1498Szrj 
51*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
52*38fd1498Szrj __thread struct goacc_thread *goacc_tls_data;
53*38fd1498Szrj #else
54*38fd1498Szrj pthread_key_t goacc_tls_key;
55*38fd1498Szrj #endif
56*38fd1498Szrj static pthread_key_t goacc_cleanup_key;
57*38fd1498Szrj 
58*38fd1498Szrj static struct goacc_thread *goacc_threads;
59*38fd1498Szrj static gomp_mutex_t goacc_thread_lock;
60*38fd1498Szrj 
61*38fd1498Szrj /* An array of dispatchers for device types, indexed by the type.  This array
62*38fd1498Szrj    only references "base" devices, and other instances of the same type are
63*38fd1498Szrj    found by simply indexing from each such device (which are stored linearly,
64*38fd1498Szrj    grouped by device in target.c:devices).  */
65*38fd1498Szrj static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
66*38fd1498Szrj 
67*38fd1498Szrj attribute_hidden void
goacc_register(struct gomp_device_descr * disp)68*38fd1498Szrj goacc_register (struct gomp_device_descr *disp)
69*38fd1498Szrj {
70*38fd1498Szrj   /* Only register the 0th device here.  */
71*38fd1498Szrj   if (disp->target_id != 0)
72*38fd1498Szrj     return;
73*38fd1498Szrj 
74*38fd1498Szrj   gomp_mutex_lock (&acc_device_lock);
75*38fd1498Szrj 
76*38fd1498Szrj   assert (acc_device_type (disp->type) != acc_device_none
77*38fd1498Szrj 	  && acc_device_type (disp->type) != acc_device_default
78*38fd1498Szrj 	  && acc_device_type (disp->type) != acc_device_not_host);
79*38fd1498Szrj   assert (!dispatchers[disp->type]);
80*38fd1498Szrj   dispatchers[disp->type] = disp;
81*38fd1498Szrj 
82*38fd1498Szrj   gomp_mutex_unlock (&acc_device_lock);
83*38fd1498Szrj }
84*38fd1498Szrj 
85*38fd1498Szrj /* OpenACC names some things a little differently.  */
86*38fd1498Szrj 
87*38fd1498Szrj static const char *
get_openacc_name(const char * name)88*38fd1498Szrj get_openacc_name (const char *name)
89*38fd1498Szrj {
90*38fd1498Szrj   if (strcmp (name, "nvptx") == 0)
91*38fd1498Szrj     return "nvidia";
92*38fd1498Szrj   else
93*38fd1498Szrj     return name;
94*38fd1498Szrj }
95*38fd1498Szrj 
96*38fd1498Szrj static const char *
name_of_acc_device_t(enum acc_device_t type)97*38fd1498Szrj name_of_acc_device_t (enum acc_device_t type)
98*38fd1498Szrj {
99*38fd1498Szrj   switch (type)
100*38fd1498Szrj     {
101*38fd1498Szrj     case acc_device_none: return "none";
102*38fd1498Szrj     case acc_device_default: return "default";
103*38fd1498Szrj     case acc_device_host: return "host";
104*38fd1498Szrj     case acc_device_not_host: return "not_host";
105*38fd1498Szrj     case acc_device_nvidia: return "nvidia";
106*38fd1498Szrj     default: gomp_fatal ("unknown device type %u", (unsigned) type);
107*38fd1498Szrj     }
108*38fd1498Szrj }
109*38fd1498Szrj 
110*38fd1498Szrj /* ACC_DEVICE_LOCK must be held before calling this function.  If FAIL_IS_ERROR
111*38fd1498Szrj    is true, this function raises an error if there are no devices of type D,
112*38fd1498Szrj    otherwise it returns NULL in that case.  */
113*38fd1498Szrj 
114*38fd1498Szrj static struct gomp_device_descr *
resolve_device(acc_device_t d,bool fail_is_error)115*38fd1498Szrj resolve_device (acc_device_t d, bool fail_is_error)
116*38fd1498Szrj {
117*38fd1498Szrj   acc_device_t d_arg = d;
118*38fd1498Szrj 
119*38fd1498Szrj   switch (d)
120*38fd1498Szrj     {
121*38fd1498Szrj     case acc_device_default:
122*38fd1498Szrj       {
123*38fd1498Szrj 	if (goacc_device_type)
124*38fd1498Szrj 	  {
125*38fd1498Szrj 	    /* Lookup the named device.  */
126*38fd1498Szrj 	    while (++d != _ACC_device_hwm)
127*38fd1498Szrj 	      if (dispatchers[d]
128*38fd1498Szrj 		  && !strcasecmp (goacc_device_type,
129*38fd1498Szrj 				  get_openacc_name (dispatchers[d]->name))
130*38fd1498Szrj 		  && dispatchers[d]->get_num_devices_func () > 0)
131*38fd1498Szrj 		goto found;
132*38fd1498Szrj 
133*38fd1498Szrj 	    if (fail_is_error)
134*38fd1498Szrj 	      {
135*38fd1498Szrj 		gomp_mutex_unlock (&acc_device_lock);
136*38fd1498Szrj 		gomp_fatal ("device type %s not supported", goacc_device_type);
137*38fd1498Szrj 	      }
138*38fd1498Szrj 	    else
139*38fd1498Szrj 	      return NULL;
140*38fd1498Szrj 	  }
141*38fd1498Szrj 
142*38fd1498Szrj 	/* No default device specified, so start scanning for any non-host
143*38fd1498Szrj 	   device that is available.  */
144*38fd1498Szrj 	d = acc_device_not_host;
145*38fd1498Szrj       }
146*38fd1498Szrj       /* FALLTHROUGH */
147*38fd1498Szrj 
148*38fd1498Szrj     case acc_device_not_host:
149*38fd1498Szrj       /* Find the first available device after acc_device_not_host.  */
150*38fd1498Szrj       while (++d != _ACC_device_hwm)
151*38fd1498Szrj 	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
152*38fd1498Szrj 	  goto found;
153*38fd1498Szrj       if (d_arg == acc_device_default)
154*38fd1498Szrj 	{
155*38fd1498Szrj 	  d = acc_device_host;
156*38fd1498Szrj 	  goto found;
157*38fd1498Szrj 	}
158*38fd1498Szrj       if (fail_is_error)
159*38fd1498Szrj         {
160*38fd1498Szrj 	  gomp_mutex_unlock (&acc_device_lock);
161*38fd1498Szrj 	  gomp_fatal ("no device found");
162*38fd1498Szrj 	}
163*38fd1498Szrj       else
164*38fd1498Szrj         return NULL;
165*38fd1498Szrj       break;
166*38fd1498Szrj 
167*38fd1498Szrj     case acc_device_host:
168*38fd1498Szrj       break;
169*38fd1498Szrj 
170*38fd1498Szrj     default:
171*38fd1498Szrj       if (d > _ACC_device_hwm)
172*38fd1498Szrj 	{
173*38fd1498Szrj 	  if (fail_is_error)
174*38fd1498Szrj 	    goto unsupported_device;
175*38fd1498Szrj 	  else
176*38fd1498Szrj 	    return NULL;
177*38fd1498Szrj 	}
178*38fd1498Szrj       break;
179*38fd1498Szrj     }
180*38fd1498Szrj  found:
181*38fd1498Szrj 
182*38fd1498Szrj   assert (d != acc_device_none
183*38fd1498Szrj 	  && d != acc_device_default
184*38fd1498Szrj 	  && d != acc_device_not_host);
185*38fd1498Szrj 
186*38fd1498Szrj   if (dispatchers[d] == NULL && fail_is_error)
187*38fd1498Szrj     {
188*38fd1498Szrj     unsupported_device:
189*38fd1498Szrj       gomp_mutex_unlock (&acc_device_lock);
190*38fd1498Szrj       gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
191*38fd1498Szrj     }
192*38fd1498Szrj 
193*38fd1498Szrj   return dispatchers[d];
194*38fd1498Szrj }
195*38fd1498Szrj 
196*38fd1498Szrj /* Emit a suitable error if no device of a particular type is available, or
197*38fd1498Szrj    the given device number is out-of-range.  */
198*38fd1498Szrj static void
acc_dev_num_out_of_range(acc_device_t d,int ord,int ndevs)199*38fd1498Szrj acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
200*38fd1498Szrj {
201*38fd1498Szrj   if (ndevs == 0)
202*38fd1498Szrj     gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
203*38fd1498Szrj   else
204*38fd1498Szrj     gomp_fatal ("device %u out of range", ord);
205*38fd1498Szrj }
206*38fd1498Szrj 
207*38fd1498Szrj /* This is called when plugins have been initialized, and serves to call
208*38fd1498Szrj    (indirectly) the target's device_init hook.  Calling multiple times without
209*38fd1498Szrj    an intervening acc_shutdown_1 call is an error.  ACC_DEVICE_LOCK must be
210*38fd1498Szrj    held before calling this function.  */
211*38fd1498Szrj 
212*38fd1498Szrj static struct gomp_device_descr *
acc_init_1(acc_device_t d)213*38fd1498Szrj acc_init_1 (acc_device_t d)
214*38fd1498Szrj {
215*38fd1498Szrj   struct gomp_device_descr *base_dev, *acc_dev;
216*38fd1498Szrj   int ndevs;
217*38fd1498Szrj 
218*38fd1498Szrj   base_dev = resolve_device (d, true);
219*38fd1498Szrj 
220*38fd1498Szrj   ndevs = base_dev->get_num_devices_func ();
221*38fd1498Szrj 
222*38fd1498Szrj   if (ndevs <= 0 || goacc_device_num >= ndevs)
223*38fd1498Szrj     acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
224*38fd1498Szrj 
225*38fd1498Szrj   acc_dev = &base_dev[goacc_device_num];
226*38fd1498Szrj 
227*38fd1498Szrj   gomp_mutex_lock (&acc_dev->lock);
228*38fd1498Szrj   if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
229*38fd1498Szrj     {
230*38fd1498Szrj       gomp_mutex_unlock (&acc_dev->lock);
231*38fd1498Szrj       gomp_fatal ("device already active");
232*38fd1498Szrj     }
233*38fd1498Szrj 
234*38fd1498Szrj   gomp_init_device (acc_dev);
235*38fd1498Szrj   gomp_mutex_unlock (&acc_dev->lock);
236*38fd1498Szrj 
237*38fd1498Szrj   return base_dev;
238*38fd1498Szrj }
239*38fd1498Szrj 
240*38fd1498Szrj /* ACC_DEVICE_LOCK must be held before calling this function.  */
241*38fd1498Szrj 
242*38fd1498Szrj static void
acc_shutdown_1(acc_device_t d)243*38fd1498Szrj acc_shutdown_1 (acc_device_t d)
244*38fd1498Szrj {
245*38fd1498Szrj   struct gomp_device_descr *base_dev;
246*38fd1498Szrj   struct goacc_thread *walk;
247*38fd1498Szrj   int ndevs, i;
248*38fd1498Szrj   bool devices_active = false;
249*38fd1498Szrj 
250*38fd1498Szrj   /* Get the base device for this device type.  */
251*38fd1498Szrj   base_dev = resolve_device (d, true);
252*38fd1498Szrj 
253*38fd1498Szrj   ndevs = base_dev->get_num_devices_func ();
254*38fd1498Szrj 
255*38fd1498Szrj   /* Unload all the devices of this type that have been opened.  */
256*38fd1498Szrj   for (i = 0; i < ndevs; i++)
257*38fd1498Szrj     {
258*38fd1498Szrj       struct gomp_device_descr *acc_dev = &base_dev[i];
259*38fd1498Szrj 
260*38fd1498Szrj       gomp_mutex_lock (&acc_dev->lock);
261*38fd1498Szrj       gomp_unload_device (acc_dev);
262*38fd1498Szrj       gomp_mutex_unlock (&acc_dev->lock);
263*38fd1498Szrj     }
264*38fd1498Szrj 
265*38fd1498Szrj   gomp_mutex_lock (&goacc_thread_lock);
266*38fd1498Szrj 
267*38fd1498Szrj   /* Free target-specific TLS data and close all devices.  */
268*38fd1498Szrj   for (walk = goacc_threads; walk != NULL; walk = walk->next)
269*38fd1498Szrj     {
270*38fd1498Szrj       if (walk->target_tls)
271*38fd1498Szrj 	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
272*38fd1498Szrj 
273*38fd1498Szrj       walk->target_tls = NULL;
274*38fd1498Szrj 
275*38fd1498Szrj       /* This would mean the user is shutting down OpenACC in the middle of an
276*38fd1498Szrj          "acc data" pragma.  Likely not intentional.  */
277*38fd1498Szrj       if (walk->mapped_data)
278*38fd1498Szrj 	{
279*38fd1498Szrj 	  gomp_mutex_unlock (&goacc_thread_lock);
280*38fd1498Szrj 	  gomp_fatal ("shutdown in 'acc data' region");
281*38fd1498Szrj 	}
282*38fd1498Szrj 
283*38fd1498Szrj       /* Similarly, if this happens then user code has done something weird.  */
284*38fd1498Szrj       if (walk->saved_bound_dev)
285*38fd1498Szrj 	{
286*38fd1498Szrj 	  gomp_mutex_unlock (&goacc_thread_lock);
287*38fd1498Szrj 	  gomp_fatal ("shutdown during host fallback");
288*38fd1498Szrj 	}
289*38fd1498Szrj 
290*38fd1498Szrj       if (walk->dev)
291*38fd1498Szrj 	{
292*38fd1498Szrj 	  gomp_mutex_lock (&walk->dev->lock);
293*38fd1498Szrj 	  gomp_free_memmap (&walk->dev->mem_map);
294*38fd1498Szrj 	  gomp_mutex_unlock (&walk->dev->lock);
295*38fd1498Szrj 
296*38fd1498Szrj 	  walk->dev = NULL;
297*38fd1498Szrj 	  walk->base_dev = NULL;
298*38fd1498Szrj 	}
299*38fd1498Szrj     }
300*38fd1498Szrj 
301*38fd1498Szrj   gomp_mutex_unlock (&goacc_thread_lock);
302*38fd1498Szrj 
303*38fd1498Szrj   /* Close all the devices of this type that have been opened.  */
304*38fd1498Szrj   bool ret = true;
305*38fd1498Szrj   for (i = 0; i < ndevs; i++)
306*38fd1498Szrj     {
307*38fd1498Szrj       struct gomp_device_descr *acc_dev = &base_dev[i];
308*38fd1498Szrj       gomp_mutex_lock (&acc_dev->lock);
309*38fd1498Szrj       if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
310*38fd1498Szrj         {
311*38fd1498Szrj 	  devices_active = true;
312*38fd1498Szrj 	  ret &= acc_dev->fini_device_func (acc_dev->target_id);
313*38fd1498Szrj 	  acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
314*38fd1498Szrj 	}
315*38fd1498Szrj       gomp_mutex_unlock (&acc_dev->lock);
316*38fd1498Szrj     }
317*38fd1498Szrj 
318*38fd1498Szrj   if (!ret)
319*38fd1498Szrj     gomp_fatal ("device finalization failed");
320*38fd1498Szrj 
321*38fd1498Szrj   if (!devices_active)
322*38fd1498Szrj     gomp_fatal ("no device initialized");
323*38fd1498Szrj }
324*38fd1498Szrj 
325*38fd1498Szrj static struct goacc_thread *
goacc_new_thread(void)326*38fd1498Szrj goacc_new_thread (void)
327*38fd1498Szrj {
328*38fd1498Szrj   struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
329*38fd1498Szrj 
330*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
331*38fd1498Szrj   goacc_tls_data = thr;
332*38fd1498Szrj #else
333*38fd1498Szrj   pthread_setspecific (goacc_tls_key, thr);
334*38fd1498Szrj #endif
335*38fd1498Szrj 
336*38fd1498Szrj   pthread_setspecific (goacc_cleanup_key, thr);
337*38fd1498Szrj 
338*38fd1498Szrj   gomp_mutex_lock (&goacc_thread_lock);
339*38fd1498Szrj   thr->next = goacc_threads;
340*38fd1498Szrj   goacc_threads = thr;
341*38fd1498Szrj   gomp_mutex_unlock (&goacc_thread_lock);
342*38fd1498Szrj 
343*38fd1498Szrj   return thr;
344*38fd1498Szrj }
345*38fd1498Szrj 
346*38fd1498Szrj static void
goacc_destroy_thread(void * data)347*38fd1498Szrj goacc_destroy_thread (void *data)
348*38fd1498Szrj {
349*38fd1498Szrj   struct goacc_thread *thr = data, *walk, *prev;
350*38fd1498Szrj 
351*38fd1498Szrj   gomp_mutex_lock (&goacc_thread_lock);
352*38fd1498Szrj 
353*38fd1498Szrj   if (thr)
354*38fd1498Szrj     {
355*38fd1498Szrj       struct gomp_device_descr *acc_dev = thr->dev;
356*38fd1498Szrj 
357*38fd1498Szrj       if (acc_dev && thr->target_tls)
358*38fd1498Szrj 	{
359*38fd1498Szrj 	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
360*38fd1498Szrj 	  thr->target_tls = NULL;
361*38fd1498Szrj 	}
362*38fd1498Szrj 
363*38fd1498Szrj       assert (!thr->mapped_data);
364*38fd1498Szrj 
365*38fd1498Szrj       /* Remove from thread list.  */
366*38fd1498Szrj       for (prev = NULL, walk = goacc_threads; walk;
367*38fd1498Szrj 	   prev = walk, walk = walk->next)
368*38fd1498Szrj 	if (walk == thr)
369*38fd1498Szrj 	  {
370*38fd1498Szrj 	    if (prev == NULL)
371*38fd1498Szrj 	      goacc_threads = walk->next;
372*38fd1498Szrj 	    else
373*38fd1498Szrj 	      prev->next = walk->next;
374*38fd1498Szrj 
375*38fd1498Szrj 	    free (thr);
376*38fd1498Szrj 
377*38fd1498Szrj 	    break;
378*38fd1498Szrj 	  }
379*38fd1498Szrj 
380*38fd1498Szrj       assert (walk);
381*38fd1498Szrj     }
382*38fd1498Szrj 
383*38fd1498Szrj   gomp_mutex_unlock (&goacc_thread_lock);
384*38fd1498Szrj }
385*38fd1498Szrj 
386*38fd1498Szrj /* Use the ORD'th device instance for the current host thread (or -1 for the
387*38fd1498Szrj    current global default).  The device (and the runtime) must be initialised
388*38fd1498Szrj    before calling this function.  */
389*38fd1498Szrj 
390*38fd1498Szrj void
goacc_attach_host_thread_to_device(int ord)391*38fd1498Szrj goacc_attach_host_thread_to_device (int ord)
392*38fd1498Szrj {
393*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
394*38fd1498Szrj   struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
395*38fd1498Szrj   int num_devices;
396*38fd1498Szrj 
397*38fd1498Szrj   if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
398*38fd1498Szrj     return;
399*38fd1498Szrj 
400*38fd1498Szrj   if (ord < 0)
401*38fd1498Szrj     ord = goacc_device_num;
402*38fd1498Szrj 
403*38fd1498Szrj   /* Decide which type of device to use.  If the current thread has a device
404*38fd1498Szrj      type already (e.g. set by acc_set_device_type), use that, else use the
405*38fd1498Szrj      global default.  */
406*38fd1498Szrj   if (thr && thr->base_dev)
407*38fd1498Szrj     base_dev = thr->base_dev;
408*38fd1498Szrj   else
409*38fd1498Szrj     {
410*38fd1498Szrj       assert (cached_base_dev);
411*38fd1498Szrj       base_dev = cached_base_dev;
412*38fd1498Szrj     }
413*38fd1498Szrj 
414*38fd1498Szrj   num_devices = base_dev->get_num_devices_func ();
415*38fd1498Szrj   if (num_devices <= 0 || ord >= num_devices)
416*38fd1498Szrj     acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
417*38fd1498Szrj 			      num_devices);
418*38fd1498Szrj 
419*38fd1498Szrj   if (!thr)
420*38fd1498Szrj     thr = goacc_new_thread ();
421*38fd1498Szrj 
422*38fd1498Szrj   thr->base_dev = base_dev;
423*38fd1498Szrj   thr->dev = acc_dev = &base_dev[ord];
424*38fd1498Szrj   thr->saved_bound_dev = NULL;
425*38fd1498Szrj   thr->mapped_data = NULL;
426*38fd1498Szrj 
427*38fd1498Szrj   thr->target_tls
428*38fd1498Szrj     = acc_dev->openacc.create_thread_data_func (ord);
429*38fd1498Szrj 
430*38fd1498Szrj   acc_dev->openacc.async_set_async_func (acc_async_sync);
431*38fd1498Szrj }
432*38fd1498Szrj 
433*38fd1498Szrj /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
434*38fd1498Szrj    init/shutdown is per-process or per-thread.  We choose per-process.  */
435*38fd1498Szrj 
436*38fd1498Szrj void
acc_init(acc_device_t d)437*38fd1498Szrj acc_init (acc_device_t d)
438*38fd1498Szrj {
439*38fd1498Szrj   gomp_init_targets_once ();
440*38fd1498Szrj 
441*38fd1498Szrj   gomp_mutex_lock (&acc_device_lock);
442*38fd1498Szrj 
443*38fd1498Szrj   cached_base_dev = acc_init_1 (d);
444*38fd1498Szrj 
445*38fd1498Szrj   gomp_mutex_unlock (&acc_device_lock);
446*38fd1498Szrj 
447*38fd1498Szrj   goacc_attach_host_thread_to_device (-1);
448*38fd1498Szrj }
449*38fd1498Szrj 
ialias(acc_init)450*38fd1498Szrj ialias (acc_init)
451*38fd1498Szrj 
452*38fd1498Szrj void
453*38fd1498Szrj acc_shutdown (acc_device_t d)
454*38fd1498Szrj {
455*38fd1498Szrj   gomp_init_targets_once ();
456*38fd1498Szrj 
457*38fd1498Szrj   gomp_mutex_lock (&acc_device_lock);
458*38fd1498Szrj 
459*38fd1498Szrj   acc_shutdown_1 (d);
460*38fd1498Szrj 
461*38fd1498Szrj   gomp_mutex_unlock (&acc_device_lock);
462*38fd1498Szrj }
463*38fd1498Szrj 
ialias(acc_shutdown)464*38fd1498Szrj ialias (acc_shutdown)
465*38fd1498Szrj 
466*38fd1498Szrj int
467*38fd1498Szrj acc_get_num_devices (acc_device_t d)
468*38fd1498Szrj {
469*38fd1498Szrj   int n = 0;
470*38fd1498Szrj   struct gomp_device_descr *acc_dev;
471*38fd1498Szrj 
472*38fd1498Szrj   if (d == acc_device_none)
473*38fd1498Szrj     return 0;
474*38fd1498Szrj 
475*38fd1498Szrj   gomp_init_targets_once ();
476*38fd1498Szrj 
477*38fd1498Szrj   gomp_mutex_lock (&acc_device_lock);
478*38fd1498Szrj   acc_dev = resolve_device (d, false);
479*38fd1498Szrj   gomp_mutex_unlock (&acc_device_lock);
480*38fd1498Szrj 
481*38fd1498Szrj   if (!acc_dev)
482*38fd1498Szrj     return 0;
483*38fd1498Szrj 
484*38fd1498Szrj   n = acc_dev->get_num_devices_func ();
485*38fd1498Szrj   if (n < 0)
486*38fd1498Szrj     n = 0;
487*38fd1498Szrj 
488*38fd1498Szrj   return n;
489*38fd1498Szrj }
490*38fd1498Szrj 
ialias(acc_get_num_devices)491*38fd1498Szrj ialias (acc_get_num_devices)
492*38fd1498Szrj 
493*38fd1498Szrj /* Set the device type for the current thread only (using the current global
494*38fd1498Szrj    default device number), initialising that device if necessary.  Also set the
495*38fd1498Szrj    default device type for new threads to D.  */
496*38fd1498Szrj 
497*38fd1498Szrj void
498*38fd1498Szrj acc_set_device_type (acc_device_t d)
499*38fd1498Szrj {
500*38fd1498Szrj   struct gomp_device_descr *base_dev, *acc_dev;
501*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
502*38fd1498Szrj 
503*38fd1498Szrj   gomp_init_targets_once ();
504*38fd1498Szrj 
505*38fd1498Szrj   gomp_mutex_lock (&acc_device_lock);
506*38fd1498Szrj 
507*38fd1498Szrj   cached_base_dev = base_dev = resolve_device (d, true);
508*38fd1498Szrj   acc_dev = &base_dev[goacc_device_num];
509*38fd1498Szrj 
510*38fd1498Szrj   gomp_mutex_lock (&acc_dev->lock);
511*38fd1498Szrj   if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
512*38fd1498Szrj     gomp_init_device (acc_dev);
513*38fd1498Szrj   gomp_mutex_unlock (&acc_dev->lock);
514*38fd1498Szrj 
515*38fd1498Szrj   gomp_mutex_unlock (&acc_device_lock);
516*38fd1498Szrj 
517*38fd1498Szrj   /* We're changing device type: invalidate the current thread's dev and
518*38fd1498Szrj      base_dev pointers.  */
519*38fd1498Szrj   if (thr && thr->base_dev != base_dev)
520*38fd1498Szrj     {
521*38fd1498Szrj       thr->base_dev = thr->dev = NULL;
522*38fd1498Szrj       if (thr->mapped_data)
523*38fd1498Szrj         gomp_fatal ("acc_set_device_type in 'acc data' region");
524*38fd1498Szrj     }
525*38fd1498Szrj 
526*38fd1498Szrj   goacc_attach_host_thread_to_device (-1);
527*38fd1498Szrj }
528*38fd1498Szrj 
ialias(acc_set_device_type)529*38fd1498Szrj ialias (acc_set_device_type)
530*38fd1498Szrj 
531*38fd1498Szrj acc_device_t
532*38fd1498Szrj acc_get_device_type (void)
533*38fd1498Szrj {
534*38fd1498Szrj   acc_device_t res = acc_device_none;
535*38fd1498Szrj   struct gomp_device_descr *dev;
536*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
537*38fd1498Szrj 
538*38fd1498Szrj   if (thr && thr->base_dev)
539*38fd1498Szrj     res = acc_device_type (thr->base_dev->type);
540*38fd1498Szrj   else
541*38fd1498Szrj     {
542*38fd1498Szrj       gomp_init_targets_once ();
543*38fd1498Szrj 
544*38fd1498Szrj       gomp_mutex_lock (&acc_device_lock);
545*38fd1498Szrj       dev = resolve_device (acc_device_default, true);
546*38fd1498Szrj       gomp_mutex_unlock (&acc_device_lock);
547*38fd1498Szrj       res = acc_device_type (dev->type);
548*38fd1498Szrj     }
549*38fd1498Szrj 
550*38fd1498Szrj   assert (res != acc_device_default
551*38fd1498Szrj 	  && res != acc_device_not_host);
552*38fd1498Szrj 
553*38fd1498Szrj   return res;
554*38fd1498Szrj }
555*38fd1498Szrj 
ialias(acc_get_device_type)556*38fd1498Szrj ialias (acc_get_device_type)
557*38fd1498Szrj 
558*38fd1498Szrj int
559*38fd1498Szrj acc_get_device_num (acc_device_t d)
560*38fd1498Szrj {
561*38fd1498Szrj   const struct gomp_device_descr *dev;
562*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
563*38fd1498Szrj 
564*38fd1498Szrj   if (d >= _ACC_device_hwm)
565*38fd1498Szrj     gomp_fatal ("unknown device type %u", (unsigned) d);
566*38fd1498Szrj 
567*38fd1498Szrj   gomp_init_targets_once ();
568*38fd1498Szrj 
569*38fd1498Szrj   gomp_mutex_lock (&acc_device_lock);
570*38fd1498Szrj   dev = resolve_device (d, true);
571*38fd1498Szrj   gomp_mutex_unlock (&acc_device_lock);
572*38fd1498Szrj 
573*38fd1498Szrj   if (thr && thr->base_dev == dev && thr->dev)
574*38fd1498Szrj     return thr->dev->target_id;
575*38fd1498Szrj 
576*38fd1498Szrj   return goacc_device_num;
577*38fd1498Szrj }
578*38fd1498Szrj 
ialias(acc_get_device_num)579*38fd1498Szrj ialias (acc_get_device_num)
580*38fd1498Szrj 
581*38fd1498Szrj void
582*38fd1498Szrj acc_set_device_num (int ord, acc_device_t d)
583*38fd1498Szrj {
584*38fd1498Szrj   struct gomp_device_descr *base_dev, *acc_dev;
585*38fd1498Szrj   int num_devices;
586*38fd1498Szrj 
587*38fd1498Szrj   gomp_init_targets_once ();
588*38fd1498Szrj 
589*38fd1498Szrj   if (ord < 0)
590*38fd1498Szrj     ord = goacc_device_num;
591*38fd1498Szrj 
592*38fd1498Szrj   if ((int) d == 0)
593*38fd1498Szrj     /* Set whatever device is being used by the current host thread to use
594*38fd1498Szrj        device instance ORD.  It's unclear if this is supposed to affect other
595*38fd1498Szrj        host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
596*38fd1498Szrj     goacc_attach_host_thread_to_device (ord);
597*38fd1498Szrj   else
598*38fd1498Szrj     {
599*38fd1498Szrj       gomp_mutex_lock (&acc_device_lock);
600*38fd1498Szrj 
601*38fd1498Szrj       cached_base_dev = base_dev = resolve_device (d, true);
602*38fd1498Szrj 
603*38fd1498Szrj       num_devices = base_dev->get_num_devices_func ();
604*38fd1498Szrj 
605*38fd1498Szrj       if (num_devices <= 0 || ord >= num_devices)
606*38fd1498Szrj         acc_dev_num_out_of_range (d, ord, num_devices);
607*38fd1498Szrj 
608*38fd1498Szrj       acc_dev = &base_dev[ord];
609*38fd1498Szrj 
610*38fd1498Szrj       gomp_mutex_lock (&acc_dev->lock);
611*38fd1498Szrj       if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
612*38fd1498Szrj         gomp_init_device (acc_dev);
613*38fd1498Szrj       gomp_mutex_unlock (&acc_dev->lock);
614*38fd1498Szrj 
615*38fd1498Szrj       gomp_mutex_unlock (&acc_device_lock);
616*38fd1498Szrj 
617*38fd1498Szrj       goacc_attach_host_thread_to_device (ord);
618*38fd1498Szrj     }
619*38fd1498Szrj 
620*38fd1498Szrj   goacc_device_num = ord;
621*38fd1498Szrj }
622*38fd1498Szrj 
ialias(acc_set_device_num)623*38fd1498Szrj ialias (acc_set_device_num)
624*38fd1498Szrj 
625*38fd1498Szrj /* For -O and higher, the compiler always attempts to expand acc_on_device, but
626*38fd1498Szrj    if the user disables the builtin, or calls it via a pointer, we'll need this
627*38fd1498Szrj    version.
628*38fd1498Szrj 
629*38fd1498Szrj    Compile this with optimization, so that the compiler expands
630*38fd1498Szrj    this, rather than generating infinitely recursive code.  */
631*38fd1498Szrj 
632*38fd1498Szrj int __attribute__ ((__optimize__ ("O2")))
633*38fd1498Szrj acc_on_device (acc_device_t dev)
634*38fd1498Szrj {
635*38fd1498Szrj   return __builtin_acc_on_device (dev);
636*38fd1498Szrj }
637*38fd1498Szrj 
ialias(acc_on_device)638*38fd1498Szrj ialias (acc_on_device)
639*38fd1498Szrj 
640*38fd1498Szrj attribute_hidden void
641*38fd1498Szrj goacc_runtime_initialize (void)
642*38fd1498Szrj {
643*38fd1498Szrj   gomp_mutex_init (&acc_device_lock);
644*38fd1498Szrj 
645*38fd1498Szrj #if !(defined HAVE_TLS || defined USE_EMUTLS)
646*38fd1498Szrj   pthread_key_create (&goacc_tls_key, NULL);
647*38fd1498Szrj #endif
648*38fd1498Szrj 
649*38fd1498Szrj   pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
650*38fd1498Szrj 
651*38fd1498Szrj   cached_base_dev = NULL;
652*38fd1498Szrj 
653*38fd1498Szrj   goacc_threads = NULL;
654*38fd1498Szrj   gomp_mutex_init (&goacc_thread_lock);
655*38fd1498Szrj 
656*38fd1498Szrj   /* Initialize and register the 'host' device type.  */
657*38fd1498Szrj   goacc_host_init ();
658*38fd1498Szrj }
659*38fd1498Szrj 
660*38fd1498Szrj /* Compiler helper functions */
661*38fd1498Szrj 
662*38fd1498Szrj attribute_hidden void
goacc_save_and_set_bind(acc_device_t d)663*38fd1498Szrj goacc_save_and_set_bind (acc_device_t d)
664*38fd1498Szrj {
665*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
666*38fd1498Szrj 
667*38fd1498Szrj   assert (!thr->saved_bound_dev);
668*38fd1498Szrj 
669*38fd1498Szrj   thr->saved_bound_dev = thr->dev;
670*38fd1498Szrj   thr->dev = dispatchers[d];
671*38fd1498Szrj }
672*38fd1498Szrj 
673*38fd1498Szrj attribute_hidden void
goacc_restore_bind(void)674*38fd1498Szrj goacc_restore_bind (void)
675*38fd1498Szrj {
676*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
677*38fd1498Szrj 
678*38fd1498Szrj   thr->dev = thr->saved_bound_dev;
679*38fd1498Szrj   thr->saved_bound_dev = NULL;
680*38fd1498Szrj }
681*38fd1498Szrj 
682*38fd1498Szrj /* This is called from any OpenACC support function that may need to implicitly
683*38fd1498Szrj    initialize the libgomp runtime, either globally or from a new host thread.
684*38fd1498Szrj    On exit "goacc_thread" will return a valid & populated thread block.  */
685*38fd1498Szrj 
686*38fd1498Szrj attribute_hidden void
goacc_lazy_initialize(void)687*38fd1498Szrj goacc_lazy_initialize (void)
688*38fd1498Szrj {
689*38fd1498Szrj   struct goacc_thread *thr = goacc_thread ();
690*38fd1498Szrj 
691*38fd1498Szrj   if (thr && thr->dev)
692*38fd1498Szrj     return;
693*38fd1498Szrj 
694*38fd1498Szrj   if (!cached_base_dev)
695*38fd1498Szrj     acc_init (acc_device_default);
696*38fd1498Szrj   else
697*38fd1498Szrj     goacc_attach_host_thread_to_device (-1);
698*38fd1498Szrj }
699