1*38fd1498Szrj /* OpenACC Runtime initialization routines
2*38fd1498Szrj
3*38fd1498Szrj Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj
5*38fd1498Szrj Contributed by Mentor Embedded.
6*38fd1498Szrj
7*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
8*38fd1498Szrj (libgomp).
9*38fd1498Szrj
10*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
11*38fd1498Szrj under the terms of the GNU General Public License as published by
12*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
13*38fd1498Szrj any later version.
14*38fd1498Szrj
15*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18*38fd1498Szrj more details.
19*38fd1498Szrj
20*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
21*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
22*38fd1498Szrj 3.1, as published by the Free Software Foundation.
23*38fd1498Szrj
24*38fd1498Szrj You should have received a copy of the GNU General Public License and
25*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
26*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27*38fd1498Szrj <http://www.gnu.org/licenses/>. */
28*38fd1498Szrj
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include "oacc-int.h"
31*38fd1498Szrj #include "openacc.h"
32*38fd1498Szrj #include <assert.h>
33*38fd1498Szrj #include <stdlib.h>
34*38fd1498Szrj #include <strings.h>
35*38fd1498Szrj #include <stdbool.h>
36*38fd1498Szrj #include <string.h>
37*38fd1498Szrj
38*38fd1498Szrj /* This lock is used to protect access to cached_base_dev, dispatchers and
39*38fd1498Szrj the (abstract) initialisation state of attached offloading devices. */
40*38fd1498Szrj
41*38fd1498Szrj static gomp_mutex_t acc_device_lock;
42*38fd1498Szrj
43*38fd1498Szrj /* A cached version of the dispatcher for the global "current" accelerator type,
44*38fd1498Szrj e.g. used as the default when creating new host threads. This is the
45*38fd1498Szrj device-type equivalent of goacc_device_num (which specifies which device to
46*38fd1498Szrj use out of potentially several of the same type). If there are several
47*38fd1498Szrj devices of a given type, this points at the first one. */
48*38fd1498Szrj
49*38fd1498Szrj static struct gomp_device_descr *cached_base_dev = NULL;
50*38fd1498Szrj
51*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
52*38fd1498Szrj __thread struct goacc_thread *goacc_tls_data;
53*38fd1498Szrj #else
54*38fd1498Szrj pthread_key_t goacc_tls_key;
55*38fd1498Szrj #endif
56*38fd1498Szrj static pthread_key_t goacc_cleanup_key;
57*38fd1498Szrj
58*38fd1498Szrj static struct goacc_thread *goacc_threads;
59*38fd1498Szrj static gomp_mutex_t goacc_thread_lock;
60*38fd1498Szrj
61*38fd1498Szrj /* An array of dispatchers for device types, indexed by the type. This array
62*38fd1498Szrj only references "base" devices, and other instances of the same type are
63*38fd1498Szrj found by simply indexing from each such device (which are stored linearly,
64*38fd1498Szrj grouped by device in target.c:devices). */
65*38fd1498Szrj static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
66*38fd1498Szrj
67*38fd1498Szrj attribute_hidden void
goacc_register(struct gomp_device_descr * disp)68*38fd1498Szrj goacc_register (struct gomp_device_descr *disp)
69*38fd1498Szrj {
70*38fd1498Szrj /* Only register the 0th device here. */
71*38fd1498Szrj if (disp->target_id != 0)
72*38fd1498Szrj return;
73*38fd1498Szrj
74*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
75*38fd1498Szrj
76*38fd1498Szrj assert (acc_device_type (disp->type) != acc_device_none
77*38fd1498Szrj && acc_device_type (disp->type) != acc_device_default
78*38fd1498Szrj && acc_device_type (disp->type) != acc_device_not_host);
79*38fd1498Szrj assert (!dispatchers[disp->type]);
80*38fd1498Szrj dispatchers[disp->type] = disp;
81*38fd1498Szrj
82*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
83*38fd1498Szrj }
84*38fd1498Szrj
85*38fd1498Szrj /* OpenACC names some things a little differently. */
86*38fd1498Szrj
87*38fd1498Szrj static const char *
get_openacc_name(const char * name)88*38fd1498Szrj get_openacc_name (const char *name)
89*38fd1498Szrj {
90*38fd1498Szrj if (strcmp (name, "nvptx") == 0)
91*38fd1498Szrj return "nvidia";
92*38fd1498Szrj else
93*38fd1498Szrj return name;
94*38fd1498Szrj }
95*38fd1498Szrj
96*38fd1498Szrj static const char *
name_of_acc_device_t(enum acc_device_t type)97*38fd1498Szrj name_of_acc_device_t (enum acc_device_t type)
98*38fd1498Szrj {
99*38fd1498Szrj switch (type)
100*38fd1498Szrj {
101*38fd1498Szrj case acc_device_none: return "none";
102*38fd1498Szrj case acc_device_default: return "default";
103*38fd1498Szrj case acc_device_host: return "host";
104*38fd1498Szrj case acc_device_not_host: return "not_host";
105*38fd1498Szrj case acc_device_nvidia: return "nvidia";
106*38fd1498Szrj default: gomp_fatal ("unknown device type %u", (unsigned) type);
107*38fd1498Szrj }
108*38fd1498Szrj }
109*38fd1498Szrj
110*38fd1498Szrj /* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR
111*38fd1498Szrj is true, this function raises an error if there are no devices of type D,
112*38fd1498Szrj otherwise it returns NULL in that case. */
113*38fd1498Szrj
114*38fd1498Szrj static struct gomp_device_descr *
resolve_device(acc_device_t d,bool fail_is_error)115*38fd1498Szrj resolve_device (acc_device_t d, bool fail_is_error)
116*38fd1498Szrj {
117*38fd1498Szrj acc_device_t d_arg = d;
118*38fd1498Szrj
119*38fd1498Szrj switch (d)
120*38fd1498Szrj {
121*38fd1498Szrj case acc_device_default:
122*38fd1498Szrj {
123*38fd1498Szrj if (goacc_device_type)
124*38fd1498Szrj {
125*38fd1498Szrj /* Lookup the named device. */
126*38fd1498Szrj while (++d != _ACC_device_hwm)
127*38fd1498Szrj if (dispatchers[d]
128*38fd1498Szrj && !strcasecmp (goacc_device_type,
129*38fd1498Szrj get_openacc_name (dispatchers[d]->name))
130*38fd1498Szrj && dispatchers[d]->get_num_devices_func () > 0)
131*38fd1498Szrj goto found;
132*38fd1498Szrj
133*38fd1498Szrj if (fail_is_error)
134*38fd1498Szrj {
135*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
136*38fd1498Szrj gomp_fatal ("device type %s not supported", goacc_device_type);
137*38fd1498Szrj }
138*38fd1498Szrj else
139*38fd1498Szrj return NULL;
140*38fd1498Szrj }
141*38fd1498Szrj
142*38fd1498Szrj /* No default device specified, so start scanning for any non-host
143*38fd1498Szrj device that is available. */
144*38fd1498Szrj d = acc_device_not_host;
145*38fd1498Szrj }
146*38fd1498Szrj /* FALLTHROUGH */
147*38fd1498Szrj
148*38fd1498Szrj case acc_device_not_host:
149*38fd1498Szrj /* Find the first available device after acc_device_not_host. */
150*38fd1498Szrj while (++d != _ACC_device_hwm)
151*38fd1498Szrj if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
152*38fd1498Szrj goto found;
153*38fd1498Szrj if (d_arg == acc_device_default)
154*38fd1498Szrj {
155*38fd1498Szrj d = acc_device_host;
156*38fd1498Szrj goto found;
157*38fd1498Szrj }
158*38fd1498Szrj if (fail_is_error)
159*38fd1498Szrj {
160*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
161*38fd1498Szrj gomp_fatal ("no device found");
162*38fd1498Szrj }
163*38fd1498Szrj else
164*38fd1498Szrj return NULL;
165*38fd1498Szrj break;
166*38fd1498Szrj
167*38fd1498Szrj case acc_device_host:
168*38fd1498Szrj break;
169*38fd1498Szrj
170*38fd1498Szrj default:
171*38fd1498Szrj if (d > _ACC_device_hwm)
172*38fd1498Szrj {
173*38fd1498Szrj if (fail_is_error)
174*38fd1498Szrj goto unsupported_device;
175*38fd1498Szrj else
176*38fd1498Szrj return NULL;
177*38fd1498Szrj }
178*38fd1498Szrj break;
179*38fd1498Szrj }
180*38fd1498Szrj found:
181*38fd1498Szrj
182*38fd1498Szrj assert (d != acc_device_none
183*38fd1498Szrj && d != acc_device_default
184*38fd1498Szrj && d != acc_device_not_host);
185*38fd1498Szrj
186*38fd1498Szrj if (dispatchers[d] == NULL && fail_is_error)
187*38fd1498Szrj {
188*38fd1498Szrj unsupported_device:
189*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
190*38fd1498Szrj gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
191*38fd1498Szrj }
192*38fd1498Szrj
193*38fd1498Szrj return dispatchers[d];
194*38fd1498Szrj }
195*38fd1498Szrj
196*38fd1498Szrj /* Emit a suitable error if no device of a particular type is available, or
197*38fd1498Szrj the given device number is out-of-range. */
198*38fd1498Szrj static void
acc_dev_num_out_of_range(acc_device_t d,int ord,int ndevs)199*38fd1498Szrj acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
200*38fd1498Szrj {
201*38fd1498Szrj if (ndevs == 0)
202*38fd1498Szrj gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
203*38fd1498Szrj else
204*38fd1498Szrj gomp_fatal ("device %u out of range", ord);
205*38fd1498Szrj }
206*38fd1498Szrj
207*38fd1498Szrj /* This is called when plugins have been initialized, and serves to call
208*38fd1498Szrj (indirectly) the target's device_init hook. Calling multiple times without
209*38fd1498Szrj an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be
210*38fd1498Szrj held before calling this function. */
211*38fd1498Szrj
212*38fd1498Szrj static struct gomp_device_descr *
acc_init_1(acc_device_t d)213*38fd1498Szrj acc_init_1 (acc_device_t d)
214*38fd1498Szrj {
215*38fd1498Szrj struct gomp_device_descr *base_dev, *acc_dev;
216*38fd1498Szrj int ndevs;
217*38fd1498Szrj
218*38fd1498Szrj base_dev = resolve_device (d, true);
219*38fd1498Szrj
220*38fd1498Szrj ndevs = base_dev->get_num_devices_func ();
221*38fd1498Szrj
222*38fd1498Szrj if (ndevs <= 0 || goacc_device_num >= ndevs)
223*38fd1498Szrj acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
224*38fd1498Szrj
225*38fd1498Szrj acc_dev = &base_dev[goacc_device_num];
226*38fd1498Szrj
227*38fd1498Szrj gomp_mutex_lock (&acc_dev->lock);
228*38fd1498Szrj if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
229*38fd1498Szrj {
230*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
231*38fd1498Szrj gomp_fatal ("device already active");
232*38fd1498Szrj }
233*38fd1498Szrj
234*38fd1498Szrj gomp_init_device (acc_dev);
235*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
236*38fd1498Szrj
237*38fd1498Szrj return base_dev;
238*38fd1498Szrj }
239*38fd1498Szrj
240*38fd1498Szrj /* ACC_DEVICE_LOCK must be held before calling this function. */
241*38fd1498Szrj
242*38fd1498Szrj static void
acc_shutdown_1(acc_device_t d)243*38fd1498Szrj acc_shutdown_1 (acc_device_t d)
244*38fd1498Szrj {
245*38fd1498Szrj struct gomp_device_descr *base_dev;
246*38fd1498Szrj struct goacc_thread *walk;
247*38fd1498Szrj int ndevs, i;
248*38fd1498Szrj bool devices_active = false;
249*38fd1498Szrj
250*38fd1498Szrj /* Get the base device for this device type. */
251*38fd1498Szrj base_dev = resolve_device (d, true);
252*38fd1498Szrj
253*38fd1498Szrj ndevs = base_dev->get_num_devices_func ();
254*38fd1498Szrj
255*38fd1498Szrj /* Unload all the devices of this type that have been opened. */
256*38fd1498Szrj for (i = 0; i < ndevs; i++)
257*38fd1498Szrj {
258*38fd1498Szrj struct gomp_device_descr *acc_dev = &base_dev[i];
259*38fd1498Szrj
260*38fd1498Szrj gomp_mutex_lock (&acc_dev->lock);
261*38fd1498Szrj gomp_unload_device (acc_dev);
262*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
263*38fd1498Szrj }
264*38fd1498Szrj
265*38fd1498Szrj gomp_mutex_lock (&goacc_thread_lock);
266*38fd1498Szrj
267*38fd1498Szrj /* Free target-specific TLS data and close all devices. */
268*38fd1498Szrj for (walk = goacc_threads; walk != NULL; walk = walk->next)
269*38fd1498Szrj {
270*38fd1498Szrj if (walk->target_tls)
271*38fd1498Szrj base_dev->openacc.destroy_thread_data_func (walk->target_tls);
272*38fd1498Szrj
273*38fd1498Szrj walk->target_tls = NULL;
274*38fd1498Szrj
275*38fd1498Szrj /* This would mean the user is shutting down OpenACC in the middle of an
276*38fd1498Szrj "acc data" pragma. Likely not intentional. */
277*38fd1498Szrj if (walk->mapped_data)
278*38fd1498Szrj {
279*38fd1498Szrj gomp_mutex_unlock (&goacc_thread_lock);
280*38fd1498Szrj gomp_fatal ("shutdown in 'acc data' region");
281*38fd1498Szrj }
282*38fd1498Szrj
283*38fd1498Szrj /* Similarly, if this happens then user code has done something weird. */
284*38fd1498Szrj if (walk->saved_bound_dev)
285*38fd1498Szrj {
286*38fd1498Szrj gomp_mutex_unlock (&goacc_thread_lock);
287*38fd1498Szrj gomp_fatal ("shutdown during host fallback");
288*38fd1498Szrj }
289*38fd1498Szrj
290*38fd1498Szrj if (walk->dev)
291*38fd1498Szrj {
292*38fd1498Szrj gomp_mutex_lock (&walk->dev->lock);
293*38fd1498Szrj gomp_free_memmap (&walk->dev->mem_map);
294*38fd1498Szrj gomp_mutex_unlock (&walk->dev->lock);
295*38fd1498Szrj
296*38fd1498Szrj walk->dev = NULL;
297*38fd1498Szrj walk->base_dev = NULL;
298*38fd1498Szrj }
299*38fd1498Szrj }
300*38fd1498Szrj
301*38fd1498Szrj gomp_mutex_unlock (&goacc_thread_lock);
302*38fd1498Szrj
303*38fd1498Szrj /* Close all the devices of this type that have been opened. */
304*38fd1498Szrj bool ret = true;
305*38fd1498Szrj for (i = 0; i < ndevs; i++)
306*38fd1498Szrj {
307*38fd1498Szrj struct gomp_device_descr *acc_dev = &base_dev[i];
308*38fd1498Szrj gomp_mutex_lock (&acc_dev->lock);
309*38fd1498Szrj if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
310*38fd1498Szrj {
311*38fd1498Szrj devices_active = true;
312*38fd1498Szrj ret &= acc_dev->fini_device_func (acc_dev->target_id);
313*38fd1498Szrj acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
314*38fd1498Szrj }
315*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
316*38fd1498Szrj }
317*38fd1498Szrj
318*38fd1498Szrj if (!ret)
319*38fd1498Szrj gomp_fatal ("device finalization failed");
320*38fd1498Szrj
321*38fd1498Szrj if (!devices_active)
322*38fd1498Szrj gomp_fatal ("no device initialized");
323*38fd1498Szrj }
324*38fd1498Szrj
325*38fd1498Szrj static struct goacc_thread *
goacc_new_thread(void)326*38fd1498Szrj goacc_new_thread (void)
327*38fd1498Szrj {
328*38fd1498Szrj struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
329*38fd1498Szrj
330*38fd1498Szrj #if defined HAVE_TLS || defined USE_EMUTLS
331*38fd1498Szrj goacc_tls_data = thr;
332*38fd1498Szrj #else
333*38fd1498Szrj pthread_setspecific (goacc_tls_key, thr);
334*38fd1498Szrj #endif
335*38fd1498Szrj
336*38fd1498Szrj pthread_setspecific (goacc_cleanup_key, thr);
337*38fd1498Szrj
338*38fd1498Szrj gomp_mutex_lock (&goacc_thread_lock);
339*38fd1498Szrj thr->next = goacc_threads;
340*38fd1498Szrj goacc_threads = thr;
341*38fd1498Szrj gomp_mutex_unlock (&goacc_thread_lock);
342*38fd1498Szrj
343*38fd1498Szrj return thr;
344*38fd1498Szrj }
345*38fd1498Szrj
346*38fd1498Szrj static void
goacc_destroy_thread(void * data)347*38fd1498Szrj goacc_destroy_thread (void *data)
348*38fd1498Szrj {
349*38fd1498Szrj struct goacc_thread *thr = data, *walk, *prev;
350*38fd1498Szrj
351*38fd1498Szrj gomp_mutex_lock (&goacc_thread_lock);
352*38fd1498Szrj
353*38fd1498Szrj if (thr)
354*38fd1498Szrj {
355*38fd1498Szrj struct gomp_device_descr *acc_dev = thr->dev;
356*38fd1498Szrj
357*38fd1498Szrj if (acc_dev && thr->target_tls)
358*38fd1498Szrj {
359*38fd1498Szrj acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
360*38fd1498Szrj thr->target_tls = NULL;
361*38fd1498Szrj }
362*38fd1498Szrj
363*38fd1498Szrj assert (!thr->mapped_data);
364*38fd1498Szrj
365*38fd1498Szrj /* Remove from thread list. */
366*38fd1498Szrj for (prev = NULL, walk = goacc_threads; walk;
367*38fd1498Szrj prev = walk, walk = walk->next)
368*38fd1498Szrj if (walk == thr)
369*38fd1498Szrj {
370*38fd1498Szrj if (prev == NULL)
371*38fd1498Szrj goacc_threads = walk->next;
372*38fd1498Szrj else
373*38fd1498Szrj prev->next = walk->next;
374*38fd1498Szrj
375*38fd1498Szrj free (thr);
376*38fd1498Szrj
377*38fd1498Szrj break;
378*38fd1498Szrj }
379*38fd1498Szrj
380*38fd1498Szrj assert (walk);
381*38fd1498Szrj }
382*38fd1498Szrj
383*38fd1498Szrj gomp_mutex_unlock (&goacc_thread_lock);
384*38fd1498Szrj }
385*38fd1498Szrj
386*38fd1498Szrj /* Use the ORD'th device instance for the current host thread (or -1 for the
387*38fd1498Szrj current global default). The device (and the runtime) must be initialised
388*38fd1498Szrj before calling this function. */
389*38fd1498Szrj
390*38fd1498Szrj void
goacc_attach_host_thread_to_device(int ord)391*38fd1498Szrj goacc_attach_host_thread_to_device (int ord)
392*38fd1498Szrj {
393*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
394*38fd1498Szrj struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
395*38fd1498Szrj int num_devices;
396*38fd1498Szrj
397*38fd1498Szrj if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
398*38fd1498Szrj return;
399*38fd1498Szrj
400*38fd1498Szrj if (ord < 0)
401*38fd1498Szrj ord = goacc_device_num;
402*38fd1498Szrj
403*38fd1498Szrj /* Decide which type of device to use. If the current thread has a device
404*38fd1498Szrj type already (e.g. set by acc_set_device_type), use that, else use the
405*38fd1498Szrj global default. */
406*38fd1498Szrj if (thr && thr->base_dev)
407*38fd1498Szrj base_dev = thr->base_dev;
408*38fd1498Szrj else
409*38fd1498Szrj {
410*38fd1498Szrj assert (cached_base_dev);
411*38fd1498Szrj base_dev = cached_base_dev;
412*38fd1498Szrj }
413*38fd1498Szrj
414*38fd1498Szrj num_devices = base_dev->get_num_devices_func ();
415*38fd1498Szrj if (num_devices <= 0 || ord >= num_devices)
416*38fd1498Szrj acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
417*38fd1498Szrj num_devices);
418*38fd1498Szrj
419*38fd1498Szrj if (!thr)
420*38fd1498Szrj thr = goacc_new_thread ();
421*38fd1498Szrj
422*38fd1498Szrj thr->base_dev = base_dev;
423*38fd1498Szrj thr->dev = acc_dev = &base_dev[ord];
424*38fd1498Szrj thr->saved_bound_dev = NULL;
425*38fd1498Szrj thr->mapped_data = NULL;
426*38fd1498Szrj
427*38fd1498Szrj thr->target_tls
428*38fd1498Szrj = acc_dev->openacc.create_thread_data_func (ord);
429*38fd1498Szrj
430*38fd1498Szrj acc_dev->openacc.async_set_async_func (acc_async_sync);
431*38fd1498Szrj }
432*38fd1498Szrj
433*38fd1498Szrj /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
434*38fd1498Szrj init/shutdown is per-process or per-thread. We choose per-process. */
435*38fd1498Szrj
436*38fd1498Szrj void
acc_init(acc_device_t d)437*38fd1498Szrj acc_init (acc_device_t d)
438*38fd1498Szrj {
439*38fd1498Szrj gomp_init_targets_once ();
440*38fd1498Szrj
441*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
442*38fd1498Szrj
443*38fd1498Szrj cached_base_dev = acc_init_1 (d);
444*38fd1498Szrj
445*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
446*38fd1498Szrj
447*38fd1498Szrj goacc_attach_host_thread_to_device (-1);
448*38fd1498Szrj }
449*38fd1498Szrj
ialias(acc_init)450*38fd1498Szrj ialias (acc_init)
451*38fd1498Szrj
452*38fd1498Szrj void
453*38fd1498Szrj acc_shutdown (acc_device_t d)
454*38fd1498Szrj {
455*38fd1498Szrj gomp_init_targets_once ();
456*38fd1498Szrj
457*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
458*38fd1498Szrj
459*38fd1498Szrj acc_shutdown_1 (d);
460*38fd1498Szrj
461*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
462*38fd1498Szrj }
463*38fd1498Szrj
ialias(acc_shutdown)464*38fd1498Szrj ialias (acc_shutdown)
465*38fd1498Szrj
466*38fd1498Szrj int
467*38fd1498Szrj acc_get_num_devices (acc_device_t d)
468*38fd1498Szrj {
469*38fd1498Szrj int n = 0;
470*38fd1498Szrj struct gomp_device_descr *acc_dev;
471*38fd1498Szrj
472*38fd1498Szrj if (d == acc_device_none)
473*38fd1498Szrj return 0;
474*38fd1498Szrj
475*38fd1498Szrj gomp_init_targets_once ();
476*38fd1498Szrj
477*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
478*38fd1498Szrj acc_dev = resolve_device (d, false);
479*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
480*38fd1498Szrj
481*38fd1498Szrj if (!acc_dev)
482*38fd1498Szrj return 0;
483*38fd1498Szrj
484*38fd1498Szrj n = acc_dev->get_num_devices_func ();
485*38fd1498Szrj if (n < 0)
486*38fd1498Szrj n = 0;
487*38fd1498Szrj
488*38fd1498Szrj return n;
489*38fd1498Szrj }
490*38fd1498Szrj
ialias(acc_get_num_devices)491*38fd1498Szrj ialias (acc_get_num_devices)
492*38fd1498Szrj
493*38fd1498Szrj /* Set the device type for the current thread only (using the current global
494*38fd1498Szrj default device number), initialising that device if necessary. Also set the
495*38fd1498Szrj default device type for new threads to D. */
496*38fd1498Szrj
497*38fd1498Szrj void
498*38fd1498Szrj acc_set_device_type (acc_device_t d)
499*38fd1498Szrj {
500*38fd1498Szrj struct gomp_device_descr *base_dev, *acc_dev;
501*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
502*38fd1498Szrj
503*38fd1498Szrj gomp_init_targets_once ();
504*38fd1498Szrj
505*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
506*38fd1498Szrj
507*38fd1498Szrj cached_base_dev = base_dev = resolve_device (d, true);
508*38fd1498Szrj acc_dev = &base_dev[goacc_device_num];
509*38fd1498Szrj
510*38fd1498Szrj gomp_mutex_lock (&acc_dev->lock);
511*38fd1498Szrj if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
512*38fd1498Szrj gomp_init_device (acc_dev);
513*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
514*38fd1498Szrj
515*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
516*38fd1498Szrj
517*38fd1498Szrj /* We're changing device type: invalidate the current thread's dev and
518*38fd1498Szrj base_dev pointers. */
519*38fd1498Szrj if (thr && thr->base_dev != base_dev)
520*38fd1498Szrj {
521*38fd1498Szrj thr->base_dev = thr->dev = NULL;
522*38fd1498Szrj if (thr->mapped_data)
523*38fd1498Szrj gomp_fatal ("acc_set_device_type in 'acc data' region");
524*38fd1498Szrj }
525*38fd1498Szrj
526*38fd1498Szrj goacc_attach_host_thread_to_device (-1);
527*38fd1498Szrj }
528*38fd1498Szrj
ialias(acc_set_device_type)529*38fd1498Szrj ialias (acc_set_device_type)
530*38fd1498Szrj
531*38fd1498Szrj acc_device_t
532*38fd1498Szrj acc_get_device_type (void)
533*38fd1498Szrj {
534*38fd1498Szrj acc_device_t res = acc_device_none;
535*38fd1498Szrj struct gomp_device_descr *dev;
536*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
537*38fd1498Szrj
538*38fd1498Szrj if (thr && thr->base_dev)
539*38fd1498Szrj res = acc_device_type (thr->base_dev->type);
540*38fd1498Szrj else
541*38fd1498Szrj {
542*38fd1498Szrj gomp_init_targets_once ();
543*38fd1498Szrj
544*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
545*38fd1498Szrj dev = resolve_device (acc_device_default, true);
546*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
547*38fd1498Szrj res = acc_device_type (dev->type);
548*38fd1498Szrj }
549*38fd1498Szrj
550*38fd1498Szrj assert (res != acc_device_default
551*38fd1498Szrj && res != acc_device_not_host);
552*38fd1498Szrj
553*38fd1498Szrj return res;
554*38fd1498Szrj }
555*38fd1498Szrj
ialias(acc_get_device_type)556*38fd1498Szrj ialias (acc_get_device_type)
557*38fd1498Szrj
558*38fd1498Szrj int
559*38fd1498Szrj acc_get_device_num (acc_device_t d)
560*38fd1498Szrj {
561*38fd1498Szrj const struct gomp_device_descr *dev;
562*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
563*38fd1498Szrj
564*38fd1498Szrj if (d >= _ACC_device_hwm)
565*38fd1498Szrj gomp_fatal ("unknown device type %u", (unsigned) d);
566*38fd1498Szrj
567*38fd1498Szrj gomp_init_targets_once ();
568*38fd1498Szrj
569*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
570*38fd1498Szrj dev = resolve_device (d, true);
571*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
572*38fd1498Szrj
573*38fd1498Szrj if (thr && thr->base_dev == dev && thr->dev)
574*38fd1498Szrj return thr->dev->target_id;
575*38fd1498Szrj
576*38fd1498Szrj return goacc_device_num;
577*38fd1498Szrj }
578*38fd1498Szrj
ialias(acc_get_device_num)579*38fd1498Szrj ialias (acc_get_device_num)
580*38fd1498Szrj
581*38fd1498Szrj void
582*38fd1498Szrj acc_set_device_num (int ord, acc_device_t d)
583*38fd1498Szrj {
584*38fd1498Szrj struct gomp_device_descr *base_dev, *acc_dev;
585*38fd1498Szrj int num_devices;
586*38fd1498Szrj
587*38fd1498Szrj gomp_init_targets_once ();
588*38fd1498Szrj
589*38fd1498Szrj if (ord < 0)
590*38fd1498Szrj ord = goacc_device_num;
591*38fd1498Szrj
592*38fd1498Szrj if ((int) d == 0)
593*38fd1498Szrj /* Set whatever device is being used by the current host thread to use
594*38fd1498Szrj device instance ORD. It's unclear if this is supposed to affect other
595*38fd1498Szrj host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */
596*38fd1498Szrj goacc_attach_host_thread_to_device (ord);
597*38fd1498Szrj else
598*38fd1498Szrj {
599*38fd1498Szrj gomp_mutex_lock (&acc_device_lock);
600*38fd1498Szrj
601*38fd1498Szrj cached_base_dev = base_dev = resolve_device (d, true);
602*38fd1498Szrj
603*38fd1498Szrj num_devices = base_dev->get_num_devices_func ();
604*38fd1498Szrj
605*38fd1498Szrj if (num_devices <= 0 || ord >= num_devices)
606*38fd1498Szrj acc_dev_num_out_of_range (d, ord, num_devices);
607*38fd1498Szrj
608*38fd1498Szrj acc_dev = &base_dev[ord];
609*38fd1498Szrj
610*38fd1498Szrj gomp_mutex_lock (&acc_dev->lock);
611*38fd1498Szrj if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
612*38fd1498Szrj gomp_init_device (acc_dev);
613*38fd1498Szrj gomp_mutex_unlock (&acc_dev->lock);
614*38fd1498Szrj
615*38fd1498Szrj gomp_mutex_unlock (&acc_device_lock);
616*38fd1498Szrj
617*38fd1498Szrj goacc_attach_host_thread_to_device (ord);
618*38fd1498Szrj }
619*38fd1498Szrj
620*38fd1498Szrj goacc_device_num = ord;
621*38fd1498Szrj }
622*38fd1498Szrj
ialias(acc_set_device_num)623*38fd1498Szrj ialias (acc_set_device_num)
624*38fd1498Szrj
625*38fd1498Szrj /* For -O and higher, the compiler always attempts to expand acc_on_device, but
626*38fd1498Szrj if the user disables the builtin, or calls it via a pointer, we'll need this
627*38fd1498Szrj version.
628*38fd1498Szrj
629*38fd1498Szrj Compile this with optimization, so that the compiler expands
630*38fd1498Szrj this, rather than generating infinitely recursive code. */
631*38fd1498Szrj
632*38fd1498Szrj int __attribute__ ((__optimize__ ("O2")))
633*38fd1498Szrj acc_on_device (acc_device_t dev)
634*38fd1498Szrj {
635*38fd1498Szrj return __builtin_acc_on_device (dev);
636*38fd1498Szrj }
637*38fd1498Szrj
ialias(acc_on_device)638*38fd1498Szrj ialias (acc_on_device)
639*38fd1498Szrj
640*38fd1498Szrj attribute_hidden void
641*38fd1498Szrj goacc_runtime_initialize (void)
642*38fd1498Szrj {
643*38fd1498Szrj gomp_mutex_init (&acc_device_lock);
644*38fd1498Szrj
645*38fd1498Szrj #if !(defined HAVE_TLS || defined USE_EMUTLS)
646*38fd1498Szrj pthread_key_create (&goacc_tls_key, NULL);
647*38fd1498Szrj #endif
648*38fd1498Szrj
649*38fd1498Szrj pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
650*38fd1498Szrj
651*38fd1498Szrj cached_base_dev = NULL;
652*38fd1498Szrj
653*38fd1498Szrj goacc_threads = NULL;
654*38fd1498Szrj gomp_mutex_init (&goacc_thread_lock);
655*38fd1498Szrj
656*38fd1498Szrj /* Initialize and register the 'host' device type. */
657*38fd1498Szrj goacc_host_init ();
658*38fd1498Szrj }
659*38fd1498Szrj
660*38fd1498Szrj /* Compiler helper functions */
661*38fd1498Szrj
662*38fd1498Szrj attribute_hidden void
goacc_save_and_set_bind(acc_device_t d)663*38fd1498Szrj goacc_save_and_set_bind (acc_device_t d)
664*38fd1498Szrj {
665*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
666*38fd1498Szrj
667*38fd1498Szrj assert (!thr->saved_bound_dev);
668*38fd1498Szrj
669*38fd1498Szrj thr->saved_bound_dev = thr->dev;
670*38fd1498Szrj thr->dev = dispatchers[d];
671*38fd1498Szrj }
672*38fd1498Szrj
673*38fd1498Szrj attribute_hidden void
goacc_restore_bind(void)674*38fd1498Szrj goacc_restore_bind (void)
675*38fd1498Szrj {
676*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
677*38fd1498Szrj
678*38fd1498Szrj thr->dev = thr->saved_bound_dev;
679*38fd1498Szrj thr->saved_bound_dev = NULL;
680*38fd1498Szrj }
681*38fd1498Szrj
682*38fd1498Szrj /* This is called from any OpenACC support function that may need to implicitly
683*38fd1498Szrj initialize the libgomp runtime, either globally or from a new host thread.
684*38fd1498Szrj On exit "goacc_thread" will return a valid & populated thread block. */
685*38fd1498Szrj
686*38fd1498Szrj attribute_hidden void
goacc_lazy_initialize(void)687*38fd1498Szrj goacc_lazy_initialize (void)
688*38fd1498Szrj {
689*38fd1498Szrj struct goacc_thread *thr = goacc_thread ();
690*38fd1498Szrj
691*38fd1498Szrj if (thr && thr->dev)
692*38fd1498Szrj return;
693*38fd1498Szrj
694*38fd1498Szrj if (!cached_base_dev)
695*38fd1498Szrj acc_init (acc_device_default);
696*38fd1498Szrj else
697*38fd1498Szrj goacc_attach_host_thread_to_device (-1);
698*38fd1498Szrj }
699