xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/oacc-init.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* OpenACC Runtime initialization routines
2 
3    Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include "libgomp.h"
30 #include "oacc-int.h"
31 #include "openacc.h"
32 #include "plugin/plugin-host.h"
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <strings.h>
36 #include <stdbool.h>
37 #include <string.h>
38 
39 static gomp_mutex_t acc_device_lock;
40 
41 /* A cached version of the dispatcher for the global "current" accelerator type,
42    e.g. used as the default when creating new host threads.  This is the
43    device-type equivalent of goacc_device_num (which specifies which device to
44    use out of potentially several of the same type).  If there are several
45    devices of a given type, this points at the first one.  */
46 
47 static struct gomp_device_descr *cached_base_dev = NULL;
48 
49 #if defined HAVE_TLS || defined USE_EMUTLS
50 __thread struct goacc_thread *goacc_tls_data;
51 #else
52 pthread_key_t goacc_tls_key;
53 #endif
54 static pthread_key_t goacc_cleanup_key;
55 
56 static struct goacc_thread *goacc_threads;
57 static gomp_mutex_t goacc_thread_lock;
58 
59 /* An array of dispatchers for device types, indexed by the type.  This array
60    only references "base" devices, and other instances of the same type are
61    found by simply indexing from each such device (which are stored linearly,
62    grouped by device in target.c:devices).  */
63 static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
64 
65 attribute_hidden void
66 goacc_register (struct gomp_device_descr *disp)
67 {
68   /* Only register the 0th device here.  */
69   if (disp->target_id != 0)
70     return;
71 
72   gomp_mutex_lock (&acc_device_lock);
73 
74   assert (acc_device_type (disp->type) != acc_device_none
75 	  && acc_device_type (disp->type) != acc_device_default
76 	  && acc_device_type (disp->type) != acc_device_not_host);
77   assert (!dispatchers[disp->type]);
78   dispatchers[disp->type] = disp;
79 
80   gomp_mutex_unlock (&acc_device_lock);
81 }
82 
83 /* OpenACC names some things a little differently.  */
84 
85 static const char *
86 get_openacc_name (const char *name)
87 {
88   if (strcmp (name, "nvptx") == 0)
89     return "nvidia";
90   else
91     return name;
92 }
93 
94 static const char *
95 name_of_acc_device_t (enum acc_device_t type)
96 {
97   switch (type)
98     {
99     case acc_device_none: return "none";
100     case acc_device_default: return "default";
101     case acc_device_host: return "host";
102     case acc_device_host_nonshm: return "host_nonshm";
103     case acc_device_not_host: return "not_host";
104     case acc_device_nvidia: return "nvidia";
105     default: gomp_fatal ("unknown device type %u", (unsigned) type);
106     }
107 }
108 
109 static struct gomp_device_descr *
110 resolve_device (acc_device_t d)
111 {
112   acc_device_t d_arg = d;
113 
114   switch (d)
115     {
116     case acc_device_default:
117       {
118 	if (goacc_device_type)
119 	  {
120 	    /* Lookup the named device.  */
121 	    while (++d != _ACC_device_hwm)
122 	      if (dispatchers[d]
123 		  && !strcasecmp (goacc_device_type,
124 				  get_openacc_name (dispatchers[d]->name))
125 		  && dispatchers[d]->get_num_devices_func () > 0)
126 		goto found;
127 
128 	    gomp_fatal ("device type %s not supported", goacc_device_type);
129 	  }
130 
131 	/* No default device specified, so start scanning for any non-host
132 	   device that is available.  */
133 	d = acc_device_not_host;
134       }
135       /* FALLTHROUGH */
136 
137     case acc_device_not_host:
138       /* Find the first available device after acc_device_not_host.  */
139       while (++d != _ACC_device_hwm)
140 	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
141 	  goto found;
142       if (d_arg == acc_device_default)
143 	{
144 	  d = acc_device_host;
145 	  goto found;
146 	}
147       gomp_fatal ("no device found");
148       break;
149 
150     case acc_device_host:
151       break;
152 
153     default:
154       if (d > _ACC_device_hwm)
155 	gomp_fatal ("device %u out of range", (unsigned)d);
156       break;
157     }
158  found:
159 
160   assert (d != acc_device_none
161 	  && d != acc_device_default
162 	  && d != acc_device_not_host);
163 
164   return dispatchers[d];
165 }
166 
167 /* This is called when plugins have been initialized, and serves to call
168    (indirectly) the target's device_init hook.  Calling multiple times without
169    an intervening acc_shutdown_1 call is an error.  */
170 
171 static struct gomp_device_descr *
172 acc_init_1 (acc_device_t d)
173 {
174   struct gomp_device_descr *base_dev, *acc_dev;
175   int ndevs;
176 
177   base_dev = resolve_device (d);
178 
179   ndevs = base_dev->get_num_devices_func ();
180 
181   if (!base_dev || ndevs <= 0 || goacc_device_num >= ndevs)
182     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
183 
184   acc_dev = &base_dev[goacc_device_num];
185 
186   if (acc_dev->is_initialized)
187     gomp_fatal ("device already active");
188 
189   gomp_init_device (acc_dev);
190 
191   return base_dev;
192 }
193 
194 static void
195 acc_shutdown_1 (acc_device_t d)
196 {
197   struct gomp_device_descr *base_dev;
198   struct goacc_thread *walk;
199   int ndevs, i;
200   bool devices_active = false;
201 
202   /* Get the base device for this device type.  */
203   base_dev = resolve_device (d);
204 
205   if (!base_dev)
206     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
207 
208   gomp_mutex_lock (&goacc_thread_lock);
209 
210   /* Free target-specific TLS data and close all devices.  */
211   for (walk = goacc_threads; walk != NULL; walk = walk->next)
212     {
213       if (walk->target_tls)
214 	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
215 
216       walk->target_tls = NULL;
217 
218       /* This would mean the user is shutting down OpenACC in the middle of an
219          "acc data" pragma.  Likely not intentional.  */
220       if (walk->mapped_data)
221 	gomp_fatal ("shutdown in 'acc data' region");
222 
223       /* Similarly, if this happens then user code has done something weird.  */
224       if (walk->saved_bound_dev)
225         gomp_fatal ("shutdown during host fallback");
226 
227       if (walk->dev)
228 	{
229 	  gomp_mutex_lock (&walk->dev->lock);
230 	  gomp_free_memmap (&walk->dev->mem_map);
231 	  gomp_mutex_unlock (&walk->dev->lock);
232 
233 	  walk->dev = NULL;
234 	  walk->base_dev = NULL;
235 	}
236     }
237 
238   gomp_mutex_unlock (&goacc_thread_lock);
239 
240   ndevs = base_dev->get_num_devices_func ();
241 
242   /* Close all the devices of this type that have been opened.  */
243   for (i = 0; i < ndevs; i++)
244     {
245       struct gomp_device_descr *acc_dev = &base_dev[i];
246       if (acc_dev->is_initialized)
247         {
248 	  devices_active = true;
249 	  gomp_fini_device (acc_dev);
250 	}
251     }
252 
253   if (!devices_active)
254     gomp_fatal ("no device initialized");
255 }
256 
257 static struct goacc_thread *
258 goacc_new_thread (void)
259 {
260   struct goacc_thread *thr = gomp_malloc (sizeof (struct gomp_thread));
261 
262 #if defined HAVE_TLS || defined USE_EMUTLS
263   goacc_tls_data = thr;
264 #else
265   pthread_setspecific (goacc_tls_key, thr);
266 #endif
267 
268   pthread_setspecific (goacc_cleanup_key, thr);
269 
270   gomp_mutex_lock (&goacc_thread_lock);
271   thr->next = goacc_threads;
272   goacc_threads = thr;
273   gomp_mutex_unlock (&goacc_thread_lock);
274 
275   return thr;
276 }
277 
278 static void
279 goacc_destroy_thread (void *data)
280 {
281   struct goacc_thread *thr = data, *walk, *prev;
282 
283   gomp_mutex_lock (&goacc_thread_lock);
284 
285   if (thr)
286     {
287       struct gomp_device_descr *acc_dev = thr->dev;
288 
289       if (acc_dev && thr->target_tls)
290 	{
291 	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
292 	  thr->target_tls = NULL;
293 	}
294 
295       assert (!thr->mapped_data);
296 
297       /* Remove from thread list.  */
298       for (prev = NULL, walk = goacc_threads; walk;
299 	   prev = walk, walk = walk->next)
300 	if (walk == thr)
301 	  {
302 	    if (prev == NULL)
303 	      goacc_threads = walk->next;
304 	    else
305 	      prev->next = walk->next;
306 
307 	    free (thr);
308 
309 	    break;
310 	  }
311 
312       assert (walk);
313     }
314 
315   gomp_mutex_unlock (&goacc_thread_lock);
316 }
317 
318 /* Use the ORD'th device instance for the current host thread (or -1 for the
319    current global default).  The device (and the runtime) must be initialised
320    before calling this function.  */
321 
322 void
323 goacc_attach_host_thread_to_device (int ord)
324 {
325   struct goacc_thread *thr = goacc_thread ();
326   struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
327   int num_devices;
328 
329   if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
330     return;
331 
332   if (ord < 0)
333     ord = goacc_device_num;
334 
335   /* Decide which type of device to use.  If the current thread has a device
336      type already (e.g. set by acc_set_device_type), use that, else use the
337      global default.  */
338   if (thr && thr->base_dev)
339     base_dev = thr->base_dev;
340   else
341     {
342       assert (cached_base_dev);
343       base_dev = cached_base_dev;
344     }
345 
346   num_devices = base_dev->get_num_devices_func ();
347   if (num_devices <= 0 || ord >= num_devices)
348     gomp_fatal ("device %u out of range", ord);
349 
350   if (!thr)
351     thr = goacc_new_thread ();
352 
353   thr->base_dev = base_dev;
354   thr->dev = acc_dev = &base_dev[ord];
355   thr->saved_bound_dev = NULL;
356   thr->mapped_data = NULL;
357 
358   thr->target_tls
359     = acc_dev->openacc.create_thread_data_func (ord);
360 
361   acc_dev->openacc.async_set_async_func (acc_async_sync);
362 }
363 
364 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
365    init/shutdown is per-process or per-thread.  We choose per-process.  */
366 
367 void
368 acc_init (acc_device_t d)
369 {
370   if (!cached_base_dev)
371     gomp_init_targets_once ();
372 
373   gomp_mutex_lock (&acc_device_lock);
374 
375   cached_base_dev = acc_init_1 (d);
376 
377   gomp_mutex_unlock (&acc_device_lock);
378 
379   goacc_attach_host_thread_to_device (-1);
380 }
381 
382 ialias (acc_init)
383 
384 void
385 acc_shutdown (acc_device_t d)
386 {
387   gomp_mutex_lock (&acc_device_lock);
388 
389   acc_shutdown_1 (d);
390 
391   gomp_mutex_unlock (&acc_device_lock);
392 }
393 
394 ialias (acc_shutdown)
395 
396 int
397 acc_get_num_devices (acc_device_t d)
398 {
399   int n = 0;
400   struct gomp_device_descr *acc_dev;
401 
402   if (d == acc_device_none)
403     return 0;
404 
405   gomp_init_targets_once ();
406 
407   acc_dev = resolve_device (d);
408   if (!acc_dev)
409     return 0;
410 
411   n = acc_dev->get_num_devices_func ();
412   if (n < 0)
413     n = 0;
414 
415   return n;
416 }
417 
418 ialias (acc_get_num_devices)
419 
420 /* Set the device type for the current thread only (using the current global
421    default device number), initialising that device if necessary.  Also set the
422    default device type for new threads to D.  */
423 
424 void
425 acc_set_device_type (acc_device_t d)
426 {
427   struct gomp_device_descr *base_dev, *acc_dev;
428   struct goacc_thread *thr = goacc_thread ();
429 
430   gomp_mutex_lock (&acc_device_lock);
431 
432   if (!cached_base_dev)
433     gomp_init_targets_once ();
434 
435   cached_base_dev = base_dev = resolve_device (d);
436   acc_dev = &base_dev[goacc_device_num];
437 
438   if (!acc_dev->is_initialized)
439     gomp_init_device (acc_dev);
440 
441   gomp_mutex_unlock (&acc_device_lock);
442 
443   /* We're changing device type: invalidate the current thread's dev and
444      base_dev pointers.  */
445   if (thr && thr->base_dev != base_dev)
446     {
447       thr->base_dev = thr->dev = NULL;
448       if (thr->mapped_data)
449         gomp_fatal ("acc_set_device_type in 'acc data' region");
450     }
451 
452   goacc_attach_host_thread_to_device (-1);
453 }
454 
455 ialias (acc_set_device_type)
456 
457 acc_device_t
458 acc_get_device_type (void)
459 {
460   acc_device_t res = acc_device_none;
461   struct gomp_device_descr *dev;
462   struct goacc_thread *thr = goacc_thread ();
463 
464   if (thr && thr->base_dev)
465     res = acc_device_type (thr->base_dev->type);
466   else
467     {
468       gomp_init_targets_once ();
469 
470       dev = resolve_device (acc_device_default);
471       res = acc_device_type (dev->type);
472     }
473 
474   assert (res != acc_device_default
475 	  && res != acc_device_not_host);
476 
477   return res;
478 }
479 
480 ialias (acc_get_device_type)
481 
482 int
483 acc_get_device_num (acc_device_t d)
484 {
485   const struct gomp_device_descr *dev;
486   struct goacc_thread *thr = goacc_thread ();
487 
488   if (d >= _ACC_device_hwm)
489     gomp_fatal ("device %u out of range", (unsigned)d);
490 
491   if (!cached_base_dev)
492     gomp_init_targets_once ();
493 
494   dev = resolve_device (d);
495   if (!dev)
496     gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
497 
498   if (thr && thr->base_dev == dev && thr->dev)
499     return thr->dev->target_id;
500 
501   return goacc_device_num;
502 }
503 
504 ialias (acc_get_device_num)
505 
506 void
507 acc_set_device_num (int ord, acc_device_t d)
508 {
509   struct gomp_device_descr *base_dev, *acc_dev;
510   int num_devices;
511 
512   if (!cached_base_dev)
513     gomp_init_targets_once ();
514 
515   if (ord < 0)
516     ord = goacc_device_num;
517 
518   if ((int) d == 0)
519     /* Set whatever device is being used by the current host thread to use
520        device instance ORD.  It's unclear if this is supposed to affect other
521        host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
522     goacc_attach_host_thread_to_device (ord);
523   else
524     {
525       gomp_mutex_lock (&acc_device_lock);
526 
527       cached_base_dev = base_dev = resolve_device (d);
528 
529       num_devices = base_dev->get_num_devices_func ();
530 
531       if (ord >= num_devices)
532         gomp_fatal ("device %u out of range", ord);
533 
534       acc_dev = &base_dev[ord];
535 
536       if (!acc_dev->is_initialized)
537         gomp_init_device (acc_dev);
538 
539       gomp_mutex_unlock (&acc_device_lock);
540 
541       goacc_attach_host_thread_to_device (ord);
542     }
543 
544   goacc_device_num = ord;
545 }
546 
547 ialias (acc_set_device_num)
548 
549 int
550 acc_on_device (acc_device_t dev)
551 {
552   struct goacc_thread *thr = goacc_thread ();
553 
554   /* We only want to appear to be the "host_nonshm" plugin from "offloaded"
555      code -- i.e. within a parallel region.  Test a flag set by the
556      openacc_parallel hook of the host_nonshm plugin to determine that.  */
557   if (acc_get_device_type () == acc_device_host_nonshm
558       && thr && thr->target_tls
559       && ((struct nonshm_thread *)thr->target_tls)->nonshm_exec)
560     return dev == acc_device_host_nonshm || dev == acc_device_not_host;
561 
562   /* For OpenACC, libgomp is only built for the host, so this is sufficient.  */
563   return dev == acc_device_host || dev == acc_device_none;
564 }
565 
566 ialias (acc_on_device)
567 
568 attribute_hidden void
569 goacc_runtime_initialize (void)
570 {
571   gomp_mutex_init (&acc_device_lock);
572 
573 #if !(defined HAVE_TLS || defined USE_EMUTLS)
574   pthread_key_create (&goacc_tls_key, NULL);
575 #endif
576 
577   pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
578 
579   cached_base_dev = NULL;
580 
581   goacc_threads = NULL;
582   gomp_mutex_init (&goacc_thread_lock);
583 
584   /* Initialize and register the 'host' device type.  */
585   goacc_host_init ();
586 }
587 
588 /* Compiler helper functions */
589 
590 attribute_hidden void
591 goacc_save_and_set_bind (acc_device_t d)
592 {
593   struct goacc_thread *thr = goacc_thread ();
594 
595   assert (!thr->saved_bound_dev);
596 
597   thr->saved_bound_dev = thr->dev;
598   thr->dev = dispatchers[d];
599 }
600 
601 attribute_hidden void
602 goacc_restore_bind (void)
603 {
604   struct goacc_thread *thr = goacc_thread ();
605 
606   thr->dev = thr->saved_bound_dev;
607   thr->saved_bound_dev = NULL;
608 }
609 
610 /* This is called from any OpenACC support function that may need to implicitly
611    initialize the libgomp runtime, either globally or from a new host thread.
612    On exit "goacc_thread" will return a valid & populated thread block.  */
613 
614 attribute_hidden void
615 goacc_lazy_initialize (void)
616 {
617   struct goacc_thread *thr = goacc_thread ();
618 
619   if (thr && thr->dev)
620     return;
621 
622   if (!cached_base_dev)
623     acc_init (acc_device_default);
624   else
625     goacc_attach_host_thread_to_device (-1);
626 }
627