xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/target.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h>  /* For PRIu64.  */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42 
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47 
48 static void gomp_target_init (void);
49 
50 /* The whole initialization code for offloading plugins is only run one.  */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 
53 /* Mutex for offload image registration.  */
54 static gomp_mutex_t register_lock;
55 
56 /* This structure describes an offload image.
57    It contains type of the target device, pointer to host table descriptor, and
58    pointer to target data.  */
59 struct offload_image_descr {
60   unsigned version;
61   enum offload_target_type type;
62   const void *host_table;
63   const void *target_data;
64 };
65 
66 /* Array of descriptors of offload images.  */
67 static struct offload_image_descr *offload_images;
68 
69 /* Total number of offload images.  */
70 static int num_offload_images;
71 
72 /* Array of descriptors for all available devices.  */
73 static struct gomp_device_descr *devices;
74 
75 /* Total number of available devices.  */
76 static int num_devices;
77 
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
79 static int num_devices_openmp;
80 
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
82 
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
85 {
86   void *ret = realloc (old, size);
87   if (ret == NULL)
88     {
89       gomp_mutex_unlock (&register_lock);
90       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91     }
92   return ret;
93 }
94 
95 attribute_hidden void
96 gomp_init_targets_once (void)
97 {
98   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 }
100 
101 attribute_hidden int
102 gomp_get_num_devices (void)
103 {
104   gomp_init_targets_once ();
105   return num_devices_openmp;
106 }
107 
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
110 {
111   if (device_id == GOMP_DEVICE_ICV)
112     {
113       struct gomp_task_icv *icv = gomp_icv (false);
114       device_id = icv->default_device_var;
115     }
116 
117   if (device_id < 0 || device_id >= gomp_get_num_devices ())
118     return NULL;
119 
120   gomp_mutex_lock (&devices[device_id].lock);
121   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122     gomp_init_device (&devices[device_id]);
123   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124     {
125       gomp_mutex_unlock (&devices[device_id].lock);
126       return NULL;
127     }
128   gomp_mutex_unlock (&devices[device_id].lock);
129 
130   return &devices[device_id];
131 }
132 
133 
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 {
137   if (key->host_start != key->host_end)
138     return splay_tree_lookup (mem_map, key);
139 
140   key->host_end++;
141   splay_tree_key n = splay_tree_lookup (mem_map, key);
142   key->host_end--;
143   if (n)
144     return n;
145   key->host_start--;
146   n = splay_tree_lookup (mem_map, key);
147   key->host_start++;
148   if (n)
149     return n;
150   return splay_tree_lookup (mem_map, key);
151 }
152 
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 {
156   if (key->host_start != key->host_end)
157     return splay_tree_lookup (mem_map, key);
158 
159   key->host_end++;
160   splay_tree_key n = splay_tree_lookup (mem_map, key);
161   key->host_end--;
162   return n;
163 }
164 
165 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
166    gomp_map_0len_lookup found oldn for newn.
167    Helper function of gomp_map_vars.  */
168 
169 static inline void
170 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
171 			splay_tree_key newn, struct target_var_desc *tgt_var,
172 			unsigned char kind)
173 {
174   tgt_var->key = oldn;
175   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
176   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
177   tgt_var->offset = newn->host_start - oldn->host_start;
178   tgt_var->length = newn->host_end - newn->host_start;
179 
180   if ((kind & GOMP_MAP_FLAG_FORCE)
181       || oldn->host_start > newn->host_start
182       || oldn->host_end < newn->host_end)
183     {
184       gomp_mutex_unlock (&devicep->lock);
185       gomp_fatal ("Trying to map into device [%p..%p) object when "
186 		  "[%p..%p) is already mapped",
187 		  (void *) newn->host_start, (void *) newn->host_end,
188 		  (void *) oldn->host_start, (void *) oldn->host_end);
189     }
190 
191   if (GOMP_MAP_ALWAYS_TO_P (kind))
192     devicep->host2dev_func (devicep->target_id,
193 			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
194 				      + newn->host_start - oldn->host_start),
195 			    (void *) newn->host_start,
196 			    newn->host_end - newn->host_start);
197   if (oldn->refcount != REFCOUNT_INFINITY)
198     oldn->refcount++;
199 }
200 
201 static int
202 get_kind (bool short_mapkind, void *kinds, int idx)
203 {
204   return short_mapkind ? ((unsigned short *) kinds)[idx]
205 		       : ((unsigned char *) kinds)[idx];
206 }
207 
208 static void
209 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
210 		  uintptr_t target_offset, uintptr_t bias)
211 {
212   struct gomp_device_descr *devicep = tgt->device_descr;
213   struct splay_tree_s *mem_map = &devicep->mem_map;
214   struct splay_tree_key_s cur_node;
215 
216   cur_node.host_start = host_ptr;
217   if (cur_node.host_start == (uintptr_t) NULL)
218     {
219       cur_node.tgt_offset = (uintptr_t) NULL;
220       /* FIXME: see comment about coalescing host/dev transfers below.  */
221       devicep->host2dev_func (devicep->target_id,
222 			      (void *) (tgt->tgt_start + target_offset),
223 			      (void *) &cur_node.tgt_offset,
224 			      sizeof (void *));
225       return;
226     }
227   /* Add bias to the pointer value.  */
228   cur_node.host_start += bias;
229   cur_node.host_end = cur_node.host_start;
230   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
231   if (n == NULL)
232     {
233       gomp_mutex_unlock (&devicep->lock);
234       gomp_fatal ("Pointer target of array section wasn't mapped");
235     }
236   cur_node.host_start -= n->host_start;
237   cur_node.tgt_offset
238     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
239   /* At this point tgt_offset is target address of the
240      array section.  Now subtract bias to get what we want
241      to initialize the pointer with.  */
242   cur_node.tgt_offset -= bias;
243   /* FIXME: see comment about coalescing host/dev transfers below.  */
244   devicep->host2dev_func (devicep->target_id,
245 			  (void *) (tgt->tgt_start + target_offset),
246 			  (void *) &cur_node.tgt_offset,
247 			  sizeof (void *));
248 }
249 
250 static void
251 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
252 			  size_t first, size_t i, void **hostaddrs,
253 			  size_t *sizes, void *kinds)
254 {
255   struct gomp_device_descr *devicep = tgt->device_descr;
256   struct splay_tree_s *mem_map = &devicep->mem_map;
257   struct splay_tree_key_s cur_node;
258   int kind;
259   const bool short_mapkind = true;
260   const int typemask = short_mapkind ? 0xff : 0x7;
261 
262   cur_node.host_start = (uintptr_t) hostaddrs[i];
263   cur_node.host_end = cur_node.host_start + sizes[i];
264   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
265   kind = get_kind (short_mapkind, kinds, i);
266   if (n2
267       && n2->tgt == n->tgt
268       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
269     {
270       gomp_map_vars_existing (devicep, n2, &cur_node,
271 			      &tgt->list[i], kind & typemask);
272       return;
273     }
274   if (sizes[i] == 0)
275     {
276       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
277 	{
278 	  cur_node.host_start--;
279 	  n2 = splay_tree_lookup (mem_map, &cur_node);
280 	  cur_node.host_start++;
281 	  if (n2
282 	      && n2->tgt == n->tgt
283 	      && n2->host_start - n->host_start
284 		 == n2->tgt_offset - n->tgt_offset)
285 	    {
286 	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
287 				      kind & typemask);
288 	      return;
289 	    }
290 	}
291       cur_node.host_end++;
292       n2 = splay_tree_lookup (mem_map, &cur_node);
293       cur_node.host_end--;
294       if (n2
295 	  && n2->tgt == n->tgt
296 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
297 	{
298 	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
299 				  kind & typemask);
300 	  return;
301 	}
302     }
303   gomp_mutex_unlock (&devicep->lock);
304   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
305 	      "other mapped elements from the same structure weren't mapped "
306 	      "together with it", (void *) cur_node.host_start,
307 	      (void *) cur_node.host_end);
308 }
309 
310 static inline uintptr_t
311 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
312 {
313   if (tgt->list[i].key != NULL)
314     return tgt->list[i].key->tgt->tgt_start
315 	   + tgt->list[i].key->tgt_offset
316 	   + tgt->list[i].offset;
317   if (tgt->list[i].offset == ~(uintptr_t) 0)
318     return (uintptr_t) hostaddrs[i];
319   if (tgt->list[i].offset == ~(uintptr_t) 1)
320     return 0;
321   if (tgt->list[i].offset == ~(uintptr_t) 2)
322     return tgt->list[i + 1].key->tgt->tgt_start
323 	   + tgt->list[i + 1].key->tgt_offset
324 	   + tgt->list[i + 1].offset
325 	   + (uintptr_t) hostaddrs[i]
326 	   - (uintptr_t) hostaddrs[i + 1];
327   return tgt->tgt_start + tgt->list[i].offset;
328 }
329 
330 attribute_hidden struct target_mem_desc *
331 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
332 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
333 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
334 {
335   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
336   bool has_firstprivate = false;
337   const int rshift = short_mapkind ? 8 : 3;
338   const int typemask = short_mapkind ? 0xff : 0x7;
339   struct splay_tree_s *mem_map = &devicep->mem_map;
340   struct splay_tree_key_s cur_node;
341   struct target_mem_desc *tgt
342     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
343   tgt->list_count = mapnum;
344   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
345   tgt->device_descr = devicep;
346 
347   if (mapnum == 0)
348     {
349       tgt->tgt_start = 0;
350       tgt->tgt_end = 0;
351       return tgt;
352     }
353 
354   tgt_align = sizeof (void *);
355   tgt_size = 0;
356   if (pragma_kind == GOMP_MAP_VARS_TARGET)
357     {
358       size_t align = 4 * sizeof (void *);
359       tgt_align = align;
360       tgt_size = mapnum * sizeof (void *);
361     }
362 
363   gomp_mutex_lock (&devicep->lock);
364   if (devicep->state == GOMP_DEVICE_FINALIZED)
365     {
366       gomp_mutex_unlock (&devicep->lock);
367       free (tgt);
368       return NULL;
369     }
370 
371   for (i = 0; i < mapnum; i++)
372     {
373       int kind = get_kind (short_mapkind, kinds, i);
374       if (hostaddrs[i] == NULL
375 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
376 	{
377 	  tgt->list[i].key = NULL;
378 	  tgt->list[i].offset = ~(uintptr_t) 0;
379 	  continue;
380 	}
381       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
382 	{
383 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
384 	  cur_node.host_end = cur_node.host_start;
385 	  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
386 	  if (n == NULL)
387 	    {
388 	      gomp_mutex_unlock (&devicep->lock);
389 	      gomp_fatal ("use_device_ptr pointer wasn't mapped");
390 	    }
391 	  cur_node.host_start -= n->host_start;
392 	  hostaddrs[i]
393 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
394 			+ cur_node.host_start);
395 	  tgt->list[i].key = NULL;
396 	  tgt->list[i].offset = ~(uintptr_t) 0;
397 	  continue;
398 	}
399       else if ((kind & typemask) == GOMP_MAP_STRUCT)
400 	{
401 	  size_t first = i + 1;
402 	  size_t last = i + sizes[i];
403 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
404 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
405 			      + sizes[last];
406 	  tgt->list[i].key = NULL;
407 	  tgt->list[i].offset = ~(uintptr_t) 2;
408 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
409 	  if (n == NULL)
410 	    {
411 	      size_t align = (size_t) 1 << (kind >> rshift);
412 	      if (tgt_align < align)
413 		tgt_align = align;
414 	      tgt_size -= (uintptr_t) hostaddrs[first]
415 			  - (uintptr_t) hostaddrs[i];
416 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
417 	      tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
418 	      not_found_cnt += last - i;
419 	      for (i = first; i <= last; i++)
420 		tgt->list[i].key = NULL;
421 	      i--;
422 	      continue;
423 	    }
424 	  for (i = first; i <= last; i++)
425 	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
426 				      sizes, kinds);
427 	  i--;
428 	  continue;
429 	}
430       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
431 	{
432 	  tgt->list[i].key = NULL;
433 	  tgt->list[i].offset = ~(uintptr_t) 1;
434 	  has_firstprivate = true;
435 	  continue;
436 	}
437       cur_node.host_start = (uintptr_t) hostaddrs[i];
438       if (!GOMP_MAP_POINTER_P (kind & typemask))
439 	cur_node.host_end = cur_node.host_start + sizes[i];
440       else
441 	cur_node.host_end = cur_node.host_start + sizeof (void *);
442       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
443 	{
444 	  tgt->list[i].key = NULL;
445 
446 	  size_t align = (size_t) 1 << (kind >> rshift);
447 	  if (tgt_align < align)
448 	    tgt_align = align;
449 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
450 	  tgt_size += cur_node.host_end - cur_node.host_start;
451 	  has_firstprivate = true;
452 	  continue;
453 	}
454       splay_tree_key n;
455       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
456 	{
457 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
458 	  if (!n)
459 	    {
460 	      tgt->list[i].key = NULL;
461 	      tgt->list[i].offset = ~(uintptr_t) 1;
462 	      continue;
463 	    }
464 	}
465       else
466 	n = splay_tree_lookup (mem_map, &cur_node);
467       if (n && n->refcount != REFCOUNT_LINK)
468 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
469 				kind & typemask);
470       else
471 	{
472 	  tgt->list[i].key = NULL;
473 
474 	  size_t align = (size_t) 1 << (kind >> rshift);
475 	  not_found_cnt++;
476 	  if (tgt_align < align)
477 	    tgt_align = align;
478 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
479 	  tgt_size += cur_node.host_end - cur_node.host_start;
480 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
481 	    {
482 	      size_t j;
483 	      for (j = i + 1; j < mapnum; j++)
484 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
485 					 & typemask))
486 		  break;
487 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
488 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
489 			     > cur_node.host_end))
490 		  break;
491 		else
492 		  {
493 		    tgt->list[j].key = NULL;
494 		    i++;
495 		  }
496 	    }
497 	}
498     }
499 
500   if (devaddrs)
501     {
502       if (mapnum != 1)
503 	{
504 	  gomp_mutex_unlock (&devicep->lock);
505 	  gomp_fatal ("unexpected aggregation");
506 	}
507       tgt->to_free = devaddrs[0];
508       tgt->tgt_start = (uintptr_t) tgt->to_free;
509       tgt->tgt_end = tgt->tgt_start + sizes[0];
510     }
511   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
512     {
513       /* Allocate tgt_align aligned tgt_size block of memory.  */
514       /* FIXME: Perhaps change interface to allocate properly aligned
515 	 memory.  */
516       tgt->to_free = devicep->alloc_func (devicep->target_id,
517 					  tgt_size + tgt_align - 1);
518       tgt->tgt_start = (uintptr_t) tgt->to_free;
519       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
520       tgt->tgt_end = tgt->tgt_start + tgt_size;
521     }
522   else
523     {
524       tgt->to_free = NULL;
525       tgt->tgt_start = 0;
526       tgt->tgt_end = 0;
527     }
528 
529   tgt_size = 0;
530   if (pragma_kind == GOMP_MAP_VARS_TARGET)
531     tgt_size = mapnum * sizeof (void *);
532 
533   tgt->array = NULL;
534   if (not_found_cnt || has_firstprivate)
535     {
536       if (not_found_cnt)
537 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
538       splay_tree_node array = tgt->array;
539       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
540       uintptr_t field_tgt_base = 0;
541 
542       for (i = 0; i < mapnum; i++)
543 	if (tgt->list[i].key == NULL)
544 	  {
545 	    int kind = get_kind (short_mapkind, kinds, i);
546 	    if (hostaddrs[i] == NULL)
547 	      continue;
548 	    switch (kind & typemask)
549 	      {
550 		size_t align, len, first, last;
551 		splay_tree_key n;
552 	      case GOMP_MAP_FIRSTPRIVATE:
553 		align = (size_t) 1 << (kind >> rshift);
554 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
555 		tgt->list[i].offset = tgt_size;
556 		len = sizes[i];
557 		devicep->host2dev_func (devicep->target_id,
558 					(void *) (tgt->tgt_start + tgt_size),
559 					(void *) hostaddrs[i], len);
560 		tgt_size += len;
561 		continue;
562 	      case GOMP_MAP_FIRSTPRIVATE_INT:
563 	      case GOMP_MAP_USE_DEVICE_PTR:
564 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
565 		continue;
566 	      case GOMP_MAP_STRUCT:
567 		first = i + 1;
568 		last = i + sizes[i];
569 		cur_node.host_start = (uintptr_t) hostaddrs[i];
570 		cur_node.host_end = (uintptr_t) hostaddrs[last]
571 				    + sizes[last];
572 		if (tgt->list[first].key != NULL)
573 		  continue;
574 		n = splay_tree_lookup (mem_map, &cur_node);
575 		if (n == NULL)
576 		  {
577 		    size_t align = (size_t) 1 << (kind >> rshift);
578 		    tgt_size -= (uintptr_t) hostaddrs[first]
579 				- (uintptr_t) hostaddrs[i];
580 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
581 		    tgt_size += (uintptr_t) hostaddrs[first]
582 				- (uintptr_t) hostaddrs[i];
583 		    field_tgt_base = (uintptr_t) hostaddrs[first];
584 		    field_tgt_offset = tgt_size;
585 		    field_tgt_clear = last;
586 		    tgt_size += cur_node.host_end
587 				- (uintptr_t) hostaddrs[first];
588 		    continue;
589 		  }
590 		for (i = first; i <= last; i++)
591 		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
592 					    sizes, kinds);
593 		i--;
594 		continue;
595 	      case GOMP_MAP_ALWAYS_POINTER:
596 		cur_node.host_start = (uintptr_t) hostaddrs[i];
597 		cur_node.host_end = cur_node.host_start + sizeof (void *);
598 		n = splay_tree_lookup (mem_map, &cur_node);
599 		if (n == NULL
600 		    || n->host_start > cur_node.host_start
601 		    || n->host_end < cur_node.host_end)
602 		  {
603 		    gomp_mutex_unlock (&devicep->lock);
604 		    gomp_fatal ("always pointer not mapped");
605 		  }
606 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
607 		    != GOMP_MAP_ALWAYS_POINTER)
608 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
609 		if (cur_node.tgt_offset)
610 		  cur_node.tgt_offset -= sizes[i];
611 		devicep->host2dev_func (devicep->target_id,
612 					(void *) (n->tgt->tgt_start
613 						  + n->tgt_offset
614 						  + cur_node.host_start
615 						  - n->host_start),
616 					(void *) &cur_node.tgt_offset,
617 					sizeof (void *));
618 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
619 				      + cur_node.host_start - n->host_start;
620 		continue;
621 	      default:
622 		break;
623 	      }
624 	    splay_tree_key k = &array->key;
625 	    k->host_start = (uintptr_t) hostaddrs[i];
626 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
627 	      k->host_end = k->host_start + sizes[i];
628 	    else
629 	      k->host_end = k->host_start + sizeof (void *);
630 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
631 	    if (n && n->refcount != REFCOUNT_LINK)
632 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
633 				      kind & typemask);
634 	    else
635 	      {
636 		k->link_key = NULL;
637 		if (n && n->refcount == REFCOUNT_LINK)
638 		  {
639 		    /* Replace target address of the pointer with target address
640 		       of mapped object in the splay tree.  */
641 		    splay_tree_remove (mem_map, n);
642 		    k->link_key = n;
643 		  }
644 		size_t align = (size_t) 1 << (kind >> rshift);
645 		tgt->list[i].key = k;
646 		k->tgt = tgt;
647 		if (field_tgt_clear != ~(size_t) 0)
648 		  {
649 		    k->tgt_offset = k->host_start - field_tgt_base
650 				    + field_tgt_offset;
651 		    if (i == field_tgt_clear)
652 		      field_tgt_clear = ~(size_t) 0;
653 		  }
654 		else
655 		  {
656 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
657 		    k->tgt_offset = tgt_size;
658 		    tgt_size += k->host_end - k->host_start;
659 		  }
660 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
661 		tgt->list[i].always_copy_from
662 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
663 		tgt->list[i].offset = 0;
664 		tgt->list[i].length = k->host_end - k->host_start;
665 		k->refcount = 1;
666 		k->async_refcount = 0;
667 		tgt->refcount++;
668 		array->left = NULL;
669 		array->right = NULL;
670 		splay_tree_insert (mem_map, array);
671 		switch (kind & typemask)
672 		  {
673 		  case GOMP_MAP_ALLOC:
674 		  case GOMP_MAP_FROM:
675 		  case GOMP_MAP_FORCE_ALLOC:
676 		  case GOMP_MAP_FORCE_FROM:
677 		  case GOMP_MAP_ALWAYS_FROM:
678 		    break;
679 		  case GOMP_MAP_TO:
680 		  case GOMP_MAP_TOFROM:
681 		  case GOMP_MAP_FORCE_TO:
682 		  case GOMP_MAP_FORCE_TOFROM:
683 		  case GOMP_MAP_ALWAYS_TO:
684 		  case GOMP_MAP_ALWAYS_TOFROM:
685 		    /* FIXME: Perhaps add some smarts, like if copying
686 		       several adjacent fields from host to target, use some
687 		       host buffer to avoid sending each var individually.  */
688 		    devicep->host2dev_func (devicep->target_id,
689 					    (void *) (tgt->tgt_start
690 						      + k->tgt_offset),
691 					    (void *) k->host_start,
692 					    k->host_end - k->host_start);
693 		    break;
694 		  case GOMP_MAP_POINTER:
695 		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
696 				      k->tgt_offset, sizes[i]);
697 		    break;
698 		  case GOMP_MAP_TO_PSET:
699 		    /* FIXME: see above FIXME comment.  */
700 		    devicep->host2dev_func (devicep->target_id,
701 					    (void *) (tgt->tgt_start
702 						      + k->tgt_offset),
703 					    (void *) k->host_start,
704 					    k->host_end - k->host_start);
705 
706 		    for (j = i + 1; j < mapnum; j++)
707 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
708 							 j)
709 					       & typemask))
710 			break;
711 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
712 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
713 				   > k->host_end))
714 			break;
715 		      else
716 			{
717 			  tgt->list[j].key = k;
718 			  tgt->list[j].copy_from = false;
719 			  tgt->list[j].always_copy_from = false;
720 			  if (k->refcount != REFCOUNT_INFINITY)
721 			    k->refcount++;
722 			  gomp_map_pointer (tgt,
723 					    (uintptr_t) *(void **) hostaddrs[j],
724 					    k->tgt_offset
725 					    + ((uintptr_t) hostaddrs[j]
726 					       - k->host_start),
727 					    sizes[j]);
728 			  i++;
729 			}
730 		    break;
731 		  case GOMP_MAP_FORCE_PRESENT:
732 		    {
733 		      /* We already looked up the memory region above and it
734 			 was missing.  */
735 		      size_t size = k->host_end - k->host_start;
736 		      gomp_mutex_unlock (&devicep->lock);
737 #ifdef HAVE_INTTYPES_H
738 		      gomp_fatal ("present clause: !acc_is_present (%p, "
739 				  "%"PRIu64" (0x%"PRIx64"))",
740 				  (void *) k->host_start,
741 				  (uint64_t) size, (uint64_t) size);
742 #else
743 		      gomp_fatal ("present clause: !acc_is_present (%p, "
744 				  "%lu (0x%lx))", (void *) k->host_start,
745 				  (unsigned long) size, (unsigned long) size);
746 #endif
747 		    }
748 		    break;
749 		  case GOMP_MAP_FORCE_DEVICEPTR:
750 		    assert (k->host_end - k->host_start == sizeof (void *));
751 
752 		    devicep->host2dev_func (devicep->target_id,
753 					    (void *) (tgt->tgt_start
754 						      + k->tgt_offset),
755 					    (void *) k->host_start,
756 					    sizeof (void *));
757 		    break;
758 		  default:
759 		    gomp_mutex_unlock (&devicep->lock);
760 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
761 				kind);
762 		  }
763 
764 		if (k->link_key)
765 		  {
766 		    /* Set link pointer on target to the device address of the
767 		       mapped object.  */
768 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
769 		    devicep->host2dev_func (devicep->target_id,
770 					    (void *) n->tgt_offset,
771 					    &tgt_addr, sizeof (void *));
772 		  }
773 		array++;
774 	      }
775 	  }
776     }
777 
778   if (pragma_kind == GOMP_MAP_VARS_TARGET)
779     {
780       for (i = 0; i < mapnum; i++)
781 	{
782 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
783 	  /* FIXME: see above FIXME comment.  */
784 	  devicep->host2dev_func (devicep->target_id,
785 				  (void *) (tgt->tgt_start
786 					    + i * sizeof (void *)),
787 				  (void *) &cur_node.tgt_offset,
788 				  sizeof (void *));
789 	}
790     }
791 
792   /* If the variable from "omp target enter data" map-list was already mapped,
793      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
794      gomp_exit_data.  */
795   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
796     {
797       free (tgt);
798       tgt = NULL;
799     }
800 
801   gomp_mutex_unlock (&devicep->lock);
802   return tgt;
803 }
804 
805 static void
806 gomp_unmap_tgt (struct target_mem_desc *tgt)
807 {
808   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
809   if (tgt->tgt_end)
810     tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
811 
812   free (tgt->array);
813   free (tgt);
814 }
815 
816 /* Decrease the refcount for a set of mapped variables, and queue asychronous
817    copies from the device back to the host after any work that has been issued.
818    Because the regions are still "live", increment an asynchronous reference
819    count to indicate that they should not be unmapped from host-side data
820    structures until the asynchronous copy has completed.  */
821 
822 attribute_hidden void
823 gomp_copy_from_async (struct target_mem_desc *tgt)
824 {
825   struct gomp_device_descr *devicep = tgt->device_descr;
826   size_t i;
827 
828   gomp_mutex_lock (&devicep->lock);
829 
830   for (i = 0; i < tgt->list_count; i++)
831     if (tgt->list[i].key == NULL)
832       ;
833     else if (tgt->list[i].key->refcount > 1)
834       {
835 	tgt->list[i].key->refcount--;
836 	tgt->list[i].key->async_refcount++;
837       }
838     else
839       {
840 	splay_tree_key k = tgt->list[i].key;
841 	if (tgt->list[i].copy_from)
842 	  devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
843 				  (void *) (k->tgt->tgt_start + k->tgt_offset),
844 				  k->host_end - k->host_start);
845       }
846 
847   gomp_mutex_unlock (&devicep->lock);
848 }
849 
850 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
851    variables back from device to host: if it is false, it is assumed that this
852    has been done already, i.e. by gomp_copy_from_async above.  */
853 
854 attribute_hidden void
855 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
856 {
857   struct gomp_device_descr *devicep = tgt->device_descr;
858 
859   if (tgt->list_count == 0)
860     {
861       free (tgt);
862       return;
863     }
864 
865   gomp_mutex_lock (&devicep->lock);
866   if (devicep->state == GOMP_DEVICE_FINALIZED)
867     {
868       gomp_mutex_unlock (&devicep->lock);
869       free (tgt->array);
870       free (tgt);
871       return;
872     }
873 
874   size_t i;
875   for (i = 0; i < tgt->list_count; i++)
876     {
877       splay_tree_key k = tgt->list[i].key;
878       if (k == NULL)
879 	continue;
880 
881       bool do_unmap = false;
882       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
883 	k->refcount--;
884       else if (k->refcount == 1)
885 	{
886 	  if (k->async_refcount > 0)
887 	    k->async_refcount--;
888 	  else
889 	    {
890 	      k->refcount--;
891 	      do_unmap = true;
892 	    }
893 	}
894 
895       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
896 	  || tgt->list[i].always_copy_from)
897 	devicep->dev2host_func (devicep->target_id,
898 				(void *) (k->host_start + tgt->list[i].offset),
899 				(void *) (k->tgt->tgt_start + k->tgt_offset
900 					  + tgt->list[i].offset),
901 				tgt->list[i].length);
902       if (do_unmap)
903 	{
904 	  splay_tree_remove (&devicep->mem_map, k);
905 	  if (k->link_key)
906 	    splay_tree_insert (&devicep->mem_map,
907 			       (splay_tree_node) k->link_key);
908 	  if (k->tgt->refcount > 1)
909 	    k->tgt->refcount--;
910 	  else
911 	    gomp_unmap_tgt (k->tgt);
912 	}
913     }
914 
915   if (tgt->refcount > 1)
916     tgt->refcount--;
917   else
918     gomp_unmap_tgt (tgt);
919 
920   gomp_mutex_unlock (&devicep->lock);
921 }
922 
923 static void
924 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
925 	     size_t *sizes, void *kinds, bool short_mapkind)
926 {
927   size_t i;
928   struct splay_tree_key_s cur_node;
929   const int typemask = short_mapkind ? 0xff : 0x7;
930 
931   if (!devicep)
932     return;
933 
934   if (mapnum == 0)
935     return;
936 
937   gomp_mutex_lock (&devicep->lock);
938   if (devicep->state == GOMP_DEVICE_FINALIZED)
939     {
940       gomp_mutex_unlock (&devicep->lock);
941       return;
942     }
943 
944   for (i = 0; i < mapnum; i++)
945     if (sizes[i])
946       {
947 	cur_node.host_start = (uintptr_t) hostaddrs[i];
948 	cur_node.host_end = cur_node.host_start + sizes[i];
949 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
950 	if (n)
951 	  {
952 	    int kind = get_kind (short_mapkind, kinds, i);
953 	    if (n->host_start > cur_node.host_start
954 		|| n->host_end < cur_node.host_end)
955 	      {
956 		gomp_mutex_unlock (&devicep->lock);
957 		gomp_fatal ("Trying to update [%p..%p) object when "
958 			    "only [%p..%p) is mapped",
959 			    (void *) cur_node.host_start,
960 			    (void *) cur_node.host_end,
961 			    (void *) n->host_start,
962 			    (void *) n->host_end);
963 	      }
964 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
965 	      devicep->host2dev_func (devicep->target_id,
966 				      (void *) (n->tgt->tgt_start
967 						+ n->tgt_offset
968 						+ cur_node.host_start
969 						- n->host_start),
970 				      (void *) cur_node.host_start,
971 				      cur_node.host_end - cur_node.host_start);
972 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
973 	      devicep->dev2host_func (devicep->target_id,
974 				      (void *) cur_node.host_start,
975 				      (void *) (n->tgt->tgt_start
976 						+ n->tgt_offset
977 						+ cur_node.host_start
978 						- n->host_start),
979 				      cur_node.host_end - cur_node.host_start);
980 	  }
981       }
982   gomp_mutex_unlock (&devicep->lock);
983 }
984 
985 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
986    And insert to splay tree the mapping between addresses from HOST_TABLE and
987    from loaded target image.  We rely in the host and device compiler
988    emitting variable and functions in the same order.  */
989 
990 static void
991 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
992 			   const void *host_table, const void *target_data,
993 			   bool is_register_lock)
994 {
995   void **host_func_table = ((void ***) host_table)[0];
996   void **host_funcs_end  = ((void ***) host_table)[1];
997   void **host_var_table  = ((void ***) host_table)[2];
998   void **host_vars_end   = ((void ***) host_table)[3];
999 
1000   /* The func table contains only addresses, the var table contains addresses
1001      and corresponding sizes.  */
1002   int num_funcs = host_funcs_end - host_func_table;
1003   int num_vars  = (host_vars_end - host_var_table) / 2;
1004 
1005   /* Load image to device and get target addresses for the image.  */
1006   struct addr_pair *target_table = NULL;
1007   int i, num_target_entries;
1008 
1009   num_target_entries
1010     = devicep->load_image_func (devicep->target_id, version,
1011 				target_data, &target_table);
1012 
1013   if (num_target_entries != num_funcs + num_vars)
1014     {
1015       gomp_mutex_unlock (&devicep->lock);
1016       if (is_register_lock)
1017 	gomp_mutex_unlock (&register_lock);
1018       gomp_fatal ("Cannot map target functions or variables"
1019 		  " (expected %u, have %u)", num_funcs + num_vars,
1020 		  num_target_entries);
1021     }
1022 
1023   /* Insert host-target address mapping into splay tree.  */
1024   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1025   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1026   tgt->refcount = REFCOUNT_INFINITY;
1027   tgt->tgt_start = 0;
1028   tgt->tgt_end = 0;
1029   tgt->to_free = NULL;
1030   tgt->prev = NULL;
1031   tgt->list_count = 0;
1032   tgt->device_descr = devicep;
1033   splay_tree_node array = tgt->array;
1034 
1035   for (i = 0; i < num_funcs; i++)
1036     {
1037       splay_tree_key k = &array->key;
1038       k->host_start = (uintptr_t) host_func_table[i];
1039       k->host_end = k->host_start + 1;
1040       k->tgt = tgt;
1041       k->tgt_offset = target_table[i].start;
1042       k->refcount = REFCOUNT_INFINITY;
1043       k->async_refcount = 0;
1044       k->link_key = NULL;
1045       array->left = NULL;
1046       array->right = NULL;
1047       splay_tree_insert (&devicep->mem_map, array);
1048       array++;
1049     }
1050 
1051   /* Most significant bit of the size in host and target tables marks
1052      "omp declare target link" variables.  */
1053   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1054   const uintptr_t size_mask = ~link_bit;
1055 
1056   for (i = 0; i < num_vars; i++)
1057     {
1058       struct addr_pair *target_var = &target_table[num_funcs + i];
1059       uintptr_t target_size = target_var->end - target_var->start;
1060 
1061       if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1062 	{
1063 	  gomp_mutex_unlock (&devicep->lock);
1064 	  if (is_register_lock)
1065 	    gomp_mutex_unlock (&register_lock);
1066 	  gomp_fatal ("Cannot map target variables (size mismatch)");
1067 	}
1068 
1069       splay_tree_key k = &array->key;
1070       k->host_start = (uintptr_t) host_var_table[i * 2];
1071       k->host_end
1072 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1073       k->tgt = tgt;
1074       k->tgt_offset = target_var->start;
1075       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1076       k->async_refcount = 0;
1077       k->link_key = NULL;
1078       array->left = NULL;
1079       array->right = NULL;
1080       splay_tree_insert (&devicep->mem_map, array);
1081       array++;
1082     }
1083 
1084   free (target_table);
1085 }
1086 
1087 /* Unload the mappings described by target_data from device DEVICE_P.
1088    The device must be locked.   */
1089 
1090 static void
1091 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1092 			       unsigned version,
1093 			       const void *host_table, const void *target_data)
1094 {
1095   void **host_func_table = ((void ***) host_table)[0];
1096   void **host_funcs_end  = ((void ***) host_table)[1];
1097   void **host_var_table  = ((void ***) host_table)[2];
1098   void **host_vars_end   = ((void ***) host_table)[3];
1099 
1100   /* The func table contains only addresses, the var table contains addresses
1101      and corresponding sizes.  */
1102   int num_funcs = host_funcs_end - host_func_table;
1103   int num_vars  = (host_vars_end - host_var_table) / 2;
1104 
1105   struct splay_tree_key_s k;
1106   splay_tree_key node = NULL;
1107 
1108   /* Find mapping at start of node array */
1109   if (num_funcs || num_vars)
1110     {
1111       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1112 		      : (uintptr_t) host_var_table[0]);
1113       k.host_end = k.host_start + 1;
1114       node = splay_tree_lookup (&devicep->mem_map, &k);
1115     }
1116 
1117   devicep->unload_image_func (devicep->target_id, version, target_data);
1118 
1119   /* Remove mappings from splay tree.  */
1120   int i;
1121   for (i = 0; i < num_funcs; i++)
1122     {
1123       k.host_start = (uintptr_t) host_func_table[i];
1124       k.host_end = k.host_start + 1;
1125       splay_tree_remove (&devicep->mem_map, &k);
1126     }
1127 
1128   /* Most significant bit of the size in host and target tables marks
1129      "omp declare target link" variables.  */
1130   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1131   const uintptr_t size_mask = ~link_bit;
1132   bool is_tgt_unmapped = false;
1133 
1134   for (i = 0; i < num_vars; i++)
1135     {
1136       k.host_start = (uintptr_t) host_var_table[i * 2];
1137       k.host_end
1138 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1139 
1140       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1141 	splay_tree_remove (&devicep->mem_map, &k);
1142       else
1143 	{
1144 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1145 	  splay_tree_remove (&devicep->mem_map, n);
1146 	  if (n->link_key)
1147 	    {
1148 	      if (n->tgt->refcount > 1)
1149 		n->tgt->refcount--;
1150 	      else
1151 		{
1152 		  is_tgt_unmapped = true;
1153 		  gomp_unmap_tgt (n->tgt);
1154 		}
1155 	    }
1156 	}
1157     }
1158 
1159   if (node && !is_tgt_unmapped)
1160     {
1161       free (node->tgt);
1162       free (node);
1163     }
1164 }
1165 
1166 /* This function should be called from every offload image while loading.
1167    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1168    the target, and TARGET_DATA needed by target plugin.  */
1169 
1170 void
1171 GOMP_offload_register_ver (unsigned version, const void *host_table,
1172 			   int target_type, const void *target_data)
1173 {
1174   int i;
1175 
1176   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1177     gomp_fatal ("Library too old for offload (version %u < %u)",
1178 		GOMP_VERSION, GOMP_VERSION_LIB (version));
1179 
1180   gomp_mutex_lock (&register_lock);
1181 
1182   /* Load image to all initialized devices.  */
1183   for (i = 0; i < num_devices; i++)
1184     {
1185       struct gomp_device_descr *devicep = &devices[i];
1186       gomp_mutex_lock (&devicep->lock);
1187       if (devicep->type == target_type
1188 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1189 	gomp_load_image_to_device (devicep, version,
1190 				   host_table, target_data, true);
1191       gomp_mutex_unlock (&devicep->lock);
1192     }
1193 
1194   /* Insert image to array of pending images.  */
1195   offload_images
1196     = gomp_realloc_unlock (offload_images,
1197 			   (num_offload_images + 1)
1198 			   * sizeof (struct offload_image_descr));
1199   offload_images[num_offload_images].version = version;
1200   offload_images[num_offload_images].type = target_type;
1201   offload_images[num_offload_images].host_table = host_table;
1202   offload_images[num_offload_images].target_data = target_data;
1203 
1204   num_offload_images++;
1205   gomp_mutex_unlock (&register_lock);
1206 }
1207 
1208 void
1209 GOMP_offload_register (const void *host_table, int target_type,
1210 		       const void *target_data)
1211 {
1212   GOMP_offload_register_ver (0, host_table, target_type, target_data);
1213 }
1214 
1215 /* This function should be called from every offload image while unloading.
1216    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1217    the target, and TARGET_DATA needed by target plugin.  */
1218 
1219 void
1220 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1221 			     int target_type, const void *target_data)
1222 {
1223   int i;
1224 
1225   gomp_mutex_lock (&register_lock);
1226 
1227   /* Unload image from all initialized devices.  */
1228   for (i = 0; i < num_devices; i++)
1229     {
1230       struct gomp_device_descr *devicep = &devices[i];
1231       gomp_mutex_lock (&devicep->lock);
1232       if (devicep->type == target_type
1233 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1234 	gomp_unload_image_from_device (devicep, version,
1235 				       host_table, target_data);
1236       gomp_mutex_unlock (&devicep->lock);
1237     }
1238 
1239   /* Remove image from array of pending images.  */
1240   for (i = 0; i < num_offload_images; i++)
1241     if (offload_images[i].target_data == target_data)
1242       {
1243 	offload_images[i] = offload_images[--num_offload_images];
1244 	break;
1245       }
1246 
1247   gomp_mutex_unlock (&register_lock);
1248 }
1249 
1250 void
1251 GOMP_offload_unregister (const void *host_table, int target_type,
1252 			 const void *target_data)
1253 {
1254   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1255 }
1256 
1257 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
1258    must be locked on entry, and remains locked on return.  */
1259 
1260 attribute_hidden void
1261 gomp_init_device (struct gomp_device_descr *devicep)
1262 {
1263   int i;
1264   devicep->init_device_func (devicep->target_id);
1265 
1266   /* Load to device all images registered by the moment.  */
1267   for (i = 0; i < num_offload_images; i++)
1268     {
1269       struct offload_image_descr *image = &offload_images[i];
1270       if (image->type == devicep->type)
1271 	gomp_load_image_to_device (devicep, image->version,
1272 				   image->host_table, image->target_data,
1273 				   false);
1274     }
1275 
1276   devicep->state = GOMP_DEVICE_INITIALIZED;
1277 }
1278 
1279 attribute_hidden void
1280 gomp_unload_device (struct gomp_device_descr *devicep)
1281 {
1282   if (devicep->state == GOMP_DEVICE_INITIALIZED)
1283     {
1284       unsigned i;
1285 
1286       /* Unload from device all images registered at the moment.  */
1287       for (i = 0; i < num_offload_images; i++)
1288 	{
1289 	  struct offload_image_descr *image = &offload_images[i];
1290 	  if (image->type == devicep->type)
1291 	    gomp_unload_image_from_device (devicep, image->version,
1292 					   image->host_table,
1293 					   image->target_data);
1294 	}
1295     }
1296 }
1297 
1298 /* Free address mapping tables.  MM must be locked on entry, and remains locked
1299    on return.  */
1300 
1301 attribute_hidden void
1302 gomp_free_memmap (struct splay_tree_s *mem_map)
1303 {
1304   while (mem_map->root)
1305     {
1306       struct target_mem_desc *tgt = mem_map->root->key.tgt;
1307 
1308       splay_tree_remove (mem_map, &mem_map->root->key);
1309       free (tgt->array);
1310       free (tgt);
1311     }
1312 }
1313 
1314 /* Host fallback for GOMP_target{,_ext} routines.  */
1315 
1316 static void
1317 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1318 {
1319   struct gomp_thread old_thr, *thr = gomp_thread ();
1320   old_thr = *thr;
1321   memset (thr, '\0', sizeof (*thr));
1322   if (gomp_places_list)
1323     {
1324       thr->place = old_thr.place;
1325       thr->ts.place_partition_len = gomp_places_list_len;
1326     }
1327   fn (hostaddrs);
1328   gomp_free_thread (thr);
1329   *thr = old_thr;
1330 }
1331 
1332 /* Calculate alignment and size requirements of a private copy of data shared
1333    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1334 
1335 static inline void
1336 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1337 				     unsigned short *kinds, size_t *tgt_align,
1338 				     size_t *tgt_size)
1339 {
1340   size_t i;
1341   for (i = 0; i < mapnum; i++)
1342     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1343       {
1344 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1345 	if (*tgt_align < align)
1346 	  *tgt_align = align;
1347 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1348 	*tgt_size += sizes[i];
1349       }
1350 }
1351 
1352 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1353 
1354 static inline void
1355 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1356 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1357 			size_t tgt_size)
1358 {
1359   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1360   if (al)
1361     tgt += tgt_align - al;
1362   tgt_size = 0;
1363   size_t i;
1364   for (i = 0; i < mapnum; i++)
1365     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1366       {
1367 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1368 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1369 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1370 	hostaddrs[i] = tgt + tgt_size;
1371 	tgt_size = tgt_size + sizes[i];
1372       }
1373 }
1374 
1375 /* Helper function of GOMP_target{,_ext} routines.  */
1376 
1377 static void *
1378 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1379 			 void (*host_fn) (void *))
1380 {
1381   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1382     return (void *) host_fn;
1383   else
1384     {
1385       gomp_mutex_lock (&devicep->lock);
1386       if (devicep->state == GOMP_DEVICE_FINALIZED)
1387 	{
1388 	  gomp_mutex_unlock (&devicep->lock);
1389 	  return NULL;
1390 	}
1391 
1392       struct splay_tree_key_s k;
1393       k.host_start = (uintptr_t) host_fn;
1394       k.host_end = k.host_start + 1;
1395       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1396       gomp_mutex_unlock (&devicep->lock);
1397       if (tgt_fn == NULL)
1398 	return NULL;
1399 
1400       return (void *) tgt_fn->tgt_offset;
1401     }
1402 }
1403 
1404 /* Called when encountering a target directive.  If DEVICE
1405    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1406    GOMP_DEVICE_HOST_FALLBACK (or any value
1407    larger than last available hw device), use host fallback.
1408    FN is address of host code, UNUSED is part of the current ABI, but
1409    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1410    with MAPNUM entries, with addresses of the host objects,
1411    sizes of the host objects (resp. for pointer kind pointer bias
1412    and assumed sizeof (void *) size) and kinds.  */
1413 
1414 void
1415 GOMP_target (int device, void (*fn) (void *), const void *unused,
1416 	     size_t mapnum, void **hostaddrs, size_t *sizes,
1417 	     unsigned char *kinds)
1418 {
1419   struct gomp_device_descr *devicep = resolve_device (device);
1420 
1421   void *fn_addr;
1422   if (devicep == NULL
1423       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1424       /* All shared memory devices should use the GOMP_target_ext function.  */
1425       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1426       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1427     return gomp_target_fallback (fn, hostaddrs);
1428 
1429   struct target_mem_desc *tgt_vars
1430     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1431 		     GOMP_MAP_VARS_TARGET);
1432   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1433 		     NULL);
1434   gomp_unmap_vars (tgt_vars, true);
1435 }
1436 
1437 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1438    and several arguments have been added:
1439    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1440    DEPEND is array of dependencies, see GOMP_task for details.
1441 
1442    ARGS is a pointer to an array consisting of a variable number of both
1443    device-independent and device-specific arguments, which can take one two
1444    elements where the first specifies for which device it is intended, the type
1445    and optionally also the value.  If the value is not present in the first
1446    one, the whole second element the actual value.  The last element of the
1447    array is a single NULL.  Among the device independent can be for example
1448    NUM_TEAMS and THREAD_LIMIT.
1449 
1450    NUM_TEAMS is positive if GOMP_teams will be called in the body with
1451    that value, or 1 if teams construct is not present, or 0, if
1452    teams construct does not have num_teams clause and so the choice is
1453    implementation defined, and -1 if it can't be determined on the host
1454    what value will GOMP_teams have on the device.
1455    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1456    body with that value, or 0, if teams construct does not have thread_limit
1457    clause or the teams construct is not present, or -1 if it can't be
1458    determined on the host what value will GOMP_teams have on the device.  */
1459 
1460 void
1461 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1462 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1463 		 unsigned int flags, void **depend, void **args)
1464 {
1465   struct gomp_device_descr *devicep = resolve_device (device);
1466   size_t tgt_align = 0, tgt_size = 0;
1467   bool fpc_done = false;
1468 
1469   if (flags & GOMP_TARGET_FLAG_NOWAIT)
1470     {
1471       struct gomp_thread *thr = gomp_thread ();
1472       /* Create a team if we don't have any around, as nowait
1473 	 target tasks make sense to run asynchronously even when
1474 	 outside of any parallel.  */
1475       if (__builtin_expect (thr->ts.team == NULL, 0))
1476 	{
1477 	  struct gomp_team *team = gomp_new_team (1);
1478 	  struct gomp_task *task = thr->task;
1479 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1480 	  team->prev_ts = thr->ts;
1481 	  thr->ts.team = team;
1482 	  thr->ts.team_id = 0;
1483 	  thr->ts.work_share = &team->work_shares[0];
1484 	  thr->ts.last_work_share = NULL;
1485 #ifdef HAVE_SYNC_BUILTINS
1486 	  thr->ts.single_count = 0;
1487 #endif
1488 	  thr->ts.static_trip = 0;
1489 	  thr->task = &team->implicit_task[0];
1490 	  gomp_init_task (thr->task, NULL, icv);
1491 	  if (task)
1492 	    {
1493 	      thr->task = task;
1494 	      gomp_end_task ();
1495 	      free (task);
1496 	      thr->task = &team->implicit_task[0];
1497 	    }
1498 	  else
1499 	    pthread_setspecific (gomp_thread_destructor, thr);
1500 	}
1501       if (thr->ts.team
1502 	  && !thr->task->final_task)
1503 	{
1504 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1505 				   sizes, kinds, flags, depend, args,
1506 				   GOMP_TARGET_TASK_BEFORE_MAP);
1507 	  return;
1508 	}
1509     }
1510 
1511   /* If there are depend clauses, but nowait is not present
1512      (or we are in a final task), block the parent task until the
1513      dependencies are resolved and then just continue with the rest
1514      of the function as if it is a merged task.  */
1515   if (depend != NULL)
1516     {
1517       struct gomp_thread *thr = gomp_thread ();
1518       if (thr->task && thr->task->depend_hash)
1519 	{
1520 	  /* If we might need to wait, copy firstprivate now.  */
1521 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1522 					       &tgt_align, &tgt_size);
1523 	  if (tgt_align)
1524 	    {
1525 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1526 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1527 				      tgt_align, tgt_size);
1528 	    }
1529 	  fpc_done = true;
1530 	  gomp_task_maybe_wait_for_dependencies (depend);
1531 	}
1532     }
1533 
1534   void *fn_addr;
1535   if (devicep == NULL
1536       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1537       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1538       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1539     {
1540       if (!fpc_done)
1541 	{
1542 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1543 					       &tgt_align, &tgt_size);
1544 	  if (tgt_align)
1545 	    {
1546 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1547 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1548 				      tgt_align, tgt_size);
1549 	    }
1550 	}
1551       gomp_target_fallback (fn, hostaddrs);
1552       return;
1553     }
1554 
1555   struct target_mem_desc *tgt_vars;
1556   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1557     {
1558       if (!fpc_done)
1559 	{
1560 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1561 					       &tgt_align, &tgt_size);
1562 	  if (tgt_align)
1563 	    {
1564 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1565 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1566 				      tgt_align, tgt_size);
1567 	    }
1568 	}
1569       tgt_vars = NULL;
1570     }
1571   else
1572     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1573 			      true, GOMP_MAP_VARS_TARGET);
1574   devicep->run_func (devicep->target_id, fn_addr,
1575 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1576 		     args);
1577   if (tgt_vars)
1578     gomp_unmap_vars (tgt_vars, true);
1579 }
1580 
1581 /* Host fallback for GOMP_target_data{,_ext} routines.  */
1582 
1583 static void
1584 gomp_target_data_fallback (void)
1585 {
1586   struct gomp_task_icv *icv = gomp_icv (false);
1587   if (icv->target_data)
1588     {
1589       /* Even when doing a host fallback, if there are any active
1590          #pragma omp target data constructs, need to remember the
1591          new #pragma omp target data, otherwise GOMP_target_end_data
1592          would get out of sync.  */
1593       struct target_mem_desc *tgt
1594 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1595 			 GOMP_MAP_VARS_DATA);
1596       tgt->prev = icv->target_data;
1597       icv->target_data = tgt;
1598     }
1599 }
1600 
1601 void
1602 GOMP_target_data (int device, const void *unused, size_t mapnum,
1603 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
1604 {
1605   struct gomp_device_descr *devicep = resolve_device (device);
1606 
1607   if (devicep == NULL
1608       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1609       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1610     return gomp_target_data_fallback ();
1611 
1612   struct target_mem_desc *tgt
1613     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1614 		     GOMP_MAP_VARS_DATA);
1615   struct gomp_task_icv *icv = gomp_icv (true);
1616   tgt->prev = icv->target_data;
1617   icv->target_data = tgt;
1618 }
1619 
1620 void
1621 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1622 		      size_t *sizes, unsigned short *kinds)
1623 {
1624   struct gomp_device_descr *devicep = resolve_device (device);
1625 
1626   if (devicep == NULL
1627       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1628       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1629     return gomp_target_data_fallback ();
1630 
1631   struct target_mem_desc *tgt
1632     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1633 		     GOMP_MAP_VARS_DATA);
1634   struct gomp_task_icv *icv = gomp_icv (true);
1635   tgt->prev = icv->target_data;
1636   icv->target_data = tgt;
1637 }
1638 
1639 void
1640 GOMP_target_end_data (void)
1641 {
1642   struct gomp_task_icv *icv = gomp_icv (false);
1643   if (icv->target_data)
1644     {
1645       struct target_mem_desc *tgt = icv->target_data;
1646       icv->target_data = tgt->prev;
1647       gomp_unmap_vars (tgt, true);
1648     }
1649 }
1650 
1651 void
1652 GOMP_target_update (int device, const void *unused, size_t mapnum,
1653 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
1654 {
1655   struct gomp_device_descr *devicep = resolve_device (device);
1656 
1657   if (devicep == NULL
1658       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1659       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1660     return;
1661 
1662   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1663 }
1664 
1665 void
1666 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1667 			size_t *sizes, unsigned short *kinds,
1668 			unsigned int flags, void **depend)
1669 {
1670   struct gomp_device_descr *devicep = resolve_device (device);
1671 
1672   /* If there are depend clauses, but nowait is not present,
1673      block the parent task until the dependencies are resolved
1674      and then just continue with the rest of the function as if it
1675      is a merged task.  Until we are able to schedule task during
1676      variable mapping or unmapping, ignore nowait if depend clauses
1677      are not present.  */
1678   if (depend != NULL)
1679     {
1680       struct gomp_thread *thr = gomp_thread ();
1681       if (thr->task && thr->task->depend_hash)
1682 	{
1683 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1684 	      && thr->ts.team
1685 	      && !thr->task->final_task)
1686 	    {
1687 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1688 					   mapnum, hostaddrs, sizes, kinds,
1689 					   flags | GOMP_TARGET_FLAG_UPDATE,
1690 					   depend, NULL, GOMP_TARGET_TASK_DATA))
1691 		return;
1692 	    }
1693 	  else
1694 	    {
1695 	      struct gomp_team *team = thr->ts.team;
1696 	      /* If parallel or taskgroup has been cancelled, don't start new
1697 		 tasks.  */
1698 	      if (team
1699 		  && (gomp_team_barrier_cancelled (&team->barrier)
1700 		      || (thr->task->taskgroup
1701 			  && thr->task->taskgroup->cancelled)))
1702 		return;
1703 
1704 	      gomp_task_maybe_wait_for_dependencies (depend);
1705 	    }
1706 	}
1707     }
1708 
1709   if (devicep == NULL
1710       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1711       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1712     return;
1713 
1714   struct gomp_thread *thr = gomp_thread ();
1715   struct gomp_team *team = thr->ts.team;
1716   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
1717   if (team
1718       && (gomp_team_barrier_cancelled (&team->barrier)
1719 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1720     return;
1721 
1722   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1723 }
1724 
1725 static void
1726 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1727 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
1728 {
1729   const int typemask = 0xff;
1730   size_t i;
1731   gomp_mutex_lock (&devicep->lock);
1732   if (devicep->state == GOMP_DEVICE_FINALIZED)
1733     {
1734       gomp_mutex_unlock (&devicep->lock);
1735       return;
1736     }
1737 
1738   for (i = 0; i < mapnum; i++)
1739     {
1740       struct splay_tree_key_s cur_node;
1741       unsigned char kind = kinds[i] & typemask;
1742       switch (kind)
1743 	{
1744 	case GOMP_MAP_FROM:
1745 	case GOMP_MAP_ALWAYS_FROM:
1746 	case GOMP_MAP_DELETE:
1747 	case GOMP_MAP_RELEASE:
1748 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1749 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1750 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
1751 	  cur_node.host_end = cur_node.host_start + sizes[i];
1752 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1753 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1754 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1755 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
1756 	  if (!k)
1757 	    continue;
1758 
1759 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1760 	    k->refcount--;
1761 	  if ((kind == GOMP_MAP_DELETE
1762 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1763 	      && k->refcount != REFCOUNT_INFINITY)
1764 	    k->refcount = 0;
1765 
1766 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1767 	      || kind == GOMP_MAP_ALWAYS_FROM)
1768 	    devicep->dev2host_func (devicep->target_id,
1769 				    (void *) cur_node.host_start,
1770 				    (void *) (k->tgt->tgt_start + k->tgt_offset
1771 					      + cur_node.host_start
1772 					      - k->host_start),
1773 				    cur_node.host_end - cur_node.host_start);
1774 	  if (k->refcount == 0)
1775 	    {
1776 	      splay_tree_remove (&devicep->mem_map, k);
1777 	      if (k->link_key)
1778 		splay_tree_insert (&devicep->mem_map,
1779 				   (splay_tree_node) k->link_key);
1780 	      if (k->tgt->refcount > 1)
1781 		k->tgt->refcount--;
1782 	      else
1783 		gomp_unmap_tgt (k->tgt);
1784 	    }
1785 
1786 	  break;
1787 	default:
1788 	  gomp_mutex_unlock (&devicep->lock);
1789 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1790 		      kind);
1791 	}
1792     }
1793 
1794   gomp_mutex_unlock (&devicep->lock);
1795 }
1796 
1797 void
1798 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1799 			     size_t *sizes, unsigned short *kinds,
1800 			     unsigned int flags, void **depend)
1801 {
1802   struct gomp_device_descr *devicep = resolve_device (device);
1803 
1804   /* If there are depend clauses, but nowait is not present,
1805      block the parent task until the dependencies are resolved
1806      and then just continue with the rest of the function as if it
1807      is a merged task.  Until we are able to schedule task during
1808      variable mapping or unmapping, ignore nowait if depend clauses
1809      are not present.  */
1810   if (depend != NULL)
1811     {
1812       struct gomp_thread *thr = gomp_thread ();
1813       if (thr->task && thr->task->depend_hash)
1814 	{
1815 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1816 	      && thr->ts.team
1817 	      && !thr->task->final_task)
1818 	    {
1819 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1820 					   mapnum, hostaddrs, sizes, kinds,
1821 					   flags, depend, NULL,
1822 					   GOMP_TARGET_TASK_DATA))
1823 		return;
1824 	    }
1825 	  else
1826 	    {
1827 	      struct gomp_team *team = thr->ts.team;
1828 	      /* If parallel or taskgroup has been cancelled, don't start new
1829 		 tasks.  */
1830 	      if (team
1831 		  && (gomp_team_barrier_cancelled (&team->barrier)
1832 		      || (thr->task->taskgroup
1833 			  && thr->task->taskgroup->cancelled)))
1834 		return;
1835 
1836 	      gomp_task_maybe_wait_for_dependencies (depend);
1837 	    }
1838 	}
1839     }
1840 
1841   if (devicep == NULL
1842       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1843       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1844     return;
1845 
1846   struct gomp_thread *thr = gomp_thread ();
1847   struct gomp_team *team = thr->ts.team;
1848   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
1849   if (team
1850       && (gomp_team_barrier_cancelled (&team->barrier)
1851 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1852     return;
1853 
1854   size_t i;
1855   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1856     for (i = 0; i < mapnum; i++)
1857       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1858 	{
1859 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1860 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1861 	  i += sizes[i];
1862 	}
1863       else
1864 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1865 		       true, GOMP_MAP_VARS_ENTER_DATA);
1866   else
1867     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1868 }
1869 
1870 bool
1871 gomp_target_task_fn (void *data)
1872 {
1873   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1874   struct gomp_device_descr *devicep = ttask->devicep;
1875 
1876   if (ttask->fn != NULL)
1877     {
1878       void *fn_addr;
1879       if (devicep == NULL
1880 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1881 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
1882 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1883 	{
1884 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
1885 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
1886 	  return false;
1887 	}
1888 
1889       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1890 	{
1891 	  if (ttask->tgt)
1892 	    gomp_unmap_vars (ttask->tgt, true);
1893 	  return false;
1894 	}
1895 
1896       void *actual_arguments;
1897       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1898 	{
1899 	  ttask->tgt = NULL;
1900 	  actual_arguments = ttask->hostaddrs;
1901 	}
1902       else
1903 	{
1904 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
1905 				      NULL, ttask->sizes, ttask->kinds, true,
1906 				      GOMP_MAP_VARS_TARGET);
1907 	  actual_arguments = (void *) ttask->tgt->tgt_start;
1908 	}
1909       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1910 
1911       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
1912 			       ttask->args, (void *) ttask);
1913       return true;
1914     }
1915   else if (devicep == NULL
1916 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1917 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1918     return false;
1919 
1920   size_t i;
1921   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1922     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1923 		 ttask->kinds, true);
1924   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1925     for (i = 0; i < ttask->mapnum; i++)
1926       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1927 	{
1928 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1929 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1930 			 GOMP_MAP_VARS_ENTER_DATA);
1931 	  i += ttask->sizes[i];
1932 	}
1933       else
1934 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1935 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1936   else
1937     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1938 		    ttask->kinds);
1939   return false;
1940 }
1941 
1942 void
1943 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1944 {
1945   if (thread_limit)
1946     {
1947       struct gomp_task_icv *icv = gomp_icv (true);
1948       icv->thread_limit_var
1949 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1950     }
1951   (void) num_teams;
1952 }
1953 
1954 void *
1955 omp_target_alloc (size_t size, int device_num)
1956 {
1957   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1958     return malloc (size);
1959 
1960   if (device_num < 0)
1961     return NULL;
1962 
1963   struct gomp_device_descr *devicep = resolve_device (device_num);
1964   if (devicep == NULL)
1965     return NULL;
1966 
1967   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1968       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1969     return malloc (size);
1970 
1971   gomp_mutex_lock (&devicep->lock);
1972   void *ret = devicep->alloc_func (devicep->target_id, size);
1973   gomp_mutex_unlock (&devicep->lock);
1974   return ret;
1975 }
1976 
1977 void
1978 omp_target_free (void *device_ptr, int device_num)
1979 {
1980   if (device_ptr == NULL)
1981     return;
1982 
1983   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1984     {
1985       free (device_ptr);
1986       return;
1987     }
1988 
1989   if (device_num < 0)
1990     return;
1991 
1992   struct gomp_device_descr *devicep = resolve_device (device_num);
1993   if (devicep == NULL)
1994     return;
1995 
1996   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1997       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1998     {
1999       free (device_ptr);
2000       return;
2001     }
2002 
2003   gomp_mutex_lock (&devicep->lock);
2004   devicep->free_func (devicep->target_id, device_ptr);
2005   gomp_mutex_unlock (&devicep->lock);
2006 }
2007 
2008 int
2009 omp_target_is_present (void *ptr, int device_num)
2010 {
2011   if (ptr == NULL)
2012     return 1;
2013 
2014   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2015     return 1;
2016 
2017   if (device_num < 0)
2018     return 0;
2019 
2020   struct gomp_device_descr *devicep = resolve_device (device_num);
2021   if (devicep == NULL)
2022     return 0;
2023 
2024   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2025       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2026     return 1;
2027 
2028   gomp_mutex_lock (&devicep->lock);
2029   struct splay_tree_s *mem_map = &devicep->mem_map;
2030   struct splay_tree_key_s cur_node;
2031 
2032   cur_node.host_start = (uintptr_t) ptr;
2033   cur_node.host_end = cur_node.host_start;
2034   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2035   int ret = n != NULL;
2036   gomp_mutex_unlock (&devicep->lock);
2037   return ret;
2038 }
2039 
2040 int
2041 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2042 		   size_t src_offset, int dst_device_num, int src_device_num)
2043 {
2044   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2045 
2046   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2047     {
2048       if (dst_device_num < 0)
2049 	return EINVAL;
2050 
2051       dst_devicep = resolve_device (dst_device_num);
2052       if (dst_devicep == NULL)
2053 	return EINVAL;
2054 
2055       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2056 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2057 	dst_devicep = NULL;
2058     }
2059   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2060     {
2061       if (src_device_num < 0)
2062 	return EINVAL;
2063 
2064       src_devicep = resolve_device (src_device_num);
2065       if (src_devicep == NULL)
2066 	return EINVAL;
2067 
2068       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2069 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2070 	src_devicep = NULL;
2071     }
2072   if (src_devicep == NULL && dst_devicep == NULL)
2073     {
2074       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2075       return 0;
2076     }
2077   if (src_devicep == NULL)
2078     {
2079       gomp_mutex_lock (&dst_devicep->lock);
2080       dst_devicep->host2dev_func (dst_devicep->target_id,
2081 				  (char *) dst + dst_offset,
2082 				  (char *) src + src_offset, length);
2083       gomp_mutex_unlock (&dst_devicep->lock);
2084       return 0;
2085     }
2086   if (dst_devicep == NULL)
2087     {
2088       gomp_mutex_lock (&src_devicep->lock);
2089       src_devicep->dev2host_func (src_devicep->target_id,
2090 				  (char *) dst + dst_offset,
2091 				  (char *) src + src_offset, length);
2092       gomp_mutex_unlock (&src_devicep->lock);
2093       return 0;
2094     }
2095   if (src_devicep == dst_devicep)
2096     {
2097       gomp_mutex_lock (&src_devicep->lock);
2098       src_devicep->dev2dev_func (src_devicep->target_id,
2099 				 (char *) dst + dst_offset,
2100 				 (char *) src + src_offset, length);
2101       gomp_mutex_unlock (&src_devicep->lock);
2102       return 0;
2103     }
2104   return EINVAL;
2105 }
2106 
2107 static int
2108 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2109 			       int num_dims, const size_t *volume,
2110 			       const size_t *dst_offsets,
2111 			       const size_t *src_offsets,
2112 			       const size_t *dst_dimensions,
2113 			       const size_t *src_dimensions,
2114 			       struct gomp_device_descr *dst_devicep,
2115 			       struct gomp_device_descr *src_devicep)
2116 {
2117   size_t dst_slice = element_size;
2118   size_t src_slice = element_size;
2119   size_t j, dst_off, src_off, length;
2120   int i, ret;
2121 
2122   if (num_dims == 1)
2123     {
2124       if (__builtin_mul_overflow (element_size, volume[0], &length)
2125 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2126 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2127 	return EINVAL;
2128       if (dst_devicep == NULL && src_devicep == NULL)
2129 	memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2130       else if (src_devicep == NULL)
2131 	dst_devicep->host2dev_func (dst_devicep->target_id,
2132 				    (char *) dst + dst_off,
2133 				    (char *) src + src_off, length);
2134       else if (dst_devicep == NULL)
2135 	src_devicep->dev2host_func (src_devicep->target_id,
2136 				    (char *) dst + dst_off,
2137 				    (char *) src + src_off, length);
2138       else if (src_devicep == dst_devicep)
2139 	src_devicep->dev2dev_func (src_devicep->target_id,
2140 				   (char *) dst + dst_off,
2141 				   (char *) src + src_off, length);
2142       else
2143 	return EINVAL;
2144       return 0;
2145     }
2146 
2147   /* FIXME: it would be nice to have some plugin function to handle
2148      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2149      be handled in the generic recursion below, and for host-host it
2150      should be used even for any num_dims >= 2.  */
2151 
2152   for (i = 1; i < num_dims; i++)
2153     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2154 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2155       return EINVAL;
2156   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2157       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2158     return EINVAL;
2159   for (j = 0; j < volume[0]; j++)
2160     {
2161       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2162 					   (char *) src + src_off,
2163 					   element_size, num_dims - 1,
2164 					   volume + 1, dst_offsets + 1,
2165 					   src_offsets + 1, dst_dimensions + 1,
2166 					   src_dimensions + 1, dst_devicep,
2167 					   src_devicep);
2168       if (ret)
2169 	return ret;
2170       dst_off += dst_slice;
2171       src_off += src_slice;
2172     }
2173   return 0;
2174 }
2175 
2176 int
2177 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2178 			int num_dims, const size_t *volume,
2179 			const size_t *dst_offsets,
2180 			const size_t *src_offsets,
2181 			const size_t *dst_dimensions,
2182 			const size_t *src_dimensions,
2183 			int dst_device_num, int src_device_num)
2184 {
2185   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2186 
2187   if (!dst && !src)
2188     return INT_MAX;
2189 
2190   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2191     {
2192       if (dst_device_num < 0)
2193 	return EINVAL;
2194 
2195       dst_devicep = resolve_device (dst_device_num);
2196       if (dst_devicep == NULL)
2197 	return EINVAL;
2198 
2199       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2200 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2201 	dst_devicep = NULL;
2202     }
2203   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2204     {
2205       if (src_device_num < 0)
2206 	return EINVAL;
2207 
2208       src_devicep = resolve_device (src_device_num);
2209       if (src_devicep == NULL)
2210 	return EINVAL;
2211 
2212       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2213 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2214 	src_devicep = NULL;
2215     }
2216 
2217   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2218     return EINVAL;
2219 
2220   if (src_devicep)
2221     gomp_mutex_lock (&src_devicep->lock);
2222   else if (dst_devicep)
2223     gomp_mutex_lock (&dst_devicep->lock);
2224   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2225 					   volume, dst_offsets, src_offsets,
2226 					   dst_dimensions, src_dimensions,
2227 					   dst_devicep, src_devicep);
2228   if (src_devicep)
2229     gomp_mutex_unlock (&src_devicep->lock);
2230   else if (dst_devicep)
2231     gomp_mutex_unlock (&dst_devicep->lock);
2232   return ret;
2233 }
2234 
2235 int
2236 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2237 			  size_t device_offset, int device_num)
2238 {
2239   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2240     return EINVAL;
2241 
2242   if (device_num < 0)
2243     return EINVAL;
2244 
2245   struct gomp_device_descr *devicep = resolve_device (device_num);
2246   if (devicep == NULL)
2247     return EINVAL;
2248 
2249   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2250       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2251     return EINVAL;
2252 
2253   gomp_mutex_lock (&devicep->lock);
2254 
2255   struct splay_tree_s *mem_map = &devicep->mem_map;
2256   struct splay_tree_key_s cur_node;
2257   int ret = EINVAL;
2258 
2259   cur_node.host_start = (uintptr_t) host_ptr;
2260   cur_node.host_end = cur_node.host_start + size;
2261   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2262   if (n)
2263     {
2264       if (n->tgt->tgt_start + n->tgt_offset
2265 	  == (uintptr_t) device_ptr + device_offset
2266 	  && n->host_start <= cur_node.host_start
2267 	  && n->host_end >= cur_node.host_end)
2268 	ret = 0;
2269     }
2270   else
2271     {
2272       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2273       tgt->array = gomp_malloc (sizeof (*tgt->array));
2274       tgt->refcount = 1;
2275       tgt->tgt_start = 0;
2276       tgt->tgt_end = 0;
2277       tgt->to_free = NULL;
2278       tgt->prev = NULL;
2279       tgt->list_count = 0;
2280       tgt->device_descr = devicep;
2281       splay_tree_node array = tgt->array;
2282       splay_tree_key k = &array->key;
2283       k->host_start = cur_node.host_start;
2284       k->host_end = cur_node.host_end;
2285       k->tgt = tgt;
2286       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2287       k->refcount = REFCOUNT_INFINITY;
2288       k->async_refcount = 0;
2289       array->left = NULL;
2290       array->right = NULL;
2291       splay_tree_insert (&devicep->mem_map, array);
2292       ret = 0;
2293     }
2294   gomp_mutex_unlock (&devicep->lock);
2295   return ret;
2296 }
2297 
2298 int
2299 omp_target_disassociate_ptr (void *ptr, int device_num)
2300 {
2301   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2302     return EINVAL;
2303 
2304   if (device_num < 0)
2305     return EINVAL;
2306 
2307   struct gomp_device_descr *devicep = resolve_device (device_num);
2308   if (devicep == NULL)
2309     return EINVAL;
2310 
2311   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2312     return EINVAL;
2313 
2314   gomp_mutex_lock (&devicep->lock);
2315 
2316   struct splay_tree_s *mem_map = &devicep->mem_map;
2317   struct splay_tree_key_s cur_node;
2318   int ret = EINVAL;
2319 
2320   cur_node.host_start = (uintptr_t) ptr;
2321   cur_node.host_end = cur_node.host_start;
2322   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2323   if (n
2324       && n->host_start == cur_node.host_start
2325       && n->refcount == REFCOUNT_INFINITY
2326       && n->tgt->tgt_start == 0
2327       && n->tgt->to_free == NULL
2328       && n->tgt->refcount == 1
2329       && n->tgt->list_count == 0)
2330     {
2331       splay_tree_remove (&devicep->mem_map, n);
2332       gomp_unmap_tgt (n->tgt);
2333       ret = 0;
2334     }
2335 
2336   gomp_mutex_unlock (&devicep->lock);
2337   return ret;
2338 }
2339 
2340 #ifdef PLUGIN_SUPPORT
2341 
2342 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
2343    in PLUGIN_NAME.
2344    The handles of the found functions are stored in the corresponding fields
2345    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
2346 
2347 static bool
2348 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2349 			     const char *plugin_name)
2350 {
2351   const char *err = NULL, *last_missing = NULL;
2352 
2353   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2354   if (!plugin_handle)
2355     goto dl_fail;
2356 
2357   /* Check if all required functions are available in the plugin and store
2358      their handlers.  None of the symbols can legitimately be NULL,
2359      so we don't need to check dlerror all the time.  */
2360 #define DLSYM(f)							\
2361   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
2362     goto dl_fail
2363   /* Similar, but missing functions are not an error.  Return false if
2364      failed, true otherwise.  */
2365 #define DLSYM_OPT(f, n)							\
2366   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
2367    || (last_missing = #n, 0))
2368 
2369   DLSYM (version);
2370   if (device->version_func () != GOMP_VERSION)
2371     {
2372       err = "plugin version mismatch";
2373       goto fail;
2374     }
2375 
2376   DLSYM (get_name);
2377   DLSYM (get_caps);
2378   DLSYM (get_type);
2379   DLSYM (get_num_devices);
2380   DLSYM (init_device);
2381   DLSYM (fini_device);
2382   DLSYM (load_image);
2383   DLSYM (unload_image);
2384   DLSYM (alloc);
2385   DLSYM (free);
2386   DLSYM (dev2host);
2387   DLSYM (host2dev);
2388   device->capabilities = device->get_caps_func ();
2389   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2390     {
2391       DLSYM (run);
2392       DLSYM (async_run);
2393       DLSYM_OPT (can_run, can_run);
2394       DLSYM (dev2dev);
2395     }
2396   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2397     {
2398       if (!DLSYM_OPT (openacc.exec, openacc_parallel)
2399 	  || !DLSYM_OPT (openacc.register_async_cleanup,
2400 			 openacc_register_async_cleanup)
2401 	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2402 	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2403 	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2404 	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2405 	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2406 	  || !DLSYM_OPT (openacc.async_wait_all_async,
2407 			 openacc_async_wait_all_async)
2408 	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2409 	  || !DLSYM_OPT (openacc.create_thread_data,
2410 			 openacc_create_thread_data)
2411 	  || !DLSYM_OPT (openacc.destroy_thread_data,
2412 			 openacc_destroy_thread_data))
2413 	{
2414 	  /* Require all the OpenACC handlers if we have
2415 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
2416 	  err = "plugin missing OpenACC handler function";
2417 	  goto fail;
2418 	}
2419 
2420       unsigned cuda = 0;
2421       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2422 			 openacc_get_current_cuda_device);
2423       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2424 			 openacc_get_current_cuda_context);
2425       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
2426       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
2427       if (cuda && cuda != 4)
2428 	{
2429 	  /* Make sure all the CUDA functions are there if any of them are.  */
2430 	  err = "plugin missing OpenACC CUDA handler function";
2431 	  goto fail;
2432 	}
2433     }
2434 #undef DLSYM
2435 #undef DLSYM_OPT
2436 
2437   return 1;
2438 
2439  dl_fail:
2440   err = dlerror ();
2441  fail:
2442   gomp_error ("while loading %s: %s", plugin_name, err);
2443   if (last_missing)
2444     gomp_error ("missing function was %s", last_missing);
2445   if (plugin_handle)
2446     dlclose (plugin_handle);
2447 
2448   return 0;
2449 }
2450 
2451 /* This function finalizes all initialized devices.  */
2452 
2453 static void
2454 gomp_target_fini (void)
2455 {
2456   int i;
2457   for (i = 0; i < num_devices; i++)
2458     {
2459       struct gomp_device_descr *devicep = &devices[i];
2460       gomp_mutex_lock (&devicep->lock);
2461       if (devicep->state == GOMP_DEVICE_INITIALIZED)
2462 	{
2463 	  devicep->fini_device_func (devicep->target_id);
2464 	  devicep->state = GOMP_DEVICE_FINALIZED;
2465 	}
2466       gomp_mutex_unlock (&devicep->lock);
2467     }
2468 }
2469 
2470 /* This function initializes the runtime needed for offloading.
2471    It parses the list of offload targets and tries to load the plugins for
2472    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2473    will be set, and the array DEVICES initialized, containing descriptors for
2474    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2475    by the others.  */
2476 
2477 static void
2478 gomp_target_init (void)
2479 {
2480   const char *prefix ="libgomp-plugin-";
2481   const char *suffix = SONAME_SUFFIX (1);
2482   const char *cur, *next;
2483   char *plugin_name;
2484   int i, new_num_devices;
2485 
2486   num_devices = 0;
2487   devices = NULL;
2488 
2489   cur = OFFLOAD_TARGETS;
2490   if (*cur)
2491     do
2492       {
2493 	struct gomp_device_descr current_device;
2494 
2495 	next = strchr (cur, ',');
2496 
2497 	plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2498 				       + strlen (prefix) + strlen (suffix));
2499 	if (!plugin_name)
2500 	  {
2501 	    num_devices = 0;
2502 	    break;
2503 	  }
2504 
2505 	strcpy (plugin_name, prefix);
2506 	strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2507 	strcat (plugin_name, suffix);
2508 
2509 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
2510 	  {
2511 	    new_num_devices = current_device.get_num_devices_func ();
2512 	    if (new_num_devices >= 1)
2513 	      {
2514 		/* Augment DEVICES and NUM_DEVICES.  */
2515 
2516 		devices = realloc (devices, (num_devices + new_num_devices)
2517 				   * sizeof (struct gomp_device_descr));
2518 		if (!devices)
2519 		  {
2520 		    num_devices = 0;
2521 		    free (plugin_name);
2522 		    break;
2523 		  }
2524 
2525 		current_device.name = current_device.get_name_func ();
2526 		/* current_device.capabilities has already been set.  */
2527 		current_device.type = current_device.get_type_func ();
2528 		current_device.mem_map.root = NULL;
2529 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
2530 		current_device.openacc.data_environ = NULL;
2531 		for (i = 0; i < new_num_devices; i++)
2532 		  {
2533 		    current_device.target_id = i;
2534 		    devices[num_devices] = current_device;
2535 		    gomp_mutex_init (&devices[num_devices].lock);
2536 		    num_devices++;
2537 		  }
2538 	      }
2539 	  }
2540 
2541 	free (plugin_name);
2542 	cur = next + 1;
2543       }
2544     while (next);
2545 
2546   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2547      NUM_DEVICES_OPENMP.  */
2548   struct gomp_device_descr *devices_s
2549     = malloc (num_devices * sizeof (struct gomp_device_descr));
2550   if (!devices_s)
2551     {
2552       num_devices = 0;
2553       free (devices);
2554       devices = NULL;
2555     }
2556   num_devices_openmp = 0;
2557   for (i = 0; i < num_devices; i++)
2558     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2559       devices_s[num_devices_openmp++] = devices[i];
2560   int num_devices_after_openmp = num_devices_openmp;
2561   for (i = 0; i < num_devices; i++)
2562     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2563       devices_s[num_devices_after_openmp++] = devices[i];
2564   free (devices);
2565   devices = devices_s;
2566 
2567   for (i = 0; i < num_devices; i++)
2568     {
2569       /* The 'devices' array can be moved (by the realloc call) until we have
2570 	 found all the plugins, so registering with the OpenACC runtime (which
2571 	 takes a copy of the pointer argument) must be delayed until now.  */
2572       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2573 	goacc_register (&devices[i]);
2574     }
2575 
2576   if (atexit (gomp_target_fini) != 0)
2577     gomp_fatal ("atexit failed");
2578 }
2579 
2580 #else /* PLUGIN_SUPPORT */
2581 /* If dlfcn.h is unavailable we always fallback to host execution.
2582    GOMP_target* routines are just stubs for this case.  */
2583 static void
2584 gomp_target_init (void)
2585 {
2586 }
2587 #endif /* PLUGIN_SUPPORT */
2588