xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/target.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* Copyright (C) 2013-2017 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h>  /* For PRIu64.  */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42 
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47 
48 static void gomp_target_init (void);
49 
50 /* The whole initialization code for offloading plugins is only run one.  */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 
53 /* Mutex for offload image registration.  */
54 static gomp_mutex_t register_lock;
55 
56 /* This structure describes an offload image.
57    It contains type of the target device, pointer to host table descriptor, and
58    pointer to target data.  */
59 struct offload_image_descr {
60   unsigned version;
61   enum offload_target_type type;
62   const void *host_table;
63   const void *target_data;
64 };
65 
66 /* Array of descriptors of offload images.  */
67 static struct offload_image_descr *offload_images;
68 
69 /* Total number of offload images.  */
70 static int num_offload_images;
71 
72 /* Array of descriptors for all available devices.  */
73 static struct gomp_device_descr *devices;
74 
75 /* Total number of available devices.  */
76 static int num_devices;
77 
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
79 static int num_devices_openmp;
80 
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
82 
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
85 {
86   void *ret = realloc (old, size);
87   if (ret == NULL)
88     {
89       gomp_mutex_unlock (&register_lock);
90       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91     }
92   return ret;
93 }
94 
95 attribute_hidden void
96 gomp_init_targets_once (void)
97 {
98   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 }
100 
101 attribute_hidden int
102 gomp_get_num_devices (void)
103 {
104   gomp_init_targets_once ();
105   return num_devices_openmp;
106 }
107 
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
110 {
111   if (device_id == GOMP_DEVICE_ICV)
112     {
113       struct gomp_task_icv *icv = gomp_icv (false);
114       device_id = icv->default_device_var;
115     }
116 
117   if (device_id < 0 || device_id >= gomp_get_num_devices ())
118     return NULL;
119 
120   gomp_mutex_lock (&devices[device_id].lock);
121   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122     gomp_init_device (&devices[device_id]);
123   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124     {
125       gomp_mutex_unlock (&devices[device_id].lock);
126       return NULL;
127     }
128   gomp_mutex_unlock (&devices[device_id].lock);
129 
130   return &devices[device_id];
131 }
132 
133 
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 {
137   if (key->host_start != key->host_end)
138     return splay_tree_lookup (mem_map, key);
139 
140   key->host_end++;
141   splay_tree_key n = splay_tree_lookup (mem_map, key);
142   key->host_end--;
143   if (n)
144     return n;
145   key->host_start--;
146   n = splay_tree_lookup (mem_map, key);
147   key->host_start++;
148   if (n)
149     return n;
150   return splay_tree_lookup (mem_map, key);
151 }
152 
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 {
156   if (key->host_start != key->host_end)
157     return splay_tree_lookup (mem_map, key);
158 
159   key->host_end++;
160   splay_tree_key n = splay_tree_lookup (mem_map, key);
161   key->host_end--;
162   return n;
163 }
164 
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 		  bool (*copy_func) (int, void *, const void *, size_t),
168 		  const char *dst, void *dstaddr,
169 		  const char *src, const void *srcaddr,
170 		  size_t size)
171 {
172   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173     {
174       gomp_mutex_unlock (&devicep->lock);
175       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177     }
178 }
179 
180 static void
181 gomp_copy_host2dev (struct gomp_device_descr *devicep,
182 		    void *d, const void *h, size_t sz)
183 {
184   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
185 }
186 
187 static void
188 gomp_copy_dev2host (struct gomp_device_descr *devicep,
189 		    void *h, const void *d, size_t sz)
190 {
191   gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
192 }
193 
194 static void
195 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
196 {
197   if (!devicep->free_func (devicep->target_id, devptr))
198     {
199       gomp_mutex_unlock (&devicep->lock);
200       gomp_fatal ("error in freeing device memory block at %p", devptr);
201     }
202 }
203 
204 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
205    gomp_map_0len_lookup found oldn for newn.
206    Helper function of gomp_map_vars.  */
207 
208 static inline void
209 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
210 			splay_tree_key newn, struct target_var_desc *tgt_var,
211 			unsigned char kind)
212 {
213   tgt_var->key = oldn;
214   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
215   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
216   tgt_var->offset = newn->host_start - oldn->host_start;
217   tgt_var->length = newn->host_end - newn->host_start;
218 
219   if ((kind & GOMP_MAP_FLAG_FORCE)
220       || oldn->host_start > newn->host_start
221       || oldn->host_end < newn->host_end)
222     {
223       gomp_mutex_unlock (&devicep->lock);
224       gomp_fatal ("Trying to map into device [%p..%p) object when "
225 		  "[%p..%p) is already mapped",
226 		  (void *) newn->host_start, (void *) newn->host_end,
227 		  (void *) oldn->host_start, (void *) oldn->host_end);
228     }
229 
230   if (GOMP_MAP_ALWAYS_TO_P (kind))
231     gomp_copy_host2dev (devicep,
232 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
233 				  + newn->host_start - oldn->host_start),
234 			(void *) newn->host_start,
235 			newn->host_end - newn->host_start);
236 
237   if (oldn->refcount != REFCOUNT_INFINITY)
238     oldn->refcount++;
239 }
240 
241 static int
242 get_kind (bool short_mapkind, void *kinds, int idx)
243 {
244   return short_mapkind ? ((unsigned short *) kinds)[idx]
245 		       : ((unsigned char *) kinds)[idx];
246 }
247 
248 static void
249 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
250 		  uintptr_t target_offset, uintptr_t bias)
251 {
252   struct gomp_device_descr *devicep = tgt->device_descr;
253   struct splay_tree_s *mem_map = &devicep->mem_map;
254   struct splay_tree_key_s cur_node;
255 
256   cur_node.host_start = host_ptr;
257   if (cur_node.host_start == (uintptr_t) NULL)
258     {
259       cur_node.tgt_offset = (uintptr_t) NULL;
260       /* FIXME: see comment about coalescing host/dev transfers below.  */
261       gomp_copy_host2dev (devicep,
262 			  (void *) (tgt->tgt_start + target_offset),
263 			  (void *) &cur_node.tgt_offset,
264 			  sizeof (void *));
265       return;
266     }
267   /* Add bias to the pointer value.  */
268   cur_node.host_start += bias;
269   cur_node.host_end = cur_node.host_start;
270   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
271   if (n == NULL)
272     {
273       gomp_mutex_unlock (&devicep->lock);
274       gomp_fatal ("Pointer target of array section wasn't mapped");
275     }
276   cur_node.host_start -= n->host_start;
277   cur_node.tgt_offset
278     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
279   /* At this point tgt_offset is target address of the
280      array section.  Now subtract bias to get what we want
281      to initialize the pointer with.  */
282   cur_node.tgt_offset -= bias;
283   /* FIXME: see comment about coalescing host/dev transfers below.  */
284   gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
285 		      (void *) &cur_node.tgt_offset, sizeof (void *));
286 }
287 
288 static void
289 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
290 			  size_t first, size_t i, void **hostaddrs,
291 			  size_t *sizes, void *kinds)
292 {
293   struct gomp_device_descr *devicep = tgt->device_descr;
294   struct splay_tree_s *mem_map = &devicep->mem_map;
295   struct splay_tree_key_s cur_node;
296   int kind;
297   const bool short_mapkind = true;
298   const int typemask = short_mapkind ? 0xff : 0x7;
299 
300   cur_node.host_start = (uintptr_t) hostaddrs[i];
301   cur_node.host_end = cur_node.host_start + sizes[i];
302   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
303   kind = get_kind (short_mapkind, kinds, i);
304   if (n2
305       && n2->tgt == n->tgt
306       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
307     {
308       gomp_map_vars_existing (devicep, n2, &cur_node,
309 			      &tgt->list[i], kind & typemask);
310       return;
311     }
312   if (sizes[i] == 0)
313     {
314       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
315 	{
316 	  cur_node.host_start--;
317 	  n2 = splay_tree_lookup (mem_map, &cur_node);
318 	  cur_node.host_start++;
319 	  if (n2
320 	      && n2->tgt == n->tgt
321 	      && n2->host_start - n->host_start
322 		 == n2->tgt_offset - n->tgt_offset)
323 	    {
324 	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
325 				      kind & typemask);
326 	      return;
327 	    }
328 	}
329       cur_node.host_end++;
330       n2 = splay_tree_lookup (mem_map, &cur_node);
331       cur_node.host_end--;
332       if (n2
333 	  && n2->tgt == n->tgt
334 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
335 	{
336 	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
337 				  kind & typemask);
338 	  return;
339 	}
340     }
341   gomp_mutex_unlock (&devicep->lock);
342   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
343 	      "other mapped elements from the same structure weren't mapped "
344 	      "together with it", (void *) cur_node.host_start,
345 	      (void *) cur_node.host_end);
346 }
347 
348 static inline uintptr_t
349 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
350 {
351   if (tgt->list[i].key != NULL)
352     return tgt->list[i].key->tgt->tgt_start
353 	   + tgt->list[i].key->tgt_offset
354 	   + tgt->list[i].offset;
355   if (tgt->list[i].offset == ~(uintptr_t) 0)
356     return (uintptr_t) hostaddrs[i];
357   if (tgt->list[i].offset == ~(uintptr_t) 1)
358     return 0;
359   if (tgt->list[i].offset == ~(uintptr_t) 2)
360     return tgt->list[i + 1].key->tgt->tgt_start
361 	   + tgt->list[i + 1].key->tgt_offset
362 	   + tgt->list[i + 1].offset
363 	   + (uintptr_t) hostaddrs[i]
364 	   - (uintptr_t) hostaddrs[i + 1];
365   return tgt->tgt_start + tgt->list[i].offset;
366 }
367 
368 attribute_hidden struct target_mem_desc *
369 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
370 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
371 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
372 {
373   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
374   bool has_firstprivate = false;
375   const int rshift = short_mapkind ? 8 : 3;
376   const int typemask = short_mapkind ? 0xff : 0x7;
377   struct splay_tree_s *mem_map = &devicep->mem_map;
378   struct splay_tree_key_s cur_node;
379   struct target_mem_desc *tgt
380     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
381   tgt->list_count = mapnum;
382   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
383   tgt->device_descr = devicep;
384 
385   if (mapnum == 0)
386     {
387       tgt->tgt_start = 0;
388       tgt->tgt_end = 0;
389       return tgt;
390     }
391 
392   tgt_align = sizeof (void *);
393   tgt_size = 0;
394   if (pragma_kind == GOMP_MAP_VARS_TARGET)
395     {
396       size_t align = 4 * sizeof (void *);
397       tgt_align = align;
398       tgt_size = mapnum * sizeof (void *);
399     }
400 
401   gomp_mutex_lock (&devicep->lock);
402   if (devicep->state == GOMP_DEVICE_FINALIZED)
403     {
404       gomp_mutex_unlock (&devicep->lock);
405       free (tgt);
406       return NULL;
407     }
408 
409   for (i = 0; i < mapnum; i++)
410     {
411       int kind = get_kind (short_mapkind, kinds, i);
412       if (hostaddrs[i] == NULL
413 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
414 	{
415 	  tgt->list[i].key = NULL;
416 	  tgt->list[i].offset = ~(uintptr_t) 0;
417 	  continue;
418 	}
419       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
420 	{
421 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
422 	  cur_node.host_end = cur_node.host_start;
423 	  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
424 	  if (n == NULL)
425 	    {
426 	      gomp_mutex_unlock (&devicep->lock);
427 	      gomp_fatal ("use_device_ptr pointer wasn't mapped");
428 	    }
429 	  cur_node.host_start -= n->host_start;
430 	  hostaddrs[i]
431 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
432 			+ cur_node.host_start);
433 	  tgt->list[i].key = NULL;
434 	  tgt->list[i].offset = ~(uintptr_t) 0;
435 	  continue;
436 	}
437       else if ((kind & typemask) == GOMP_MAP_STRUCT)
438 	{
439 	  size_t first = i + 1;
440 	  size_t last = i + sizes[i];
441 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
442 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
443 			      + sizes[last];
444 	  tgt->list[i].key = NULL;
445 	  tgt->list[i].offset = ~(uintptr_t) 2;
446 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
447 	  if (n == NULL)
448 	    {
449 	      size_t align = (size_t) 1 << (kind >> rshift);
450 	      if (tgt_align < align)
451 		tgt_align = align;
452 	      tgt_size -= (uintptr_t) hostaddrs[first]
453 			  - (uintptr_t) hostaddrs[i];
454 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
455 	      tgt_size += cur_node.host_end - (uintptr_t) hostaddrs[i];
456 	      not_found_cnt += last - i;
457 	      for (i = first; i <= last; i++)
458 		tgt->list[i].key = NULL;
459 	      i--;
460 	      continue;
461 	    }
462 	  for (i = first; i <= last; i++)
463 	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
464 				      sizes, kinds);
465 	  i--;
466 	  continue;
467 	}
468       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
469 	{
470 	  tgt->list[i].key = NULL;
471 	  tgt->list[i].offset = ~(uintptr_t) 1;
472 	  has_firstprivate = true;
473 	  continue;
474 	}
475       cur_node.host_start = (uintptr_t) hostaddrs[i];
476       if (!GOMP_MAP_POINTER_P (kind & typemask))
477 	cur_node.host_end = cur_node.host_start + sizes[i];
478       else
479 	cur_node.host_end = cur_node.host_start + sizeof (void *);
480       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
481 	{
482 	  tgt->list[i].key = NULL;
483 
484 	  size_t align = (size_t) 1 << (kind >> rshift);
485 	  if (tgt_align < align)
486 	    tgt_align = align;
487 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
488 	  tgt_size += cur_node.host_end - cur_node.host_start;
489 	  has_firstprivate = true;
490 	  continue;
491 	}
492       splay_tree_key n;
493       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
494 	{
495 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
496 	  if (!n)
497 	    {
498 	      tgt->list[i].key = NULL;
499 	      tgt->list[i].offset = ~(uintptr_t) 1;
500 	      continue;
501 	    }
502 	}
503       else
504 	n = splay_tree_lookup (mem_map, &cur_node);
505       if (n && n->refcount != REFCOUNT_LINK)
506 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
507 				kind & typemask);
508       else
509 	{
510 	  tgt->list[i].key = NULL;
511 
512 	  size_t align = (size_t) 1 << (kind >> rshift);
513 	  not_found_cnt++;
514 	  if (tgt_align < align)
515 	    tgt_align = align;
516 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
517 	  tgt_size += cur_node.host_end - cur_node.host_start;
518 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
519 	    {
520 	      size_t j;
521 	      for (j = i + 1; j < mapnum; j++)
522 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
523 					 & typemask))
524 		  break;
525 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
526 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
527 			     > cur_node.host_end))
528 		  break;
529 		else
530 		  {
531 		    tgt->list[j].key = NULL;
532 		    i++;
533 		  }
534 	    }
535 	}
536     }
537 
538   if (devaddrs)
539     {
540       if (mapnum != 1)
541 	{
542 	  gomp_mutex_unlock (&devicep->lock);
543 	  gomp_fatal ("unexpected aggregation");
544 	}
545       tgt->to_free = devaddrs[0];
546       tgt->tgt_start = (uintptr_t) tgt->to_free;
547       tgt->tgt_end = tgt->tgt_start + sizes[0];
548     }
549   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
550     {
551       /* Allocate tgt_align aligned tgt_size block of memory.  */
552       /* FIXME: Perhaps change interface to allocate properly aligned
553 	 memory.  */
554       tgt->to_free = devicep->alloc_func (devicep->target_id,
555 					  tgt_size + tgt_align - 1);
556       if (!tgt->to_free)
557 	{
558 	  gomp_mutex_unlock (&devicep->lock);
559 	  gomp_fatal ("device memory allocation fail");
560 	}
561 
562       tgt->tgt_start = (uintptr_t) tgt->to_free;
563       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
564       tgt->tgt_end = tgt->tgt_start + tgt_size;
565     }
566   else
567     {
568       tgt->to_free = NULL;
569       tgt->tgt_start = 0;
570       tgt->tgt_end = 0;
571     }
572 
573   tgt_size = 0;
574   if (pragma_kind == GOMP_MAP_VARS_TARGET)
575     tgt_size = mapnum * sizeof (void *);
576 
577   tgt->array = NULL;
578   if (not_found_cnt || has_firstprivate)
579     {
580       if (not_found_cnt)
581 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
582       splay_tree_node array = tgt->array;
583       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
584       uintptr_t field_tgt_base = 0;
585 
586       for (i = 0; i < mapnum; i++)
587 	if (tgt->list[i].key == NULL)
588 	  {
589 	    int kind = get_kind (short_mapkind, kinds, i);
590 	    if (hostaddrs[i] == NULL)
591 	      continue;
592 	    switch (kind & typemask)
593 	      {
594 		size_t align, len, first, last;
595 		splay_tree_key n;
596 	      case GOMP_MAP_FIRSTPRIVATE:
597 		align = (size_t) 1 << (kind >> rshift);
598 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
599 		tgt->list[i].offset = tgt_size;
600 		len = sizes[i];
601 		gomp_copy_host2dev (devicep,
602 				    (void *) (tgt->tgt_start + tgt_size),
603 				    (void *) hostaddrs[i], len);
604 		tgt_size += len;
605 		continue;
606 	      case GOMP_MAP_FIRSTPRIVATE_INT:
607 	      case GOMP_MAP_USE_DEVICE_PTR:
608 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
609 		continue;
610 	      case GOMP_MAP_STRUCT:
611 		first = i + 1;
612 		last = i + sizes[i];
613 		cur_node.host_start = (uintptr_t) hostaddrs[i];
614 		cur_node.host_end = (uintptr_t) hostaddrs[last]
615 				    + sizes[last];
616 		if (tgt->list[first].key != NULL)
617 		  continue;
618 		n = splay_tree_lookup (mem_map, &cur_node);
619 		if (n == NULL)
620 		  {
621 		    size_t align = (size_t) 1 << (kind >> rshift);
622 		    tgt_size -= (uintptr_t) hostaddrs[first]
623 				- (uintptr_t) hostaddrs[i];
624 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
625 		    tgt_size += (uintptr_t) hostaddrs[first]
626 				- (uintptr_t) hostaddrs[i];
627 		    field_tgt_base = (uintptr_t) hostaddrs[first];
628 		    field_tgt_offset = tgt_size;
629 		    field_tgt_clear = last;
630 		    tgt_size += cur_node.host_end
631 				- (uintptr_t) hostaddrs[first];
632 		    continue;
633 		  }
634 		for (i = first; i <= last; i++)
635 		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
636 					    sizes, kinds);
637 		i--;
638 		continue;
639 	      case GOMP_MAP_ALWAYS_POINTER:
640 		cur_node.host_start = (uintptr_t) hostaddrs[i];
641 		cur_node.host_end = cur_node.host_start + sizeof (void *);
642 		n = splay_tree_lookup (mem_map, &cur_node);
643 		if (n == NULL
644 		    || n->host_start > cur_node.host_start
645 		    || n->host_end < cur_node.host_end)
646 		  {
647 		    gomp_mutex_unlock (&devicep->lock);
648 		    gomp_fatal ("always pointer not mapped");
649 		  }
650 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
651 		    != GOMP_MAP_ALWAYS_POINTER)
652 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
653 		if (cur_node.tgt_offset)
654 		  cur_node.tgt_offset -= sizes[i];
655 		gomp_copy_host2dev (devicep,
656 				    (void *) (n->tgt->tgt_start
657 					      + n->tgt_offset
658 					      + cur_node.host_start
659 					      - n->host_start),
660 				    (void *) &cur_node.tgt_offset,
661 				    sizeof (void *));
662 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
663 				      + cur_node.host_start - n->host_start;
664 		continue;
665 	      default:
666 		break;
667 	      }
668 	    splay_tree_key k = &array->key;
669 	    k->host_start = (uintptr_t) hostaddrs[i];
670 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
671 	      k->host_end = k->host_start + sizes[i];
672 	    else
673 	      k->host_end = k->host_start + sizeof (void *);
674 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
675 	    if (n && n->refcount != REFCOUNT_LINK)
676 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
677 				      kind & typemask);
678 	    else
679 	      {
680 		k->link_key = NULL;
681 		if (n && n->refcount == REFCOUNT_LINK)
682 		  {
683 		    /* Replace target address of the pointer with target address
684 		       of mapped object in the splay tree.  */
685 		    splay_tree_remove (mem_map, n);
686 		    k->link_key = n;
687 		  }
688 		size_t align = (size_t) 1 << (kind >> rshift);
689 		tgt->list[i].key = k;
690 		k->tgt = tgt;
691 		if (field_tgt_clear != ~(size_t) 0)
692 		  {
693 		    k->tgt_offset = k->host_start - field_tgt_base
694 				    + field_tgt_offset;
695 		    if (i == field_tgt_clear)
696 		      field_tgt_clear = ~(size_t) 0;
697 		  }
698 		else
699 		  {
700 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
701 		    k->tgt_offset = tgt_size;
702 		    tgt_size += k->host_end - k->host_start;
703 		  }
704 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
705 		tgt->list[i].always_copy_from
706 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
707 		tgt->list[i].offset = 0;
708 		tgt->list[i].length = k->host_end - k->host_start;
709 		k->refcount = 1;
710 		tgt->refcount++;
711 		array->left = NULL;
712 		array->right = NULL;
713 		splay_tree_insert (mem_map, array);
714 		switch (kind & typemask)
715 		  {
716 		  case GOMP_MAP_ALLOC:
717 		  case GOMP_MAP_FROM:
718 		  case GOMP_MAP_FORCE_ALLOC:
719 		  case GOMP_MAP_FORCE_FROM:
720 		  case GOMP_MAP_ALWAYS_FROM:
721 		    break;
722 		  case GOMP_MAP_TO:
723 		  case GOMP_MAP_TOFROM:
724 		  case GOMP_MAP_FORCE_TO:
725 		  case GOMP_MAP_FORCE_TOFROM:
726 		  case GOMP_MAP_ALWAYS_TO:
727 		  case GOMP_MAP_ALWAYS_TOFROM:
728 		    /* FIXME: Perhaps add some smarts, like if copying
729 		       several adjacent fields from host to target, use some
730 		       host buffer to avoid sending each var individually.  */
731 		    gomp_copy_host2dev (devicep,
732 					(void *) (tgt->tgt_start
733 						  + k->tgt_offset),
734 					(void *) k->host_start,
735 					k->host_end - k->host_start);
736 		    break;
737 		  case GOMP_MAP_POINTER:
738 		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
739 				      k->tgt_offset, sizes[i]);
740 		    break;
741 		  case GOMP_MAP_TO_PSET:
742 		    /* FIXME: see above FIXME comment.  */
743 		    gomp_copy_host2dev (devicep,
744 					(void *) (tgt->tgt_start
745 						  + k->tgt_offset),
746 					(void *) k->host_start,
747 					k->host_end - k->host_start);
748 
749 		    for (j = i + 1; j < mapnum; j++)
750 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
751 							 j)
752 					       & typemask))
753 			break;
754 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
755 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
756 				   > k->host_end))
757 			break;
758 		      else
759 			{
760 			  tgt->list[j].key = k;
761 			  tgt->list[j].copy_from = false;
762 			  tgt->list[j].always_copy_from = false;
763 			  if (k->refcount != REFCOUNT_INFINITY)
764 			    k->refcount++;
765 			  gomp_map_pointer (tgt,
766 					    (uintptr_t) *(void **) hostaddrs[j],
767 					    k->tgt_offset
768 					    + ((uintptr_t) hostaddrs[j]
769 					       - k->host_start),
770 					    sizes[j]);
771 			  i++;
772 			}
773 		    break;
774 		  case GOMP_MAP_FORCE_PRESENT:
775 		    {
776 		      /* We already looked up the memory region above and it
777 			 was missing.  */
778 		      size_t size = k->host_end - k->host_start;
779 		      gomp_mutex_unlock (&devicep->lock);
780 #ifdef HAVE_INTTYPES_H
781 		      gomp_fatal ("present clause: !acc_is_present (%p, "
782 				  "%"PRIu64" (0x%"PRIx64"))",
783 				  (void *) k->host_start,
784 				  (uint64_t) size, (uint64_t) size);
785 #else
786 		      gomp_fatal ("present clause: !acc_is_present (%p, "
787 				  "%lu (0x%lx))", (void *) k->host_start,
788 				  (unsigned long) size, (unsigned long) size);
789 #endif
790 		    }
791 		    break;
792 		  case GOMP_MAP_FORCE_DEVICEPTR:
793 		    assert (k->host_end - k->host_start == sizeof (void *));
794 		    gomp_copy_host2dev (devicep,
795 					(void *) (tgt->tgt_start
796 						  + k->tgt_offset),
797 					(void *) k->host_start,
798 					sizeof (void *));
799 		    break;
800 		  default:
801 		    gomp_mutex_unlock (&devicep->lock);
802 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
803 				kind);
804 		  }
805 
806 		if (k->link_key)
807 		  {
808 		    /* Set link pointer on target to the device address of the
809 		       mapped object.  */
810 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
811 		    devicep->host2dev_func (devicep->target_id,
812 					    (void *) n->tgt_offset,
813 					    &tgt_addr, sizeof (void *));
814 		  }
815 		array++;
816 	      }
817 	  }
818     }
819 
820   if (pragma_kind == GOMP_MAP_VARS_TARGET)
821     {
822       for (i = 0; i < mapnum; i++)
823 	{
824 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
825 	  /* FIXME: see above FIXME comment.  */
826 	  gomp_copy_host2dev (devicep,
827 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
828 			      (void *) &cur_node.tgt_offset, sizeof (void *));
829 	}
830     }
831 
832   /* If the variable from "omp target enter data" map-list was already mapped,
833      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
834      gomp_exit_data.  */
835   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
836     {
837       free (tgt);
838       tgt = NULL;
839     }
840 
841   gomp_mutex_unlock (&devicep->lock);
842   return tgt;
843 }
844 
845 static void
846 gomp_unmap_tgt (struct target_mem_desc *tgt)
847 {
848   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
849   if (tgt->tgt_end)
850     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
851 
852   free (tgt->array);
853   free (tgt);
854 }
855 
856 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
857    variables back from device to host: if it is false, it is assumed that this
858    has been done already.  */
859 
860 attribute_hidden void
861 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
862 {
863   struct gomp_device_descr *devicep = tgt->device_descr;
864 
865   if (tgt->list_count == 0)
866     {
867       free (tgt);
868       return;
869     }
870 
871   gomp_mutex_lock (&devicep->lock);
872   if (devicep->state == GOMP_DEVICE_FINALIZED)
873     {
874       gomp_mutex_unlock (&devicep->lock);
875       free (tgt->array);
876       free (tgt);
877       return;
878     }
879 
880   size_t i;
881   for (i = 0; i < tgt->list_count; i++)
882     {
883       splay_tree_key k = tgt->list[i].key;
884       if (k == NULL)
885 	continue;
886 
887       bool do_unmap = false;
888       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
889 	k->refcount--;
890       else if (k->refcount == 1)
891 	{
892 	  k->refcount--;
893 	  do_unmap = true;
894 	}
895 
896       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
897 	  || tgt->list[i].always_copy_from)
898 	gomp_copy_dev2host (devicep,
899 			    (void *) (k->host_start + tgt->list[i].offset),
900 			    (void *) (k->tgt->tgt_start + k->tgt_offset
901 				      + tgt->list[i].offset),
902 			    tgt->list[i].length);
903       if (do_unmap)
904 	{
905 	  splay_tree_remove (&devicep->mem_map, k);
906 	  if (k->link_key)
907 	    splay_tree_insert (&devicep->mem_map,
908 			       (splay_tree_node) k->link_key);
909 	  if (k->tgt->refcount > 1)
910 	    k->tgt->refcount--;
911 	  else
912 	    gomp_unmap_tgt (k->tgt);
913 	}
914     }
915 
916   if (tgt->refcount > 1)
917     tgt->refcount--;
918   else
919     gomp_unmap_tgt (tgt);
920 
921   gomp_mutex_unlock (&devicep->lock);
922 }
923 
924 static void
925 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
926 	     size_t *sizes, void *kinds, bool short_mapkind)
927 {
928   size_t i;
929   struct splay_tree_key_s cur_node;
930   const int typemask = short_mapkind ? 0xff : 0x7;
931 
932   if (!devicep)
933     return;
934 
935   if (mapnum == 0)
936     return;
937 
938   gomp_mutex_lock (&devicep->lock);
939   if (devicep->state == GOMP_DEVICE_FINALIZED)
940     {
941       gomp_mutex_unlock (&devicep->lock);
942       return;
943     }
944 
945   for (i = 0; i < mapnum; i++)
946     if (sizes[i])
947       {
948 	cur_node.host_start = (uintptr_t) hostaddrs[i];
949 	cur_node.host_end = cur_node.host_start + sizes[i];
950 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
951 	if (n)
952 	  {
953 	    int kind = get_kind (short_mapkind, kinds, i);
954 	    if (n->host_start > cur_node.host_start
955 		|| n->host_end < cur_node.host_end)
956 	      {
957 		gomp_mutex_unlock (&devicep->lock);
958 		gomp_fatal ("Trying to update [%p..%p) object when "
959 			    "only [%p..%p) is mapped",
960 			    (void *) cur_node.host_start,
961 			    (void *) cur_node.host_end,
962 			    (void *) n->host_start,
963 			    (void *) n->host_end);
964 	      }
965 
966 
967 	    void *hostaddr = (void *) cur_node.host_start;
968 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
969 				      + cur_node.host_start - n->host_start);
970 	    size_t size = cur_node.host_end - cur_node.host_start;
971 
972 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
973 	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
974 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
975 	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
976 	  }
977       }
978   gomp_mutex_unlock (&devicep->lock);
979 }
980 
981 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
982    And insert to splay tree the mapping between addresses from HOST_TABLE and
983    from loaded target image.  We rely in the host and device compiler
984    emitting variable and functions in the same order.  */
985 
986 static void
987 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
988 			   const void *host_table, const void *target_data,
989 			   bool is_register_lock)
990 {
991   void **host_func_table = ((void ***) host_table)[0];
992   void **host_funcs_end  = ((void ***) host_table)[1];
993   void **host_var_table  = ((void ***) host_table)[2];
994   void **host_vars_end   = ((void ***) host_table)[3];
995 
996   /* The func table contains only addresses, the var table contains addresses
997      and corresponding sizes.  */
998   int num_funcs = host_funcs_end - host_func_table;
999   int num_vars  = (host_vars_end - host_var_table) / 2;
1000 
1001   /* Load image to device and get target addresses for the image.  */
1002   struct addr_pair *target_table = NULL;
1003   int i, num_target_entries;
1004 
1005   num_target_entries
1006     = devicep->load_image_func (devicep->target_id, version,
1007 				target_data, &target_table);
1008 
1009   if (num_target_entries != num_funcs + num_vars)
1010     {
1011       gomp_mutex_unlock (&devicep->lock);
1012       if (is_register_lock)
1013 	gomp_mutex_unlock (&register_lock);
1014       gomp_fatal ("Cannot map target functions or variables"
1015 		  " (expected %u, have %u)", num_funcs + num_vars,
1016 		  num_target_entries);
1017     }
1018 
1019   /* Insert host-target address mapping into splay tree.  */
1020   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1021   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1022   tgt->refcount = REFCOUNT_INFINITY;
1023   tgt->tgt_start = 0;
1024   tgt->tgt_end = 0;
1025   tgt->to_free = NULL;
1026   tgt->prev = NULL;
1027   tgt->list_count = 0;
1028   tgt->device_descr = devicep;
1029   splay_tree_node array = tgt->array;
1030 
1031   for (i = 0; i < num_funcs; i++)
1032     {
1033       splay_tree_key k = &array->key;
1034       k->host_start = (uintptr_t) host_func_table[i];
1035       k->host_end = k->host_start + 1;
1036       k->tgt = tgt;
1037       k->tgt_offset = target_table[i].start;
1038       k->refcount = REFCOUNT_INFINITY;
1039       k->link_key = NULL;
1040       array->left = NULL;
1041       array->right = NULL;
1042       splay_tree_insert (&devicep->mem_map, array);
1043       array++;
1044     }
1045 
1046   /* Most significant bit of the size in host and target tables marks
1047      "omp declare target link" variables.  */
1048   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1049   const uintptr_t size_mask = ~link_bit;
1050 
1051   for (i = 0; i < num_vars; i++)
1052     {
1053       struct addr_pair *target_var = &target_table[num_funcs + i];
1054       uintptr_t target_size = target_var->end - target_var->start;
1055 
1056       if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1057 	{
1058 	  gomp_mutex_unlock (&devicep->lock);
1059 	  if (is_register_lock)
1060 	    gomp_mutex_unlock (&register_lock);
1061 	  gomp_fatal ("Cannot map target variables (size mismatch)");
1062 	}
1063 
1064       splay_tree_key k = &array->key;
1065       k->host_start = (uintptr_t) host_var_table[i * 2];
1066       k->host_end
1067 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1068       k->tgt = tgt;
1069       k->tgt_offset = target_var->start;
1070       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1071       k->link_key = NULL;
1072       array->left = NULL;
1073       array->right = NULL;
1074       splay_tree_insert (&devicep->mem_map, array);
1075       array++;
1076     }
1077 
1078   free (target_table);
1079 }
1080 
1081 /* Unload the mappings described by target_data from device DEVICE_P.
1082    The device must be locked.   */
1083 
1084 static void
1085 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1086 			       unsigned version,
1087 			       const void *host_table, const void *target_data)
1088 {
1089   void **host_func_table = ((void ***) host_table)[0];
1090   void **host_funcs_end  = ((void ***) host_table)[1];
1091   void **host_var_table  = ((void ***) host_table)[2];
1092   void **host_vars_end   = ((void ***) host_table)[3];
1093 
1094   /* The func table contains only addresses, the var table contains addresses
1095      and corresponding sizes.  */
1096   int num_funcs = host_funcs_end - host_func_table;
1097   int num_vars  = (host_vars_end - host_var_table) / 2;
1098 
1099   struct splay_tree_key_s k;
1100   splay_tree_key node = NULL;
1101 
1102   /* Find mapping at start of node array */
1103   if (num_funcs || num_vars)
1104     {
1105       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1106 		      : (uintptr_t) host_var_table[0]);
1107       k.host_end = k.host_start + 1;
1108       node = splay_tree_lookup (&devicep->mem_map, &k);
1109     }
1110 
1111   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1112     {
1113       gomp_mutex_unlock (&devicep->lock);
1114       gomp_fatal ("image unload fail");
1115     }
1116 
1117   /* Remove mappings from splay tree.  */
1118   int i;
1119   for (i = 0; i < num_funcs; i++)
1120     {
1121       k.host_start = (uintptr_t) host_func_table[i];
1122       k.host_end = k.host_start + 1;
1123       splay_tree_remove (&devicep->mem_map, &k);
1124     }
1125 
1126   /* Most significant bit of the size in host and target tables marks
1127      "omp declare target link" variables.  */
1128   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1129   const uintptr_t size_mask = ~link_bit;
1130   bool is_tgt_unmapped = false;
1131 
1132   for (i = 0; i < num_vars; i++)
1133     {
1134       k.host_start = (uintptr_t) host_var_table[i * 2];
1135       k.host_end
1136 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1137 
1138       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1139 	splay_tree_remove (&devicep->mem_map, &k);
1140       else
1141 	{
1142 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1143 	  splay_tree_remove (&devicep->mem_map, n);
1144 	  if (n->link_key)
1145 	    {
1146 	      if (n->tgt->refcount > 1)
1147 		n->tgt->refcount--;
1148 	      else
1149 		{
1150 		  is_tgt_unmapped = true;
1151 		  gomp_unmap_tgt (n->tgt);
1152 		}
1153 	    }
1154 	}
1155     }
1156 
1157   if (node && !is_tgt_unmapped)
1158     {
1159       free (node->tgt);
1160       free (node);
1161     }
1162 }
1163 
1164 /* This function should be called from every offload image while loading.
1165    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1166    the target, and TARGET_DATA needed by target plugin.  */
1167 
1168 void
1169 GOMP_offload_register_ver (unsigned version, const void *host_table,
1170 			   int target_type, const void *target_data)
1171 {
1172   int i;
1173 
1174   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1175     gomp_fatal ("Library too old for offload (version %u < %u)",
1176 		GOMP_VERSION, GOMP_VERSION_LIB (version));
1177 
1178   gomp_mutex_lock (&register_lock);
1179 
1180   /* Load image to all initialized devices.  */
1181   for (i = 0; i < num_devices; i++)
1182     {
1183       struct gomp_device_descr *devicep = &devices[i];
1184       gomp_mutex_lock (&devicep->lock);
1185       if (devicep->type == target_type
1186 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1187 	gomp_load_image_to_device (devicep, version,
1188 				   host_table, target_data, true);
1189       gomp_mutex_unlock (&devicep->lock);
1190     }
1191 
1192   /* Insert image to array of pending images.  */
1193   offload_images
1194     = gomp_realloc_unlock (offload_images,
1195 			   (num_offload_images + 1)
1196 			   * sizeof (struct offload_image_descr));
1197   offload_images[num_offload_images].version = version;
1198   offload_images[num_offload_images].type = target_type;
1199   offload_images[num_offload_images].host_table = host_table;
1200   offload_images[num_offload_images].target_data = target_data;
1201 
1202   num_offload_images++;
1203   gomp_mutex_unlock (&register_lock);
1204 }
1205 
1206 void
1207 GOMP_offload_register (const void *host_table, int target_type,
1208 		       const void *target_data)
1209 {
1210   GOMP_offload_register_ver (0, host_table, target_type, target_data);
1211 }
1212 
1213 /* This function should be called from every offload image while unloading.
1214    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1215    the target, and TARGET_DATA needed by target plugin.  */
1216 
1217 void
1218 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1219 			     int target_type, const void *target_data)
1220 {
1221   int i;
1222 
1223   gomp_mutex_lock (&register_lock);
1224 
1225   /* Unload image from all initialized devices.  */
1226   for (i = 0; i < num_devices; i++)
1227     {
1228       struct gomp_device_descr *devicep = &devices[i];
1229       gomp_mutex_lock (&devicep->lock);
1230       if (devicep->type == target_type
1231 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1232 	gomp_unload_image_from_device (devicep, version,
1233 				       host_table, target_data);
1234       gomp_mutex_unlock (&devicep->lock);
1235     }
1236 
1237   /* Remove image from array of pending images.  */
1238   for (i = 0; i < num_offload_images; i++)
1239     if (offload_images[i].target_data == target_data)
1240       {
1241 	offload_images[i] = offload_images[--num_offload_images];
1242 	break;
1243       }
1244 
1245   gomp_mutex_unlock (&register_lock);
1246 }
1247 
1248 void
1249 GOMP_offload_unregister (const void *host_table, int target_type,
1250 			 const void *target_data)
1251 {
1252   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1253 }
1254 
1255 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
1256    must be locked on entry, and remains locked on return.  */
1257 
1258 attribute_hidden void
1259 gomp_init_device (struct gomp_device_descr *devicep)
1260 {
1261   int i;
1262   if (!devicep->init_device_func (devicep->target_id))
1263     {
1264       gomp_mutex_unlock (&devicep->lock);
1265       gomp_fatal ("device initialization failed");
1266     }
1267 
1268   /* Load to device all images registered by the moment.  */
1269   for (i = 0; i < num_offload_images; i++)
1270     {
1271       struct offload_image_descr *image = &offload_images[i];
1272       if (image->type == devicep->type)
1273 	gomp_load_image_to_device (devicep, image->version,
1274 				   image->host_table, image->target_data,
1275 				   false);
1276     }
1277 
1278   devicep->state = GOMP_DEVICE_INITIALIZED;
1279 }
1280 
1281 attribute_hidden void
1282 gomp_unload_device (struct gomp_device_descr *devicep)
1283 {
1284   if (devicep->state == GOMP_DEVICE_INITIALIZED)
1285     {
1286       unsigned i;
1287 
1288       /* Unload from device all images registered at the moment.  */
1289       for (i = 0; i < num_offload_images; i++)
1290 	{
1291 	  struct offload_image_descr *image = &offload_images[i];
1292 	  if (image->type == devicep->type)
1293 	    gomp_unload_image_from_device (devicep, image->version,
1294 					   image->host_table,
1295 					   image->target_data);
1296 	}
1297     }
1298 }
1299 
1300 /* Free address mapping tables.  MM must be locked on entry, and remains locked
1301    on return.  */
1302 
1303 attribute_hidden void
1304 gomp_free_memmap (struct splay_tree_s *mem_map)
1305 {
1306   while (mem_map->root)
1307     {
1308       struct target_mem_desc *tgt = mem_map->root->key.tgt;
1309 
1310       splay_tree_remove (mem_map, &mem_map->root->key);
1311       free (tgt->array);
1312       free (tgt);
1313     }
1314 }
1315 
1316 /* Host fallback for GOMP_target{,_ext} routines.  */
1317 
1318 static void
1319 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1320 {
1321   struct gomp_thread old_thr, *thr = gomp_thread ();
1322   old_thr = *thr;
1323   memset (thr, '\0', sizeof (*thr));
1324   if (gomp_places_list)
1325     {
1326       thr->place = old_thr.place;
1327       thr->ts.place_partition_len = gomp_places_list_len;
1328     }
1329   fn (hostaddrs);
1330   gomp_free_thread (thr);
1331   *thr = old_thr;
1332 }
1333 
1334 /* Calculate alignment and size requirements of a private copy of data shared
1335    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1336 
1337 static inline void
1338 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1339 				     unsigned short *kinds, size_t *tgt_align,
1340 				     size_t *tgt_size)
1341 {
1342   size_t i;
1343   for (i = 0; i < mapnum; i++)
1344     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1345       {
1346 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1347 	if (*tgt_align < align)
1348 	  *tgt_align = align;
1349 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1350 	*tgt_size += sizes[i];
1351       }
1352 }
1353 
1354 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1355 
1356 static inline void
1357 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1358 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1359 			size_t tgt_size)
1360 {
1361   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1362   if (al)
1363     tgt += tgt_align - al;
1364   tgt_size = 0;
1365   size_t i;
1366   for (i = 0; i < mapnum; i++)
1367     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1368       {
1369 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1370 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1371 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1372 	hostaddrs[i] = tgt + tgt_size;
1373 	tgt_size = tgt_size + sizes[i];
1374       }
1375 }
1376 
1377 /* Helper function of GOMP_target{,_ext} routines.  */
1378 
1379 static void *
1380 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1381 			 void (*host_fn) (void *))
1382 {
1383   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1384     return (void *) host_fn;
1385   else
1386     {
1387       gomp_mutex_lock (&devicep->lock);
1388       if (devicep->state == GOMP_DEVICE_FINALIZED)
1389 	{
1390 	  gomp_mutex_unlock (&devicep->lock);
1391 	  return NULL;
1392 	}
1393 
1394       struct splay_tree_key_s k;
1395       k.host_start = (uintptr_t) host_fn;
1396       k.host_end = k.host_start + 1;
1397       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1398       gomp_mutex_unlock (&devicep->lock);
1399       if (tgt_fn == NULL)
1400 	return NULL;
1401 
1402       return (void *) tgt_fn->tgt_offset;
1403     }
1404 }
1405 
1406 /* Called when encountering a target directive.  If DEVICE
1407    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1408    GOMP_DEVICE_HOST_FALLBACK (or any value
1409    larger than last available hw device), use host fallback.
1410    FN is address of host code, UNUSED is part of the current ABI, but
1411    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1412    with MAPNUM entries, with addresses of the host objects,
1413    sizes of the host objects (resp. for pointer kind pointer bias
1414    and assumed sizeof (void *) size) and kinds.  */
1415 
1416 void
1417 GOMP_target (int device, void (*fn) (void *), const void *unused,
1418 	     size_t mapnum, void **hostaddrs, size_t *sizes,
1419 	     unsigned char *kinds)
1420 {
1421   struct gomp_device_descr *devicep = resolve_device (device);
1422 
1423   void *fn_addr;
1424   if (devicep == NULL
1425       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1426       /* All shared memory devices should use the GOMP_target_ext function.  */
1427       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1428       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1429     return gomp_target_fallback (fn, hostaddrs);
1430 
1431   struct target_mem_desc *tgt_vars
1432     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1433 		     GOMP_MAP_VARS_TARGET);
1434   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1435 		     NULL);
1436   gomp_unmap_vars (tgt_vars, true);
1437 }
1438 
1439 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1440    and several arguments have been added:
1441    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1442    DEPEND is array of dependencies, see GOMP_task for details.
1443 
1444    ARGS is a pointer to an array consisting of a variable number of both
1445    device-independent and device-specific arguments, which can take one two
1446    elements where the first specifies for which device it is intended, the type
1447    and optionally also the value.  If the value is not present in the first
1448    one, the whole second element the actual value.  The last element of the
1449    array is a single NULL.  Among the device independent can be for example
1450    NUM_TEAMS and THREAD_LIMIT.
1451 
1452    NUM_TEAMS is positive if GOMP_teams will be called in the body with
1453    that value, or 1 if teams construct is not present, or 0, if
1454    teams construct does not have num_teams clause and so the choice is
1455    implementation defined, and -1 if it can't be determined on the host
1456    what value will GOMP_teams have on the device.
1457    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1458    body with that value, or 0, if teams construct does not have thread_limit
1459    clause or the teams construct is not present, or -1 if it can't be
1460    determined on the host what value will GOMP_teams have on the device.  */
1461 
1462 void
1463 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1464 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1465 		 unsigned int flags, void **depend, void **args)
1466 {
1467   struct gomp_device_descr *devicep = resolve_device (device);
1468   size_t tgt_align = 0, tgt_size = 0;
1469   bool fpc_done = false;
1470 
1471   if (flags & GOMP_TARGET_FLAG_NOWAIT)
1472     {
1473       struct gomp_thread *thr = gomp_thread ();
1474       /* Create a team if we don't have any around, as nowait
1475 	 target tasks make sense to run asynchronously even when
1476 	 outside of any parallel.  */
1477       if (__builtin_expect (thr->ts.team == NULL, 0))
1478 	{
1479 	  struct gomp_team *team = gomp_new_team (1);
1480 	  struct gomp_task *task = thr->task;
1481 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1482 	  team->prev_ts = thr->ts;
1483 	  thr->ts.team = team;
1484 	  thr->ts.team_id = 0;
1485 	  thr->ts.work_share = &team->work_shares[0];
1486 	  thr->ts.last_work_share = NULL;
1487 #ifdef HAVE_SYNC_BUILTINS
1488 	  thr->ts.single_count = 0;
1489 #endif
1490 	  thr->ts.static_trip = 0;
1491 	  thr->task = &team->implicit_task[0];
1492 	  gomp_init_task (thr->task, NULL, icv);
1493 	  if (task)
1494 	    {
1495 	      thr->task = task;
1496 	      gomp_end_task ();
1497 	      free (task);
1498 	      thr->task = &team->implicit_task[0];
1499 	    }
1500 	  else
1501 	    pthread_setspecific (gomp_thread_destructor, thr);
1502 	}
1503       if (thr->ts.team
1504 	  && !thr->task->final_task)
1505 	{
1506 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1507 				   sizes, kinds, flags, depend, args,
1508 				   GOMP_TARGET_TASK_BEFORE_MAP);
1509 	  return;
1510 	}
1511     }
1512 
1513   /* If there are depend clauses, but nowait is not present
1514      (or we are in a final task), block the parent task until the
1515      dependencies are resolved and then just continue with the rest
1516      of the function as if it is a merged task.  */
1517   if (depend != NULL)
1518     {
1519       struct gomp_thread *thr = gomp_thread ();
1520       if (thr->task && thr->task->depend_hash)
1521 	{
1522 	  /* If we might need to wait, copy firstprivate now.  */
1523 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1524 					       &tgt_align, &tgt_size);
1525 	  if (tgt_align)
1526 	    {
1527 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1528 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1529 				      tgt_align, tgt_size);
1530 	    }
1531 	  fpc_done = true;
1532 	  gomp_task_maybe_wait_for_dependencies (depend);
1533 	}
1534     }
1535 
1536   void *fn_addr;
1537   if (devicep == NULL
1538       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1539       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1540       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1541     {
1542       if (!fpc_done)
1543 	{
1544 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1545 					       &tgt_align, &tgt_size);
1546 	  if (tgt_align)
1547 	    {
1548 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1549 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1550 				      tgt_align, tgt_size);
1551 	    }
1552 	}
1553       gomp_target_fallback (fn, hostaddrs);
1554       return;
1555     }
1556 
1557   struct target_mem_desc *tgt_vars;
1558   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1559     {
1560       if (!fpc_done)
1561 	{
1562 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1563 					       &tgt_align, &tgt_size);
1564 	  if (tgt_align)
1565 	    {
1566 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1567 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1568 				      tgt_align, tgt_size);
1569 	    }
1570 	}
1571       tgt_vars = NULL;
1572     }
1573   else
1574     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1575 			      true, GOMP_MAP_VARS_TARGET);
1576   devicep->run_func (devicep->target_id, fn_addr,
1577 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1578 		     args);
1579   if (tgt_vars)
1580     gomp_unmap_vars (tgt_vars, true);
1581 }
1582 
1583 /* Host fallback for GOMP_target_data{,_ext} routines.  */
1584 
1585 static void
1586 gomp_target_data_fallback (void)
1587 {
1588   struct gomp_task_icv *icv = gomp_icv (false);
1589   if (icv->target_data)
1590     {
1591       /* Even when doing a host fallback, if there are any active
1592          #pragma omp target data constructs, need to remember the
1593          new #pragma omp target data, otherwise GOMP_target_end_data
1594          would get out of sync.  */
1595       struct target_mem_desc *tgt
1596 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1597 			 GOMP_MAP_VARS_DATA);
1598       tgt->prev = icv->target_data;
1599       icv->target_data = tgt;
1600     }
1601 }
1602 
1603 void
1604 GOMP_target_data (int device, const void *unused, size_t mapnum,
1605 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
1606 {
1607   struct gomp_device_descr *devicep = resolve_device (device);
1608 
1609   if (devicep == NULL
1610       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1611       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1612     return gomp_target_data_fallback ();
1613 
1614   struct target_mem_desc *tgt
1615     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1616 		     GOMP_MAP_VARS_DATA);
1617   struct gomp_task_icv *icv = gomp_icv (true);
1618   tgt->prev = icv->target_data;
1619   icv->target_data = tgt;
1620 }
1621 
1622 void
1623 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1624 		      size_t *sizes, unsigned short *kinds)
1625 {
1626   struct gomp_device_descr *devicep = resolve_device (device);
1627 
1628   if (devicep == NULL
1629       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1630       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1631     return gomp_target_data_fallback ();
1632 
1633   struct target_mem_desc *tgt
1634     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1635 		     GOMP_MAP_VARS_DATA);
1636   struct gomp_task_icv *icv = gomp_icv (true);
1637   tgt->prev = icv->target_data;
1638   icv->target_data = tgt;
1639 }
1640 
1641 void
1642 GOMP_target_end_data (void)
1643 {
1644   struct gomp_task_icv *icv = gomp_icv (false);
1645   if (icv->target_data)
1646     {
1647       struct target_mem_desc *tgt = icv->target_data;
1648       icv->target_data = tgt->prev;
1649       gomp_unmap_vars (tgt, true);
1650     }
1651 }
1652 
1653 void
1654 GOMP_target_update (int device, const void *unused, size_t mapnum,
1655 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
1656 {
1657   struct gomp_device_descr *devicep = resolve_device (device);
1658 
1659   if (devicep == NULL
1660       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1661       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1662     return;
1663 
1664   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1665 }
1666 
1667 void
1668 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1669 			size_t *sizes, unsigned short *kinds,
1670 			unsigned int flags, void **depend)
1671 {
1672   struct gomp_device_descr *devicep = resolve_device (device);
1673 
1674   /* If there are depend clauses, but nowait is not present,
1675      block the parent task until the dependencies are resolved
1676      and then just continue with the rest of the function as if it
1677      is a merged task.  Until we are able to schedule task during
1678      variable mapping or unmapping, ignore nowait if depend clauses
1679      are not present.  */
1680   if (depend != NULL)
1681     {
1682       struct gomp_thread *thr = gomp_thread ();
1683       if (thr->task && thr->task->depend_hash)
1684 	{
1685 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1686 	      && thr->ts.team
1687 	      && !thr->task->final_task)
1688 	    {
1689 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1690 					   mapnum, hostaddrs, sizes, kinds,
1691 					   flags | GOMP_TARGET_FLAG_UPDATE,
1692 					   depend, NULL, GOMP_TARGET_TASK_DATA))
1693 		return;
1694 	    }
1695 	  else
1696 	    {
1697 	      struct gomp_team *team = thr->ts.team;
1698 	      /* If parallel or taskgroup has been cancelled, don't start new
1699 		 tasks.  */
1700 	      if (team
1701 		  && (gomp_team_barrier_cancelled (&team->barrier)
1702 		      || (thr->task->taskgroup
1703 			  && thr->task->taskgroup->cancelled)))
1704 		return;
1705 
1706 	      gomp_task_maybe_wait_for_dependencies (depend);
1707 	    }
1708 	}
1709     }
1710 
1711   if (devicep == NULL
1712       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1713       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1714     return;
1715 
1716   struct gomp_thread *thr = gomp_thread ();
1717   struct gomp_team *team = thr->ts.team;
1718   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
1719   if (team
1720       && (gomp_team_barrier_cancelled (&team->barrier)
1721 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1722     return;
1723 
1724   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1725 }
1726 
1727 static void
1728 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1729 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
1730 {
1731   const int typemask = 0xff;
1732   size_t i;
1733   gomp_mutex_lock (&devicep->lock);
1734   if (devicep->state == GOMP_DEVICE_FINALIZED)
1735     {
1736       gomp_mutex_unlock (&devicep->lock);
1737       return;
1738     }
1739 
1740   for (i = 0; i < mapnum; i++)
1741     {
1742       struct splay_tree_key_s cur_node;
1743       unsigned char kind = kinds[i] & typemask;
1744       switch (kind)
1745 	{
1746 	case GOMP_MAP_FROM:
1747 	case GOMP_MAP_ALWAYS_FROM:
1748 	case GOMP_MAP_DELETE:
1749 	case GOMP_MAP_RELEASE:
1750 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1751 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1752 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
1753 	  cur_node.host_end = cur_node.host_start + sizes[i];
1754 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1755 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1756 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1757 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
1758 	  if (!k)
1759 	    continue;
1760 
1761 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1762 	    k->refcount--;
1763 	  if ((kind == GOMP_MAP_DELETE
1764 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1765 	      && k->refcount != REFCOUNT_INFINITY)
1766 	    k->refcount = 0;
1767 
1768 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1769 	      || kind == GOMP_MAP_ALWAYS_FROM)
1770 	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1771 				(void *) (k->tgt->tgt_start + k->tgt_offset
1772 					  + cur_node.host_start
1773 					  - k->host_start),
1774 				cur_node.host_end - cur_node.host_start);
1775 	  if (k->refcount == 0)
1776 	    {
1777 	      splay_tree_remove (&devicep->mem_map, k);
1778 	      if (k->link_key)
1779 		splay_tree_insert (&devicep->mem_map,
1780 				   (splay_tree_node) k->link_key);
1781 	      if (k->tgt->refcount > 1)
1782 		k->tgt->refcount--;
1783 	      else
1784 		gomp_unmap_tgt (k->tgt);
1785 	    }
1786 
1787 	  break;
1788 	default:
1789 	  gomp_mutex_unlock (&devicep->lock);
1790 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1791 		      kind);
1792 	}
1793     }
1794 
1795   gomp_mutex_unlock (&devicep->lock);
1796 }
1797 
1798 void
1799 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1800 			     size_t *sizes, unsigned short *kinds,
1801 			     unsigned int flags, void **depend)
1802 {
1803   struct gomp_device_descr *devicep = resolve_device (device);
1804 
1805   /* If there are depend clauses, but nowait is not present,
1806      block the parent task until the dependencies are resolved
1807      and then just continue with the rest of the function as if it
1808      is a merged task.  Until we are able to schedule task during
1809      variable mapping or unmapping, ignore nowait if depend clauses
1810      are not present.  */
1811   if (depend != NULL)
1812     {
1813       struct gomp_thread *thr = gomp_thread ();
1814       if (thr->task && thr->task->depend_hash)
1815 	{
1816 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1817 	      && thr->ts.team
1818 	      && !thr->task->final_task)
1819 	    {
1820 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1821 					   mapnum, hostaddrs, sizes, kinds,
1822 					   flags, depend, NULL,
1823 					   GOMP_TARGET_TASK_DATA))
1824 		return;
1825 	    }
1826 	  else
1827 	    {
1828 	      struct gomp_team *team = thr->ts.team;
1829 	      /* If parallel or taskgroup has been cancelled, don't start new
1830 		 tasks.  */
1831 	      if (team
1832 		  && (gomp_team_barrier_cancelled (&team->barrier)
1833 		      || (thr->task->taskgroup
1834 			  && thr->task->taskgroup->cancelled)))
1835 		return;
1836 
1837 	      gomp_task_maybe_wait_for_dependencies (depend);
1838 	    }
1839 	}
1840     }
1841 
1842   if (devicep == NULL
1843       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1844       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1845     return;
1846 
1847   struct gomp_thread *thr = gomp_thread ();
1848   struct gomp_team *team = thr->ts.team;
1849   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
1850   if (team
1851       && (gomp_team_barrier_cancelled (&team->barrier)
1852 	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1853     return;
1854 
1855   size_t i;
1856   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1857     for (i = 0; i < mapnum; i++)
1858       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1859 	{
1860 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
1861 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1862 	  i += sizes[i];
1863 	}
1864       else
1865 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
1866 		       true, GOMP_MAP_VARS_ENTER_DATA);
1867   else
1868     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
1869 }
1870 
1871 bool
1872 gomp_target_task_fn (void *data)
1873 {
1874   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
1875   struct gomp_device_descr *devicep = ttask->devicep;
1876 
1877   if (ttask->fn != NULL)
1878     {
1879       void *fn_addr;
1880       if (devicep == NULL
1881 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1882 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
1883 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1884 	{
1885 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
1886 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
1887 	  return false;
1888 	}
1889 
1890       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1891 	{
1892 	  if (ttask->tgt)
1893 	    gomp_unmap_vars (ttask->tgt, true);
1894 	  return false;
1895 	}
1896 
1897       void *actual_arguments;
1898       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1899 	{
1900 	  ttask->tgt = NULL;
1901 	  actual_arguments = ttask->hostaddrs;
1902 	}
1903       else
1904 	{
1905 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
1906 				      NULL, ttask->sizes, ttask->kinds, true,
1907 				      GOMP_MAP_VARS_TARGET);
1908 	  actual_arguments = (void *) ttask->tgt->tgt_start;
1909 	}
1910       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
1911 
1912       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
1913 			       ttask->args, (void *) ttask);
1914       return true;
1915     }
1916   else if (devicep == NULL
1917 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1918 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1919     return false;
1920 
1921   size_t i;
1922   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
1923     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1924 		 ttask->kinds, true);
1925   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
1926     for (i = 0; i < ttask->mapnum; i++)
1927       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
1928 	{
1929 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
1930 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
1931 			 GOMP_MAP_VARS_ENTER_DATA);
1932 	  i += ttask->sizes[i];
1933 	}
1934       else
1935 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
1936 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
1937   else
1938     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
1939 		    ttask->kinds);
1940   return false;
1941 }
1942 
1943 void
1944 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1945 {
1946   if (thread_limit)
1947     {
1948       struct gomp_task_icv *icv = gomp_icv (true);
1949       icv->thread_limit_var
1950 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1951     }
1952   (void) num_teams;
1953 }
1954 
1955 void *
1956 omp_target_alloc (size_t size, int device_num)
1957 {
1958   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1959     return malloc (size);
1960 
1961   if (device_num < 0)
1962     return NULL;
1963 
1964   struct gomp_device_descr *devicep = resolve_device (device_num);
1965   if (devicep == NULL)
1966     return NULL;
1967 
1968   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1969       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1970     return malloc (size);
1971 
1972   gomp_mutex_lock (&devicep->lock);
1973   void *ret = devicep->alloc_func (devicep->target_id, size);
1974   gomp_mutex_unlock (&devicep->lock);
1975   return ret;
1976 }
1977 
1978 void
1979 omp_target_free (void *device_ptr, int device_num)
1980 {
1981   if (device_ptr == NULL)
1982     return;
1983 
1984   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
1985     {
1986       free (device_ptr);
1987       return;
1988     }
1989 
1990   if (device_num < 0)
1991     return;
1992 
1993   struct gomp_device_descr *devicep = resolve_device (device_num);
1994   if (devicep == NULL)
1995     return;
1996 
1997   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1998       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1999     {
2000       free (device_ptr);
2001       return;
2002     }
2003 
2004   gomp_mutex_lock (&devicep->lock);
2005   gomp_free_device_memory (devicep, device_ptr);
2006   gomp_mutex_unlock (&devicep->lock);
2007 }
2008 
2009 int
2010 omp_target_is_present (void *ptr, int device_num)
2011 {
2012   if (ptr == NULL)
2013     return 1;
2014 
2015   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2016     return 1;
2017 
2018   if (device_num < 0)
2019     return 0;
2020 
2021   struct gomp_device_descr *devicep = resolve_device (device_num);
2022   if (devicep == NULL)
2023     return 0;
2024 
2025   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2026       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2027     return 1;
2028 
2029   gomp_mutex_lock (&devicep->lock);
2030   struct splay_tree_s *mem_map = &devicep->mem_map;
2031   struct splay_tree_key_s cur_node;
2032 
2033   cur_node.host_start = (uintptr_t) ptr;
2034   cur_node.host_end = cur_node.host_start;
2035   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2036   int ret = n != NULL;
2037   gomp_mutex_unlock (&devicep->lock);
2038   return ret;
2039 }
2040 
2041 int
2042 omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2043 		   size_t src_offset, int dst_device_num, int src_device_num)
2044 {
2045   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2046   bool ret;
2047 
2048   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2049     {
2050       if (dst_device_num < 0)
2051 	return EINVAL;
2052 
2053       dst_devicep = resolve_device (dst_device_num);
2054       if (dst_devicep == NULL)
2055 	return EINVAL;
2056 
2057       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2058 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2059 	dst_devicep = NULL;
2060     }
2061   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2062     {
2063       if (src_device_num < 0)
2064 	return EINVAL;
2065 
2066       src_devicep = resolve_device (src_device_num);
2067       if (src_devicep == NULL)
2068 	return EINVAL;
2069 
2070       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2071 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2072 	src_devicep = NULL;
2073     }
2074   if (src_devicep == NULL && dst_devicep == NULL)
2075     {
2076       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2077       return 0;
2078     }
2079   if (src_devicep == NULL)
2080     {
2081       gomp_mutex_lock (&dst_devicep->lock);
2082       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2083 					(char *) dst + dst_offset,
2084 					(char *) src + src_offset, length);
2085       gomp_mutex_unlock (&dst_devicep->lock);
2086       return (ret ? 0 : EINVAL);
2087     }
2088   if (dst_devicep == NULL)
2089     {
2090       gomp_mutex_lock (&src_devicep->lock);
2091       ret = src_devicep->dev2host_func (src_devicep->target_id,
2092 					(char *) dst + dst_offset,
2093 					(char *) src + src_offset, length);
2094       gomp_mutex_unlock (&src_devicep->lock);
2095       return (ret ? 0 : EINVAL);
2096     }
2097   if (src_devicep == dst_devicep)
2098     {
2099       gomp_mutex_lock (&src_devicep->lock);
2100       ret = src_devicep->dev2dev_func (src_devicep->target_id,
2101 				       (char *) dst + dst_offset,
2102 				       (char *) src + src_offset, length);
2103       gomp_mutex_unlock (&src_devicep->lock);
2104       return (ret ? 0 : EINVAL);
2105     }
2106   return EINVAL;
2107 }
2108 
2109 static int
2110 omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2111 			       int num_dims, const size_t *volume,
2112 			       const size_t *dst_offsets,
2113 			       const size_t *src_offsets,
2114 			       const size_t *dst_dimensions,
2115 			       const size_t *src_dimensions,
2116 			       struct gomp_device_descr *dst_devicep,
2117 			       struct gomp_device_descr *src_devicep)
2118 {
2119   size_t dst_slice = element_size;
2120   size_t src_slice = element_size;
2121   size_t j, dst_off, src_off, length;
2122   int i, ret;
2123 
2124   if (num_dims == 1)
2125     {
2126       if (__builtin_mul_overflow (element_size, volume[0], &length)
2127 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2128 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2129 	return EINVAL;
2130       if (dst_devicep == NULL && src_devicep == NULL)
2131 	{
2132 	  memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2133 	  ret = 1;
2134 	}
2135       else if (src_devicep == NULL)
2136 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2137 					  (char *) dst + dst_off,
2138 					  (char *) src + src_off, length);
2139       else if (dst_devicep == NULL)
2140 	ret = src_devicep->dev2host_func (src_devicep->target_id,
2141 					  (char *) dst + dst_off,
2142 					  (char *) src + src_off, length);
2143       else if (src_devicep == dst_devicep)
2144 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
2145 					 (char *) dst + dst_off,
2146 					 (char *) src + src_off, length);
2147       else
2148 	ret = 0;
2149       return ret ? 0 : EINVAL;
2150     }
2151 
2152   /* FIXME: it would be nice to have some plugin function to handle
2153      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2154      be handled in the generic recursion below, and for host-host it
2155      should be used even for any num_dims >= 2.  */
2156 
2157   for (i = 1; i < num_dims; i++)
2158     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2159 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2160       return EINVAL;
2161   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2162       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2163     return EINVAL;
2164   for (j = 0; j < volume[0]; j++)
2165     {
2166       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2167 					   (char *) src + src_off,
2168 					   element_size, num_dims - 1,
2169 					   volume + 1, dst_offsets + 1,
2170 					   src_offsets + 1, dst_dimensions + 1,
2171 					   src_dimensions + 1, dst_devicep,
2172 					   src_devicep);
2173       if (ret)
2174 	return ret;
2175       dst_off += dst_slice;
2176       src_off += src_slice;
2177     }
2178   return 0;
2179 }
2180 
2181 int
2182 omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2183 			int num_dims, const size_t *volume,
2184 			const size_t *dst_offsets,
2185 			const size_t *src_offsets,
2186 			const size_t *dst_dimensions,
2187 			const size_t *src_dimensions,
2188 			int dst_device_num, int src_device_num)
2189 {
2190   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2191 
2192   if (!dst && !src)
2193     return INT_MAX;
2194 
2195   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2196     {
2197       if (dst_device_num < 0)
2198 	return EINVAL;
2199 
2200       dst_devicep = resolve_device (dst_device_num);
2201       if (dst_devicep == NULL)
2202 	return EINVAL;
2203 
2204       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2205 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2206 	dst_devicep = NULL;
2207     }
2208   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2209     {
2210       if (src_device_num < 0)
2211 	return EINVAL;
2212 
2213       src_devicep = resolve_device (src_device_num);
2214       if (src_devicep == NULL)
2215 	return EINVAL;
2216 
2217       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2218 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2219 	src_devicep = NULL;
2220     }
2221 
2222   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2223     return EINVAL;
2224 
2225   if (src_devicep)
2226     gomp_mutex_lock (&src_devicep->lock);
2227   else if (dst_devicep)
2228     gomp_mutex_lock (&dst_devicep->lock);
2229   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2230 					   volume, dst_offsets, src_offsets,
2231 					   dst_dimensions, src_dimensions,
2232 					   dst_devicep, src_devicep);
2233   if (src_devicep)
2234     gomp_mutex_unlock (&src_devicep->lock);
2235   else if (dst_devicep)
2236     gomp_mutex_unlock (&dst_devicep->lock);
2237   return ret;
2238 }
2239 
2240 int
2241 omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2242 			  size_t device_offset, int device_num)
2243 {
2244   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2245     return EINVAL;
2246 
2247   if (device_num < 0)
2248     return EINVAL;
2249 
2250   struct gomp_device_descr *devicep = resolve_device (device_num);
2251   if (devicep == NULL)
2252     return EINVAL;
2253 
2254   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2255       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2256     return EINVAL;
2257 
2258   gomp_mutex_lock (&devicep->lock);
2259 
2260   struct splay_tree_s *mem_map = &devicep->mem_map;
2261   struct splay_tree_key_s cur_node;
2262   int ret = EINVAL;
2263 
2264   cur_node.host_start = (uintptr_t) host_ptr;
2265   cur_node.host_end = cur_node.host_start + size;
2266   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2267   if (n)
2268     {
2269       if (n->tgt->tgt_start + n->tgt_offset
2270 	  == (uintptr_t) device_ptr + device_offset
2271 	  && n->host_start <= cur_node.host_start
2272 	  && n->host_end >= cur_node.host_end)
2273 	ret = 0;
2274     }
2275   else
2276     {
2277       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2278       tgt->array = gomp_malloc (sizeof (*tgt->array));
2279       tgt->refcount = 1;
2280       tgt->tgt_start = 0;
2281       tgt->tgt_end = 0;
2282       tgt->to_free = NULL;
2283       tgt->prev = NULL;
2284       tgt->list_count = 0;
2285       tgt->device_descr = devicep;
2286       splay_tree_node array = tgt->array;
2287       splay_tree_key k = &array->key;
2288       k->host_start = cur_node.host_start;
2289       k->host_end = cur_node.host_end;
2290       k->tgt = tgt;
2291       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2292       k->refcount = REFCOUNT_INFINITY;
2293       array->left = NULL;
2294       array->right = NULL;
2295       splay_tree_insert (&devicep->mem_map, array);
2296       ret = 0;
2297     }
2298   gomp_mutex_unlock (&devicep->lock);
2299   return ret;
2300 }
2301 
2302 int
2303 omp_target_disassociate_ptr (void *ptr, int device_num)
2304 {
2305   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2306     return EINVAL;
2307 
2308   if (device_num < 0)
2309     return EINVAL;
2310 
2311   struct gomp_device_descr *devicep = resolve_device (device_num);
2312   if (devicep == NULL)
2313     return EINVAL;
2314 
2315   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2316     return EINVAL;
2317 
2318   gomp_mutex_lock (&devicep->lock);
2319 
2320   struct splay_tree_s *mem_map = &devicep->mem_map;
2321   struct splay_tree_key_s cur_node;
2322   int ret = EINVAL;
2323 
2324   cur_node.host_start = (uintptr_t) ptr;
2325   cur_node.host_end = cur_node.host_start;
2326   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2327   if (n
2328       && n->host_start == cur_node.host_start
2329       && n->refcount == REFCOUNT_INFINITY
2330       && n->tgt->tgt_start == 0
2331       && n->tgt->to_free == NULL
2332       && n->tgt->refcount == 1
2333       && n->tgt->list_count == 0)
2334     {
2335       splay_tree_remove (&devicep->mem_map, n);
2336       gomp_unmap_tgt (n->tgt);
2337       ret = 0;
2338     }
2339 
2340   gomp_mutex_unlock (&devicep->lock);
2341   return ret;
2342 }
2343 
2344 #ifdef PLUGIN_SUPPORT
2345 
2346 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
2347    in PLUGIN_NAME.
2348    The handles of the found functions are stored in the corresponding fields
2349    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
2350 
2351 static bool
2352 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2353 			     const char *plugin_name)
2354 {
2355   const char *err = NULL, *last_missing = NULL;
2356 
2357   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2358   if (!plugin_handle)
2359     goto dl_fail;
2360 
2361   /* Check if all required functions are available in the plugin and store
2362      their handlers.  None of the symbols can legitimately be NULL,
2363      so we don't need to check dlerror all the time.  */
2364 #define DLSYM(f)							\
2365   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
2366     goto dl_fail
2367   /* Similar, but missing functions are not an error.  Return false if
2368      failed, true otherwise.  */
2369 #define DLSYM_OPT(f, n)							\
2370   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
2371    || (last_missing = #n, 0))
2372 
2373   DLSYM (version);
2374   if (device->version_func () != GOMP_VERSION)
2375     {
2376       err = "plugin version mismatch";
2377       goto fail;
2378     }
2379 
2380   DLSYM (get_name);
2381   DLSYM (get_caps);
2382   DLSYM (get_type);
2383   DLSYM (get_num_devices);
2384   DLSYM (init_device);
2385   DLSYM (fini_device);
2386   DLSYM (load_image);
2387   DLSYM (unload_image);
2388   DLSYM (alloc);
2389   DLSYM (free);
2390   DLSYM (dev2host);
2391   DLSYM (host2dev);
2392   device->capabilities = device->get_caps_func ();
2393   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2394     {
2395       DLSYM (run);
2396       DLSYM (async_run);
2397       DLSYM_OPT (can_run, can_run);
2398       DLSYM (dev2dev);
2399     }
2400   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2401     {
2402       if (!DLSYM_OPT (openacc.exec, openacc_exec)
2403 	  || !DLSYM_OPT (openacc.register_async_cleanup,
2404 			 openacc_register_async_cleanup)
2405 	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2406 	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2407 	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2408 	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2409 	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2410 	  || !DLSYM_OPT (openacc.async_wait_all_async,
2411 			 openacc_async_wait_all_async)
2412 	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2413 	  || !DLSYM_OPT (openacc.create_thread_data,
2414 			 openacc_create_thread_data)
2415 	  || !DLSYM_OPT (openacc.destroy_thread_data,
2416 			 openacc_destroy_thread_data))
2417 	{
2418 	  /* Require all the OpenACC handlers if we have
2419 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
2420 	  err = "plugin missing OpenACC handler function";
2421 	  goto fail;
2422 	}
2423 
2424       unsigned cuda = 0;
2425       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2426 			 openacc_cuda_get_current_device);
2427       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2428 			 openacc_cuda_get_current_context);
2429       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2430       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2431       if (cuda && cuda != 4)
2432 	{
2433 	  /* Make sure all the CUDA functions are there if any of them are.  */
2434 	  err = "plugin missing OpenACC CUDA handler function";
2435 	  goto fail;
2436 	}
2437     }
2438 #undef DLSYM
2439 #undef DLSYM_OPT
2440 
2441   return 1;
2442 
2443  dl_fail:
2444   err = dlerror ();
2445  fail:
2446   gomp_error ("while loading %s: %s", plugin_name, err);
2447   if (last_missing)
2448     gomp_error ("missing function was %s", last_missing);
2449   if (plugin_handle)
2450     dlclose (plugin_handle);
2451 
2452   return 0;
2453 }
2454 
2455 /* This function finalizes all initialized devices.  */
2456 
2457 static void
2458 gomp_target_fini (void)
2459 {
2460   int i;
2461   for (i = 0; i < num_devices; i++)
2462     {
2463       bool ret = true;
2464       struct gomp_device_descr *devicep = &devices[i];
2465       gomp_mutex_lock (&devicep->lock);
2466       if (devicep->state == GOMP_DEVICE_INITIALIZED)
2467 	{
2468 	  ret = devicep->fini_device_func (devicep->target_id);
2469 	  devicep->state = GOMP_DEVICE_FINALIZED;
2470 	}
2471       gomp_mutex_unlock (&devicep->lock);
2472       if (!ret)
2473 	gomp_fatal ("device finalization failed");
2474     }
2475 }
2476 
2477 /* This function initializes the runtime needed for offloading.
2478    It parses the list of offload targets and tries to load the plugins for
2479    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2480    will be set, and the array DEVICES initialized, containing descriptors for
2481    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2482    by the others.  */
2483 
2484 static void
2485 gomp_target_init (void)
2486 {
2487   const char *prefix ="libgomp-plugin-";
2488   const char *suffix = SONAME_SUFFIX (1);
2489   const char *cur, *next;
2490   char *plugin_name;
2491   int i, new_num_devices;
2492 
2493   num_devices = 0;
2494   devices = NULL;
2495 
2496   cur = OFFLOAD_TARGETS;
2497   if (*cur)
2498     do
2499       {
2500 	struct gomp_device_descr current_device;
2501 
2502 	next = strchr (cur, ',');
2503 
2504 	plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
2505 				       + strlen (prefix) + strlen (suffix));
2506 	if (!plugin_name)
2507 	  {
2508 	    num_devices = 0;
2509 	    break;
2510 	  }
2511 
2512 	strcpy (plugin_name, prefix);
2513 	strncat (plugin_name, cur, next ? next - cur : strlen (cur));
2514 	strcat (plugin_name, suffix);
2515 
2516 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
2517 	  {
2518 	    new_num_devices = current_device.get_num_devices_func ();
2519 	    if (new_num_devices >= 1)
2520 	      {
2521 		/* Augment DEVICES and NUM_DEVICES.  */
2522 
2523 		devices = realloc (devices, (num_devices + new_num_devices)
2524 				   * sizeof (struct gomp_device_descr));
2525 		if (!devices)
2526 		  {
2527 		    num_devices = 0;
2528 		    free (plugin_name);
2529 		    break;
2530 		  }
2531 
2532 		current_device.name = current_device.get_name_func ();
2533 		/* current_device.capabilities has already been set.  */
2534 		current_device.type = current_device.get_type_func ();
2535 		current_device.mem_map.root = NULL;
2536 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
2537 		current_device.openacc.data_environ = NULL;
2538 		for (i = 0; i < new_num_devices; i++)
2539 		  {
2540 		    current_device.target_id = i;
2541 		    devices[num_devices] = current_device;
2542 		    gomp_mutex_init (&devices[num_devices].lock);
2543 		    num_devices++;
2544 		  }
2545 	      }
2546 	  }
2547 
2548 	free (plugin_name);
2549 	cur = next + 1;
2550       }
2551     while (next);
2552 
2553   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2554      NUM_DEVICES_OPENMP.  */
2555   struct gomp_device_descr *devices_s
2556     = malloc (num_devices * sizeof (struct gomp_device_descr));
2557   if (!devices_s)
2558     {
2559       num_devices = 0;
2560       free (devices);
2561       devices = NULL;
2562     }
2563   num_devices_openmp = 0;
2564   for (i = 0; i < num_devices; i++)
2565     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2566       devices_s[num_devices_openmp++] = devices[i];
2567   int num_devices_after_openmp = num_devices_openmp;
2568   for (i = 0; i < num_devices; i++)
2569     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2570       devices_s[num_devices_after_openmp++] = devices[i];
2571   free (devices);
2572   devices = devices_s;
2573 
2574   for (i = 0; i < num_devices; i++)
2575     {
2576       /* The 'devices' array can be moved (by the realloc call) until we have
2577 	 found all the plugins, so registering with the OpenACC runtime (which
2578 	 takes a copy of the pointer argument) must be delayed until now.  */
2579       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2580 	goacc_register (&devices[i]);
2581     }
2582 
2583   if (atexit (gomp_target_fini) != 0)
2584     gomp_fatal ("atexit failed");
2585 }
2586 
2587 #else /* PLUGIN_SUPPORT */
2588 /* If dlfcn.h is unavailable we always fallback to host execution.
2589    GOMP_target* routines are just stubs for this case.  */
2590 static void
2591 gomp_target_init (void)
2592 {
2593 }
2594 #endif /* PLUGIN_SUPPORT */
2595