xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/target.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /* Copyright (C) 2013-2020 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h>  /* For PRIu64.  */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41 
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46 
47 #define FIELD_TGT_EMPTY (~(size_t) 0)
48 
49 static void gomp_target_init (void);
50 
51 /* The whole initialization code for offloading plugins is only run one.  */
52 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
53 
54 /* Mutex for offload image registration.  */
55 static gomp_mutex_t register_lock;
56 
57 /* This structure describes an offload image.
58    It contains type of the target device, pointer to host table descriptor, and
59    pointer to target data.  */
60 struct offload_image_descr {
61   unsigned version;
62   enum offload_target_type type;
63   const void *host_table;
64   const void *target_data;
65 };
66 
67 /* Array of descriptors of offload images.  */
68 static struct offload_image_descr *offload_images;
69 
70 /* Total number of offload images.  */
71 static int num_offload_images;
72 
73 /* Array of descriptors for all available devices.  */
74 static struct gomp_device_descr *devices;
75 
76 /* Total number of available devices.  */
77 static int num_devices;
78 
79 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
80 static int num_devices_openmp;
81 
82 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
83 
84 static void *
85 gomp_realloc_unlock (void *old, size_t size)
86 {
87   void *ret = realloc (old, size);
88   if (ret == NULL)
89     {
90       gomp_mutex_unlock (&register_lock);
91       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
92     }
93   return ret;
94 }
95 
96 attribute_hidden void
97 gomp_init_targets_once (void)
98 {
99   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
100 }
101 
102 attribute_hidden int
103 gomp_get_num_devices (void)
104 {
105   gomp_init_targets_once ();
106   return num_devices_openmp;
107 }
108 
109 static struct gomp_device_descr *
110 resolve_device (int device_id)
111 {
112   if (device_id == GOMP_DEVICE_ICV)
113     {
114       struct gomp_task_icv *icv = gomp_icv (false);
115       device_id = icv->default_device_var;
116     }
117 
118   if (device_id < 0 || device_id >= gomp_get_num_devices ())
119     return NULL;
120 
121   gomp_mutex_lock (&devices[device_id].lock);
122   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
123     gomp_init_device (&devices[device_id]);
124   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
125     {
126       gomp_mutex_unlock (&devices[device_id].lock);
127       return NULL;
128     }
129   gomp_mutex_unlock (&devices[device_id].lock);
130 
131   return &devices[device_id];
132 }
133 
134 
135 static inline splay_tree_key
136 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
137 {
138   if (key->host_start != key->host_end)
139     return splay_tree_lookup (mem_map, key);
140 
141   key->host_end++;
142   splay_tree_key n = splay_tree_lookup (mem_map, key);
143   key->host_end--;
144   if (n)
145     return n;
146   key->host_start--;
147   n = splay_tree_lookup (mem_map, key);
148   key->host_start++;
149   if (n)
150     return n;
151   return splay_tree_lookup (mem_map, key);
152 }
153 
154 static inline splay_tree_key
155 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
156 {
157   if (key->host_start != key->host_end)
158     return splay_tree_lookup (mem_map, key);
159 
160   key->host_end++;
161   splay_tree_key n = splay_tree_lookup (mem_map, key);
162   key->host_end--;
163   return n;
164 }
165 
166 static inline void
167 gomp_device_copy (struct gomp_device_descr *devicep,
168 		  bool (*copy_func) (int, void *, const void *, size_t),
169 		  const char *dst, void *dstaddr,
170 		  const char *src, const void *srcaddr,
171 		  size_t size)
172 {
173   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
174     {
175       gomp_mutex_unlock (&devicep->lock);
176       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
177 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
178     }
179 }
180 
181 static inline void
182 goacc_device_copy_async (struct gomp_device_descr *devicep,
183 			 bool (*copy_func) (int, void *, const void *, size_t,
184 					    struct goacc_asyncqueue *),
185 			 const char *dst, void *dstaddr,
186 			 const char *src, const void *srcaddr,
187 			 size_t size, struct goacc_asyncqueue *aq)
188 {
189   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
190     {
191       gomp_mutex_unlock (&devicep->lock);
192       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
193 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
194     }
195 }
196 
197 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
198    host to device memory transfers.  */
199 
200 struct gomp_coalesce_chunk
201 {
202   /* The starting and ending point of a coalesced chunk of memory.  */
203   size_t start, end;
204 };
205 
206 struct gomp_coalesce_buf
207 {
208   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
209      it will be copied to the device.  */
210   void *buf;
211   struct target_mem_desc *tgt;
212   /* Array with offsets, chunks[i].start is the starting offset and
213      chunks[i].end ending offset relative to tgt->tgt_start device address
214      of chunks which are to be copied to buf and later copied to device.  */
215   struct gomp_coalesce_chunk *chunks;
216   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
217      be performed.  */
218   long chunk_cnt;
219   /* During construction of chunks array, how many memory regions are within
220      the last chunk.  If there is just one memory region for a chunk, we copy
221      it directly to device rather than going through buf.  */
222   long use_cnt;
223 };
224 
225 /* Maximum size of memory region considered for coalescing.  Larger copies
226    are performed directly.  */
227 #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
228 
229 /* Maximum size of a gap in between regions to consider them being copied
230    within the same chunk.  All the device offsets considered are within
231    newly allocated device memory, so it isn't fatal if we copy some padding
232    in between from host to device.  The gaps come either from alignment
233    padding or from memory regions which are not supposed to be copied from
234    host to device (e.g. map(alloc:), map(from:) etc.).  */
235 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
236 
237 /* Add region with device tgt_start relative offset and length to CBUF.  */
238 
239 static inline void
240 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
241 {
242   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
243     return;
244   if (cbuf->chunk_cnt)
245     {
246       if (cbuf->chunk_cnt < 0)
247 	return;
248       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
249 	{
250 	  cbuf->chunk_cnt = -1;
251 	  return;
252 	}
253       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
254 	{
255 	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
256 	  cbuf->use_cnt++;
257 	  return;
258 	}
259       /* If the last chunk is only used by one mapping, discard it,
260 	 as it will be one host to device copy anyway and
261 	 memcpying it around will only waste cycles.  */
262       if (cbuf->use_cnt == 1)
263 	cbuf->chunk_cnt--;
264     }
265   cbuf->chunks[cbuf->chunk_cnt].start = start;
266   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
267   cbuf->chunk_cnt++;
268   cbuf->use_cnt = 1;
269 }
270 
271 /* Return true for mapping kinds which need to copy data from the
272    host to device for regions that weren't previously mapped.  */
273 
274 static inline bool
275 gomp_to_device_kind_p (int kind)
276 {
277   switch (kind)
278     {
279     case GOMP_MAP_ALLOC:
280     case GOMP_MAP_FROM:
281     case GOMP_MAP_FORCE_ALLOC:
282     case GOMP_MAP_FORCE_FROM:
283     case GOMP_MAP_ALWAYS_FROM:
284       return false;
285     default:
286       return true;
287     }
288 }
289 
290 attribute_hidden void
291 gomp_copy_host2dev (struct gomp_device_descr *devicep,
292 		    struct goacc_asyncqueue *aq,
293 		    void *d, const void *h, size_t sz,
294 		    struct gomp_coalesce_buf *cbuf)
295 {
296   if (cbuf)
297     {
298       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
299       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
300 	{
301 	  long first = 0;
302 	  long last = cbuf->chunk_cnt - 1;
303 	  while (first <= last)
304 	    {
305 	      long middle = (first + last) >> 1;
306 	      if (cbuf->chunks[middle].end <= doff)
307 		first = middle + 1;
308 	      else if (cbuf->chunks[middle].start <= doff)
309 		{
310 		  if (doff + sz > cbuf->chunks[middle].end)
311 		    gomp_fatal ("internal libgomp cbuf error");
312 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
313 			  h, sz);
314 		  return;
315 		}
316 	      else
317 		last = middle - 1;
318 	    }
319 	}
320     }
321   if (__builtin_expect (aq != NULL, 0))
322     goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
323 			     "dev", d, "host", h, sz, aq);
324   else
325     gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
326 }
327 
328 attribute_hidden void
329 gomp_copy_dev2host (struct gomp_device_descr *devicep,
330 		    struct goacc_asyncqueue *aq,
331 		    void *h, const void *d, size_t sz)
332 {
333   if (__builtin_expect (aq != NULL, 0))
334     goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
335 			     "host", h, "dev", d, sz, aq);
336   else
337     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
338 }
339 
340 static void
341 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
342 {
343   if (!devicep->free_func (devicep->target_id, devptr))
344     {
345       gomp_mutex_unlock (&devicep->lock);
346       gomp_fatal ("error in freeing device memory block at %p", devptr);
347     }
348 }
349 
350 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
351    gomp_map_0len_lookup found oldn for newn.
352    Helper function of gomp_map_vars.  */
353 
354 static inline void
355 gomp_map_vars_existing (struct gomp_device_descr *devicep,
356 			struct goacc_asyncqueue *aq, splay_tree_key oldn,
357 			splay_tree_key newn, struct target_var_desc *tgt_var,
358 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
359 {
360   assert (kind != GOMP_MAP_ATTACH);
361 
362   tgt_var->key = oldn;
363   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
364   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
365   tgt_var->is_attach = false;
366   tgt_var->offset = newn->host_start - oldn->host_start;
367   tgt_var->length = newn->host_end - newn->host_start;
368 
369   if ((kind & GOMP_MAP_FLAG_FORCE)
370       || oldn->host_start > newn->host_start
371       || oldn->host_end < newn->host_end)
372     {
373       gomp_mutex_unlock (&devicep->lock);
374       gomp_fatal ("Trying to map into device [%p..%p) object when "
375 		  "[%p..%p) is already mapped",
376 		  (void *) newn->host_start, (void *) newn->host_end,
377 		  (void *) oldn->host_start, (void *) oldn->host_end);
378     }
379 
380   if (GOMP_MAP_ALWAYS_TO_P (kind))
381     gomp_copy_host2dev (devicep, aq,
382 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
383 				  + newn->host_start - oldn->host_start),
384 			(void *) newn->host_start,
385 			newn->host_end - newn->host_start, cbuf);
386 
387   if (oldn->refcount != REFCOUNT_INFINITY)
388     oldn->refcount++;
389 }
390 
391 static int
392 get_kind (bool short_mapkind, void *kinds, int idx)
393 {
394   return short_mapkind ? ((unsigned short *) kinds)[idx]
395 		       : ((unsigned char *) kinds)[idx];
396 }
397 
398 static void
399 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
400 		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
401 		  struct gomp_coalesce_buf *cbuf)
402 {
403   struct gomp_device_descr *devicep = tgt->device_descr;
404   struct splay_tree_s *mem_map = &devicep->mem_map;
405   struct splay_tree_key_s cur_node;
406 
407   cur_node.host_start = host_ptr;
408   if (cur_node.host_start == (uintptr_t) NULL)
409     {
410       cur_node.tgt_offset = (uintptr_t) NULL;
411       gomp_copy_host2dev (devicep, aq,
412 			  (void *) (tgt->tgt_start + target_offset),
413 			  (void *) &cur_node.tgt_offset,
414 			  sizeof (void *), cbuf);
415       return;
416     }
417   /* Add bias to the pointer value.  */
418   cur_node.host_start += bias;
419   cur_node.host_end = cur_node.host_start;
420   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
421   if (n == NULL)
422     {
423       gomp_mutex_unlock (&devicep->lock);
424       gomp_fatal ("Pointer target of array section wasn't mapped");
425     }
426   cur_node.host_start -= n->host_start;
427   cur_node.tgt_offset
428     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
429   /* At this point tgt_offset is target address of the
430      array section.  Now subtract bias to get what we want
431      to initialize the pointer with.  */
432   cur_node.tgt_offset -= bias;
433   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
434 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
435 }
436 
437 static void
438 gomp_map_fields_existing (struct target_mem_desc *tgt,
439 			  struct goacc_asyncqueue *aq, splay_tree_key n,
440 			  size_t first, size_t i, void **hostaddrs,
441 			  size_t *sizes, void *kinds,
442 			  struct gomp_coalesce_buf *cbuf)
443 {
444   struct gomp_device_descr *devicep = tgt->device_descr;
445   struct splay_tree_s *mem_map = &devicep->mem_map;
446   struct splay_tree_key_s cur_node;
447   int kind;
448   const bool short_mapkind = true;
449   const int typemask = short_mapkind ? 0xff : 0x7;
450 
451   cur_node.host_start = (uintptr_t) hostaddrs[i];
452   cur_node.host_end = cur_node.host_start + sizes[i];
453   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
454   kind = get_kind (short_mapkind, kinds, i);
455   if (n2
456       && n2->tgt == n->tgt
457       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
458     {
459       gomp_map_vars_existing (devicep, aq, n2, &cur_node,
460 			      &tgt->list[i], kind & typemask, cbuf);
461       return;
462     }
463   if (sizes[i] == 0)
464     {
465       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
466 	{
467 	  cur_node.host_start--;
468 	  n2 = splay_tree_lookup (mem_map, &cur_node);
469 	  cur_node.host_start++;
470 	  if (n2
471 	      && n2->tgt == n->tgt
472 	      && n2->host_start - n->host_start
473 		 == n2->tgt_offset - n->tgt_offset)
474 	    {
475 	      gomp_map_vars_existing (devicep, aq, n2, &cur_node,
476 				      &tgt->list[i], kind & typemask, cbuf);
477 	      return;
478 	    }
479 	}
480       cur_node.host_end++;
481       n2 = splay_tree_lookup (mem_map, &cur_node);
482       cur_node.host_end--;
483       if (n2
484 	  && n2->tgt == n->tgt
485 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
486 	{
487 	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
488 				  kind & typemask, cbuf);
489 	  return;
490 	}
491     }
492   gomp_mutex_unlock (&devicep->lock);
493   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
494 	      "other mapped elements from the same structure weren't mapped "
495 	      "together with it", (void *) cur_node.host_start,
496 	      (void *) cur_node.host_end);
497 }
498 
499 attribute_hidden void
500 gomp_attach_pointer (struct gomp_device_descr *devicep,
501 		     struct goacc_asyncqueue *aq, splay_tree mem_map,
502 		     splay_tree_key n, uintptr_t attach_to, size_t bias,
503 		     struct gomp_coalesce_buf *cbufp)
504 {
505   struct splay_tree_key_s s;
506   size_t size, idx;
507 
508   if (n == NULL)
509     {
510       gomp_mutex_unlock (&devicep->lock);
511       gomp_fatal ("enclosing struct not mapped for attach");
512     }
513 
514   size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
515   /* We might have a pointer in a packed struct: however we cannot have more
516      than one such pointer in each pointer-sized portion of the struct, so
517      this is safe.  */
518   idx = (attach_to - n->host_start) / sizeof (void *);
519 
520   if (!n->aux)
521     n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
522 
523   if (!n->aux->attach_count)
524     n->aux->attach_count
525       = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
526 
527   if (n->aux->attach_count[idx] < UINTPTR_MAX)
528     n->aux->attach_count[idx]++;
529   else
530     {
531       gomp_mutex_unlock (&devicep->lock);
532       gomp_fatal ("attach count overflow");
533     }
534 
535   if (n->aux->attach_count[idx] == 1)
536     {
537       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
538 			 - n->host_start;
539       uintptr_t target = (uintptr_t) *(void **) attach_to;
540       splay_tree_key tn;
541       uintptr_t data;
542 
543       if ((void *) target == NULL)
544 	{
545 	  gomp_mutex_unlock (&devicep->lock);
546 	  gomp_fatal ("attempt to attach null pointer");
547 	}
548 
549       s.host_start = target + bias;
550       s.host_end = s.host_start + 1;
551       tn = splay_tree_lookup (mem_map, &s);
552 
553       if (!tn)
554 	{
555 	  gomp_mutex_unlock (&devicep->lock);
556 	  gomp_fatal ("pointer target not mapped for attach");
557 	}
558 
559       data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
560 
561       gomp_debug (1,
562 		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
563 		  __FUNCTION__, (void *) attach_to, (void *) devptr,
564 		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
565 
566       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
567 			  sizeof (void *), cbufp);
568     }
569   else
570     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
571 		(void *) attach_to, (int) n->aux->attach_count[idx]);
572 }
573 
574 attribute_hidden void
575 gomp_detach_pointer (struct gomp_device_descr *devicep,
576 		     struct goacc_asyncqueue *aq, splay_tree_key n,
577 		     uintptr_t detach_from, bool finalize,
578 		     struct gomp_coalesce_buf *cbufp)
579 {
580   size_t idx;
581 
582   if (n == NULL)
583     {
584       gomp_mutex_unlock (&devicep->lock);
585       gomp_fatal ("enclosing struct not mapped for detach");
586     }
587 
588   idx = (detach_from - n->host_start) / sizeof (void *);
589 
590   if (!n->aux || !n->aux->attach_count)
591     {
592       gomp_mutex_unlock (&devicep->lock);
593       gomp_fatal ("no attachment counters for struct");
594     }
595 
596   if (finalize)
597     n->aux->attach_count[idx] = 1;
598 
599   if (n->aux->attach_count[idx] == 0)
600     {
601       gomp_mutex_unlock (&devicep->lock);
602       gomp_fatal ("attach count underflow");
603     }
604   else
605     n->aux->attach_count[idx]--;
606 
607   if (n->aux->attach_count[idx] == 0)
608     {
609       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
610 			 - n->host_start;
611       uintptr_t target = (uintptr_t) *(void **) detach_from;
612 
613       gomp_debug (1,
614 		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
615 		  __FUNCTION__, (void *) detach_from, (void *) devptr,
616 		  (void *) (n->tgt->tgt_start + n->tgt_offset),
617 		  (void *) target);
618 
619       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
620 			  sizeof (void *), cbufp);
621     }
622   else
623     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
624 		(void *) detach_from, (int) n->aux->attach_count[idx]);
625 }
626 
627 attribute_hidden uintptr_t
628 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
629 {
630   if (tgt->list[i].key != NULL)
631     return tgt->list[i].key->tgt->tgt_start
632 	   + tgt->list[i].key->tgt_offset
633 	   + tgt->list[i].offset;
634 
635   switch (tgt->list[i].offset)
636     {
637     case OFFSET_INLINED:
638       return (uintptr_t) hostaddrs[i];
639 
640     case OFFSET_POINTER:
641       return 0;
642 
643     case OFFSET_STRUCT:
644       return tgt->list[i + 1].key->tgt->tgt_start
645 	     + tgt->list[i + 1].key->tgt_offset
646 	     + tgt->list[i + 1].offset
647 	     + (uintptr_t) hostaddrs[i]
648 	     - (uintptr_t) hostaddrs[i + 1];
649 
650     default:
651       return tgt->tgt_start + tgt->list[i].offset;
652     }
653 }
654 
655 static inline __attribute__((always_inline)) struct target_mem_desc *
656 gomp_map_vars_internal (struct gomp_device_descr *devicep,
657 			struct goacc_asyncqueue *aq, size_t mapnum,
658 			void **hostaddrs, void **devaddrs, size_t *sizes,
659 			void *kinds, bool short_mapkind,
660 			enum gomp_map_vars_kind pragma_kind)
661 {
662   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
663   bool has_firstprivate = false;
664   const int rshift = short_mapkind ? 8 : 3;
665   const int typemask = short_mapkind ? 0xff : 0x7;
666   struct splay_tree_s *mem_map = &devicep->mem_map;
667   struct splay_tree_key_s cur_node;
668   struct target_mem_desc *tgt
669     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
670   tgt->list_count = mapnum;
671   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
672   tgt->device_descr = devicep;
673   tgt->prev = NULL;
674   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
675 
676   if (mapnum == 0)
677     {
678       tgt->tgt_start = 0;
679       tgt->tgt_end = 0;
680       return tgt;
681     }
682 
683   tgt_align = sizeof (void *);
684   tgt_size = 0;
685   cbuf.chunks = NULL;
686   cbuf.chunk_cnt = -1;
687   cbuf.use_cnt = 0;
688   cbuf.buf = NULL;
689   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
690     {
691       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
692       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
693       cbuf.chunk_cnt = 0;
694     }
695   if (pragma_kind == GOMP_MAP_VARS_TARGET)
696     {
697       size_t align = 4 * sizeof (void *);
698       tgt_align = align;
699       tgt_size = mapnum * sizeof (void *);
700       cbuf.chunk_cnt = 1;
701       cbuf.use_cnt = 1 + (mapnum > 1);
702       cbuf.chunks[0].start = 0;
703       cbuf.chunks[0].end = tgt_size;
704     }
705 
706   gomp_mutex_lock (&devicep->lock);
707   if (devicep->state == GOMP_DEVICE_FINALIZED)
708     {
709       gomp_mutex_unlock (&devicep->lock);
710       free (tgt);
711       return NULL;
712     }
713 
714   for (i = 0; i < mapnum; i++)
715     {
716       int kind = get_kind (short_mapkind, kinds, i);
717       if (hostaddrs[i] == NULL
718 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
719 	{
720 	  tgt->list[i].key = NULL;
721 	  tgt->list[i].offset = OFFSET_INLINED;
722 	  continue;
723 	}
724       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
725 	       || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
726 	{
727 	  tgt->list[i].key = NULL;
728 	  if (!not_found_cnt)
729 	    {
730 	      /* In OpenMP < 5.0 and OpenACC the mapping has to be done
731 		 on a separate construct prior to using use_device_{addr,ptr}.
732 		 In OpenMP 5.0, map directives need to be ordered by the
733 		 middle-end before the use_device_* clauses.  If
734 		 !not_found_cnt, all mappings requested (if any) are already
735 		 mapped, so use_device_{addr,ptr} can be resolved right away.
736 		 Otherwise, if not_found_cnt, gomp_map_lookup might fail
737 		 now but would succeed after performing the mappings in the
738 		 following loop.  We can't defer this always to the second
739 		 loop, because it is not even invoked when !not_found_cnt
740 		 after the first loop.  */
741 	      cur_node.host_start = (uintptr_t) hostaddrs[i];
742 	      cur_node.host_end = cur_node.host_start;
743 	      splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
744 	      if (n != NULL)
745 		{
746 		  cur_node.host_start -= n->host_start;
747 		  hostaddrs[i]
748 		    = (void *) (n->tgt->tgt_start + n->tgt_offset
749 				+ cur_node.host_start);
750 		}
751 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
752 		{
753 		  gomp_mutex_unlock (&devicep->lock);
754 		  gomp_fatal ("use_device_ptr pointer wasn't mapped");
755 		}
756 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
757 		/* If not present, continue using the host address.  */
758 		;
759 	      else
760 		__builtin_unreachable ();
761 	      tgt->list[i].offset = OFFSET_INLINED;
762 	    }
763 	  else
764 	    tgt->list[i].offset = 0;
765 	  continue;
766 	}
767       else if ((kind & typemask) == GOMP_MAP_STRUCT)
768 	{
769 	  size_t first = i + 1;
770 	  size_t last = i + sizes[i];
771 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
772 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
773 			      + sizes[last];
774 	  tgt->list[i].key = NULL;
775 	  tgt->list[i].offset = OFFSET_STRUCT;
776 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
777 	  if (n == NULL)
778 	    {
779 	      size_t align = (size_t) 1 << (kind >> rshift);
780 	      if (tgt_align < align)
781 		tgt_align = align;
782 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
783 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
784 	      tgt_size += cur_node.host_end - cur_node.host_start;
785 	      not_found_cnt += last - i;
786 	      for (i = first; i <= last; i++)
787 		{
788 		  tgt->list[i].key = NULL;
789 		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
790 					     & typemask))
791 		    gomp_coalesce_buf_add (&cbuf,
792 					   tgt_size - cur_node.host_end
793 					   + (uintptr_t) hostaddrs[i],
794 					   sizes[i]);
795 		}
796 	      i--;
797 	      continue;
798 	    }
799 	  for (i = first; i <= last; i++)
800 	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
801 				      sizes, kinds, NULL);
802 	  i--;
803 	  continue;
804 	}
805       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
806 	{
807 	  tgt->list[i].key = NULL;
808 	  tgt->list[i].offset = OFFSET_POINTER;
809 	  has_firstprivate = true;
810 	  continue;
811 	}
812       else if ((kind & typemask) == GOMP_MAP_ATTACH)
813 	{
814 	  tgt->list[i].key = NULL;
815 	  has_firstprivate = true;
816 	  continue;
817 	}
818       cur_node.host_start = (uintptr_t) hostaddrs[i];
819       if (!GOMP_MAP_POINTER_P (kind & typemask))
820 	cur_node.host_end = cur_node.host_start + sizes[i];
821       else
822 	cur_node.host_end = cur_node.host_start + sizeof (void *);
823       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
824 	{
825 	  tgt->list[i].key = NULL;
826 
827 	  size_t align = (size_t) 1 << (kind >> rshift);
828 	  if (tgt_align < align)
829 	    tgt_align = align;
830 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
831 	  gomp_coalesce_buf_add (&cbuf, tgt_size,
832 				 cur_node.host_end - cur_node.host_start);
833 	  tgt_size += cur_node.host_end - cur_node.host_start;
834 	  has_firstprivate = true;
835 	  continue;
836 	}
837       splay_tree_key n;
838       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
839 	{
840 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
841 	  if (!n)
842 	    {
843 	      tgt->list[i].key = NULL;
844 	      tgt->list[i].offset = OFFSET_POINTER;
845 	      continue;
846 	    }
847 	}
848       else
849 	n = splay_tree_lookup (mem_map, &cur_node);
850       if (n && n->refcount != REFCOUNT_LINK)
851 	gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
852 				kind & typemask, NULL);
853       else
854 	{
855 	  tgt->list[i].key = NULL;
856 
857 	  if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
858 	    {
859 	      /* Not present, hence, skip entry - including its MAP_POINTER,
860 		 when existing.  */
861 	      tgt->list[i].offset = OFFSET_POINTER;
862 	      if (i + 1 < mapnum
863 		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
864 		      == GOMP_MAP_POINTER))
865 		{
866 		  ++i;
867 		  tgt->list[i].key = NULL;
868 		  tgt->list[i].offset = 0;
869 		}
870 	      continue;
871 	    }
872 	  size_t align = (size_t) 1 << (kind >> rshift);
873 	  not_found_cnt++;
874 	  if (tgt_align < align)
875 	    tgt_align = align;
876 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
877 	  if (gomp_to_device_kind_p (kind & typemask))
878 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
879 				   cur_node.host_end - cur_node.host_start);
880 	  tgt_size += cur_node.host_end - cur_node.host_start;
881 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
882 	    {
883 	      size_t j;
884 	      for (j = i + 1; j < mapnum; j++)
885 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
886 					 & typemask))
887 		  break;
888 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
889 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
890 			     > cur_node.host_end))
891 		  break;
892 		else
893 		  {
894 		    tgt->list[j].key = NULL;
895 		    i++;
896 		  }
897 	    }
898 	}
899     }
900 
901   if (devaddrs)
902     {
903       if (mapnum != 1)
904 	{
905 	  gomp_mutex_unlock (&devicep->lock);
906 	  gomp_fatal ("unexpected aggregation");
907 	}
908       tgt->to_free = devaddrs[0];
909       tgt->tgt_start = (uintptr_t) tgt->to_free;
910       tgt->tgt_end = tgt->tgt_start + sizes[0];
911     }
912   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
913     {
914       /* Allocate tgt_align aligned tgt_size block of memory.  */
915       /* FIXME: Perhaps change interface to allocate properly aligned
916 	 memory.  */
917       tgt->to_free = devicep->alloc_func (devicep->target_id,
918 					  tgt_size + tgt_align - 1);
919       if (!tgt->to_free)
920 	{
921 	  gomp_mutex_unlock (&devicep->lock);
922 	  gomp_fatal ("device memory allocation fail");
923 	}
924 
925       tgt->tgt_start = (uintptr_t) tgt->to_free;
926       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
927       tgt->tgt_end = tgt->tgt_start + tgt_size;
928 
929       if (cbuf.use_cnt == 1)
930 	cbuf.chunk_cnt--;
931       if (cbuf.chunk_cnt > 0)
932 	{
933 	  cbuf.buf
934 	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
935 	  if (cbuf.buf)
936 	    {
937 	      cbuf.tgt = tgt;
938 	      cbufp = &cbuf;
939 	    }
940 	}
941     }
942   else
943     {
944       tgt->to_free = NULL;
945       tgt->tgt_start = 0;
946       tgt->tgt_end = 0;
947     }
948 
949   tgt_size = 0;
950   if (pragma_kind == GOMP_MAP_VARS_TARGET)
951     tgt_size = mapnum * sizeof (void *);
952 
953   tgt->array = NULL;
954   if (not_found_cnt || has_firstprivate)
955     {
956       if (not_found_cnt)
957 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
958       splay_tree_node array = tgt->array;
959       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
960       uintptr_t field_tgt_base = 0;
961 
962       for (i = 0; i < mapnum; i++)
963 	if (tgt->list[i].key == NULL)
964 	  {
965 	    int kind = get_kind (short_mapkind, kinds, i);
966 	    if (hostaddrs[i] == NULL)
967 	      continue;
968 	    switch (kind & typemask)
969 	      {
970 		size_t align, len, first, last;
971 		splay_tree_key n;
972 	      case GOMP_MAP_FIRSTPRIVATE:
973 		align = (size_t) 1 << (kind >> rshift);
974 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
975 		tgt->list[i].offset = tgt_size;
976 		len = sizes[i];
977 		gomp_copy_host2dev (devicep, aq,
978 				    (void *) (tgt->tgt_start + tgt_size),
979 				    (void *) hostaddrs[i], len, cbufp);
980 		tgt_size += len;
981 		continue;
982 	      case GOMP_MAP_FIRSTPRIVATE_INT:
983 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
984 		continue;
985 	      case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
986 		/* The OpenACC 'host_data' construct only allows 'use_device'
987 		   "mapping" clauses, so in the first loop, 'not_found_cnt'
988 		   must always have been zero, so all OpenACC 'use_device'
989 		   clauses have already been handled.  (We can only easily test
990 		   'use_device' with 'if_present' clause here.)  */
991 		assert (tgt->list[i].offset == OFFSET_INLINED);
992 		/* Nevertheless, FALLTHRU to the normal handling, to keep the
993 		   code conceptually simple, similar to the first loop.  */
994 	      case GOMP_MAP_USE_DEVICE_PTR:
995 		if (tgt->list[i].offset == 0)
996 		  {
997 		    cur_node.host_start = (uintptr_t) hostaddrs[i];
998 		    cur_node.host_end = cur_node.host_start;
999 		    n = gomp_map_lookup (mem_map, &cur_node);
1000 		    if (n != NULL)
1001 		      {
1002 			cur_node.host_start -= n->host_start;
1003 			hostaddrs[i]
1004 			  = (void *) (n->tgt->tgt_start + n->tgt_offset
1005 				      + cur_node.host_start);
1006 		      }
1007 		    else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1008 		      {
1009 			gomp_mutex_unlock (&devicep->lock);
1010 			gomp_fatal ("use_device_ptr pointer wasn't mapped");
1011 		      }
1012 		    else if ((kind & typemask)
1013 			     == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1014 		      /* If not present, continue using the host address.  */
1015 		      ;
1016 		    else
1017 		      __builtin_unreachable ();
1018 		    tgt->list[i].offset = OFFSET_INLINED;
1019 		  }
1020 		continue;
1021 	      case GOMP_MAP_STRUCT:
1022 		first = i + 1;
1023 		last = i + sizes[i];
1024 		cur_node.host_start = (uintptr_t) hostaddrs[i];
1025 		cur_node.host_end = (uintptr_t) hostaddrs[last]
1026 				    + sizes[last];
1027 		if (tgt->list[first].key != NULL)
1028 		  continue;
1029 		n = splay_tree_lookup (mem_map, &cur_node);
1030 		if (n == NULL)
1031 		  {
1032 		    size_t align = (size_t) 1 << (kind >> rshift);
1033 		    tgt_size -= (uintptr_t) hostaddrs[first]
1034 				- (uintptr_t) hostaddrs[i];
1035 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1036 		    tgt_size += (uintptr_t) hostaddrs[first]
1037 				- (uintptr_t) hostaddrs[i];
1038 		    field_tgt_base = (uintptr_t) hostaddrs[first];
1039 		    field_tgt_offset = tgt_size;
1040 		    field_tgt_clear = last;
1041 		    tgt_size += cur_node.host_end
1042 				- (uintptr_t) hostaddrs[first];
1043 		    continue;
1044 		  }
1045 		for (i = first; i <= last; i++)
1046 		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1047 					    sizes, kinds, cbufp);
1048 		i--;
1049 		continue;
1050 	      case GOMP_MAP_ALWAYS_POINTER:
1051 		cur_node.host_start = (uintptr_t) hostaddrs[i];
1052 		cur_node.host_end = cur_node.host_start + sizeof (void *);
1053 		n = splay_tree_lookup (mem_map, &cur_node);
1054 		if (n == NULL
1055 		    || n->host_start > cur_node.host_start
1056 		    || n->host_end < cur_node.host_end)
1057 		  {
1058 		    gomp_mutex_unlock (&devicep->lock);
1059 		    gomp_fatal ("always pointer not mapped");
1060 		  }
1061 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1062 		    != GOMP_MAP_ALWAYS_POINTER)
1063 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1064 		if (cur_node.tgt_offset)
1065 		  cur_node.tgt_offset -= sizes[i];
1066 		gomp_copy_host2dev (devicep, aq,
1067 				    (void *) (n->tgt->tgt_start
1068 					      + n->tgt_offset
1069 					      + cur_node.host_start
1070 					      - n->host_start),
1071 				    (void *) &cur_node.tgt_offset,
1072 				    sizeof (void *), cbufp);
1073 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1074 				      + cur_node.host_start - n->host_start;
1075 		continue;
1076 	      case GOMP_MAP_IF_PRESENT:
1077 		/* Not present - otherwise handled above. Skip over its
1078 		   MAP_POINTER as well.  */
1079 		if (i + 1 < mapnum
1080 		    && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1081 			== GOMP_MAP_POINTER))
1082 		  ++i;
1083 		continue;
1084 	      case GOMP_MAP_ATTACH:
1085 		{
1086 		  cur_node.host_start = (uintptr_t) hostaddrs[i];
1087 		  cur_node.host_end = cur_node.host_start + sizeof (void *);
1088 		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1089 		  if (n != NULL)
1090 		    {
1091 		      tgt->list[i].key = n;
1092 		      tgt->list[i].offset = cur_node.host_start - n->host_start;
1093 		      tgt->list[i].length = n->host_end - n->host_start;
1094 		      tgt->list[i].copy_from = false;
1095 		      tgt->list[i].always_copy_from = false;
1096 		      tgt->list[i].is_attach = true;
1097 		      /* OpenACC 'attach'/'detach' doesn't affect
1098 			 structured/dynamic reference counts ('n->refcount',
1099 			 'n->dynamic_refcount').  */
1100 		    }
1101 		  else
1102 		    {
1103 		      gomp_mutex_unlock (&devicep->lock);
1104 		      gomp_fatal ("outer struct not mapped for attach");
1105 		    }
1106 		  gomp_attach_pointer (devicep, aq, mem_map, n,
1107 				       (uintptr_t) hostaddrs[i], sizes[i],
1108 				       cbufp);
1109 		  continue;
1110 		}
1111 	      default:
1112 		break;
1113 	      }
1114 	    splay_tree_key k = &array->key;
1115 	    k->host_start = (uintptr_t) hostaddrs[i];
1116 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
1117 	      k->host_end = k->host_start + sizes[i];
1118 	    else
1119 	      k->host_end = k->host_start + sizeof (void *);
1120 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
1121 	    if (n && n->refcount != REFCOUNT_LINK)
1122 	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1123 				      kind & typemask, cbufp);
1124 	    else
1125 	      {
1126 		k->aux = NULL;
1127 		if (n && n->refcount == REFCOUNT_LINK)
1128 		  {
1129 		    /* Replace target address of the pointer with target address
1130 		       of mapped object in the splay tree.  */
1131 		    splay_tree_remove (mem_map, n);
1132 		    k->aux
1133 		      = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1134 		    k->aux->link_key = n;
1135 		  }
1136 		size_t align = (size_t) 1 << (kind >> rshift);
1137 		tgt->list[i].key = k;
1138 		k->tgt = tgt;
1139 		if (field_tgt_clear != FIELD_TGT_EMPTY)
1140 		  {
1141 		    k->tgt_offset = k->host_start - field_tgt_base
1142 				    + field_tgt_offset;
1143 		    if (i == field_tgt_clear)
1144 		      field_tgt_clear = FIELD_TGT_EMPTY;
1145 		  }
1146 		else
1147 		  {
1148 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1149 		    k->tgt_offset = tgt_size;
1150 		    tgt_size += k->host_end - k->host_start;
1151 		  }
1152 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1153 		tgt->list[i].always_copy_from
1154 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1155 		tgt->list[i].is_attach = false;
1156 		tgt->list[i].offset = 0;
1157 		tgt->list[i].length = k->host_end - k->host_start;
1158 		k->refcount = 1;
1159 		k->dynamic_refcount = 0;
1160 		tgt->refcount++;
1161 		array->left = NULL;
1162 		array->right = NULL;
1163 		splay_tree_insert (mem_map, array);
1164 		switch (kind & typemask)
1165 		  {
1166 		  case GOMP_MAP_ALLOC:
1167 		  case GOMP_MAP_FROM:
1168 		  case GOMP_MAP_FORCE_ALLOC:
1169 		  case GOMP_MAP_FORCE_FROM:
1170 		  case GOMP_MAP_ALWAYS_FROM:
1171 		    break;
1172 		  case GOMP_MAP_TO:
1173 		  case GOMP_MAP_TOFROM:
1174 		  case GOMP_MAP_FORCE_TO:
1175 		  case GOMP_MAP_FORCE_TOFROM:
1176 		  case GOMP_MAP_ALWAYS_TO:
1177 		  case GOMP_MAP_ALWAYS_TOFROM:
1178 		    gomp_copy_host2dev (devicep, aq,
1179 					(void *) (tgt->tgt_start
1180 						  + k->tgt_offset),
1181 					(void *) k->host_start,
1182 					k->host_end - k->host_start, cbufp);
1183 		    break;
1184 		  case GOMP_MAP_POINTER:
1185 		    gomp_map_pointer (tgt, aq,
1186 				      (uintptr_t) *(void **) k->host_start,
1187 				      k->tgt_offset, sizes[i], cbufp);
1188 		    break;
1189 		  case GOMP_MAP_TO_PSET:
1190 		    gomp_copy_host2dev (devicep, aq,
1191 					(void *) (tgt->tgt_start
1192 						  + k->tgt_offset),
1193 					(void *) k->host_start,
1194 					k->host_end - k->host_start, cbufp);
1195 
1196 		    for (j = i + 1; j < mapnum; j++)
1197 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
1198 							 j)
1199 					       & typemask))
1200 			break;
1201 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
1202 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1203 				   > k->host_end))
1204 			break;
1205 		      else
1206 			{
1207 			  tgt->list[j].key = k;
1208 			  tgt->list[j].copy_from = false;
1209 			  tgt->list[j].always_copy_from = false;
1210 			  tgt->list[j].is_attach = false;
1211 			  if (k->refcount != REFCOUNT_INFINITY)
1212 			    k->refcount++;
1213 			  gomp_map_pointer (tgt, aq,
1214 					    (uintptr_t) *(void **) hostaddrs[j],
1215 					    k->tgt_offset
1216 					    + ((uintptr_t) hostaddrs[j]
1217 					       - k->host_start),
1218 					    sizes[j], cbufp);
1219 			  i++;
1220 			}
1221 		    break;
1222 		  case GOMP_MAP_FORCE_PRESENT:
1223 		    {
1224 		      /* We already looked up the memory region above and it
1225 			 was missing.  */
1226 		      size_t size = k->host_end - k->host_start;
1227 		      gomp_mutex_unlock (&devicep->lock);
1228 #ifdef HAVE_INTTYPES_H
1229 		      gomp_fatal ("present clause: !acc_is_present (%p, "
1230 				  "%"PRIu64" (0x%"PRIx64"))",
1231 				  (void *) k->host_start,
1232 				  (uint64_t) size, (uint64_t) size);
1233 #else
1234 		      gomp_fatal ("present clause: !acc_is_present (%p, "
1235 				  "%lu (0x%lx))", (void *) k->host_start,
1236 				  (unsigned long) size, (unsigned long) size);
1237 #endif
1238 		    }
1239 		    break;
1240 		  case GOMP_MAP_FORCE_DEVICEPTR:
1241 		    assert (k->host_end - k->host_start == sizeof (void *));
1242 		    gomp_copy_host2dev (devicep, aq,
1243 					(void *) (tgt->tgt_start
1244 						  + k->tgt_offset),
1245 					(void *) k->host_start,
1246 					sizeof (void *), cbufp);
1247 		    break;
1248 		  default:
1249 		    gomp_mutex_unlock (&devicep->lock);
1250 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1251 				kind);
1252 		  }
1253 
1254 		if (k->aux && k->aux->link_key)
1255 		  {
1256 		    /* Set link pointer on target to the device address of the
1257 		       mapped object.  */
1258 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1259 		    /* We intentionally do not use coalescing here, as it's not
1260 		       data allocated by the current call to this function.  */
1261 		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1262 					&tgt_addr, sizeof (void *), NULL);
1263 		  }
1264 		array++;
1265 	      }
1266 	  }
1267     }
1268 
1269   if (pragma_kind == GOMP_MAP_VARS_TARGET)
1270     {
1271       for (i = 0; i < mapnum; i++)
1272 	{
1273 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1274 	  gomp_copy_host2dev (devicep, aq,
1275 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
1276 			      (void *) &cur_node.tgt_offset, sizeof (void *),
1277 			      cbufp);
1278 	}
1279     }
1280 
1281   if (cbufp)
1282     {
1283       long c = 0;
1284       for (c = 0; c < cbuf.chunk_cnt; ++c)
1285 	gomp_copy_host2dev (devicep, aq,
1286 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1287 			    (char *) cbuf.buf + (cbuf.chunks[c].start
1288 						 - cbuf.chunks[0].start),
1289 			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
1290       free (cbuf.buf);
1291       cbuf.buf = NULL;
1292       cbufp = NULL;
1293     }
1294 
1295   /* If the variable from "omp target enter data" map-list was already mapped,
1296      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1297      gomp_exit_data.  */
1298   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1299     {
1300       free (tgt);
1301       tgt = NULL;
1302     }
1303 
1304   gomp_mutex_unlock (&devicep->lock);
1305   return tgt;
1306 }
1307 
1308 attribute_hidden struct target_mem_desc *
1309 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1310 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1311 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
1312 {
1313   return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1314 				 sizes, kinds, short_mapkind, pragma_kind);
1315 }
1316 
1317 attribute_hidden struct target_mem_desc *
1318 gomp_map_vars_async (struct gomp_device_descr *devicep,
1319 		     struct goacc_asyncqueue *aq, size_t mapnum,
1320 		     void **hostaddrs, void **devaddrs, size_t *sizes,
1321 		     void *kinds, bool short_mapkind,
1322 		     enum gomp_map_vars_kind pragma_kind)
1323 {
1324   return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1325 				 sizes, kinds, short_mapkind, pragma_kind);
1326 }
1327 
1328 static void
1329 gomp_unmap_tgt (struct target_mem_desc *tgt)
1330 {
1331   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1332   if (tgt->tgt_end)
1333     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1334 
1335   free (tgt->array);
1336   free (tgt);
1337 }
1338 
1339 static bool
1340 gomp_unref_tgt (void *ptr)
1341 {
1342   bool is_tgt_unmapped = false;
1343 
1344   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1345 
1346   if (tgt->refcount > 1)
1347     tgt->refcount--;
1348   else
1349     {
1350       gomp_unmap_tgt (tgt);
1351       is_tgt_unmapped = true;
1352     }
1353 
1354   return is_tgt_unmapped;
1355 }
1356 
1357 static void
1358 gomp_unref_tgt_void (void *ptr)
1359 {
1360   (void) gomp_unref_tgt (ptr);
1361 }
1362 
1363 static inline __attribute__((always_inline)) bool
1364 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1365 			  struct goacc_asyncqueue *aq)
1366 {
1367   bool is_tgt_unmapped = false;
1368   splay_tree_remove (&devicep->mem_map, k);
1369   if (k->aux)
1370     {
1371       if (k->aux->link_key)
1372 	splay_tree_insert (&devicep->mem_map,
1373 			   (splay_tree_node) k->aux->link_key);
1374       if (k->aux->attach_count)
1375 	free (k->aux->attach_count);
1376       free (k->aux);
1377       k->aux = NULL;
1378     }
1379   if (aq)
1380     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1381 						(void *) k->tgt);
1382   else
1383     is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1384   return is_tgt_unmapped;
1385 }
1386 
1387 attribute_hidden bool
1388 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1389 {
1390   return gomp_remove_var_internal (devicep, k, NULL);
1391 }
1392 
1393 /* Remove a variable asynchronously.  This actually removes the variable
1394    mapping immediately, but retains the linked target_mem_desc until the
1395    asynchronous operation has completed (as it may still refer to target
1396    memory).  The device lock must be held before entry, and remains locked on
1397    exit.  */
1398 
1399 attribute_hidden void
1400 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1401 		       struct goacc_asyncqueue *aq)
1402 {
1403   (void) gomp_remove_var_internal (devicep, k, aq);
1404 }
1405 
1406 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1407    variables back from device to host: if it is false, it is assumed that this
1408    has been done already.  */
1409 
1410 static inline __attribute__((always_inline)) void
1411 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1412 			  struct goacc_asyncqueue *aq)
1413 {
1414   struct gomp_device_descr *devicep = tgt->device_descr;
1415 
1416   if (tgt->list_count == 0)
1417     {
1418       free (tgt);
1419       return;
1420     }
1421 
1422   gomp_mutex_lock (&devicep->lock);
1423   if (devicep->state == GOMP_DEVICE_FINALIZED)
1424     {
1425       gomp_mutex_unlock (&devicep->lock);
1426       free (tgt->array);
1427       free (tgt);
1428       return;
1429     }
1430 
1431   size_t i;
1432 
1433   /* We must perform detachments before any copies back to the host.  */
1434   for (i = 0; i < tgt->list_count; i++)
1435     {
1436       splay_tree_key k = tgt->list[i].key;
1437 
1438       if (k != NULL && tgt->list[i].is_attach)
1439 	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1440 					     + tgt->list[i].offset,
1441 			     false, NULL);
1442     }
1443 
1444   for (i = 0; i < tgt->list_count; i++)
1445     {
1446       splay_tree_key k = tgt->list[i].key;
1447       if (k == NULL)
1448 	continue;
1449 
1450       /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1451 	 counts ('n->refcount', 'n->dynamic_refcount').  */
1452       if (tgt->list[i].is_attach)
1453 	continue;
1454 
1455       bool do_unmap = false;
1456       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1457 	k->refcount--;
1458       else if (k->refcount == 1)
1459 	{
1460 	  k->refcount--;
1461 	  do_unmap = true;
1462 	}
1463 
1464       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1465 	  || tgt->list[i].always_copy_from)
1466 	gomp_copy_dev2host (devicep, aq,
1467 			    (void *) (k->host_start + tgt->list[i].offset),
1468 			    (void *) (k->tgt->tgt_start + k->tgt_offset
1469 				      + tgt->list[i].offset),
1470 			    tgt->list[i].length);
1471       if (do_unmap)
1472 	{
1473 	  struct target_mem_desc *k_tgt = k->tgt;
1474 	  bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1475 	  /* It would be bad if TGT got unmapped while we're still iterating
1476 	     over its LIST_COUNT, and also expect to use it in the following
1477 	     code.  */
1478 	  assert (!is_tgt_unmapped
1479 		  || k_tgt != tgt);
1480 	}
1481     }
1482 
1483   if (aq)
1484     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1485 						(void *) tgt);
1486   else
1487     gomp_unref_tgt ((void *) tgt);
1488 
1489   gomp_mutex_unlock (&devicep->lock);
1490 }
1491 
1492 attribute_hidden void
1493 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1494 {
1495   gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
1496 }
1497 
1498 attribute_hidden void
1499 gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
1500 		       struct goacc_asyncqueue *aq)
1501 {
1502   gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
1503 }
1504 
1505 static void
1506 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1507 	     size_t *sizes, void *kinds, bool short_mapkind)
1508 {
1509   size_t i;
1510   struct splay_tree_key_s cur_node;
1511   const int typemask = short_mapkind ? 0xff : 0x7;
1512 
1513   if (!devicep)
1514     return;
1515 
1516   if (mapnum == 0)
1517     return;
1518 
1519   gomp_mutex_lock (&devicep->lock);
1520   if (devicep->state == GOMP_DEVICE_FINALIZED)
1521     {
1522       gomp_mutex_unlock (&devicep->lock);
1523       return;
1524     }
1525 
1526   for (i = 0; i < mapnum; i++)
1527     if (sizes[i])
1528       {
1529 	cur_node.host_start = (uintptr_t) hostaddrs[i];
1530 	cur_node.host_end = cur_node.host_start + sizes[i];
1531 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1532 	if (n)
1533 	  {
1534 	    int kind = get_kind (short_mapkind, kinds, i);
1535 	    if (n->host_start > cur_node.host_start
1536 		|| n->host_end < cur_node.host_end)
1537 	      {
1538 		gomp_mutex_unlock (&devicep->lock);
1539 		gomp_fatal ("Trying to update [%p..%p) object when "
1540 			    "only [%p..%p) is mapped",
1541 			    (void *) cur_node.host_start,
1542 			    (void *) cur_node.host_end,
1543 			    (void *) n->host_start,
1544 			    (void *) n->host_end);
1545 	      }
1546 
1547 
1548 	    void *hostaddr = (void *) cur_node.host_start;
1549 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1550 				      + cur_node.host_start - n->host_start);
1551 	    size_t size = cur_node.host_end - cur_node.host_start;
1552 
1553 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
1554 	      gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
1555 				  NULL);
1556 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1557 	      gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
1558 	  }
1559       }
1560   gomp_mutex_unlock (&devicep->lock);
1561 }
1562 
1563 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1564    And insert to splay tree the mapping between addresses from HOST_TABLE and
1565    from loaded target image.  We rely in the host and device compiler
1566    emitting variable and functions in the same order.  */
1567 
1568 static void
1569 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1570 			   const void *host_table, const void *target_data,
1571 			   bool is_register_lock)
1572 {
1573   void **host_func_table = ((void ***) host_table)[0];
1574   void **host_funcs_end  = ((void ***) host_table)[1];
1575   void **host_var_table  = ((void ***) host_table)[2];
1576   void **host_vars_end   = ((void ***) host_table)[3];
1577 
1578   /* The func table contains only addresses, the var table contains addresses
1579      and corresponding sizes.  */
1580   int num_funcs = host_funcs_end - host_func_table;
1581   int num_vars  = (host_vars_end - host_var_table) / 2;
1582 
1583   /* Load image to device and get target addresses for the image.  */
1584   struct addr_pair *target_table = NULL;
1585   int i, num_target_entries;
1586 
1587   num_target_entries
1588     = devicep->load_image_func (devicep->target_id, version,
1589 				target_data, &target_table);
1590 
1591   if (num_target_entries != num_funcs + num_vars)
1592     {
1593       gomp_mutex_unlock (&devicep->lock);
1594       if (is_register_lock)
1595 	gomp_mutex_unlock (&register_lock);
1596       gomp_fatal ("Cannot map target functions or variables"
1597 		  " (expected %u, have %u)", num_funcs + num_vars,
1598 		  num_target_entries);
1599     }
1600 
1601   /* Insert host-target address mapping into splay tree.  */
1602   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1603   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1604   tgt->refcount = REFCOUNT_INFINITY;
1605   tgt->tgt_start = 0;
1606   tgt->tgt_end = 0;
1607   tgt->to_free = NULL;
1608   tgt->prev = NULL;
1609   tgt->list_count = 0;
1610   tgt->device_descr = devicep;
1611   splay_tree_node array = tgt->array;
1612 
1613   for (i = 0; i < num_funcs; i++)
1614     {
1615       splay_tree_key k = &array->key;
1616       k->host_start = (uintptr_t) host_func_table[i];
1617       k->host_end = k->host_start + 1;
1618       k->tgt = tgt;
1619       k->tgt_offset = target_table[i].start;
1620       k->refcount = REFCOUNT_INFINITY;
1621       k->dynamic_refcount = 0;
1622       k->aux = NULL;
1623       array->left = NULL;
1624       array->right = NULL;
1625       splay_tree_insert (&devicep->mem_map, array);
1626       array++;
1627     }
1628 
1629   /* Most significant bit of the size in host and target tables marks
1630      "omp declare target link" variables.  */
1631   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1632   const uintptr_t size_mask = ~link_bit;
1633 
1634   for (i = 0; i < num_vars; i++)
1635     {
1636       struct addr_pair *target_var = &target_table[num_funcs + i];
1637       uintptr_t target_size = target_var->end - target_var->start;
1638       bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
1639 
1640       if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
1641 	{
1642 	  gomp_mutex_unlock (&devicep->lock);
1643 	  if (is_register_lock)
1644 	    gomp_mutex_unlock (&register_lock);
1645 	  gomp_fatal ("Cannot map target variables (size mismatch)");
1646 	}
1647 
1648       splay_tree_key k = &array->key;
1649       k->host_start = (uintptr_t) host_var_table[i * 2];
1650       k->host_end
1651 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1652       k->tgt = tgt;
1653       k->tgt_offset = target_var->start;
1654       k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1655       k->dynamic_refcount = 0;
1656       k->aux = NULL;
1657       array->left = NULL;
1658       array->right = NULL;
1659       splay_tree_insert (&devicep->mem_map, array);
1660       array++;
1661     }
1662 
1663   free (target_table);
1664 }
1665 
1666 /* Unload the mappings described by target_data from device DEVICE_P.
1667    The device must be locked.   */
1668 
1669 static void
1670 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1671 			       unsigned version,
1672 			       const void *host_table, const void *target_data)
1673 {
1674   void **host_func_table = ((void ***) host_table)[0];
1675   void **host_funcs_end  = ((void ***) host_table)[1];
1676   void **host_var_table  = ((void ***) host_table)[2];
1677   void **host_vars_end   = ((void ***) host_table)[3];
1678 
1679   /* The func table contains only addresses, the var table contains addresses
1680      and corresponding sizes.  */
1681   int num_funcs = host_funcs_end - host_func_table;
1682   int num_vars  = (host_vars_end - host_var_table) / 2;
1683 
1684   struct splay_tree_key_s k;
1685   splay_tree_key node = NULL;
1686 
1687   /* Find mapping at start of node array */
1688   if (num_funcs || num_vars)
1689     {
1690       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1691 		      : (uintptr_t) host_var_table[0]);
1692       k.host_end = k.host_start + 1;
1693       node = splay_tree_lookup (&devicep->mem_map, &k);
1694     }
1695 
1696   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1697     {
1698       gomp_mutex_unlock (&devicep->lock);
1699       gomp_fatal ("image unload fail");
1700     }
1701 
1702   /* Remove mappings from splay tree.  */
1703   int i;
1704   for (i = 0; i < num_funcs; i++)
1705     {
1706       k.host_start = (uintptr_t) host_func_table[i];
1707       k.host_end = k.host_start + 1;
1708       splay_tree_remove (&devicep->mem_map, &k);
1709     }
1710 
1711   /* Most significant bit of the size in host and target tables marks
1712      "omp declare target link" variables.  */
1713   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1714   const uintptr_t size_mask = ~link_bit;
1715   bool is_tgt_unmapped = false;
1716 
1717   for (i = 0; i < num_vars; i++)
1718     {
1719       k.host_start = (uintptr_t) host_var_table[i * 2];
1720       k.host_end
1721 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1722 
1723       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1724 	splay_tree_remove (&devicep->mem_map, &k);
1725       else
1726 	{
1727 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1728 	  is_tgt_unmapped = gomp_remove_var (devicep, n);
1729 	}
1730     }
1731 
1732   if (node && !is_tgt_unmapped)
1733     {
1734       free (node->tgt);
1735       free (node);
1736     }
1737 }
1738 
1739 /* This function should be called from every offload image while loading.
1740    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1741    the target, and TARGET_DATA needed by target plugin.  */
1742 
1743 void
1744 GOMP_offload_register_ver (unsigned version, const void *host_table,
1745 			   int target_type, const void *target_data)
1746 {
1747   int i;
1748 
1749   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1750     gomp_fatal ("Library too old for offload (version %u < %u)",
1751 		GOMP_VERSION, GOMP_VERSION_LIB (version));
1752 
1753   gomp_mutex_lock (&register_lock);
1754 
1755   /* Load image to all initialized devices.  */
1756   for (i = 0; i < num_devices; i++)
1757     {
1758       struct gomp_device_descr *devicep = &devices[i];
1759       gomp_mutex_lock (&devicep->lock);
1760       if (devicep->type == target_type
1761 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1762 	gomp_load_image_to_device (devicep, version,
1763 				   host_table, target_data, true);
1764       gomp_mutex_unlock (&devicep->lock);
1765     }
1766 
1767   /* Insert image to array of pending images.  */
1768   offload_images
1769     = gomp_realloc_unlock (offload_images,
1770 			   (num_offload_images + 1)
1771 			   * sizeof (struct offload_image_descr));
1772   offload_images[num_offload_images].version = version;
1773   offload_images[num_offload_images].type = target_type;
1774   offload_images[num_offload_images].host_table = host_table;
1775   offload_images[num_offload_images].target_data = target_data;
1776 
1777   num_offload_images++;
1778   gomp_mutex_unlock (&register_lock);
1779 }
1780 
1781 void
1782 GOMP_offload_register (const void *host_table, int target_type,
1783 		       const void *target_data)
1784 {
1785   GOMP_offload_register_ver (0, host_table, target_type, target_data);
1786 }
1787 
1788 /* This function should be called from every offload image while unloading.
1789    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1790    the target, and TARGET_DATA needed by target plugin.  */
1791 
1792 void
1793 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1794 			     int target_type, const void *target_data)
1795 {
1796   int i;
1797 
1798   gomp_mutex_lock (&register_lock);
1799 
1800   /* Unload image from all initialized devices.  */
1801   for (i = 0; i < num_devices; i++)
1802     {
1803       struct gomp_device_descr *devicep = &devices[i];
1804       gomp_mutex_lock (&devicep->lock);
1805       if (devicep->type == target_type
1806 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1807 	gomp_unload_image_from_device (devicep, version,
1808 				       host_table, target_data);
1809       gomp_mutex_unlock (&devicep->lock);
1810     }
1811 
1812   /* Remove image from array of pending images.  */
1813   for (i = 0; i < num_offload_images; i++)
1814     if (offload_images[i].target_data == target_data)
1815       {
1816 	offload_images[i] = offload_images[--num_offload_images];
1817 	break;
1818       }
1819 
1820   gomp_mutex_unlock (&register_lock);
1821 }
1822 
1823 void
1824 GOMP_offload_unregister (const void *host_table, int target_type,
1825 			 const void *target_data)
1826 {
1827   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1828 }
1829 
1830 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
1831    must be locked on entry, and remains locked on return.  */
1832 
1833 attribute_hidden void
1834 gomp_init_device (struct gomp_device_descr *devicep)
1835 {
1836   int i;
1837   if (!devicep->init_device_func (devicep->target_id))
1838     {
1839       gomp_mutex_unlock (&devicep->lock);
1840       gomp_fatal ("device initialization failed");
1841     }
1842 
1843   /* Load to device all images registered by the moment.  */
1844   for (i = 0; i < num_offload_images; i++)
1845     {
1846       struct offload_image_descr *image = &offload_images[i];
1847       if (image->type == devicep->type)
1848 	gomp_load_image_to_device (devicep, image->version,
1849 				   image->host_table, image->target_data,
1850 				   false);
1851     }
1852 
1853   /* Initialize OpenACC asynchronous queues.  */
1854   goacc_init_asyncqueues (devicep);
1855 
1856   devicep->state = GOMP_DEVICE_INITIALIZED;
1857 }
1858 
1859 /* This function finalizes the target device, specified by DEVICEP.  DEVICEP
1860    must be locked on entry, and remains locked on return.  */
1861 
1862 attribute_hidden bool
1863 gomp_fini_device (struct gomp_device_descr *devicep)
1864 {
1865   bool ret = goacc_fini_asyncqueues (devicep);
1866   ret &= devicep->fini_device_func (devicep->target_id);
1867   devicep->state = GOMP_DEVICE_FINALIZED;
1868   return ret;
1869 }
1870 
1871 attribute_hidden void
1872 gomp_unload_device (struct gomp_device_descr *devicep)
1873 {
1874   if (devicep->state == GOMP_DEVICE_INITIALIZED)
1875     {
1876       unsigned i;
1877 
1878       /* Unload from device all images registered at the moment.  */
1879       for (i = 0; i < num_offload_images; i++)
1880 	{
1881 	  struct offload_image_descr *image = &offload_images[i];
1882 	  if (image->type == devicep->type)
1883 	    gomp_unload_image_from_device (devicep, image->version,
1884 					   image->host_table,
1885 					   image->target_data);
1886 	}
1887     }
1888 }
1889 
1890 /* Host fallback for GOMP_target{,_ext} routines.  */
1891 
1892 static void
1893 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1894 {
1895   struct gomp_thread old_thr, *thr = gomp_thread ();
1896   old_thr = *thr;
1897   memset (thr, '\0', sizeof (*thr));
1898   if (gomp_places_list)
1899     {
1900       thr->place = old_thr.place;
1901       thr->ts.place_partition_len = gomp_places_list_len;
1902     }
1903   fn (hostaddrs);
1904   gomp_free_thread (thr);
1905   *thr = old_thr;
1906 }
1907 
1908 /* Calculate alignment and size requirements of a private copy of data shared
1909    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1910 
1911 static inline void
1912 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1913 				     unsigned short *kinds, size_t *tgt_align,
1914 				     size_t *tgt_size)
1915 {
1916   size_t i;
1917   for (i = 0; i < mapnum; i++)
1918     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1919       {
1920 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1921 	if (*tgt_align < align)
1922 	  *tgt_align = align;
1923 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1924 	*tgt_size += sizes[i];
1925       }
1926 }
1927 
1928 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1929 
1930 static inline void
1931 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1932 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1933 			size_t tgt_size)
1934 {
1935   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1936   if (al)
1937     tgt += tgt_align - al;
1938   tgt_size = 0;
1939   size_t i;
1940   for (i = 0; i < mapnum; i++)
1941     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1942       {
1943 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1944 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1945 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1946 	hostaddrs[i] = tgt + tgt_size;
1947 	tgt_size = tgt_size + sizes[i];
1948       }
1949 }
1950 
1951 /* Helper function of GOMP_target{,_ext} routines.  */
1952 
1953 static void *
1954 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1955 			 void (*host_fn) (void *))
1956 {
1957   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1958     return (void *) host_fn;
1959   else
1960     {
1961       gomp_mutex_lock (&devicep->lock);
1962       if (devicep->state == GOMP_DEVICE_FINALIZED)
1963 	{
1964 	  gomp_mutex_unlock (&devicep->lock);
1965 	  return NULL;
1966 	}
1967 
1968       struct splay_tree_key_s k;
1969       k.host_start = (uintptr_t) host_fn;
1970       k.host_end = k.host_start + 1;
1971       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1972       gomp_mutex_unlock (&devicep->lock);
1973       if (tgt_fn == NULL)
1974 	return NULL;
1975 
1976       return (void *) tgt_fn->tgt_offset;
1977     }
1978 }
1979 
1980 /* Called when encountering a target directive.  If DEVICE
1981    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1982    GOMP_DEVICE_HOST_FALLBACK (or any value
1983    larger than last available hw device), use host fallback.
1984    FN is address of host code, UNUSED is part of the current ABI, but
1985    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1986    with MAPNUM entries, with addresses of the host objects,
1987    sizes of the host objects (resp. for pointer kind pointer bias
1988    and assumed sizeof (void *) size) and kinds.  */
1989 
1990 void
1991 GOMP_target (int device, void (*fn) (void *), const void *unused,
1992 	     size_t mapnum, void **hostaddrs, size_t *sizes,
1993 	     unsigned char *kinds)
1994 {
1995   struct gomp_device_descr *devicep = resolve_device (device);
1996 
1997   void *fn_addr;
1998   if (devicep == NULL
1999       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2000       /* All shared memory devices should use the GOMP_target_ext function.  */
2001       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2002       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2003     return gomp_target_fallback (fn, hostaddrs);
2004 
2005   struct target_mem_desc *tgt_vars
2006     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2007 		     GOMP_MAP_VARS_TARGET);
2008   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2009 		     NULL);
2010   gomp_unmap_vars (tgt_vars, true);
2011 }
2012 
2013 static inline unsigned int
2014 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2015 {
2016   /* If we cannot run asynchronously, simply ignore nowait.  */
2017   if (devicep != NULL && devicep->async_run_func == NULL)
2018     flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2019 
2020   return flags;
2021 }
2022 
2023 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2024    and several arguments have been added:
2025    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2026    DEPEND is array of dependencies, see GOMP_task for details.
2027 
2028    ARGS is a pointer to an array consisting of a variable number of both
2029    device-independent and device-specific arguments, which can take one two
2030    elements where the first specifies for which device it is intended, the type
2031    and optionally also the value.  If the value is not present in the first
2032    one, the whole second element the actual value.  The last element of the
2033    array is a single NULL.  Among the device independent can be for example
2034    NUM_TEAMS and THREAD_LIMIT.
2035 
2036    NUM_TEAMS is positive if GOMP_teams will be called in the body with
2037    that value, or 1 if teams construct is not present, or 0, if
2038    teams construct does not have num_teams clause and so the choice is
2039    implementation defined, and -1 if it can't be determined on the host
2040    what value will GOMP_teams have on the device.
2041    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2042    body with that value, or 0, if teams construct does not have thread_limit
2043    clause or the teams construct is not present, or -1 if it can't be
2044    determined on the host what value will GOMP_teams have on the device.  */
2045 
2046 void
2047 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2048 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2049 		 unsigned int flags, void **depend, void **args)
2050 {
2051   struct gomp_device_descr *devicep = resolve_device (device);
2052   size_t tgt_align = 0, tgt_size = 0;
2053   bool fpc_done = false;
2054 
2055   flags = clear_unsupported_flags (devicep, flags);
2056 
2057   if (flags & GOMP_TARGET_FLAG_NOWAIT)
2058     {
2059       struct gomp_thread *thr = gomp_thread ();
2060       /* Create a team if we don't have any around, as nowait
2061 	 target tasks make sense to run asynchronously even when
2062 	 outside of any parallel.  */
2063       if (__builtin_expect (thr->ts.team == NULL, 0))
2064 	{
2065 	  struct gomp_team *team = gomp_new_team (1);
2066 	  struct gomp_task *task = thr->task;
2067 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2068 	  team->prev_ts = thr->ts;
2069 	  thr->ts.team = team;
2070 	  thr->ts.team_id = 0;
2071 	  thr->ts.work_share = &team->work_shares[0];
2072 	  thr->ts.last_work_share = NULL;
2073 #ifdef HAVE_SYNC_BUILTINS
2074 	  thr->ts.single_count = 0;
2075 #endif
2076 	  thr->ts.static_trip = 0;
2077 	  thr->task = &team->implicit_task[0];
2078 	  gomp_init_task (thr->task, NULL, icv);
2079 	  if (task)
2080 	    {
2081 	      thr->task = task;
2082 	      gomp_end_task ();
2083 	      free (task);
2084 	      thr->task = &team->implicit_task[0];
2085 	    }
2086 	  else
2087 	    pthread_setspecific (gomp_thread_destructor, thr);
2088 	}
2089       if (thr->ts.team
2090 	  && !thr->task->final_task)
2091 	{
2092 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2093 				   sizes, kinds, flags, depend, args,
2094 				   GOMP_TARGET_TASK_BEFORE_MAP);
2095 	  return;
2096 	}
2097     }
2098 
2099   /* If there are depend clauses, but nowait is not present
2100      (or we are in a final task), block the parent task until the
2101      dependencies are resolved and then just continue with the rest
2102      of the function as if it is a merged task.  */
2103   if (depend != NULL)
2104     {
2105       struct gomp_thread *thr = gomp_thread ();
2106       if (thr->task && thr->task->depend_hash)
2107 	{
2108 	  /* If we might need to wait, copy firstprivate now.  */
2109 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2110 					       &tgt_align, &tgt_size);
2111 	  if (tgt_align)
2112 	    {
2113 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2114 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2115 				      tgt_align, tgt_size);
2116 	    }
2117 	  fpc_done = true;
2118 	  gomp_task_maybe_wait_for_dependencies (depend);
2119 	}
2120     }
2121 
2122   void *fn_addr;
2123   if (devicep == NULL
2124       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2125       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2126       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2127     {
2128       if (!fpc_done)
2129 	{
2130 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2131 					       &tgt_align, &tgt_size);
2132 	  if (tgt_align)
2133 	    {
2134 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2135 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2136 				      tgt_align, tgt_size);
2137 	    }
2138 	}
2139       gomp_target_fallback (fn, hostaddrs);
2140       return;
2141     }
2142 
2143   struct target_mem_desc *tgt_vars;
2144   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2145     {
2146       if (!fpc_done)
2147 	{
2148 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2149 					       &tgt_align, &tgt_size);
2150 	  if (tgt_align)
2151 	    {
2152 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2153 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2154 				      tgt_align, tgt_size);
2155 	    }
2156 	}
2157       tgt_vars = NULL;
2158     }
2159   else
2160     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2161 			      true, GOMP_MAP_VARS_TARGET);
2162   devicep->run_func (devicep->target_id, fn_addr,
2163 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2164 		     args);
2165   if (tgt_vars)
2166     gomp_unmap_vars (tgt_vars, true);
2167 }
2168 
2169 /* Host fallback for GOMP_target_data{,_ext} routines.  */
2170 
2171 static void
2172 gomp_target_data_fallback (void)
2173 {
2174   struct gomp_task_icv *icv = gomp_icv (false);
2175   if (icv->target_data)
2176     {
2177       /* Even when doing a host fallback, if there are any active
2178          #pragma omp target data constructs, need to remember the
2179          new #pragma omp target data, otherwise GOMP_target_end_data
2180          would get out of sync.  */
2181       struct target_mem_desc *tgt
2182 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2183 			 GOMP_MAP_VARS_DATA);
2184       tgt->prev = icv->target_data;
2185       icv->target_data = tgt;
2186     }
2187 }
2188 
2189 void
2190 GOMP_target_data (int device, const void *unused, size_t mapnum,
2191 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
2192 {
2193   struct gomp_device_descr *devicep = resolve_device (device);
2194 
2195   if (devicep == NULL
2196       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2197       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2198     return gomp_target_data_fallback ();
2199 
2200   struct target_mem_desc *tgt
2201     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2202 		     GOMP_MAP_VARS_DATA);
2203   struct gomp_task_icv *icv = gomp_icv (true);
2204   tgt->prev = icv->target_data;
2205   icv->target_data = tgt;
2206 }
2207 
2208 void
2209 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2210 		      size_t *sizes, unsigned short *kinds)
2211 {
2212   struct gomp_device_descr *devicep = resolve_device (device);
2213 
2214   if (devicep == NULL
2215       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2217     return gomp_target_data_fallback ();
2218 
2219   struct target_mem_desc *tgt
2220     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2221 		     GOMP_MAP_VARS_DATA);
2222   struct gomp_task_icv *icv = gomp_icv (true);
2223   tgt->prev = icv->target_data;
2224   icv->target_data = tgt;
2225 }
2226 
2227 void
2228 GOMP_target_end_data (void)
2229 {
2230   struct gomp_task_icv *icv = gomp_icv (false);
2231   if (icv->target_data)
2232     {
2233       struct target_mem_desc *tgt = icv->target_data;
2234       icv->target_data = tgt->prev;
2235       gomp_unmap_vars (tgt, true);
2236     }
2237 }
2238 
2239 void
2240 GOMP_target_update (int device, const void *unused, size_t mapnum,
2241 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
2242 {
2243   struct gomp_device_descr *devicep = resolve_device (device);
2244 
2245   if (devicep == NULL
2246       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2247       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2248     return;
2249 
2250   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2251 }
2252 
2253 void
2254 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2255 			size_t *sizes, unsigned short *kinds,
2256 			unsigned int flags, void **depend)
2257 {
2258   struct gomp_device_descr *devicep = resolve_device (device);
2259 
2260   /* If there are depend clauses, but nowait is not present,
2261      block the parent task until the dependencies are resolved
2262      and then just continue with the rest of the function as if it
2263      is a merged task.  Until we are able to schedule task during
2264      variable mapping or unmapping, ignore nowait if depend clauses
2265      are not present.  */
2266   if (depend != NULL)
2267     {
2268       struct gomp_thread *thr = gomp_thread ();
2269       if (thr->task && thr->task->depend_hash)
2270 	{
2271 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2272 	      && thr->ts.team
2273 	      && !thr->task->final_task)
2274 	    {
2275 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2276 					   mapnum, hostaddrs, sizes, kinds,
2277 					   flags | GOMP_TARGET_FLAG_UPDATE,
2278 					   depend, NULL, GOMP_TARGET_TASK_DATA))
2279 		return;
2280 	    }
2281 	  else
2282 	    {
2283 	      struct gomp_team *team = thr->ts.team;
2284 	      /* If parallel or taskgroup has been cancelled, don't start new
2285 		 tasks.  */
2286 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2287 		{
2288 		  if (gomp_team_barrier_cancelled (&team->barrier))
2289 		    return;
2290 		  if (thr->task->taskgroup)
2291 		    {
2292 		      if (thr->task->taskgroup->cancelled)
2293 			return;
2294 		      if (thr->task->taskgroup->workshare
2295 			  && thr->task->taskgroup->prev
2296 			  && thr->task->taskgroup->prev->cancelled)
2297 			return;
2298 		    }
2299 		}
2300 
2301 	      gomp_task_maybe_wait_for_dependencies (depend);
2302 	    }
2303 	}
2304     }
2305 
2306   if (devicep == NULL
2307       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2308       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2309     return;
2310 
2311   struct gomp_thread *thr = gomp_thread ();
2312   struct gomp_team *team = thr->ts.team;
2313   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2314   if (__builtin_expect (gomp_cancel_var, 0) && team)
2315     {
2316       if (gomp_team_barrier_cancelled (&team->barrier))
2317 	return;
2318       if (thr->task->taskgroup)
2319 	{
2320 	  if (thr->task->taskgroup->cancelled)
2321 	    return;
2322 	  if (thr->task->taskgroup->workshare
2323 	      && thr->task->taskgroup->prev
2324 	      && thr->task->taskgroup->prev->cancelled)
2325 	    return;
2326 	}
2327     }
2328 
2329   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2330 }
2331 
2332 static void
2333 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2334 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
2335 {
2336   const int typemask = 0xff;
2337   size_t i;
2338   gomp_mutex_lock (&devicep->lock);
2339   if (devicep->state == GOMP_DEVICE_FINALIZED)
2340     {
2341       gomp_mutex_unlock (&devicep->lock);
2342       return;
2343     }
2344 
2345   for (i = 0; i < mapnum; i++)
2346     {
2347       struct splay_tree_key_s cur_node;
2348       unsigned char kind = kinds[i] & typemask;
2349       switch (kind)
2350 	{
2351 	case GOMP_MAP_FROM:
2352 	case GOMP_MAP_ALWAYS_FROM:
2353 	case GOMP_MAP_DELETE:
2354 	case GOMP_MAP_RELEASE:
2355 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2356 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2357 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
2358 	  cur_node.host_end = cur_node.host_start + sizes[i];
2359 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2360 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2361 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2362 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
2363 	  if (!k)
2364 	    continue;
2365 
2366 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
2367 	    k->refcount--;
2368 	  if ((kind == GOMP_MAP_DELETE
2369 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
2370 	      && k->refcount != REFCOUNT_INFINITY)
2371 	    k->refcount = 0;
2372 
2373 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
2374 	      || kind == GOMP_MAP_ALWAYS_FROM)
2375 	    gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
2376 				(void *) (k->tgt->tgt_start + k->tgt_offset
2377 					  + cur_node.host_start
2378 					  - k->host_start),
2379 				cur_node.host_end - cur_node.host_start);
2380 	  if (k->refcount == 0)
2381 	    gomp_remove_var (devicep, k);
2382 
2383 	  break;
2384 	default:
2385 	  gomp_mutex_unlock (&devicep->lock);
2386 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
2387 		      kind);
2388 	}
2389     }
2390 
2391   gomp_mutex_unlock (&devicep->lock);
2392 }
2393 
2394 void
2395 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
2396 			     size_t *sizes, unsigned short *kinds,
2397 			     unsigned int flags, void **depend)
2398 {
2399   struct gomp_device_descr *devicep = resolve_device (device);
2400 
2401   /* If there are depend clauses, but nowait is not present,
2402      block the parent task until the dependencies are resolved
2403      and then just continue with the rest of the function as if it
2404      is a merged task.  Until we are able to schedule task during
2405      variable mapping or unmapping, ignore nowait if depend clauses
2406      are not present.  */
2407   if (depend != NULL)
2408     {
2409       struct gomp_thread *thr = gomp_thread ();
2410       if (thr->task && thr->task->depend_hash)
2411 	{
2412 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2413 	      && thr->ts.team
2414 	      && !thr->task->final_task)
2415 	    {
2416 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2417 					   mapnum, hostaddrs, sizes, kinds,
2418 					   flags, depend, NULL,
2419 					   GOMP_TARGET_TASK_DATA))
2420 		return;
2421 	    }
2422 	  else
2423 	    {
2424 	      struct gomp_team *team = thr->ts.team;
2425 	      /* If parallel or taskgroup has been cancelled, don't start new
2426 		 tasks.  */
2427 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2428 		{
2429 		  if (gomp_team_barrier_cancelled (&team->barrier))
2430 		    return;
2431 		  if (thr->task->taskgroup)
2432 		    {
2433 		      if (thr->task->taskgroup->cancelled)
2434 			return;
2435 		      if (thr->task->taskgroup->workshare
2436 			  && thr->task->taskgroup->prev
2437 			  && thr->task->taskgroup->prev->cancelled)
2438 			return;
2439 		    }
2440 		}
2441 
2442 	      gomp_task_maybe_wait_for_dependencies (depend);
2443 	    }
2444 	}
2445     }
2446 
2447   if (devicep == NULL
2448       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2449       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2450     return;
2451 
2452   struct gomp_thread *thr = gomp_thread ();
2453   struct gomp_team *team = thr->ts.team;
2454   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2455   if (__builtin_expect (gomp_cancel_var, 0) && team)
2456     {
2457       if (gomp_team_barrier_cancelled (&team->barrier))
2458 	return;
2459       if (thr->task->taskgroup)
2460 	{
2461 	  if (thr->task->taskgroup->cancelled)
2462 	    return;
2463 	  if (thr->task->taskgroup->workshare
2464 	      && thr->task->taskgroup->prev
2465 	      && thr->task->taskgroup->prev->cancelled)
2466 	    return;
2467 	}
2468     }
2469 
2470   /* The variables are mapped separately such that they can be released
2471      independently.  */
2472   size_t i, j;
2473   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2474     for (i = 0; i < mapnum; i++)
2475       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2476 	{
2477 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2478 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2479 	  i += sizes[i];
2480 	}
2481       else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
2482 	{
2483 	  for (j = i + 1; j < mapnum; j++)
2484 	    if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff))
2485 	      break;
2486 	  gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
2487 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2488 	  i += j - i - 1;
2489 	}
2490       else
2491 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2492 		       true, GOMP_MAP_VARS_ENTER_DATA);
2493   else
2494     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2495 }
2496 
2497 bool
2498 gomp_target_task_fn (void *data)
2499 {
2500   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2501   struct gomp_device_descr *devicep = ttask->devicep;
2502 
2503   if (ttask->fn != NULL)
2504     {
2505       void *fn_addr;
2506       if (devicep == NULL
2507 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2508 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2509 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2510 	{
2511 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
2512 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2513 	  return false;
2514 	}
2515 
2516       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2517 	{
2518 	  if (ttask->tgt)
2519 	    gomp_unmap_vars (ttask->tgt, true);
2520 	  return false;
2521 	}
2522 
2523       void *actual_arguments;
2524       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2525 	{
2526 	  ttask->tgt = NULL;
2527 	  actual_arguments = ttask->hostaddrs;
2528 	}
2529       else
2530 	{
2531 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2532 				      NULL, ttask->sizes, ttask->kinds, true,
2533 				      GOMP_MAP_VARS_TARGET);
2534 	  actual_arguments = (void *) ttask->tgt->tgt_start;
2535 	}
2536       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2537 
2538       assert (devicep->async_run_func);
2539       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2540 			       ttask->args, (void *) ttask);
2541       return true;
2542     }
2543   else if (devicep == NULL
2544 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2545 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2546     return false;
2547 
2548   size_t i;
2549   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2550     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2551 		 ttask->kinds, true);
2552   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2553     for (i = 0; i < ttask->mapnum; i++)
2554       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2555 	{
2556 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2557 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2558 			 GOMP_MAP_VARS_ENTER_DATA);
2559 	  i += ttask->sizes[i];
2560 	}
2561       else
2562 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2563 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2564   else
2565     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2566 		    ttask->kinds);
2567   return false;
2568 }
2569 
2570 void
2571 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2572 {
2573   if (thread_limit)
2574     {
2575       struct gomp_task_icv *icv = gomp_icv (true);
2576       icv->thread_limit_var
2577 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2578     }
2579   (void) num_teams;
2580 }
2581 
2582 void *
2583 omp_target_alloc (size_t size, int device_num)
2584 {
2585   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2586     return malloc (size);
2587 
2588   if (device_num < 0)
2589     return NULL;
2590 
2591   struct gomp_device_descr *devicep = resolve_device (device_num);
2592   if (devicep == NULL)
2593     return NULL;
2594 
2595   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2596       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2597     return malloc (size);
2598 
2599   gomp_mutex_lock (&devicep->lock);
2600   void *ret = devicep->alloc_func (devicep->target_id, size);
2601   gomp_mutex_unlock (&devicep->lock);
2602   return ret;
2603 }
2604 
2605 void
2606 omp_target_free (void *device_ptr, int device_num)
2607 {
2608   if (device_ptr == NULL)
2609     return;
2610 
2611   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2612     {
2613       free (device_ptr);
2614       return;
2615     }
2616 
2617   if (device_num < 0)
2618     return;
2619 
2620   struct gomp_device_descr *devicep = resolve_device (device_num);
2621   if (devicep == NULL)
2622     return;
2623 
2624   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2625       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2626     {
2627       free (device_ptr);
2628       return;
2629     }
2630 
2631   gomp_mutex_lock (&devicep->lock);
2632   gomp_free_device_memory (devicep, device_ptr);
2633   gomp_mutex_unlock (&devicep->lock);
2634 }
2635 
2636 int
2637 omp_target_is_present (const void *ptr, int device_num)
2638 {
2639   if (ptr == NULL)
2640     return 1;
2641 
2642   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2643     return 1;
2644 
2645   if (device_num < 0)
2646     return 0;
2647 
2648   struct gomp_device_descr *devicep = resolve_device (device_num);
2649   if (devicep == NULL)
2650     return 0;
2651 
2652   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2653       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2654     return 1;
2655 
2656   gomp_mutex_lock (&devicep->lock);
2657   struct splay_tree_s *mem_map = &devicep->mem_map;
2658   struct splay_tree_key_s cur_node;
2659 
2660   cur_node.host_start = (uintptr_t) ptr;
2661   cur_node.host_end = cur_node.host_start;
2662   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2663   int ret = n != NULL;
2664   gomp_mutex_unlock (&devicep->lock);
2665   return ret;
2666 }
2667 
2668 int
2669 omp_target_memcpy (void *dst, const void *src, size_t length,
2670 		   size_t dst_offset, size_t src_offset, int dst_device_num,
2671 		   int src_device_num)
2672 {
2673   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2674   bool ret;
2675 
2676   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2677     {
2678       if (dst_device_num < 0)
2679 	return EINVAL;
2680 
2681       dst_devicep = resolve_device (dst_device_num);
2682       if (dst_devicep == NULL)
2683 	return EINVAL;
2684 
2685       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2686 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2687 	dst_devicep = NULL;
2688     }
2689   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2690     {
2691       if (src_device_num < 0)
2692 	return EINVAL;
2693 
2694       src_devicep = resolve_device (src_device_num);
2695       if (src_devicep == NULL)
2696 	return EINVAL;
2697 
2698       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2699 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2700 	src_devicep = NULL;
2701     }
2702   if (src_devicep == NULL && dst_devicep == NULL)
2703     {
2704       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2705       return 0;
2706     }
2707   if (src_devicep == NULL)
2708     {
2709       gomp_mutex_lock (&dst_devicep->lock);
2710       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2711 					(char *) dst + dst_offset,
2712 					(char *) src + src_offset, length);
2713       gomp_mutex_unlock (&dst_devicep->lock);
2714       return (ret ? 0 : EINVAL);
2715     }
2716   if (dst_devicep == NULL)
2717     {
2718       gomp_mutex_lock (&src_devicep->lock);
2719       ret = src_devicep->dev2host_func (src_devicep->target_id,
2720 					(char *) dst + dst_offset,
2721 					(char *) src + src_offset, length);
2722       gomp_mutex_unlock (&src_devicep->lock);
2723       return (ret ? 0 : EINVAL);
2724     }
2725   if (src_devicep == dst_devicep)
2726     {
2727       gomp_mutex_lock (&src_devicep->lock);
2728       ret = src_devicep->dev2dev_func (src_devicep->target_id,
2729 				       (char *) dst + dst_offset,
2730 				       (char *) src + src_offset, length);
2731       gomp_mutex_unlock (&src_devicep->lock);
2732       return (ret ? 0 : EINVAL);
2733     }
2734   return EINVAL;
2735 }
2736 
2737 static int
2738 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2739 			       int num_dims, const size_t *volume,
2740 			       const size_t *dst_offsets,
2741 			       const size_t *src_offsets,
2742 			       const size_t *dst_dimensions,
2743 			       const size_t *src_dimensions,
2744 			       struct gomp_device_descr *dst_devicep,
2745 			       struct gomp_device_descr *src_devicep)
2746 {
2747   size_t dst_slice = element_size;
2748   size_t src_slice = element_size;
2749   size_t j, dst_off, src_off, length;
2750   int i, ret;
2751 
2752   if (num_dims == 1)
2753     {
2754       if (__builtin_mul_overflow (element_size, volume[0], &length)
2755 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2756 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2757 	return EINVAL;
2758       if (dst_devicep == NULL && src_devicep == NULL)
2759 	{
2760 	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2761 		  length);
2762 	  ret = 1;
2763 	}
2764       else if (src_devicep == NULL)
2765 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2766 					  (char *) dst + dst_off,
2767 					  (const char *) src + src_off,
2768 					  length);
2769       else if (dst_devicep == NULL)
2770 	ret = src_devicep->dev2host_func (src_devicep->target_id,
2771 					  (char *) dst + dst_off,
2772 					  (const char *) src + src_off,
2773 					  length);
2774       else if (src_devicep == dst_devicep)
2775 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
2776 					 (char *) dst + dst_off,
2777 					 (const char *) src + src_off,
2778 					 length);
2779       else
2780 	ret = 0;
2781       return ret ? 0 : EINVAL;
2782     }
2783 
2784   /* FIXME: it would be nice to have some plugin function to handle
2785      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2786      be handled in the generic recursion below, and for host-host it
2787      should be used even for any num_dims >= 2.  */
2788 
2789   for (i = 1; i < num_dims; i++)
2790     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2791 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2792       return EINVAL;
2793   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2794       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2795     return EINVAL;
2796   for (j = 0; j < volume[0]; j++)
2797     {
2798       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2799 					   (const char *) src + src_off,
2800 					   element_size, num_dims - 1,
2801 					   volume + 1, dst_offsets + 1,
2802 					   src_offsets + 1, dst_dimensions + 1,
2803 					   src_dimensions + 1, dst_devicep,
2804 					   src_devicep);
2805       if (ret)
2806 	return ret;
2807       dst_off += dst_slice;
2808       src_off += src_slice;
2809     }
2810   return 0;
2811 }
2812 
2813 int
2814 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2815 			int num_dims, const size_t *volume,
2816 			const size_t *dst_offsets,
2817 			const size_t *src_offsets,
2818 			const size_t *dst_dimensions,
2819 			const size_t *src_dimensions,
2820 			int dst_device_num, int src_device_num)
2821 {
2822   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2823 
2824   if (!dst && !src)
2825     return INT_MAX;
2826 
2827   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2828     {
2829       if (dst_device_num < 0)
2830 	return EINVAL;
2831 
2832       dst_devicep = resolve_device (dst_device_num);
2833       if (dst_devicep == NULL)
2834 	return EINVAL;
2835 
2836       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2837 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2838 	dst_devicep = NULL;
2839     }
2840   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2841     {
2842       if (src_device_num < 0)
2843 	return EINVAL;
2844 
2845       src_devicep = resolve_device (src_device_num);
2846       if (src_devicep == NULL)
2847 	return EINVAL;
2848 
2849       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2850 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2851 	src_devicep = NULL;
2852     }
2853 
2854   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2855     return EINVAL;
2856 
2857   if (src_devicep)
2858     gomp_mutex_lock (&src_devicep->lock);
2859   else if (dst_devicep)
2860     gomp_mutex_lock (&dst_devicep->lock);
2861   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2862 					   volume, dst_offsets, src_offsets,
2863 					   dst_dimensions, src_dimensions,
2864 					   dst_devicep, src_devicep);
2865   if (src_devicep)
2866     gomp_mutex_unlock (&src_devicep->lock);
2867   else if (dst_devicep)
2868     gomp_mutex_unlock (&dst_devicep->lock);
2869   return ret;
2870 }
2871 
2872 int
2873 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2874 			  size_t size, size_t device_offset, int device_num)
2875 {
2876   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2877     return EINVAL;
2878 
2879   if (device_num < 0)
2880     return EINVAL;
2881 
2882   struct gomp_device_descr *devicep = resolve_device (device_num);
2883   if (devicep == NULL)
2884     return EINVAL;
2885 
2886   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2887       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2888     return EINVAL;
2889 
2890   gomp_mutex_lock (&devicep->lock);
2891 
2892   struct splay_tree_s *mem_map = &devicep->mem_map;
2893   struct splay_tree_key_s cur_node;
2894   int ret = EINVAL;
2895 
2896   cur_node.host_start = (uintptr_t) host_ptr;
2897   cur_node.host_end = cur_node.host_start + size;
2898   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2899   if (n)
2900     {
2901       if (n->tgt->tgt_start + n->tgt_offset
2902 	  == (uintptr_t) device_ptr + device_offset
2903 	  && n->host_start <= cur_node.host_start
2904 	  && n->host_end >= cur_node.host_end)
2905 	ret = 0;
2906     }
2907   else
2908     {
2909       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2910       tgt->array = gomp_malloc (sizeof (*tgt->array));
2911       tgt->refcount = 1;
2912       tgt->tgt_start = 0;
2913       tgt->tgt_end = 0;
2914       tgt->to_free = NULL;
2915       tgt->prev = NULL;
2916       tgt->list_count = 0;
2917       tgt->device_descr = devicep;
2918       splay_tree_node array = tgt->array;
2919       splay_tree_key k = &array->key;
2920       k->host_start = cur_node.host_start;
2921       k->host_end = cur_node.host_end;
2922       k->tgt = tgt;
2923       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2924       k->refcount = REFCOUNT_INFINITY;
2925       k->dynamic_refcount = 0;
2926       k->aux = NULL;
2927       array->left = NULL;
2928       array->right = NULL;
2929       splay_tree_insert (&devicep->mem_map, array);
2930       ret = 0;
2931     }
2932   gomp_mutex_unlock (&devicep->lock);
2933   return ret;
2934 }
2935 
2936 int
2937 omp_target_disassociate_ptr (const void *ptr, int device_num)
2938 {
2939   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2940     return EINVAL;
2941 
2942   if (device_num < 0)
2943     return EINVAL;
2944 
2945   struct gomp_device_descr *devicep = resolve_device (device_num);
2946   if (devicep == NULL)
2947     return EINVAL;
2948 
2949   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2950     return EINVAL;
2951 
2952   gomp_mutex_lock (&devicep->lock);
2953 
2954   struct splay_tree_s *mem_map = &devicep->mem_map;
2955   struct splay_tree_key_s cur_node;
2956   int ret = EINVAL;
2957 
2958   cur_node.host_start = (uintptr_t) ptr;
2959   cur_node.host_end = cur_node.host_start;
2960   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2961   if (n
2962       && n->host_start == cur_node.host_start
2963       && n->refcount == REFCOUNT_INFINITY
2964       && n->tgt->tgt_start == 0
2965       && n->tgt->to_free == NULL
2966       && n->tgt->refcount == 1
2967       && n->tgt->list_count == 0)
2968     {
2969       splay_tree_remove (&devicep->mem_map, n);
2970       gomp_unmap_tgt (n->tgt);
2971       ret = 0;
2972     }
2973 
2974   gomp_mutex_unlock (&devicep->lock);
2975   return ret;
2976 }
2977 
2978 int
2979 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2980 {
2981   (void) kind;
2982   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2983     return gomp_pause_host ();
2984   if (device_num < 0 || device_num >= gomp_get_num_devices ())
2985     return -1;
2986   /* Do nothing for target devices for now.  */
2987   return 0;
2988 }
2989 
2990 int
2991 omp_pause_resource_all (omp_pause_resource_t kind)
2992 {
2993   (void) kind;
2994   if (gomp_pause_host ())
2995     return -1;
2996   /* Do nothing for target devices for now.  */
2997   return 0;
2998 }
2999 
3000 ialias (omp_pause_resource)
3001 ialias (omp_pause_resource_all)
3002 
3003 #ifdef PLUGIN_SUPPORT
3004 
3005 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
3006    in PLUGIN_NAME.
3007    The handles of the found functions are stored in the corresponding fields
3008    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
3009 
3010 static bool
3011 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3012 			     const char *plugin_name)
3013 {
3014   const char *err = NULL, *last_missing = NULL;
3015 
3016   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3017   if (!plugin_handle)
3018     goto dl_fail;
3019 
3020   /* Check if all required functions are available in the plugin and store
3021      their handlers.  None of the symbols can legitimately be NULL,
3022      so we don't need to check dlerror all the time.  */
3023 #define DLSYM(f)							\
3024   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
3025     goto dl_fail
3026   /* Similar, but missing functions are not an error.  Return false if
3027      failed, true otherwise.  */
3028 #define DLSYM_OPT(f, n)							\
3029   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
3030    || (last_missing = #n, 0))
3031 
3032   DLSYM (version);
3033   if (device->version_func () != GOMP_VERSION)
3034     {
3035       err = "plugin version mismatch";
3036       goto fail;
3037     }
3038 
3039   DLSYM (get_name);
3040   DLSYM (get_caps);
3041   DLSYM (get_type);
3042   DLSYM (get_num_devices);
3043   DLSYM (init_device);
3044   DLSYM (fini_device);
3045   DLSYM (load_image);
3046   DLSYM (unload_image);
3047   DLSYM (alloc);
3048   DLSYM (free);
3049   DLSYM (dev2host);
3050   DLSYM (host2dev);
3051   device->capabilities = device->get_caps_func ();
3052   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3053     {
3054       DLSYM (run);
3055       DLSYM_OPT (async_run, async_run);
3056       DLSYM_OPT (can_run, can_run);
3057       DLSYM (dev2dev);
3058     }
3059   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3060     {
3061       if (!DLSYM_OPT (openacc.exec, openacc_exec)
3062 	  || !DLSYM_OPT (openacc.create_thread_data,
3063 			 openacc_create_thread_data)
3064 	  || !DLSYM_OPT (openacc.destroy_thread_data,
3065 			 openacc_destroy_thread_data)
3066 	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3067 	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3068 	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3069 	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3070 	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3071 	  || !DLSYM_OPT (openacc.async.queue_callback,
3072 			 openacc_async_queue_callback)
3073 	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3074 	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3075 	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3076 	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3077 	{
3078 	  /* Require all the OpenACC handlers if we have
3079 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
3080 	  err = "plugin missing OpenACC handler function";
3081 	  goto fail;
3082 	}
3083 
3084       unsigned cuda = 0;
3085       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3086 			 openacc_cuda_get_current_device);
3087       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3088 			 openacc_cuda_get_current_context);
3089       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3090       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3091       if (cuda && cuda != 4)
3092 	{
3093 	  /* Make sure all the CUDA functions are there if any of them are.  */
3094 	  err = "plugin missing OpenACC CUDA handler function";
3095 	  goto fail;
3096 	}
3097     }
3098 #undef DLSYM
3099 #undef DLSYM_OPT
3100 
3101   return 1;
3102 
3103  dl_fail:
3104   err = dlerror ();
3105  fail:
3106   gomp_error ("while loading %s: %s", plugin_name, err);
3107   if (last_missing)
3108     gomp_error ("missing function was %s", last_missing);
3109   if (plugin_handle)
3110     dlclose (plugin_handle);
3111 
3112   return 0;
3113 }
3114 
3115 /* This function finalizes all initialized devices.  */
3116 
3117 static void
3118 gomp_target_fini (void)
3119 {
3120   int i;
3121   for (i = 0; i < num_devices; i++)
3122     {
3123       bool ret = true;
3124       struct gomp_device_descr *devicep = &devices[i];
3125       gomp_mutex_lock (&devicep->lock);
3126       if (devicep->state == GOMP_DEVICE_INITIALIZED)
3127 	ret = gomp_fini_device (devicep);
3128       gomp_mutex_unlock (&devicep->lock);
3129       if (!ret)
3130 	gomp_fatal ("device finalization failed");
3131     }
3132 }
3133 
3134 /* This function initializes the runtime for offloading.
3135    It parses the list of offload plugins, and tries to load these.
3136    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3137    will be set, and the array DEVICES initialized, containing descriptors for
3138    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3139    by the others.  */
3140 
3141 static void
3142 gomp_target_init (void)
3143 {
3144   const char *prefix ="libgomp-plugin-";
3145   const char *suffix = SONAME_SUFFIX (1);
3146   const char *cur, *next;
3147   char *plugin_name;
3148   int i, new_num_devices;
3149 
3150   num_devices = 0;
3151   devices = NULL;
3152 
3153   cur = OFFLOAD_PLUGINS;
3154   if (*cur)
3155     do
3156       {
3157 	struct gomp_device_descr current_device;
3158 	size_t prefix_len, suffix_len, cur_len;
3159 
3160 	next = strchr (cur, ',');
3161 
3162 	prefix_len = strlen (prefix);
3163 	cur_len = next ? next - cur : strlen (cur);
3164 	suffix_len = strlen (suffix);
3165 
3166 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3167 	if (!plugin_name)
3168 	  {
3169 	    num_devices = 0;
3170 	    break;
3171 	  }
3172 
3173 	memcpy (plugin_name, prefix, prefix_len);
3174 	memcpy (plugin_name + prefix_len, cur, cur_len);
3175 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3176 
3177 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
3178 	  {
3179 	    new_num_devices = current_device.get_num_devices_func ();
3180 	    if (new_num_devices >= 1)
3181 	      {
3182 		/* Augment DEVICES and NUM_DEVICES.  */
3183 
3184 		devices = realloc (devices, (num_devices + new_num_devices)
3185 				   * sizeof (struct gomp_device_descr));
3186 		if (!devices)
3187 		  {
3188 		    num_devices = 0;
3189 		    free (plugin_name);
3190 		    break;
3191 		  }
3192 
3193 		current_device.name = current_device.get_name_func ();
3194 		/* current_device.capabilities has already been set.  */
3195 		current_device.type = current_device.get_type_func ();
3196 		current_device.mem_map.root = NULL;
3197 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
3198 		for (i = 0; i < new_num_devices; i++)
3199 		  {
3200 		    current_device.target_id = i;
3201 		    devices[num_devices] = current_device;
3202 		    gomp_mutex_init (&devices[num_devices].lock);
3203 		    num_devices++;
3204 		  }
3205 	      }
3206 	  }
3207 
3208 	free (plugin_name);
3209 	cur = next + 1;
3210       }
3211     while (next);
3212 
3213   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3214      NUM_DEVICES_OPENMP.  */
3215   struct gomp_device_descr *devices_s
3216     = malloc (num_devices * sizeof (struct gomp_device_descr));
3217   if (!devices_s)
3218     {
3219       num_devices = 0;
3220       free (devices);
3221       devices = NULL;
3222     }
3223   num_devices_openmp = 0;
3224   for (i = 0; i < num_devices; i++)
3225     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3226       devices_s[num_devices_openmp++] = devices[i];
3227   int num_devices_after_openmp = num_devices_openmp;
3228   for (i = 0; i < num_devices; i++)
3229     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3230       devices_s[num_devices_after_openmp++] = devices[i];
3231   free (devices);
3232   devices = devices_s;
3233 
3234   for (i = 0; i < num_devices; i++)
3235     {
3236       /* The 'devices' array can be moved (by the realloc call) until we have
3237 	 found all the plugins, so registering with the OpenACC runtime (which
3238 	 takes a copy of the pointer argument) must be delayed until now.  */
3239       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3240 	goacc_register (&devices[i]);
3241     }
3242 
3243   if (atexit (gomp_target_fini) != 0)
3244     gomp_fatal ("atexit failed");
3245 }
3246 
3247 #else /* PLUGIN_SUPPORT */
3248 /* If dlfcn.h is unavailable we always fallback to host execution.
3249    GOMP_target* routines are just stubs for this case.  */
3250 static void
3251 gomp_target_init (void)
3252 {
3253 }
3254 #endif /* PLUGIN_SUPPORT */
3255