xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/target.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "config.h"
29 #include "libgomp.h"
30 #include "oacc-plugin.h"
31 #include "oacc-int.h"
32 #include "gomp-constants.h"
33 #include <limits.h>
34 #include <stdbool.h>
35 #include <stdlib.h>
36 #ifdef HAVE_INTTYPES_H
37 # include <inttypes.h>  /* For PRIu64.  */
38 #endif
39 #include <string.h>
40 #include <assert.h>
41 #include <errno.h>
42 
43 #ifdef PLUGIN_SUPPORT
44 #include <dlfcn.h>
45 #include "plugin-suffix.h"
46 #endif
47 
48 static void gomp_target_init (void);
49 
50 /* The whole initialization code for offloading plugins is only run one.  */
51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52 
53 /* Mutex for offload image registration.  */
54 static gomp_mutex_t register_lock;
55 
56 /* This structure describes an offload image.
57    It contains type of the target device, pointer to host table descriptor, and
58    pointer to target data.  */
59 struct offload_image_descr {
60   unsigned version;
61   enum offload_target_type type;
62   const void *host_table;
63   const void *target_data;
64 };
65 
66 /* Array of descriptors of offload images.  */
67 static struct offload_image_descr *offload_images;
68 
69 /* Total number of offload images.  */
70 static int num_offload_images;
71 
72 /* Array of descriptors for all available devices.  */
73 static struct gomp_device_descr *devices;
74 
75 /* Total number of available devices.  */
76 static int num_devices;
77 
78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
79 static int num_devices_openmp;
80 
81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
82 
83 static void *
84 gomp_realloc_unlock (void *old, size_t size)
85 {
86   void *ret = realloc (old, size);
87   if (ret == NULL)
88     {
89       gomp_mutex_unlock (&register_lock);
90       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91     }
92   return ret;
93 }
94 
95 attribute_hidden void
96 gomp_init_targets_once (void)
97 {
98   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99 }
100 
101 attribute_hidden int
102 gomp_get_num_devices (void)
103 {
104   gomp_init_targets_once ();
105   return num_devices_openmp;
106 }
107 
108 static struct gomp_device_descr *
109 resolve_device (int device_id)
110 {
111   if (device_id == GOMP_DEVICE_ICV)
112     {
113       struct gomp_task_icv *icv = gomp_icv (false);
114       device_id = icv->default_device_var;
115     }
116 
117   if (device_id < 0 || device_id >= gomp_get_num_devices ())
118     return NULL;
119 
120   gomp_mutex_lock (&devices[device_id].lock);
121   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122     gomp_init_device (&devices[device_id]);
123   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124     {
125       gomp_mutex_unlock (&devices[device_id].lock);
126       return NULL;
127     }
128   gomp_mutex_unlock (&devices[device_id].lock);
129 
130   return &devices[device_id];
131 }
132 
133 
134 static inline splay_tree_key
135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136 {
137   if (key->host_start != key->host_end)
138     return splay_tree_lookup (mem_map, key);
139 
140   key->host_end++;
141   splay_tree_key n = splay_tree_lookup (mem_map, key);
142   key->host_end--;
143   if (n)
144     return n;
145   key->host_start--;
146   n = splay_tree_lookup (mem_map, key);
147   key->host_start++;
148   if (n)
149     return n;
150   return splay_tree_lookup (mem_map, key);
151 }
152 
153 static inline splay_tree_key
154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155 {
156   if (key->host_start != key->host_end)
157     return splay_tree_lookup (mem_map, key);
158 
159   key->host_end++;
160   splay_tree_key n = splay_tree_lookup (mem_map, key);
161   key->host_end--;
162   return n;
163 }
164 
165 static inline void
166 gomp_device_copy (struct gomp_device_descr *devicep,
167 		  bool (*copy_func) (int, void *, const void *, size_t),
168 		  const char *dst, void *dstaddr,
169 		  const char *src, const void *srcaddr,
170 		  size_t size)
171 {
172   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173     {
174       gomp_mutex_unlock (&devicep->lock);
175       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177     }
178 }
179 
180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181    host to device memory transfers.  */
182 
183 struct gomp_coalesce_chunk
184 {
185   /* The starting and ending point of a coalesced chunk of memory.  */
186   size_t start, end;
187 };
188 
189 struct gomp_coalesce_buf
190 {
191   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
192      it will be copied to the device.  */
193   void *buf;
194   struct target_mem_desc *tgt;
195   /* Array with offsets, chunks[i].start is the starting offset and
196      chunks[i].end ending offset relative to tgt->tgt_start device address
197      of chunks which are to be copied to buf and later copied to device.  */
198   struct gomp_coalesce_chunk *chunks;
199   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
200      be performed.  */
201   long chunk_cnt;
202   /* During construction of chunks array, how many memory regions are within
203      the last chunk.  If there is just one memory region for a chunk, we copy
204      it directly to device rather than going through buf.  */
205   long use_cnt;
206 };
207 
208 /* Maximum size of memory region considered for coalescing.  Larger copies
209    are performed directly.  */
210 #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
211 
212 /* Maximum size of a gap in between regions to consider them being copied
213    within the same chunk.  All the device offsets considered are within
214    newly allocated device memory, so it isn't fatal if we copy some padding
215    in between from host to device.  The gaps come either from alignment
216    padding or from memory regions which are not supposed to be copied from
217    host to device (e.g. map(alloc:), map(from:) etc.).  */
218 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
219 
220 /* Add region with device tgt_start relative offset and length to CBUF.  */
221 
222 static inline void
223 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
224 {
225   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
226     return;
227   if (cbuf->chunk_cnt)
228     {
229       if (cbuf->chunk_cnt < 0)
230 	return;
231       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
232 	{
233 	  cbuf->chunk_cnt = -1;
234 	  return;
235 	}
236       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
237 	{
238 	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
239 	  cbuf->use_cnt++;
240 	  return;
241 	}
242       /* If the last chunk is only used by one mapping, discard it,
243 	 as it will be one host to device copy anyway and
244 	 memcpying it around will only waste cycles.  */
245       if (cbuf->use_cnt == 1)
246 	cbuf->chunk_cnt--;
247     }
248   cbuf->chunks[cbuf->chunk_cnt].start = start;
249   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
250   cbuf->chunk_cnt++;
251   cbuf->use_cnt = 1;
252 }
253 
254 /* Return true for mapping kinds which need to copy data from the
255    host to device for regions that weren't previously mapped.  */
256 
257 static inline bool
258 gomp_to_device_kind_p (int kind)
259 {
260   switch (kind)
261     {
262     case GOMP_MAP_ALLOC:
263     case GOMP_MAP_FROM:
264     case GOMP_MAP_FORCE_ALLOC:
265     case GOMP_MAP_ALWAYS_FROM:
266       return false;
267     default:
268       return true;
269     }
270 }
271 
272 static void
273 gomp_copy_host2dev (struct gomp_device_descr *devicep,
274 		    void *d, const void *h, size_t sz,
275 		    struct gomp_coalesce_buf *cbuf)
276 {
277   if (cbuf)
278     {
279       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
280       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
281 	{
282 	  long first = 0;
283 	  long last = cbuf->chunk_cnt - 1;
284 	  while (first <= last)
285 	    {
286 	      long middle = (first + last) >> 1;
287 	      if (cbuf->chunks[middle].end <= doff)
288 		first = middle + 1;
289 	      else if (cbuf->chunks[middle].start <= doff)
290 		{
291 		  if (doff + sz > cbuf->chunks[middle].end)
292 		    gomp_fatal ("internal libgomp cbuf error");
293 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
294 			  h, sz);
295 		  return;
296 		}
297 	      else
298 		last = middle - 1;
299 	    }
300 	}
301     }
302   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
303 }
304 
305 static void
306 gomp_copy_dev2host (struct gomp_device_descr *devicep,
307 		    void *h, const void *d, size_t sz)
308 {
309   gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
310 }
311 
312 static void
313 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
314 {
315   if (!devicep->free_func (devicep->target_id, devptr))
316     {
317       gomp_mutex_unlock (&devicep->lock);
318       gomp_fatal ("error in freeing device memory block at %p", devptr);
319     }
320 }
321 
322 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
323    gomp_map_0len_lookup found oldn for newn.
324    Helper function of gomp_map_vars.  */
325 
326 static inline void
327 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
328 			splay_tree_key newn, struct target_var_desc *tgt_var,
329 			unsigned char kind, struct gomp_coalesce_buf *cbuf)
330 {
331   tgt_var->key = oldn;
332   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
333   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
334   tgt_var->offset = newn->host_start - oldn->host_start;
335   tgt_var->length = newn->host_end - newn->host_start;
336 
337   if ((kind & GOMP_MAP_FLAG_FORCE)
338       || oldn->host_start > newn->host_start
339       || oldn->host_end < newn->host_end)
340     {
341       gomp_mutex_unlock (&devicep->lock);
342       gomp_fatal ("Trying to map into device [%p..%p) object when "
343 		  "[%p..%p) is already mapped",
344 		  (void *) newn->host_start, (void *) newn->host_end,
345 		  (void *) oldn->host_start, (void *) oldn->host_end);
346     }
347 
348   if (GOMP_MAP_ALWAYS_TO_P (kind))
349     gomp_copy_host2dev (devicep,
350 			(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
351 				  + newn->host_start - oldn->host_start),
352 			(void *) newn->host_start,
353 			newn->host_end - newn->host_start, cbuf);
354 
355   if (oldn->refcount != REFCOUNT_INFINITY)
356     oldn->refcount++;
357 }
358 
359 static int
360 get_kind (bool short_mapkind, void *kinds, int idx)
361 {
362   return short_mapkind ? ((unsigned short *) kinds)[idx]
363 		       : ((unsigned char *) kinds)[idx];
364 }
365 
366 static void
367 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
368 		  uintptr_t target_offset, uintptr_t bias,
369 		  struct gomp_coalesce_buf *cbuf)
370 {
371   struct gomp_device_descr *devicep = tgt->device_descr;
372   struct splay_tree_s *mem_map = &devicep->mem_map;
373   struct splay_tree_key_s cur_node;
374 
375   cur_node.host_start = host_ptr;
376   if (cur_node.host_start == (uintptr_t) NULL)
377     {
378       cur_node.tgt_offset = (uintptr_t) NULL;
379       gomp_copy_host2dev (devicep,
380 			  (void *) (tgt->tgt_start + target_offset),
381 			  (void *) &cur_node.tgt_offset,
382 			  sizeof (void *), cbuf);
383       return;
384     }
385   /* Add bias to the pointer value.  */
386   cur_node.host_start += bias;
387   cur_node.host_end = cur_node.host_start;
388   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
389   if (n == NULL)
390     {
391       gomp_mutex_unlock (&devicep->lock);
392       gomp_fatal ("Pointer target of array section wasn't mapped");
393     }
394   cur_node.host_start -= n->host_start;
395   cur_node.tgt_offset
396     = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
397   /* At this point tgt_offset is target address of the
398      array section.  Now subtract bias to get what we want
399      to initialize the pointer with.  */
400   cur_node.tgt_offset -= bias;
401   gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
402 		      (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
403 }
404 
405 static void
406 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
407 			  size_t first, size_t i, void **hostaddrs,
408 			  size_t *sizes, void *kinds,
409 			  struct gomp_coalesce_buf *cbuf)
410 {
411   struct gomp_device_descr *devicep = tgt->device_descr;
412   struct splay_tree_s *mem_map = &devicep->mem_map;
413   struct splay_tree_key_s cur_node;
414   int kind;
415   const bool short_mapkind = true;
416   const int typemask = short_mapkind ? 0xff : 0x7;
417 
418   cur_node.host_start = (uintptr_t) hostaddrs[i];
419   cur_node.host_end = cur_node.host_start + sizes[i];
420   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
421   kind = get_kind (short_mapkind, kinds, i);
422   if (n2
423       && n2->tgt == n->tgt
424       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
425     {
426       gomp_map_vars_existing (devicep, n2, &cur_node,
427 			      &tgt->list[i], kind & typemask, cbuf);
428       return;
429     }
430   if (sizes[i] == 0)
431     {
432       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
433 	{
434 	  cur_node.host_start--;
435 	  n2 = splay_tree_lookup (mem_map, &cur_node);
436 	  cur_node.host_start++;
437 	  if (n2
438 	      && n2->tgt == n->tgt
439 	      && n2->host_start - n->host_start
440 		 == n2->tgt_offset - n->tgt_offset)
441 	    {
442 	      gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
443 				      kind & typemask, cbuf);
444 	      return;
445 	    }
446 	}
447       cur_node.host_end++;
448       n2 = splay_tree_lookup (mem_map, &cur_node);
449       cur_node.host_end--;
450       if (n2
451 	  && n2->tgt == n->tgt
452 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
453 	{
454 	  gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
455 				  kind & typemask, cbuf);
456 	  return;
457 	}
458     }
459   gomp_mutex_unlock (&devicep->lock);
460   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
461 	      "other mapped elements from the same structure weren't mapped "
462 	      "together with it", (void *) cur_node.host_start,
463 	      (void *) cur_node.host_end);
464 }
465 
466 static inline uintptr_t
467 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
468 {
469   if (tgt->list[i].key != NULL)
470     return tgt->list[i].key->tgt->tgt_start
471 	   + tgt->list[i].key->tgt_offset
472 	   + tgt->list[i].offset;
473   if (tgt->list[i].offset == ~(uintptr_t) 0)
474     return (uintptr_t) hostaddrs[i];
475   if (tgt->list[i].offset == ~(uintptr_t) 1)
476     return 0;
477   if (tgt->list[i].offset == ~(uintptr_t) 2)
478     return tgt->list[i + 1].key->tgt->tgt_start
479 	   + tgt->list[i + 1].key->tgt_offset
480 	   + tgt->list[i + 1].offset
481 	   + (uintptr_t) hostaddrs[i]
482 	   - (uintptr_t) hostaddrs[i + 1];
483   return tgt->tgt_start + tgt->list[i].offset;
484 }
485 
486 attribute_hidden struct target_mem_desc *
487 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
488 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
489 	       bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
490 {
491   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
492   bool has_firstprivate = false;
493   const int rshift = short_mapkind ? 8 : 3;
494   const int typemask = short_mapkind ? 0xff : 0x7;
495   struct splay_tree_s *mem_map = &devicep->mem_map;
496   struct splay_tree_key_s cur_node;
497   struct target_mem_desc *tgt
498     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
499   tgt->list_count = mapnum;
500   tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
501   tgt->device_descr = devicep;
502   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
503 
504   if (mapnum == 0)
505     {
506       tgt->tgt_start = 0;
507       tgt->tgt_end = 0;
508       return tgt;
509     }
510 
511   tgt_align = sizeof (void *);
512   tgt_size = 0;
513   cbuf.chunks = NULL;
514   cbuf.chunk_cnt = -1;
515   cbuf.use_cnt = 0;
516   cbuf.buf = NULL;
517   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
518     {
519       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
520       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
521       cbuf.chunk_cnt = 0;
522     }
523   if (pragma_kind == GOMP_MAP_VARS_TARGET)
524     {
525       size_t align = 4 * sizeof (void *);
526       tgt_align = align;
527       tgt_size = mapnum * sizeof (void *);
528       cbuf.chunk_cnt = 1;
529       cbuf.use_cnt = 1 + (mapnum > 1);
530       cbuf.chunks[0].start = 0;
531       cbuf.chunks[0].end = tgt_size;
532     }
533 
534   gomp_mutex_lock (&devicep->lock);
535   if (devicep->state == GOMP_DEVICE_FINALIZED)
536     {
537       gomp_mutex_unlock (&devicep->lock);
538       free (tgt);
539       return NULL;
540     }
541 
542   for (i = 0; i < mapnum; i++)
543     {
544       int kind = get_kind (short_mapkind, kinds, i);
545       if (hostaddrs[i] == NULL
546 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
547 	{
548 	  tgt->list[i].key = NULL;
549 	  tgt->list[i].offset = ~(uintptr_t) 0;
550 	  continue;
551 	}
552       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
553 	{
554 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
555 	  cur_node.host_end = cur_node.host_start;
556 	  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
557 	  if (n == NULL)
558 	    {
559 	      gomp_mutex_unlock (&devicep->lock);
560 	      gomp_fatal ("use_device_ptr pointer wasn't mapped");
561 	    }
562 	  cur_node.host_start -= n->host_start;
563 	  hostaddrs[i]
564 	    = (void *) (n->tgt->tgt_start + n->tgt_offset
565 			+ cur_node.host_start);
566 	  tgt->list[i].key = NULL;
567 	  tgt->list[i].offset = ~(uintptr_t) 0;
568 	  continue;
569 	}
570       else if ((kind & typemask) == GOMP_MAP_STRUCT)
571 	{
572 	  size_t first = i + 1;
573 	  size_t last = i + sizes[i];
574 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
575 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
576 			      + sizes[last];
577 	  tgt->list[i].key = NULL;
578 	  tgt->list[i].offset = ~(uintptr_t) 2;
579 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
580 	  if (n == NULL)
581 	    {
582 	      size_t align = (size_t) 1 << (kind >> rshift);
583 	      if (tgt_align < align)
584 		tgt_align = align;
585 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
586 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
587 	      tgt_size += cur_node.host_end - cur_node.host_start;
588 	      not_found_cnt += last - i;
589 	      for (i = first; i <= last; i++)
590 		{
591 		  tgt->list[i].key = NULL;
592 		  if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
593 					     & typemask))
594 		    gomp_coalesce_buf_add (&cbuf,
595 					   tgt_size - cur_node.host_end
596 					   + (uintptr_t) hostaddrs[i],
597 					   sizes[i]);
598 		}
599 	      i--;
600 	      continue;
601 	    }
602 	  for (i = first; i <= last; i++)
603 	    gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
604 				      sizes, kinds, NULL);
605 	  i--;
606 	  continue;
607 	}
608       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
609 	{
610 	  tgt->list[i].key = NULL;
611 	  tgt->list[i].offset = ~(uintptr_t) 1;
612 	  has_firstprivate = true;
613 	  continue;
614 	}
615       cur_node.host_start = (uintptr_t) hostaddrs[i];
616       if (!GOMP_MAP_POINTER_P (kind & typemask))
617 	cur_node.host_end = cur_node.host_start + sizes[i];
618       else
619 	cur_node.host_end = cur_node.host_start + sizeof (void *);
620       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
621 	{
622 	  tgt->list[i].key = NULL;
623 
624 	  size_t align = (size_t) 1 << (kind >> rshift);
625 	  if (tgt_align < align)
626 	    tgt_align = align;
627 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
628 	  gomp_coalesce_buf_add (&cbuf, tgt_size,
629 				 cur_node.host_end - cur_node.host_start);
630 	  tgt_size += cur_node.host_end - cur_node.host_start;
631 	  has_firstprivate = true;
632 	  continue;
633 	}
634       splay_tree_key n;
635       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
636 	{
637 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
638 	  if (!n)
639 	    {
640 	      tgt->list[i].key = NULL;
641 	      tgt->list[i].offset = ~(uintptr_t) 1;
642 	      continue;
643 	    }
644 	}
645       else
646 	n = splay_tree_lookup (mem_map, &cur_node);
647       if (n && n->refcount != REFCOUNT_LINK)
648 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
649 				kind & typemask, NULL);
650       else
651 	{
652 	  tgt->list[i].key = NULL;
653 
654 	  size_t align = (size_t) 1 << (kind >> rshift);
655 	  not_found_cnt++;
656 	  if (tgt_align < align)
657 	    tgt_align = align;
658 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
659 	  if (gomp_to_device_kind_p (kind & typemask))
660 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
661 				   cur_node.host_end - cur_node.host_start);
662 	  tgt_size += cur_node.host_end - cur_node.host_start;
663 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
664 	    {
665 	      size_t j;
666 	      for (j = i + 1; j < mapnum; j++)
667 		if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
668 					 & typemask))
669 		  break;
670 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
671 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
672 			     > cur_node.host_end))
673 		  break;
674 		else
675 		  {
676 		    tgt->list[j].key = NULL;
677 		    i++;
678 		  }
679 	    }
680 	}
681     }
682 
683   if (devaddrs)
684     {
685       if (mapnum != 1)
686 	{
687 	  gomp_mutex_unlock (&devicep->lock);
688 	  gomp_fatal ("unexpected aggregation");
689 	}
690       tgt->to_free = devaddrs[0];
691       tgt->tgt_start = (uintptr_t) tgt->to_free;
692       tgt->tgt_end = tgt->tgt_start + sizes[0];
693     }
694   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
695     {
696       /* Allocate tgt_align aligned tgt_size block of memory.  */
697       /* FIXME: Perhaps change interface to allocate properly aligned
698 	 memory.  */
699       tgt->to_free = devicep->alloc_func (devicep->target_id,
700 					  tgt_size + tgt_align - 1);
701       if (!tgt->to_free)
702 	{
703 	  gomp_mutex_unlock (&devicep->lock);
704 	  gomp_fatal ("device memory allocation fail");
705 	}
706 
707       tgt->tgt_start = (uintptr_t) tgt->to_free;
708       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
709       tgt->tgt_end = tgt->tgt_start + tgt_size;
710 
711       if (cbuf.use_cnt == 1)
712 	cbuf.chunk_cnt--;
713       if (cbuf.chunk_cnt > 0)
714 	{
715 	  cbuf.buf
716 	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
717 	  if (cbuf.buf)
718 	    {
719 	      cbuf.tgt = tgt;
720 	      cbufp = &cbuf;
721 	    }
722 	}
723     }
724   else
725     {
726       tgt->to_free = NULL;
727       tgt->tgt_start = 0;
728       tgt->tgt_end = 0;
729     }
730 
731   tgt_size = 0;
732   if (pragma_kind == GOMP_MAP_VARS_TARGET)
733     tgt_size = mapnum * sizeof (void *);
734 
735   tgt->array = NULL;
736   if (not_found_cnt || has_firstprivate)
737     {
738       if (not_found_cnt)
739 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
740       splay_tree_node array = tgt->array;
741       size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
742       uintptr_t field_tgt_base = 0;
743 
744       for (i = 0; i < mapnum; i++)
745 	if (tgt->list[i].key == NULL)
746 	  {
747 	    int kind = get_kind (short_mapkind, kinds, i);
748 	    if (hostaddrs[i] == NULL)
749 	      continue;
750 	    switch (kind & typemask)
751 	      {
752 		size_t align, len, first, last;
753 		splay_tree_key n;
754 	      case GOMP_MAP_FIRSTPRIVATE:
755 		align = (size_t) 1 << (kind >> rshift);
756 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
757 		tgt->list[i].offset = tgt_size;
758 		len = sizes[i];
759 		gomp_copy_host2dev (devicep,
760 				    (void *) (tgt->tgt_start + tgt_size),
761 				    (void *) hostaddrs[i], len, cbufp);
762 		tgt_size += len;
763 		continue;
764 	      case GOMP_MAP_FIRSTPRIVATE_INT:
765 	      case GOMP_MAP_USE_DEVICE_PTR:
766 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
767 		continue;
768 	      case GOMP_MAP_STRUCT:
769 		first = i + 1;
770 		last = i + sizes[i];
771 		cur_node.host_start = (uintptr_t) hostaddrs[i];
772 		cur_node.host_end = (uintptr_t) hostaddrs[last]
773 				    + sizes[last];
774 		if (tgt->list[first].key != NULL)
775 		  continue;
776 		n = splay_tree_lookup (mem_map, &cur_node);
777 		if (n == NULL)
778 		  {
779 		    size_t align = (size_t) 1 << (kind >> rshift);
780 		    tgt_size -= (uintptr_t) hostaddrs[first]
781 				- (uintptr_t) hostaddrs[i];
782 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
783 		    tgt_size += (uintptr_t) hostaddrs[first]
784 				- (uintptr_t) hostaddrs[i];
785 		    field_tgt_base = (uintptr_t) hostaddrs[first];
786 		    field_tgt_offset = tgt_size;
787 		    field_tgt_clear = last;
788 		    tgt_size += cur_node.host_end
789 				- (uintptr_t) hostaddrs[first];
790 		    continue;
791 		  }
792 		for (i = first; i <= last; i++)
793 		  gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
794 					    sizes, kinds, cbufp);
795 		i--;
796 		continue;
797 	      case GOMP_MAP_ALWAYS_POINTER:
798 		cur_node.host_start = (uintptr_t) hostaddrs[i];
799 		cur_node.host_end = cur_node.host_start + sizeof (void *);
800 		n = splay_tree_lookup (mem_map, &cur_node);
801 		if (n == NULL
802 		    || n->host_start > cur_node.host_start
803 		    || n->host_end < cur_node.host_end)
804 		  {
805 		    gomp_mutex_unlock (&devicep->lock);
806 		    gomp_fatal ("always pointer not mapped");
807 		  }
808 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
809 		    != GOMP_MAP_ALWAYS_POINTER)
810 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
811 		if (cur_node.tgt_offset)
812 		  cur_node.tgt_offset -= sizes[i];
813 		gomp_copy_host2dev (devicep,
814 				    (void *) (n->tgt->tgt_start
815 					      + n->tgt_offset
816 					      + cur_node.host_start
817 					      - n->host_start),
818 				    (void *) &cur_node.tgt_offset,
819 				    sizeof (void *), cbufp);
820 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
821 				      + cur_node.host_start - n->host_start;
822 		continue;
823 	      default:
824 		break;
825 	      }
826 	    splay_tree_key k = &array->key;
827 	    k->host_start = (uintptr_t) hostaddrs[i];
828 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
829 	      k->host_end = k->host_start + sizes[i];
830 	    else
831 	      k->host_end = k->host_start + sizeof (void *);
832 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
833 	    if (n && n->refcount != REFCOUNT_LINK)
834 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
835 				      kind & typemask, cbufp);
836 	    else
837 	      {
838 		k->link_key = NULL;
839 		if (n && n->refcount == REFCOUNT_LINK)
840 		  {
841 		    /* Replace target address of the pointer with target address
842 		       of mapped object in the splay tree.  */
843 		    splay_tree_remove (mem_map, n);
844 		    k->link_key = n;
845 		  }
846 		size_t align = (size_t) 1 << (kind >> rshift);
847 		tgt->list[i].key = k;
848 		k->tgt = tgt;
849 		if (field_tgt_clear != ~(size_t) 0)
850 		  {
851 		    k->tgt_offset = k->host_start - field_tgt_base
852 				    + field_tgt_offset;
853 		    if (i == field_tgt_clear)
854 		      field_tgt_clear = ~(size_t) 0;
855 		  }
856 		else
857 		  {
858 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
859 		    k->tgt_offset = tgt_size;
860 		    tgt_size += k->host_end - k->host_start;
861 		  }
862 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
863 		tgt->list[i].always_copy_from
864 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
865 		tgt->list[i].offset = 0;
866 		tgt->list[i].length = k->host_end - k->host_start;
867 		k->refcount = 1;
868 		k->dynamic_refcount = 0;
869 		tgt->refcount++;
870 		array->left = NULL;
871 		array->right = NULL;
872 		splay_tree_insert (mem_map, array);
873 		switch (kind & typemask)
874 		  {
875 		  case GOMP_MAP_ALLOC:
876 		  case GOMP_MAP_FROM:
877 		  case GOMP_MAP_FORCE_ALLOC:
878 		  case GOMP_MAP_FORCE_FROM:
879 		  case GOMP_MAP_ALWAYS_FROM:
880 		    break;
881 		  case GOMP_MAP_TO:
882 		  case GOMP_MAP_TOFROM:
883 		  case GOMP_MAP_FORCE_TO:
884 		  case GOMP_MAP_FORCE_TOFROM:
885 		  case GOMP_MAP_ALWAYS_TO:
886 		  case GOMP_MAP_ALWAYS_TOFROM:
887 		    gomp_copy_host2dev (devicep,
888 					(void *) (tgt->tgt_start
889 						  + k->tgt_offset),
890 					(void *) k->host_start,
891 					k->host_end - k->host_start, cbufp);
892 		    break;
893 		  case GOMP_MAP_POINTER:
894 		    gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
895 				      k->tgt_offset, sizes[i], cbufp);
896 		    break;
897 		  case GOMP_MAP_TO_PSET:
898 		    gomp_copy_host2dev (devicep,
899 					(void *) (tgt->tgt_start
900 						  + k->tgt_offset),
901 					(void *) k->host_start,
902 					k->host_end - k->host_start, cbufp);
903 
904 		    for (j = i + 1; j < mapnum; j++)
905 		      if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
906 							 j)
907 					       & typemask))
908 			break;
909 		      else if ((uintptr_t) hostaddrs[j] < k->host_start
910 			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
911 				   > k->host_end))
912 			break;
913 		      else
914 			{
915 			  tgt->list[j].key = k;
916 			  tgt->list[j].copy_from = false;
917 			  tgt->list[j].always_copy_from = false;
918 			  if (k->refcount != REFCOUNT_INFINITY)
919 			    k->refcount++;
920 			  gomp_map_pointer (tgt,
921 					    (uintptr_t) *(void **) hostaddrs[j],
922 					    k->tgt_offset
923 					    + ((uintptr_t) hostaddrs[j]
924 					       - k->host_start),
925 					    sizes[j], cbufp);
926 			  i++;
927 			}
928 		    break;
929 		  case GOMP_MAP_FORCE_PRESENT:
930 		    {
931 		      /* We already looked up the memory region above and it
932 			 was missing.  */
933 		      size_t size = k->host_end - k->host_start;
934 		      gomp_mutex_unlock (&devicep->lock);
935 #ifdef HAVE_INTTYPES_H
936 		      gomp_fatal ("present clause: !acc_is_present (%p, "
937 				  "%"PRIu64" (0x%"PRIx64"))",
938 				  (void *) k->host_start,
939 				  (uint64_t) size, (uint64_t) size);
940 #else
941 		      gomp_fatal ("present clause: !acc_is_present (%p, "
942 				  "%lu (0x%lx))", (void *) k->host_start,
943 				  (unsigned long) size, (unsigned long) size);
944 #endif
945 		    }
946 		    break;
947 		  case GOMP_MAP_FORCE_DEVICEPTR:
948 		    assert (k->host_end - k->host_start == sizeof (void *));
949 		    gomp_copy_host2dev (devicep,
950 					(void *) (tgt->tgt_start
951 						  + k->tgt_offset),
952 					(void *) k->host_start,
953 					sizeof (void *), cbufp);
954 		    break;
955 		  default:
956 		    gomp_mutex_unlock (&devicep->lock);
957 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
958 				kind);
959 		  }
960 
961 		if (k->link_key)
962 		  {
963 		    /* Set link pointer on target to the device address of the
964 		       mapped object.  */
965 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
966 		    /* We intentionally do not use coalescing here, as it's not
967 		       data allocated by the current call to this function.  */
968 		    gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
969 					&tgt_addr, sizeof (void *), NULL);
970 		  }
971 		array++;
972 	      }
973 	  }
974     }
975 
976   if (pragma_kind == GOMP_MAP_VARS_TARGET)
977     {
978       for (i = 0; i < mapnum; i++)
979 	{
980 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
981 	  gomp_copy_host2dev (devicep,
982 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
983 			      (void *) &cur_node.tgt_offset, sizeof (void *),
984 			      cbufp);
985 	}
986     }
987 
988   if (cbufp)
989     {
990       long c = 0;
991       for (c = 0; c < cbuf.chunk_cnt; ++c)
992 	gomp_copy_host2dev (devicep,
993 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
994 			    (char *) cbuf.buf + (cbuf.chunks[c].start
995 						 - cbuf.chunks[0].start),
996 			    cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
997       free (cbuf.buf);
998       cbuf.buf = NULL;
999       cbufp = NULL;
1000     }
1001 
1002   /* If the variable from "omp target enter data" map-list was already mapped,
1003      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1004      gomp_exit_data.  */
1005   if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
1006     {
1007       free (tgt);
1008       tgt = NULL;
1009     }
1010 
1011   gomp_mutex_unlock (&devicep->lock);
1012   return tgt;
1013 }
1014 
1015 static void
1016 gomp_unmap_tgt (struct target_mem_desc *tgt)
1017 {
1018   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1019   if (tgt->tgt_end)
1020     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1021 
1022   free (tgt->array);
1023   free (tgt);
1024 }
1025 
1026 attribute_hidden bool
1027 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1028 {
1029   bool is_tgt_unmapped = false;
1030   splay_tree_remove (&devicep->mem_map, k);
1031   if (k->link_key)
1032     splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
1033   if (k->tgt->refcount > 1)
1034     k->tgt->refcount--;
1035   else
1036     {
1037       is_tgt_unmapped = true;
1038       gomp_unmap_tgt (k->tgt);
1039     }
1040   return is_tgt_unmapped;
1041 }
1042 
1043 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1044    variables back from device to host: if it is false, it is assumed that this
1045    has been done already.  */
1046 
1047 attribute_hidden void
1048 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1049 {
1050   struct gomp_device_descr *devicep = tgt->device_descr;
1051 
1052   if (tgt->list_count == 0)
1053     {
1054       free (tgt);
1055       return;
1056     }
1057 
1058   gomp_mutex_lock (&devicep->lock);
1059   if (devicep->state == GOMP_DEVICE_FINALIZED)
1060     {
1061       gomp_mutex_unlock (&devicep->lock);
1062       free (tgt->array);
1063       free (tgt);
1064       return;
1065     }
1066 
1067   size_t i;
1068   for (i = 0; i < tgt->list_count; i++)
1069     {
1070       splay_tree_key k = tgt->list[i].key;
1071       if (k == NULL)
1072 	continue;
1073 
1074       bool do_unmap = false;
1075       if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1076 	k->refcount--;
1077       else if (k->refcount == 1)
1078 	{
1079 	  k->refcount--;
1080 	  do_unmap = true;
1081 	}
1082 
1083       if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1084 	  || tgt->list[i].always_copy_from)
1085 	gomp_copy_dev2host (devicep,
1086 			    (void *) (k->host_start + tgt->list[i].offset),
1087 			    (void *) (k->tgt->tgt_start + k->tgt_offset
1088 				      + tgt->list[i].offset),
1089 			    tgt->list[i].length);
1090       if (do_unmap)
1091 	gomp_remove_var (devicep, k);
1092     }
1093 
1094   if (tgt->refcount > 1)
1095     tgt->refcount--;
1096   else
1097     gomp_unmap_tgt (tgt);
1098 
1099   gomp_mutex_unlock (&devicep->lock);
1100 }
1101 
1102 static void
1103 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1104 	     size_t *sizes, void *kinds, bool short_mapkind)
1105 {
1106   size_t i;
1107   struct splay_tree_key_s cur_node;
1108   const int typemask = short_mapkind ? 0xff : 0x7;
1109 
1110   if (!devicep)
1111     return;
1112 
1113   if (mapnum == 0)
1114     return;
1115 
1116   gomp_mutex_lock (&devicep->lock);
1117   if (devicep->state == GOMP_DEVICE_FINALIZED)
1118     {
1119       gomp_mutex_unlock (&devicep->lock);
1120       return;
1121     }
1122 
1123   for (i = 0; i < mapnum; i++)
1124     if (sizes[i])
1125       {
1126 	cur_node.host_start = (uintptr_t) hostaddrs[i];
1127 	cur_node.host_end = cur_node.host_start + sizes[i];
1128 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1129 	if (n)
1130 	  {
1131 	    int kind = get_kind (short_mapkind, kinds, i);
1132 	    if (n->host_start > cur_node.host_start
1133 		|| n->host_end < cur_node.host_end)
1134 	      {
1135 		gomp_mutex_unlock (&devicep->lock);
1136 		gomp_fatal ("Trying to update [%p..%p) object when "
1137 			    "only [%p..%p) is mapped",
1138 			    (void *) cur_node.host_start,
1139 			    (void *) cur_node.host_end,
1140 			    (void *) n->host_start,
1141 			    (void *) n->host_end);
1142 	      }
1143 
1144 
1145 	    void *hostaddr = (void *) cur_node.host_start;
1146 	    void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1147 				      + cur_node.host_start - n->host_start);
1148 	    size_t size = cur_node.host_end - cur_node.host_start;
1149 
1150 	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
1151 	      gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1152 	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1153 	      gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1154 	  }
1155       }
1156   gomp_mutex_unlock (&devicep->lock);
1157 }
1158 
1159 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1160    And insert to splay tree the mapping between addresses from HOST_TABLE and
1161    from loaded target image.  We rely in the host and device compiler
1162    emitting variable and functions in the same order.  */
1163 
1164 static void
1165 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1166 			   const void *host_table, const void *target_data,
1167 			   bool is_register_lock)
1168 {
1169   void **host_func_table = ((void ***) host_table)[0];
1170   void **host_funcs_end  = ((void ***) host_table)[1];
1171   void **host_var_table  = ((void ***) host_table)[2];
1172   void **host_vars_end   = ((void ***) host_table)[3];
1173 
1174   /* The func table contains only addresses, the var table contains addresses
1175      and corresponding sizes.  */
1176   int num_funcs = host_funcs_end - host_func_table;
1177   int num_vars  = (host_vars_end - host_var_table) / 2;
1178 
1179   /* Load image to device and get target addresses for the image.  */
1180   struct addr_pair *target_table = NULL;
1181   int i, num_target_entries;
1182 
1183   num_target_entries
1184     = devicep->load_image_func (devicep->target_id, version,
1185 				target_data, &target_table);
1186 
1187   if (num_target_entries != num_funcs + num_vars)
1188     {
1189       gomp_mutex_unlock (&devicep->lock);
1190       if (is_register_lock)
1191 	gomp_mutex_unlock (&register_lock);
1192       gomp_fatal ("Cannot map target functions or variables"
1193 		  " (expected %u, have %u)", num_funcs + num_vars,
1194 		  num_target_entries);
1195     }
1196 
1197   /* Insert host-target address mapping into splay tree.  */
1198   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1199   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1200   tgt->refcount = REFCOUNT_INFINITY;
1201   tgt->tgt_start = 0;
1202   tgt->tgt_end = 0;
1203   tgt->to_free = NULL;
1204   tgt->prev = NULL;
1205   tgt->list_count = 0;
1206   tgt->device_descr = devicep;
1207   splay_tree_node array = tgt->array;
1208 
1209   for (i = 0; i < num_funcs; i++)
1210     {
1211       splay_tree_key k = &array->key;
1212       k->host_start = (uintptr_t) host_func_table[i];
1213       k->host_end = k->host_start + 1;
1214       k->tgt = tgt;
1215       k->tgt_offset = target_table[i].start;
1216       k->refcount = REFCOUNT_INFINITY;
1217       k->dynamic_refcount = 0;
1218       k->link_key = NULL;
1219       array->left = NULL;
1220       array->right = NULL;
1221       splay_tree_insert (&devicep->mem_map, array);
1222       array++;
1223     }
1224 
1225   /* Most significant bit of the size in host and target tables marks
1226      "omp declare target link" variables.  */
1227   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1228   const uintptr_t size_mask = ~link_bit;
1229 
1230   for (i = 0; i < num_vars; i++)
1231     {
1232       struct addr_pair *target_var = &target_table[num_funcs + i];
1233       uintptr_t target_size = target_var->end - target_var->start;
1234 
1235       if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1236 	{
1237 	  gomp_mutex_unlock (&devicep->lock);
1238 	  if (is_register_lock)
1239 	    gomp_mutex_unlock (&register_lock);
1240 	  gomp_fatal ("Cannot map target variables (size mismatch)");
1241 	}
1242 
1243       splay_tree_key k = &array->key;
1244       k->host_start = (uintptr_t) host_var_table[i * 2];
1245       k->host_end
1246 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1247       k->tgt = tgt;
1248       k->tgt_offset = target_var->start;
1249       k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1250       k->dynamic_refcount = 0;
1251       k->link_key = NULL;
1252       array->left = NULL;
1253       array->right = NULL;
1254       splay_tree_insert (&devicep->mem_map, array);
1255       array++;
1256     }
1257 
1258   free (target_table);
1259 }
1260 
1261 /* Unload the mappings described by target_data from device DEVICE_P.
1262    The device must be locked.   */
1263 
1264 static void
1265 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1266 			       unsigned version,
1267 			       const void *host_table, const void *target_data)
1268 {
1269   void **host_func_table = ((void ***) host_table)[0];
1270   void **host_funcs_end  = ((void ***) host_table)[1];
1271   void **host_var_table  = ((void ***) host_table)[2];
1272   void **host_vars_end   = ((void ***) host_table)[3];
1273 
1274   /* The func table contains only addresses, the var table contains addresses
1275      and corresponding sizes.  */
1276   int num_funcs = host_funcs_end - host_func_table;
1277   int num_vars  = (host_vars_end - host_var_table) / 2;
1278 
1279   struct splay_tree_key_s k;
1280   splay_tree_key node = NULL;
1281 
1282   /* Find mapping at start of node array */
1283   if (num_funcs || num_vars)
1284     {
1285       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1286 		      : (uintptr_t) host_var_table[0]);
1287       k.host_end = k.host_start + 1;
1288       node = splay_tree_lookup (&devicep->mem_map, &k);
1289     }
1290 
1291   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1292     {
1293       gomp_mutex_unlock (&devicep->lock);
1294       gomp_fatal ("image unload fail");
1295     }
1296 
1297   /* Remove mappings from splay tree.  */
1298   int i;
1299   for (i = 0; i < num_funcs; i++)
1300     {
1301       k.host_start = (uintptr_t) host_func_table[i];
1302       k.host_end = k.host_start + 1;
1303       splay_tree_remove (&devicep->mem_map, &k);
1304     }
1305 
1306   /* Most significant bit of the size in host and target tables marks
1307      "omp declare target link" variables.  */
1308   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1309   const uintptr_t size_mask = ~link_bit;
1310   bool is_tgt_unmapped = false;
1311 
1312   for (i = 0; i < num_vars; i++)
1313     {
1314       k.host_start = (uintptr_t) host_var_table[i * 2];
1315       k.host_end
1316 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1317 
1318       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1319 	splay_tree_remove (&devicep->mem_map, &k);
1320       else
1321 	{
1322 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1323 	  is_tgt_unmapped = gomp_remove_var (devicep, n);
1324 	}
1325     }
1326 
1327   if (node && !is_tgt_unmapped)
1328     {
1329       free (node->tgt);
1330       free (node);
1331     }
1332 }
1333 
1334 /* This function should be called from every offload image while loading.
1335    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1336    the target, and TARGET_DATA needed by target plugin.  */
1337 
1338 void
1339 GOMP_offload_register_ver (unsigned version, const void *host_table,
1340 			   int target_type, const void *target_data)
1341 {
1342   int i;
1343 
1344   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1345     gomp_fatal ("Library too old for offload (version %u < %u)",
1346 		GOMP_VERSION, GOMP_VERSION_LIB (version));
1347 
1348   gomp_mutex_lock (&register_lock);
1349 
1350   /* Load image to all initialized devices.  */
1351   for (i = 0; i < num_devices; i++)
1352     {
1353       struct gomp_device_descr *devicep = &devices[i];
1354       gomp_mutex_lock (&devicep->lock);
1355       if (devicep->type == target_type
1356 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1357 	gomp_load_image_to_device (devicep, version,
1358 				   host_table, target_data, true);
1359       gomp_mutex_unlock (&devicep->lock);
1360     }
1361 
1362   /* Insert image to array of pending images.  */
1363   offload_images
1364     = gomp_realloc_unlock (offload_images,
1365 			   (num_offload_images + 1)
1366 			   * sizeof (struct offload_image_descr));
1367   offload_images[num_offload_images].version = version;
1368   offload_images[num_offload_images].type = target_type;
1369   offload_images[num_offload_images].host_table = host_table;
1370   offload_images[num_offload_images].target_data = target_data;
1371 
1372   num_offload_images++;
1373   gomp_mutex_unlock (&register_lock);
1374 }
1375 
1376 void
1377 GOMP_offload_register (const void *host_table, int target_type,
1378 		       const void *target_data)
1379 {
1380   GOMP_offload_register_ver (0, host_table, target_type, target_data);
1381 }
1382 
1383 /* This function should be called from every offload image while unloading.
1384    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1385    the target, and TARGET_DATA needed by target plugin.  */
1386 
1387 void
1388 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1389 			     int target_type, const void *target_data)
1390 {
1391   int i;
1392 
1393   gomp_mutex_lock (&register_lock);
1394 
1395   /* Unload image from all initialized devices.  */
1396   for (i = 0; i < num_devices; i++)
1397     {
1398       struct gomp_device_descr *devicep = &devices[i];
1399       gomp_mutex_lock (&devicep->lock);
1400       if (devicep->type == target_type
1401 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
1402 	gomp_unload_image_from_device (devicep, version,
1403 				       host_table, target_data);
1404       gomp_mutex_unlock (&devicep->lock);
1405     }
1406 
1407   /* Remove image from array of pending images.  */
1408   for (i = 0; i < num_offload_images; i++)
1409     if (offload_images[i].target_data == target_data)
1410       {
1411 	offload_images[i] = offload_images[--num_offload_images];
1412 	break;
1413       }
1414 
1415   gomp_mutex_unlock (&register_lock);
1416 }
1417 
1418 void
1419 GOMP_offload_unregister (const void *host_table, int target_type,
1420 			 const void *target_data)
1421 {
1422   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1423 }
1424 
1425 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
1426    must be locked on entry, and remains locked on return.  */
1427 
1428 attribute_hidden void
1429 gomp_init_device (struct gomp_device_descr *devicep)
1430 {
1431   int i;
1432   if (!devicep->init_device_func (devicep->target_id))
1433     {
1434       gomp_mutex_unlock (&devicep->lock);
1435       gomp_fatal ("device initialization failed");
1436     }
1437 
1438   /* Load to device all images registered by the moment.  */
1439   for (i = 0; i < num_offload_images; i++)
1440     {
1441       struct offload_image_descr *image = &offload_images[i];
1442       if (image->type == devicep->type)
1443 	gomp_load_image_to_device (devicep, image->version,
1444 				   image->host_table, image->target_data,
1445 				   false);
1446     }
1447 
1448   devicep->state = GOMP_DEVICE_INITIALIZED;
1449 }
1450 
1451 attribute_hidden void
1452 gomp_unload_device (struct gomp_device_descr *devicep)
1453 {
1454   if (devicep->state == GOMP_DEVICE_INITIALIZED)
1455     {
1456       unsigned i;
1457 
1458       /* Unload from device all images registered at the moment.  */
1459       for (i = 0; i < num_offload_images; i++)
1460 	{
1461 	  struct offload_image_descr *image = &offload_images[i];
1462 	  if (image->type == devicep->type)
1463 	    gomp_unload_image_from_device (devicep, image->version,
1464 					   image->host_table,
1465 					   image->target_data);
1466 	}
1467     }
1468 }
1469 
1470 /* Free address mapping tables.  MM must be locked on entry, and remains locked
1471    on return.  */
1472 
1473 attribute_hidden void
1474 gomp_free_memmap (struct splay_tree_s *mem_map)
1475 {
1476   while (mem_map->root)
1477     {
1478       struct target_mem_desc *tgt = mem_map->root->key.tgt;
1479 
1480       splay_tree_remove (mem_map, &mem_map->root->key);
1481       free (tgt->array);
1482       free (tgt);
1483     }
1484 }
1485 
1486 /* Host fallback for GOMP_target{,_ext} routines.  */
1487 
1488 static void
1489 gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1490 {
1491   struct gomp_thread old_thr, *thr = gomp_thread ();
1492   old_thr = *thr;
1493   memset (thr, '\0', sizeof (*thr));
1494   if (gomp_places_list)
1495     {
1496       thr->place = old_thr.place;
1497       thr->ts.place_partition_len = gomp_places_list_len;
1498     }
1499   fn (hostaddrs);
1500   gomp_free_thread (thr);
1501   *thr = old_thr;
1502 }
1503 
1504 /* Calculate alignment and size requirements of a private copy of data shared
1505    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
1506 
1507 static inline void
1508 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1509 				     unsigned short *kinds, size_t *tgt_align,
1510 				     size_t *tgt_size)
1511 {
1512   size_t i;
1513   for (i = 0; i < mapnum; i++)
1514     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1515       {
1516 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1517 	if (*tgt_align < align)
1518 	  *tgt_align = align;
1519 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1520 	*tgt_size += sizes[i];
1521       }
1522 }
1523 
1524 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
1525 
1526 static inline void
1527 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1528 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
1529 			size_t tgt_size)
1530 {
1531   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1532   if (al)
1533     tgt += tgt_align - al;
1534   tgt_size = 0;
1535   size_t i;
1536   for (i = 0; i < mapnum; i++)
1537     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1538       {
1539 	size_t align = (size_t) 1 << (kinds[i] >> 8);
1540 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
1541 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1542 	hostaddrs[i] = tgt + tgt_size;
1543 	tgt_size = tgt_size + sizes[i];
1544       }
1545 }
1546 
1547 /* Helper function of GOMP_target{,_ext} routines.  */
1548 
1549 static void *
1550 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1551 			 void (*host_fn) (void *))
1552 {
1553   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1554     return (void *) host_fn;
1555   else
1556     {
1557       gomp_mutex_lock (&devicep->lock);
1558       if (devicep->state == GOMP_DEVICE_FINALIZED)
1559 	{
1560 	  gomp_mutex_unlock (&devicep->lock);
1561 	  return NULL;
1562 	}
1563 
1564       struct splay_tree_key_s k;
1565       k.host_start = (uintptr_t) host_fn;
1566       k.host_end = k.host_start + 1;
1567       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1568       gomp_mutex_unlock (&devicep->lock);
1569       if (tgt_fn == NULL)
1570 	return NULL;
1571 
1572       return (void *) tgt_fn->tgt_offset;
1573     }
1574 }
1575 
1576 /* Called when encountering a target directive.  If DEVICE
1577    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
1578    GOMP_DEVICE_HOST_FALLBACK (or any value
1579    larger than last available hw device), use host fallback.
1580    FN is address of host code, UNUSED is part of the current ABI, but
1581    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
1582    with MAPNUM entries, with addresses of the host objects,
1583    sizes of the host objects (resp. for pointer kind pointer bias
1584    and assumed sizeof (void *) size) and kinds.  */
1585 
1586 void
1587 GOMP_target (int device, void (*fn) (void *), const void *unused,
1588 	     size_t mapnum, void **hostaddrs, size_t *sizes,
1589 	     unsigned char *kinds)
1590 {
1591   struct gomp_device_descr *devicep = resolve_device (device);
1592 
1593   void *fn_addr;
1594   if (devicep == NULL
1595       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1596       /* All shared memory devices should use the GOMP_target_ext function.  */
1597       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1598       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1599     return gomp_target_fallback (fn, hostaddrs);
1600 
1601   struct target_mem_desc *tgt_vars
1602     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1603 		     GOMP_MAP_VARS_TARGET);
1604   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1605 		     NULL);
1606   gomp_unmap_vars (tgt_vars, true);
1607 }
1608 
1609 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1610    and several arguments have been added:
1611    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1612    DEPEND is array of dependencies, see GOMP_task for details.
1613 
1614    ARGS is a pointer to an array consisting of a variable number of both
1615    device-independent and device-specific arguments, which can take one two
1616    elements where the first specifies for which device it is intended, the type
1617    and optionally also the value.  If the value is not present in the first
1618    one, the whole second element the actual value.  The last element of the
1619    array is a single NULL.  Among the device independent can be for example
1620    NUM_TEAMS and THREAD_LIMIT.
1621 
1622    NUM_TEAMS is positive if GOMP_teams will be called in the body with
1623    that value, or 1 if teams construct is not present, or 0, if
1624    teams construct does not have num_teams clause and so the choice is
1625    implementation defined, and -1 if it can't be determined on the host
1626    what value will GOMP_teams have on the device.
1627    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1628    body with that value, or 0, if teams construct does not have thread_limit
1629    clause or the teams construct is not present, or -1 if it can't be
1630    determined on the host what value will GOMP_teams have on the device.  */
1631 
1632 void
1633 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1634 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
1635 		 unsigned int flags, void **depend, void **args)
1636 {
1637   struct gomp_device_descr *devicep = resolve_device (device);
1638   size_t tgt_align = 0, tgt_size = 0;
1639   bool fpc_done = false;
1640 
1641   if (flags & GOMP_TARGET_FLAG_NOWAIT)
1642     {
1643       struct gomp_thread *thr = gomp_thread ();
1644       /* Create a team if we don't have any around, as nowait
1645 	 target tasks make sense to run asynchronously even when
1646 	 outside of any parallel.  */
1647       if (__builtin_expect (thr->ts.team == NULL, 0))
1648 	{
1649 	  struct gomp_team *team = gomp_new_team (1);
1650 	  struct gomp_task *task = thr->task;
1651 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1652 	  team->prev_ts = thr->ts;
1653 	  thr->ts.team = team;
1654 	  thr->ts.team_id = 0;
1655 	  thr->ts.work_share = &team->work_shares[0];
1656 	  thr->ts.last_work_share = NULL;
1657 #ifdef HAVE_SYNC_BUILTINS
1658 	  thr->ts.single_count = 0;
1659 #endif
1660 	  thr->ts.static_trip = 0;
1661 	  thr->task = &team->implicit_task[0];
1662 	  gomp_init_task (thr->task, NULL, icv);
1663 	  if (task)
1664 	    {
1665 	      thr->task = task;
1666 	      gomp_end_task ();
1667 	      free (task);
1668 	      thr->task = &team->implicit_task[0];
1669 	    }
1670 	  else
1671 	    pthread_setspecific (gomp_thread_destructor, thr);
1672 	}
1673       if (thr->ts.team
1674 	  && !thr->task->final_task)
1675 	{
1676 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1677 				   sizes, kinds, flags, depend, args,
1678 				   GOMP_TARGET_TASK_BEFORE_MAP);
1679 	  return;
1680 	}
1681     }
1682 
1683   /* If there are depend clauses, but nowait is not present
1684      (or we are in a final task), block the parent task until the
1685      dependencies are resolved and then just continue with the rest
1686      of the function as if it is a merged task.  */
1687   if (depend != NULL)
1688     {
1689       struct gomp_thread *thr = gomp_thread ();
1690       if (thr->task && thr->task->depend_hash)
1691 	{
1692 	  /* If we might need to wait, copy firstprivate now.  */
1693 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1694 					       &tgt_align, &tgt_size);
1695 	  if (tgt_align)
1696 	    {
1697 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1698 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1699 				      tgt_align, tgt_size);
1700 	    }
1701 	  fpc_done = true;
1702 	  gomp_task_maybe_wait_for_dependencies (depend);
1703 	}
1704     }
1705 
1706   void *fn_addr;
1707   if (devicep == NULL
1708       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1709       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1710       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1711     {
1712       if (!fpc_done)
1713 	{
1714 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1715 					       &tgt_align, &tgt_size);
1716 	  if (tgt_align)
1717 	    {
1718 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1719 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1720 				      tgt_align, tgt_size);
1721 	    }
1722 	}
1723       gomp_target_fallback (fn, hostaddrs);
1724       return;
1725     }
1726 
1727   struct target_mem_desc *tgt_vars;
1728   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1729     {
1730       if (!fpc_done)
1731 	{
1732 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
1733 					       &tgt_align, &tgt_size);
1734 	  if (tgt_align)
1735 	    {
1736 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1737 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1738 				      tgt_align, tgt_size);
1739 	    }
1740 	}
1741       tgt_vars = NULL;
1742     }
1743   else
1744     tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1745 			      true, GOMP_MAP_VARS_TARGET);
1746   devicep->run_func (devicep->target_id, fn_addr,
1747 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1748 		     args);
1749   if (tgt_vars)
1750     gomp_unmap_vars (tgt_vars, true);
1751 }
1752 
1753 /* Host fallback for GOMP_target_data{,_ext} routines.  */
1754 
1755 static void
1756 gomp_target_data_fallback (void)
1757 {
1758   struct gomp_task_icv *icv = gomp_icv (false);
1759   if (icv->target_data)
1760     {
1761       /* Even when doing a host fallback, if there are any active
1762          #pragma omp target data constructs, need to remember the
1763          new #pragma omp target data, otherwise GOMP_target_end_data
1764          would get out of sync.  */
1765       struct target_mem_desc *tgt
1766 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1767 			 GOMP_MAP_VARS_DATA);
1768       tgt->prev = icv->target_data;
1769       icv->target_data = tgt;
1770     }
1771 }
1772 
1773 void
1774 GOMP_target_data (int device, const void *unused, size_t mapnum,
1775 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
1776 {
1777   struct gomp_device_descr *devicep = resolve_device (device);
1778 
1779   if (devicep == NULL
1780       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1781       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1782     return gomp_target_data_fallback ();
1783 
1784   struct target_mem_desc *tgt
1785     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1786 		     GOMP_MAP_VARS_DATA);
1787   struct gomp_task_icv *icv = gomp_icv (true);
1788   tgt->prev = icv->target_data;
1789   icv->target_data = tgt;
1790 }
1791 
1792 void
1793 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1794 		      size_t *sizes, unsigned short *kinds)
1795 {
1796   struct gomp_device_descr *devicep = resolve_device (device);
1797 
1798   if (devicep == NULL
1799       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1800       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1801     return gomp_target_data_fallback ();
1802 
1803   struct target_mem_desc *tgt
1804     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1805 		     GOMP_MAP_VARS_DATA);
1806   struct gomp_task_icv *icv = gomp_icv (true);
1807   tgt->prev = icv->target_data;
1808   icv->target_data = tgt;
1809 }
1810 
1811 void
1812 GOMP_target_end_data (void)
1813 {
1814   struct gomp_task_icv *icv = gomp_icv (false);
1815   if (icv->target_data)
1816     {
1817       struct target_mem_desc *tgt = icv->target_data;
1818       icv->target_data = tgt->prev;
1819       gomp_unmap_vars (tgt, true);
1820     }
1821 }
1822 
1823 void
1824 GOMP_target_update (int device, const void *unused, size_t mapnum,
1825 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
1826 {
1827   struct gomp_device_descr *devicep = resolve_device (device);
1828 
1829   if (devicep == NULL
1830       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1831       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1832     return;
1833 
1834   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1835 }
1836 
1837 void
1838 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1839 			size_t *sizes, unsigned short *kinds,
1840 			unsigned int flags, void **depend)
1841 {
1842   struct gomp_device_descr *devicep = resolve_device (device);
1843 
1844   /* If there are depend clauses, but nowait is not present,
1845      block the parent task until the dependencies are resolved
1846      and then just continue with the rest of the function as if it
1847      is a merged task.  Until we are able to schedule task during
1848      variable mapping or unmapping, ignore nowait if depend clauses
1849      are not present.  */
1850   if (depend != NULL)
1851     {
1852       struct gomp_thread *thr = gomp_thread ();
1853       if (thr->task && thr->task->depend_hash)
1854 	{
1855 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1856 	      && thr->ts.team
1857 	      && !thr->task->final_task)
1858 	    {
1859 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1860 					   mapnum, hostaddrs, sizes, kinds,
1861 					   flags | GOMP_TARGET_FLAG_UPDATE,
1862 					   depend, NULL, GOMP_TARGET_TASK_DATA))
1863 		return;
1864 	    }
1865 	  else
1866 	    {
1867 	      struct gomp_team *team = thr->ts.team;
1868 	      /* If parallel or taskgroup has been cancelled, don't start new
1869 		 tasks.  */
1870 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
1871 		{
1872 		  if (gomp_team_barrier_cancelled (&team->barrier))
1873 		    return;
1874 		  if (thr->task->taskgroup)
1875 		    {
1876 		      if (thr->task->taskgroup->cancelled)
1877 			return;
1878 		      if (thr->task->taskgroup->workshare
1879 			  && thr->task->taskgroup->prev
1880 			  && thr->task->taskgroup->prev->cancelled)
1881 			return;
1882 		    }
1883 		}
1884 
1885 	      gomp_task_maybe_wait_for_dependencies (depend);
1886 	    }
1887 	}
1888     }
1889 
1890   if (devicep == NULL
1891       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1892       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1893     return;
1894 
1895   struct gomp_thread *thr = gomp_thread ();
1896   struct gomp_team *team = thr->ts.team;
1897   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
1898   if (__builtin_expect (gomp_cancel_var, 0) && team)
1899     {
1900       if (gomp_team_barrier_cancelled (&team->barrier))
1901 	return;
1902       if (thr->task->taskgroup)
1903 	{
1904 	  if (thr->task->taskgroup->cancelled)
1905 	    return;
1906 	  if (thr->task->taskgroup->workshare
1907 	      && thr->task->taskgroup->prev
1908 	      && thr->task->taskgroup->prev->cancelled)
1909 	    return;
1910 	}
1911     }
1912 
1913   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1914 }
1915 
1916 static void
1917 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1918 		void **hostaddrs, size_t *sizes, unsigned short *kinds)
1919 {
1920   const int typemask = 0xff;
1921   size_t i;
1922   gomp_mutex_lock (&devicep->lock);
1923   if (devicep->state == GOMP_DEVICE_FINALIZED)
1924     {
1925       gomp_mutex_unlock (&devicep->lock);
1926       return;
1927     }
1928 
1929   for (i = 0; i < mapnum; i++)
1930     {
1931       struct splay_tree_key_s cur_node;
1932       unsigned char kind = kinds[i] & typemask;
1933       switch (kind)
1934 	{
1935 	case GOMP_MAP_FROM:
1936 	case GOMP_MAP_ALWAYS_FROM:
1937 	case GOMP_MAP_DELETE:
1938 	case GOMP_MAP_RELEASE:
1939 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1940 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1941 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
1942 	  cur_node.host_end = cur_node.host_start + sizes[i];
1943 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1944 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1945 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1946 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
1947 	  if (!k)
1948 	    continue;
1949 
1950 	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1951 	    k->refcount--;
1952 	  if ((kind == GOMP_MAP_DELETE
1953 	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1954 	      && k->refcount != REFCOUNT_INFINITY)
1955 	    k->refcount = 0;
1956 
1957 	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1958 	      || kind == GOMP_MAP_ALWAYS_FROM)
1959 	    gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1960 				(void *) (k->tgt->tgt_start + k->tgt_offset
1961 					  + cur_node.host_start
1962 					  - k->host_start),
1963 				cur_node.host_end - cur_node.host_start);
1964 	  if (k->refcount == 0)
1965 	    {
1966 	      splay_tree_remove (&devicep->mem_map, k);
1967 	      if (k->link_key)
1968 		splay_tree_insert (&devicep->mem_map,
1969 				   (splay_tree_node) k->link_key);
1970 	      if (k->tgt->refcount > 1)
1971 		k->tgt->refcount--;
1972 	      else
1973 		gomp_unmap_tgt (k->tgt);
1974 	    }
1975 
1976 	  break;
1977 	default:
1978 	  gomp_mutex_unlock (&devicep->lock);
1979 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1980 		      kind);
1981 	}
1982     }
1983 
1984   gomp_mutex_unlock (&devicep->lock);
1985 }
1986 
1987 void
1988 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1989 			     size_t *sizes, unsigned short *kinds,
1990 			     unsigned int flags, void **depend)
1991 {
1992   struct gomp_device_descr *devicep = resolve_device (device);
1993 
1994   /* If there are depend clauses, but nowait is not present,
1995      block the parent task until the dependencies are resolved
1996      and then just continue with the rest of the function as if it
1997      is a merged task.  Until we are able to schedule task during
1998      variable mapping or unmapping, ignore nowait if depend clauses
1999      are not present.  */
2000   if (depend != NULL)
2001     {
2002       struct gomp_thread *thr = gomp_thread ();
2003       if (thr->task && thr->task->depend_hash)
2004 	{
2005 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2006 	      && thr->ts.team
2007 	      && !thr->task->final_task)
2008 	    {
2009 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2010 					   mapnum, hostaddrs, sizes, kinds,
2011 					   flags, depend, NULL,
2012 					   GOMP_TARGET_TASK_DATA))
2013 		return;
2014 	    }
2015 	  else
2016 	    {
2017 	      struct gomp_team *team = thr->ts.team;
2018 	      /* If parallel or taskgroup has been cancelled, don't start new
2019 		 tasks.  */
2020 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2021 		{
2022 		  if (gomp_team_barrier_cancelled (&team->barrier))
2023 		    return;
2024 		  if (thr->task->taskgroup)
2025 		    {
2026 		      if (thr->task->taskgroup->cancelled)
2027 			return;
2028 		      if (thr->task->taskgroup->workshare
2029 			  && thr->task->taskgroup->prev
2030 			  && thr->task->taskgroup->prev->cancelled)
2031 			return;
2032 		    }
2033 		}
2034 
2035 	      gomp_task_maybe_wait_for_dependencies (depend);
2036 	    }
2037 	}
2038     }
2039 
2040   if (devicep == NULL
2041       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2042       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2043     return;
2044 
2045   struct gomp_thread *thr = gomp_thread ();
2046   struct gomp_team *team = thr->ts.team;
2047   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2048   if (__builtin_expect (gomp_cancel_var, 0) && team)
2049     {
2050       if (gomp_team_barrier_cancelled (&team->barrier))
2051 	return;
2052       if (thr->task->taskgroup)
2053 	{
2054 	  if (thr->task->taskgroup->cancelled)
2055 	    return;
2056 	  if (thr->task->taskgroup->workshare
2057 	      && thr->task->taskgroup->prev
2058 	      && thr->task->taskgroup->prev->cancelled)
2059 	    return;
2060 	}
2061     }
2062 
2063   size_t i;
2064   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2065     for (i = 0; i < mapnum; i++)
2066       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2067 	{
2068 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2069 			 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2070 	  i += sizes[i];
2071 	}
2072       else
2073 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2074 		       true, GOMP_MAP_VARS_ENTER_DATA);
2075   else
2076     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2077 }
2078 
2079 bool
2080 gomp_target_task_fn (void *data)
2081 {
2082   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2083   struct gomp_device_descr *devicep = ttask->devicep;
2084 
2085   if (ttask->fn != NULL)
2086     {
2087       void *fn_addr;
2088       if (devicep == NULL
2089 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2090 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2091 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2092 	{
2093 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
2094 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2095 	  return false;
2096 	}
2097 
2098       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2099 	{
2100 	  if (ttask->tgt)
2101 	    gomp_unmap_vars (ttask->tgt, true);
2102 	  return false;
2103 	}
2104 
2105       void *actual_arguments;
2106       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2107 	{
2108 	  ttask->tgt = NULL;
2109 	  actual_arguments = ttask->hostaddrs;
2110 	}
2111       else
2112 	{
2113 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2114 				      NULL, ttask->sizes, ttask->kinds, true,
2115 				      GOMP_MAP_VARS_TARGET);
2116 	  actual_arguments = (void *) ttask->tgt->tgt_start;
2117 	}
2118       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2119 
2120       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2121 			       ttask->args, (void *) ttask);
2122       return true;
2123     }
2124   else if (devicep == NULL
2125 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2126 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2127     return false;
2128 
2129   size_t i;
2130   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2131     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2132 		 ttask->kinds, true);
2133   else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2134     for (i = 0; i < ttask->mapnum; i++)
2135       if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2136 	{
2137 	  gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2138 			 NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2139 			 GOMP_MAP_VARS_ENTER_DATA);
2140 	  i += ttask->sizes[i];
2141 	}
2142       else
2143 	gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2144 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2145   else
2146     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2147 		    ttask->kinds);
2148   return false;
2149 }
2150 
2151 void
2152 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2153 {
2154   if (thread_limit)
2155     {
2156       struct gomp_task_icv *icv = gomp_icv (true);
2157       icv->thread_limit_var
2158 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2159     }
2160   (void) num_teams;
2161 }
2162 
2163 void *
2164 omp_target_alloc (size_t size, int device_num)
2165 {
2166   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2167     return malloc (size);
2168 
2169   if (device_num < 0)
2170     return NULL;
2171 
2172   struct gomp_device_descr *devicep = resolve_device (device_num);
2173   if (devicep == NULL)
2174     return NULL;
2175 
2176   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2177       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2178     return malloc (size);
2179 
2180   gomp_mutex_lock (&devicep->lock);
2181   void *ret = devicep->alloc_func (devicep->target_id, size);
2182   gomp_mutex_unlock (&devicep->lock);
2183   return ret;
2184 }
2185 
2186 void
2187 omp_target_free (void *device_ptr, int device_num)
2188 {
2189   if (device_ptr == NULL)
2190     return;
2191 
2192   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2193     {
2194       free (device_ptr);
2195       return;
2196     }
2197 
2198   if (device_num < 0)
2199     return;
2200 
2201   struct gomp_device_descr *devicep = resolve_device (device_num);
2202   if (devicep == NULL)
2203     return;
2204 
2205   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2206       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2207     {
2208       free (device_ptr);
2209       return;
2210     }
2211 
2212   gomp_mutex_lock (&devicep->lock);
2213   gomp_free_device_memory (devicep, device_ptr);
2214   gomp_mutex_unlock (&devicep->lock);
2215 }
2216 
2217 int
2218 omp_target_is_present (const void *ptr, int device_num)
2219 {
2220   if (ptr == NULL)
2221     return 1;
2222 
2223   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2224     return 1;
2225 
2226   if (device_num < 0)
2227     return 0;
2228 
2229   struct gomp_device_descr *devicep = resolve_device (device_num);
2230   if (devicep == NULL)
2231     return 0;
2232 
2233   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2234       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2235     return 1;
2236 
2237   gomp_mutex_lock (&devicep->lock);
2238   struct splay_tree_s *mem_map = &devicep->mem_map;
2239   struct splay_tree_key_s cur_node;
2240 
2241   cur_node.host_start = (uintptr_t) ptr;
2242   cur_node.host_end = cur_node.host_start;
2243   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2244   int ret = n != NULL;
2245   gomp_mutex_unlock (&devicep->lock);
2246   return ret;
2247 }
2248 
2249 int
2250 omp_target_memcpy (void *dst, const void *src, size_t length,
2251 		   size_t dst_offset, size_t src_offset, int dst_device_num,
2252 		   int src_device_num)
2253 {
2254   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2255   bool ret;
2256 
2257   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2258     {
2259       if (dst_device_num < 0)
2260 	return EINVAL;
2261 
2262       dst_devicep = resolve_device (dst_device_num);
2263       if (dst_devicep == NULL)
2264 	return EINVAL;
2265 
2266       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2267 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2268 	dst_devicep = NULL;
2269     }
2270   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2271     {
2272       if (src_device_num < 0)
2273 	return EINVAL;
2274 
2275       src_devicep = resolve_device (src_device_num);
2276       if (src_devicep == NULL)
2277 	return EINVAL;
2278 
2279       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2280 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2281 	src_devicep = NULL;
2282     }
2283   if (src_devicep == NULL && dst_devicep == NULL)
2284     {
2285       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2286       return 0;
2287     }
2288   if (src_devicep == NULL)
2289     {
2290       gomp_mutex_lock (&dst_devicep->lock);
2291       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2292 					(char *) dst + dst_offset,
2293 					(char *) src + src_offset, length);
2294       gomp_mutex_unlock (&dst_devicep->lock);
2295       return (ret ? 0 : EINVAL);
2296     }
2297   if (dst_devicep == NULL)
2298     {
2299       gomp_mutex_lock (&src_devicep->lock);
2300       ret = src_devicep->dev2host_func (src_devicep->target_id,
2301 					(char *) dst + dst_offset,
2302 					(char *) src + src_offset, length);
2303       gomp_mutex_unlock (&src_devicep->lock);
2304       return (ret ? 0 : EINVAL);
2305     }
2306   if (src_devicep == dst_devicep)
2307     {
2308       gomp_mutex_lock (&src_devicep->lock);
2309       ret = src_devicep->dev2dev_func (src_devicep->target_id,
2310 				       (char *) dst + dst_offset,
2311 				       (char *) src + src_offset, length);
2312       gomp_mutex_unlock (&src_devicep->lock);
2313       return (ret ? 0 : EINVAL);
2314     }
2315   return EINVAL;
2316 }
2317 
2318 static int
2319 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
2320 			       int num_dims, const size_t *volume,
2321 			       const size_t *dst_offsets,
2322 			       const size_t *src_offsets,
2323 			       const size_t *dst_dimensions,
2324 			       const size_t *src_dimensions,
2325 			       struct gomp_device_descr *dst_devicep,
2326 			       struct gomp_device_descr *src_devicep)
2327 {
2328   size_t dst_slice = element_size;
2329   size_t src_slice = element_size;
2330   size_t j, dst_off, src_off, length;
2331   int i, ret;
2332 
2333   if (num_dims == 1)
2334     {
2335       if (__builtin_mul_overflow (element_size, volume[0], &length)
2336 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2337 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2338 	return EINVAL;
2339       if (dst_devicep == NULL && src_devicep == NULL)
2340 	{
2341 	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
2342 		  length);
2343 	  ret = 1;
2344 	}
2345       else if (src_devicep == NULL)
2346 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2347 					  (char *) dst + dst_off,
2348 					  (const char *) src + src_off,
2349 					  length);
2350       else if (dst_devicep == NULL)
2351 	ret = src_devicep->dev2host_func (src_devicep->target_id,
2352 					  (char *) dst + dst_off,
2353 					  (const char *) src + src_off,
2354 					  length);
2355       else if (src_devicep == dst_devicep)
2356 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
2357 					 (char *) dst + dst_off,
2358 					 (const char *) src + src_off,
2359 					 length);
2360       else
2361 	ret = 0;
2362       return ret ? 0 : EINVAL;
2363     }
2364 
2365   /* FIXME: it would be nice to have some plugin function to handle
2366      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
2367      be handled in the generic recursion below, and for host-host it
2368      should be used even for any num_dims >= 2.  */
2369 
2370   for (i = 1; i < num_dims; i++)
2371     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2372 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2373       return EINVAL;
2374   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2375       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2376     return EINVAL;
2377   for (j = 0; j < volume[0]; j++)
2378     {
2379       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2380 					   (const char *) src + src_off,
2381 					   element_size, num_dims - 1,
2382 					   volume + 1, dst_offsets + 1,
2383 					   src_offsets + 1, dst_dimensions + 1,
2384 					   src_dimensions + 1, dst_devicep,
2385 					   src_devicep);
2386       if (ret)
2387 	return ret;
2388       dst_off += dst_slice;
2389       src_off += src_slice;
2390     }
2391   return 0;
2392 }
2393 
2394 int
2395 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
2396 			int num_dims, const size_t *volume,
2397 			const size_t *dst_offsets,
2398 			const size_t *src_offsets,
2399 			const size_t *dst_dimensions,
2400 			const size_t *src_dimensions,
2401 			int dst_device_num, int src_device_num)
2402 {
2403   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2404 
2405   if (!dst && !src)
2406     return INT_MAX;
2407 
2408   if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2409     {
2410       if (dst_device_num < 0)
2411 	return EINVAL;
2412 
2413       dst_devicep = resolve_device (dst_device_num);
2414       if (dst_devicep == NULL)
2415 	return EINVAL;
2416 
2417       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2418 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2419 	dst_devicep = NULL;
2420     }
2421   if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2422     {
2423       if (src_device_num < 0)
2424 	return EINVAL;
2425 
2426       src_devicep = resolve_device (src_device_num);
2427       if (src_devicep == NULL)
2428 	return EINVAL;
2429 
2430       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2431 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2432 	src_devicep = NULL;
2433     }
2434 
2435   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2436     return EINVAL;
2437 
2438   if (src_devicep)
2439     gomp_mutex_lock (&src_devicep->lock);
2440   else if (dst_devicep)
2441     gomp_mutex_lock (&dst_devicep->lock);
2442   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2443 					   volume, dst_offsets, src_offsets,
2444 					   dst_dimensions, src_dimensions,
2445 					   dst_devicep, src_devicep);
2446   if (src_devicep)
2447     gomp_mutex_unlock (&src_devicep->lock);
2448   else if (dst_devicep)
2449     gomp_mutex_unlock (&dst_devicep->lock);
2450   return ret;
2451 }
2452 
2453 int
2454 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
2455 			  size_t size, size_t device_offset, int device_num)
2456 {
2457   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2458     return EINVAL;
2459 
2460   if (device_num < 0)
2461     return EINVAL;
2462 
2463   struct gomp_device_descr *devicep = resolve_device (device_num);
2464   if (devicep == NULL)
2465     return EINVAL;
2466 
2467   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2468       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2469     return EINVAL;
2470 
2471   gomp_mutex_lock (&devicep->lock);
2472 
2473   struct splay_tree_s *mem_map = &devicep->mem_map;
2474   struct splay_tree_key_s cur_node;
2475   int ret = EINVAL;
2476 
2477   cur_node.host_start = (uintptr_t) host_ptr;
2478   cur_node.host_end = cur_node.host_start + size;
2479   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2480   if (n)
2481     {
2482       if (n->tgt->tgt_start + n->tgt_offset
2483 	  == (uintptr_t) device_ptr + device_offset
2484 	  && n->host_start <= cur_node.host_start
2485 	  && n->host_end >= cur_node.host_end)
2486 	ret = 0;
2487     }
2488   else
2489     {
2490       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2491       tgt->array = gomp_malloc (sizeof (*tgt->array));
2492       tgt->refcount = 1;
2493       tgt->tgt_start = 0;
2494       tgt->tgt_end = 0;
2495       tgt->to_free = NULL;
2496       tgt->prev = NULL;
2497       tgt->list_count = 0;
2498       tgt->device_descr = devicep;
2499       splay_tree_node array = tgt->array;
2500       splay_tree_key k = &array->key;
2501       k->host_start = cur_node.host_start;
2502       k->host_end = cur_node.host_end;
2503       k->tgt = tgt;
2504       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2505       k->refcount = REFCOUNT_INFINITY;
2506       k->dynamic_refcount = 0;
2507       array->left = NULL;
2508       array->right = NULL;
2509       splay_tree_insert (&devicep->mem_map, array);
2510       ret = 0;
2511     }
2512   gomp_mutex_unlock (&devicep->lock);
2513   return ret;
2514 }
2515 
2516 int
2517 omp_target_disassociate_ptr (const void *ptr, int device_num)
2518 {
2519   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2520     return EINVAL;
2521 
2522   if (device_num < 0)
2523     return EINVAL;
2524 
2525   struct gomp_device_descr *devicep = resolve_device (device_num);
2526   if (devicep == NULL)
2527     return EINVAL;
2528 
2529   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2530     return EINVAL;
2531 
2532   gomp_mutex_lock (&devicep->lock);
2533 
2534   struct splay_tree_s *mem_map = &devicep->mem_map;
2535   struct splay_tree_key_s cur_node;
2536   int ret = EINVAL;
2537 
2538   cur_node.host_start = (uintptr_t) ptr;
2539   cur_node.host_end = cur_node.host_start;
2540   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2541   if (n
2542       && n->host_start == cur_node.host_start
2543       && n->refcount == REFCOUNT_INFINITY
2544       && n->tgt->tgt_start == 0
2545       && n->tgt->to_free == NULL
2546       && n->tgt->refcount == 1
2547       && n->tgt->list_count == 0)
2548     {
2549       splay_tree_remove (&devicep->mem_map, n);
2550       gomp_unmap_tgt (n->tgt);
2551       ret = 0;
2552     }
2553 
2554   gomp_mutex_unlock (&devicep->lock);
2555   return ret;
2556 }
2557 
2558 int
2559 omp_pause_resource (omp_pause_resource_t kind, int device_num)
2560 {
2561   (void) kind;
2562   if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2563     return gomp_pause_host ();
2564   if (device_num < 0 || device_num >= gomp_get_num_devices ())
2565     return -1;
2566   /* Do nothing for target devices for now.  */
2567   return 0;
2568 }
2569 
2570 int
2571 omp_pause_resource_all (omp_pause_resource_t kind)
2572 {
2573   (void) kind;
2574   if (gomp_pause_host ())
2575     return -1;
2576   /* Do nothing for target devices for now.  */
2577   return 0;
2578 }
2579 
2580 ialias (omp_pause_resource)
2581 ialias (omp_pause_resource_all)
2582 
2583 #ifdef PLUGIN_SUPPORT
2584 
2585 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
2586    in PLUGIN_NAME.
2587    The handles of the found functions are stored in the corresponding fields
2588    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
2589 
2590 static bool
2591 gomp_load_plugin_for_device (struct gomp_device_descr *device,
2592 			     const char *plugin_name)
2593 {
2594   const char *err = NULL, *last_missing = NULL;
2595 
2596   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2597   if (!plugin_handle)
2598     goto dl_fail;
2599 
2600   /* Check if all required functions are available in the plugin and store
2601      their handlers.  None of the symbols can legitimately be NULL,
2602      so we don't need to check dlerror all the time.  */
2603 #define DLSYM(f)							\
2604   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
2605     goto dl_fail
2606   /* Similar, but missing functions are not an error.  Return false if
2607      failed, true otherwise.  */
2608 #define DLSYM_OPT(f, n)							\
2609   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
2610    || (last_missing = #n, 0))
2611 
2612   DLSYM (version);
2613   if (device->version_func () != GOMP_VERSION)
2614     {
2615       err = "plugin version mismatch";
2616       goto fail;
2617     }
2618 
2619   DLSYM (get_name);
2620   DLSYM (get_caps);
2621   DLSYM (get_type);
2622   DLSYM (get_num_devices);
2623   DLSYM (init_device);
2624   DLSYM (fini_device);
2625   DLSYM (load_image);
2626   DLSYM (unload_image);
2627   DLSYM (alloc);
2628   DLSYM (free);
2629   DLSYM (dev2host);
2630   DLSYM (host2dev);
2631   device->capabilities = device->get_caps_func ();
2632   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2633     {
2634       DLSYM (run);
2635       DLSYM (async_run);
2636       DLSYM_OPT (can_run, can_run);
2637       DLSYM (dev2dev);
2638     }
2639   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2640     {
2641       if (!DLSYM_OPT (openacc.exec, openacc_exec)
2642 	  || !DLSYM_OPT (openacc.register_async_cleanup,
2643 			 openacc_register_async_cleanup)
2644 	  || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2645 	  || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2646 	  || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2647 	  || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2648 	  || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2649 	  || !DLSYM_OPT (openacc.async_wait_all_async,
2650 			 openacc_async_wait_all_async)
2651 	  || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2652 	  || !DLSYM_OPT (openacc.create_thread_data,
2653 			 openacc_create_thread_data)
2654 	  || !DLSYM_OPT (openacc.destroy_thread_data,
2655 			 openacc_destroy_thread_data))
2656 	{
2657 	  /* Require all the OpenACC handlers if we have
2658 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
2659 	  err = "plugin missing OpenACC handler function";
2660 	  goto fail;
2661 	}
2662 
2663       unsigned cuda = 0;
2664       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2665 			 openacc_cuda_get_current_device);
2666       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2667 			 openacc_cuda_get_current_context);
2668       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2669       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2670       if (cuda && cuda != 4)
2671 	{
2672 	  /* Make sure all the CUDA functions are there if any of them are.  */
2673 	  err = "plugin missing OpenACC CUDA handler function";
2674 	  goto fail;
2675 	}
2676     }
2677 #undef DLSYM
2678 #undef DLSYM_OPT
2679 
2680   return 1;
2681 
2682  dl_fail:
2683   err = dlerror ();
2684  fail:
2685   gomp_error ("while loading %s: %s", plugin_name, err);
2686   if (last_missing)
2687     gomp_error ("missing function was %s", last_missing);
2688   if (plugin_handle)
2689     dlclose (plugin_handle);
2690 
2691   return 0;
2692 }
2693 
2694 /* This function finalizes all initialized devices.  */
2695 
2696 static void
2697 gomp_target_fini (void)
2698 {
2699   int i;
2700   for (i = 0; i < num_devices; i++)
2701     {
2702       bool ret = true;
2703       struct gomp_device_descr *devicep = &devices[i];
2704       gomp_mutex_lock (&devicep->lock);
2705       if (devicep->state == GOMP_DEVICE_INITIALIZED)
2706 	{
2707 	  ret = devicep->fini_device_func (devicep->target_id);
2708 	  devicep->state = GOMP_DEVICE_FINALIZED;
2709 	}
2710       gomp_mutex_unlock (&devicep->lock);
2711       if (!ret)
2712 	gomp_fatal ("device finalization failed");
2713     }
2714 }
2715 
2716 /* This function initializes the runtime for offloading.
2717    It parses the list of offload plugins, and tries to load these.
2718    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2719    will be set, and the array DEVICES initialized, containing descriptors for
2720    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2721    by the others.  */
2722 
2723 static void
2724 gomp_target_init (void)
2725 {
2726   const char *prefix ="libgomp-plugin-";
2727   const char *suffix = SONAME_SUFFIX (1);
2728   const char *cur, *next;
2729   char *plugin_name;
2730   int i, new_num_devices;
2731 
2732   num_devices = 0;
2733   devices = NULL;
2734 
2735   cur = OFFLOAD_PLUGINS;
2736   if (*cur)
2737     do
2738       {
2739 	struct gomp_device_descr current_device;
2740 	size_t prefix_len, suffix_len, cur_len;
2741 
2742 	next = strchr (cur, ',');
2743 
2744 	prefix_len = strlen (prefix);
2745 	cur_len = next ? next - cur : strlen (cur);
2746 	suffix_len = strlen (suffix);
2747 
2748 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2749 	if (!plugin_name)
2750 	  {
2751 	    num_devices = 0;
2752 	    break;
2753 	  }
2754 
2755 	memcpy (plugin_name, prefix, prefix_len);
2756 	memcpy (plugin_name + prefix_len, cur, cur_len);
2757 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2758 
2759 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
2760 	  {
2761 	    new_num_devices = current_device.get_num_devices_func ();
2762 	    if (new_num_devices >= 1)
2763 	      {
2764 		/* Augment DEVICES and NUM_DEVICES.  */
2765 
2766 		devices = realloc (devices, (num_devices + new_num_devices)
2767 				   * sizeof (struct gomp_device_descr));
2768 		if (!devices)
2769 		  {
2770 		    num_devices = 0;
2771 		    free (plugin_name);
2772 		    break;
2773 		  }
2774 
2775 		current_device.name = current_device.get_name_func ();
2776 		/* current_device.capabilities has already been set.  */
2777 		current_device.type = current_device.get_type_func ();
2778 		current_device.mem_map.root = NULL;
2779 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
2780 		current_device.openacc.data_environ = NULL;
2781 		for (i = 0; i < new_num_devices; i++)
2782 		  {
2783 		    current_device.target_id = i;
2784 		    devices[num_devices] = current_device;
2785 		    gomp_mutex_init (&devices[num_devices].lock);
2786 		    num_devices++;
2787 		  }
2788 	      }
2789 	  }
2790 
2791 	free (plugin_name);
2792 	cur = next + 1;
2793       }
2794     while (next);
2795 
2796   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2797      NUM_DEVICES_OPENMP.  */
2798   struct gomp_device_descr *devices_s
2799     = malloc (num_devices * sizeof (struct gomp_device_descr));
2800   if (!devices_s)
2801     {
2802       num_devices = 0;
2803       free (devices);
2804       devices = NULL;
2805     }
2806   num_devices_openmp = 0;
2807   for (i = 0; i < num_devices; i++)
2808     if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2809       devices_s[num_devices_openmp++] = devices[i];
2810   int num_devices_after_openmp = num_devices_openmp;
2811   for (i = 0; i < num_devices; i++)
2812     if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2813       devices_s[num_devices_after_openmp++] = devices[i];
2814   free (devices);
2815   devices = devices_s;
2816 
2817   for (i = 0; i < num_devices; i++)
2818     {
2819       /* The 'devices' array can be moved (by the realloc call) until we have
2820 	 found all the plugins, so registering with the OpenACC runtime (which
2821 	 takes a copy of the pointer argument) must be delayed until now.  */
2822       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2823 	goacc_register (&devices[i]);
2824     }
2825 
2826   if (atexit (gomp_target_fini) != 0)
2827     gomp_fatal ("atexit failed");
2828 }
2829 
2830 #else /* PLUGIN_SUPPORT */
2831 /* If dlfcn.h is unavailable we always fallback to host execution.
2832    GOMP_target* routines are just stubs for this case.  */
2833 static void
2834 gomp_target_init (void)
2835 {
2836 }
2837 #endif /* PLUGIN_SUPPORT */
2838