xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/target.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h>  /* For PRIu64.  */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41 
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46 
47 typedef uintptr_t *hash_entry_type;
htab_alloc(size_t size)48 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
htab_free(void * ptr)49 static inline void htab_free (void *ptr) { free (ptr); }
50 #include "hashtab.h"
51 
52 static inline hashval_t
htab_hash(hash_entry_type element)53 htab_hash (hash_entry_type element)
54 {
55   return hash_pointer ((void *) element);
56 }
57 
58 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)59 htab_eq (hash_entry_type x, hash_entry_type y)
60 {
61   return x == y;
62 }
63 
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
65 
66 static void gomp_target_init (void);
67 
68 /* The whole initialization code for offloading plugins is only run one.  */
69 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
70 
71 /* Mutex for offload image registration.  */
72 static gomp_mutex_t register_lock;
73 
74 /* This structure describes an offload image.
75    It contains type of the target device, pointer to host table descriptor, and
76    pointer to target data.  */
77 struct offload_image_descr {
78   unsigned version;
79   enum offload_target_type type;
80   const void *host_table;
81   const void *target_data;
82 };
83 
84 /* Array of descriptors of offload images.  */
85 static struct offload_image_descr *offload_images;
86 
87 /* Total number of offload images.  */
88 static int num_offload_images;
89 
90 /* Array of descriptors for all available devices.  */
91 static struct gomp_device_descr *devices;
92 
93 /* Total number of available devices.  */
94 static int num_devices;
95 
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
97 static int num_devices_openmp;
98 
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
100 
101 static void *
gomp_realloc_unlock(void * old,size_t size)102 gomp_realloc_unlock (void *old, size_t size)
103 {
104   void *ret = realloc (old, size);
105   if (ret == NULL)
106     {
107       gomp_mutex_unlock (&register_lock);
108       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
109     }
110   return ret;
111 }
112 
113 attribute_hidden void
gomp_init_targets_once(void)114 gomp_init_targets_once (void)
115 {
116   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117 }
118 
119 attribute_hidden int
gomp_get_num_devices(void)120 gomp_get_num_devices (void)
121 {
122   gomp_init_targets_once ();
123   return num_devices_openmp;
124 }
125 
126 static struct gomp_device_descr *
resolve_device(int device_id)127 resolve_device (int device_id)
128 {
129   if (device_id == GOMP_DEVICE_ICV)
130     {
131       struct gomp_task_icv *icv = gomp_icv (false);
132       device_id = icv->default_device_var;
133     }
134 
135   if (device_id < 0 || device_id >= gomp_get_num_devices ())
136     {
137       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138 	  && device_id != GOMP_DEVICE_HOST_FALLBACK
139 	  && device_id != num_devices_openmp)
140 	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141 		    "but device not found");
142 
143       return NULL;
144     }
145 
146   gomp_mutex_lock (&devices[device_id].lock);
147   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148     gomp_init_device (&devices[device_id]);
149   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
150     {
151       gomp_mutex_unlock (&devices[device_id].lock);
152 
153       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154 	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155 		    "but device is finalized");
156 
157       return NULL;
158     }
159   gomp_mutex_unlock (&devices[device_id].lock);
160 
161   return &devices[device_id];
162 }
163 
164 
165 static inline splay_tree_key
gomp_map_lookup(splay_tree mem_map,splay_tree_key key)166 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
167 {
168   if (key->host_start != key->host_end)
169     return splay_tree_lookup (mem_map, key);
170 
171   key->host_end++;
172   splay_tree_key n = splay_tree_lookup (mem_map, key);
173   key->host_end--;
174   if (n)
175     return n;
176   key->host_start--;
177   n = splay_tree_lookup (mem_map, key);
178   key->host_start++;
179   if (n)
180     return n;
181   return splay_tree_lookup (mem_map, key);
182 }
183 
184 static inline splay_tree_key
gomp_map_0len_lookup(splay_tree mem_map,splay_tree_key key)185 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
186 {
187   if (key->host_start != key->host_end)
188     return splay_tree_lookup (mem_map, key);
189 
190   key->host_end++;
191   splay_tree_key n = splay_tree_lookup (mem_map, key);
192   key->host_end--;
193   return n;
194 }
195 
196 static inline void
gomp_device_copy(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t),const char * dst,void * dstaddr,const char * src,const void * srcaddr,size_t size)197 gomp_device_copy (struct gomp_device_descr *devicep,
198 		  bool (*copy_func) (int, void *, const void *, size_t),
199 		  const char *dst, void *dstaddr,
200 		  const char *src, const void *srcaddr,
201 		  size_t size)
202 {
203   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
204     {
205       gomp_mutex_unlock (&devicep->lock);
206       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207 		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
208     }
209 }
210 
211 static inline void
goacc_device_copy_async(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t,struct goacc_asyncqueue *),const char * dst,void * dstaddr,const char * src,const void * srcaddr,const void * srcaddr_orig,size_t size,struct goacc_asyncqueue * aq)212 goacc_device_copy_async (struct gomp_device_descr *devicep,
213 			 bool (*copy_func) (int, void *, const void *, size_t,
214 					    struct goacc_asyncqueue *),
215 			 const char *dst, void *dstaddr,
216 			 const char *src, const void *srcaddr,
217 			 const void *srcaddr_orig,
218 			 size_t size, struct goacc_asyncqueue *aq)
219 {
220   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
221     {
222       gomp_mutex_unlock (&devicep->lock);
223       if (srcaddr_orig && srcaddr_orig != srcaddr)
224 	gomp_fatal ("Copying of %s object [%p..%p)"
225 		    " via buffer %s object [%p..%p)"
226 		    " to %s object [%p..%p) failed",
227 		    src, srcaddr_orig, srcaddr_orig + size,
228 		    src, srcaddr, srcaddr + size,
229 		    dst, dstaddr, dstaddr + size);
230       else
231 	gomp_fatal ("Copying of %s object [%p..%p)"
232 		    " to %s object [%p..%p) failed",
233 		    src, srcaddr, srcaddr + size,
234 		    dst, dstaddr, dstaddr + size);
235     }
236 }
237 
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239    host to device memory transfers.  */
240 
241 struct gomp_coalesce_chunk
242 {
243   /* The starting and ending point of a coalesced chunk of memory.  */
244   size_t start, end;
245 };
246 
247 struct gomp_coalesce_buf
248 {
249   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250      it will be copied to the device.  */
251   void *buf;
252   struct target_mem_desc *tgt;
253   /* Array with offsets, chunks[i].start is the starting offset and
254      chunks[i].end ending offset relative to tgt->tgt_start device address
255      of chunks which are to be copied to buf and later copied to device.  */
256   struct gomp_coalesce_chunk *chunks;
257   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258      be performed.  */
259   long chunk_cnt;
260   /* During construction of chunks array, how many memory regions are within
261      the last chunk.  If there is just one memory region for a chunk, we copy
262      it directly to device rather than going through buf.  */
263   long use_cnt;
264 };
265 
266 /* Maximum size of memory region considered for coalescing.  Larger copies
267    are performed directly.  */
268 #define MAX_COALESCE_BUF_SIZE	(32 * 1024)
269 
270 /* Maximum size of a gap in between regions to consider them being copied
271    within the same chunk.  All the device offsets considered are within
272    newly allocated device memory, so it isn't fatal if we copy some padding
273    in between from host to device.  The gaps come either from alignment
274    padding or from memory regions which are not supposed to be copied from
275    host to device (e.g. map(alloc:), map(from:) etc.).  */
276 #define MAX_COALESCE_BUF_GAP	(4 * 1024)
277 
278 /* Add region with device tgt_start relative offset and length to CBUF.
279 
280    This must not be used for asynchronous copies, because the host data might
281    not be computed yet (by an earlier asynchronous compute region, for
282    example).
283    TODO ... but we could allow CBUF usage for EPHEMERAL data?  (Open question:
284    is it more performant to use libgomp CBUF buffering or individual device
285    asyncronous copying?)  */
286 
287 static inline void
gomp_coalesce_buf_add(struct gomp_coalesce_buf * cbuf,size_t start,size_t len)288 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
289 {
290   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291     return;
292   if (cbuf->chunk_cnt)
293     {
294       if (cbuf->chunk_cnt < 0)
295 	return;
296       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
297 	{
298 	  cbuf->chunk_cnt = -1;
299 	  return;
300 	}
301       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
302 	{
303 	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304 	  cbuf->use_cnt++;
305 	  return;
306 	}
307       /* If the last chunk is only used by one mapping, discard it,
308 	 as it will be one host to device copy anyway and
309 	 memcpying it around will only waste cycles.  */
310       if (cbuf->use_cnt == 1)
311 	cbuf->chunk_cnt--;
312     }
313   cbuf->chunks[cbuf->chunk_cnt].start = start;
314   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315   cbuf->chunk_cnt++;
316   cbuf->use_cnt = 1;
317 }
318 
319 /* Return true for mapping kinds which need to copy data from the
320    host to device for regions that weren't previously mapped.  */
321 
322 static inline bool
gomp_to_device_kind_p(int kind)323 gomp_to_device_kind_p (int kind)
324 {
325   switch (kind)
326     {
327     case GOMP_MAP_ALLOC:
328     case GOMP_MAP_FROM:
329     case GOMP_MAP_FORCE_ALLOC:
330     case GOMP_MAP_FORCE_FROM:
331     case GOMP_MAP_ALWAYS_FROM:
332       return false;
333     default:
334       return true;
335     }
336 }
337 
338 /* Copy host memory to an offload device.  In asynchronous mode (if AQ is
339    non-NULL), when the source data is stack or may otherwise be deallocated
340    before the asynchronous copy takes place, EPHEMERAL must be passed as
341    TRUE.  */
342 
343 attribute_hidden void
gomp_copy_host2dev(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * d,const void * h,size_t sz,bool ephemeral,struct gomp_coalesce_buf * cbuf)344 gomp_copy_host2dev (struct gomp_device_descr *devicep,
345 		    struct goacc_asyncqueue *aq,
346 		    void *d, const void *h, size_t sz,
347 		    bool ephemeral, struct gomp_coalesce_buf *cbuf)
348 {
349   if (__builtin_expect (aq != NULL, 0))
350     {
351       /* See 'gomp_coalesce_buf_add'.  */
352       assert (!cbuf);
353 
354       void *h_buf = (void *) h;
355       if (ephemeral)
356 	{
357 	  /* We're queueing up an asynchronous copy from data that may
358 	     disappear before the transfer takes place (i.e. because it is a
359 	     stack local in a function that is no longer executing).  Make a
360 	     copy of the data into a temporary buffer in those cases.  */
361 	  h_buf = gomp_malloc (sz);
362 	  memcpy (h_buf, h, sz);
363 	}
364       goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365 			       "dev", d, "host", h_buf, h, sz, aq);
366       if (ephemeral)
367 	/* Free temporary buffer once the transfer has completed.  */
368 	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
369 
370       return;
371     }
372 
373   if (cbuf)
374     {
375       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
377 	{
378 	  long first = 0;
379 	  long last = cbuf->chunk_cnt - 1;
380 	  while (first <= last)
381 	    {
382 	      long middle = (first + last) >> 1;
383 	      if (cbuf->chunks[middle].end <= doff)
384 		first = middle + 1;
385 	      else if (cbuf->chunks[middle].start <= doff)
386 		{
387 		  if (doff + sz > cbuf->chunks[middle].end)
388 		    {
389 		      gomp_mutex_unlock (&devicep->lock);
390 		      gomp_fatal ("internal libgomp cbuf error");
391 		    }
392 		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
393 			  h, sz);
394 		  return;
395 		}
396 	      else
397 		last = middle - 1;
398 	    }
399 	}
400     }
401 
402   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
403 }
404 
405 attribute_hidden void
gomp_copy_dev2host(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * h,const void * d,size_t sz)406 gomp_copy_dev2host (struct gomp_device_descr *devicep,
407 		    struct goacc_asyncqueue *aq,
408 		    void *h, const void *d, size_t sz)
409 {
410   if (__builtin_expect (aq != NULL, 0))
411     goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
412 			     "host", h, "dev", d, NULL, sz, aq);
413   else
414     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
415 }
416 
417 static void
gomp_free_device_memory(struct gomp_device_descr * devicep,void * devptr)418 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
419 {
420   if (!devicep->free_func (devicep->target_id, devptr))
421     {
422       gomp_mutex_unlock (&devicep->lock);
423       gomp_fatal ("error in freeing device memory block at %p", devptr);
424     }
425 }
426 
427 /* Increment reference count of a splay_tree_key region K by 1.
428    If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429    increment the value if refcount is not yet contained in the set (used for
430    OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431    once for each construct).  */
432 
433 static inline void
gomp_increment_refcount(splay_tree_key k,htab_t * refcount_set)434 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
435 {
436   if (k == NULL || k->refcount == REFCOUNT_INFINITY)
437     return;
438 
439   uintptr_t *refcount_ptr = &k->refcount;
440 
441   if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
442     refcount_ptr = &k->structelem_refcount;
443   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
444     refcount_ptr = k->structelem_refcount_ptr;
445 
446   if (refcount_set)
447     {
448       if (htab_find (*refcount_set, refcount_ptr))
449 	return;
450       uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
451       *slot = refcount_ptr;
452     }
453 
454   *refcount_ptr += 1;
455   return;
456 }
457 
458 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459    is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460    track already seen refcounts, and only adjust the value if refcount is not
461    yet contained in the set (like gomp_increment_refcount).
462 
463    Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464    it is already zero and we know we decremented it earlier. This signals that
465    associated maps should be copied back to host.
466 
467    *DO_REMOVE is set to true when we this is the first handling of this refcount
468    and we are setting it to zero. This signals a removal of this key from the
469    splay-tree map.
470 
471    Copy and removal are separated due to cases like handling of structure
472    elements, e.g. each map of a structure element representing a possible copy
473    out of a structure field has to be handled individually, but we only signal
474    removal for one (the first encountered) sibing map.  */
475 
476 static inline void
gomp_decrement_refcount(splay_tree_key k,htab_t * refcount_set,bool delete_p,bool * do_copy,bool * do_remove)477 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
478 			 bool *do_copy, bool *do_remove)
479 {
480   if (k == NULL || k->refcount == REFCOUNT_INFINITY)
481     {
482       *do_copy = *do_remove = false;
483       return;
484     }
485 
486   uintptr_t *refcount_ptr = &k->refcount;
487 
488   if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
489     refcount_ptr = &k->structelem_refcount;
490   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
491     refcount_ptr = k->structelem_refcount_ptr;
492 
493   bool new_encountered_refcount;
494   bool set_to_zero = false;
495   bool is_zero = false;
496 
497   uintptr_t orig_refcount = *refcount_ptr;
498 
499   if (refcount_set)
500     {
501       if (htab_find (*refcount_set, refcount_ptr))
502 	{
503 	  new_encountered_refcount = false;
504 	  goto end;
505 	}
506 
507       uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
508       *slot = refcount_ptr;
509       new_encountered_refcount = true;
510     }
511   else
512     /* If no refcount_set being used, assume all keys are being decremented
513        for the first time.  */
514     new_encountered_refcount = true;
515 
516   if (delete_p)
517     *refcount_ptr = 0;
518   else if (*refcount_ptr > 0)
519     *refcount_ptr -= 1;
520 
521  end:
522   if (*refcount_ptr == 0)
523     {
524       if (orig_refcount > 0)
525 	set_to_zero = true;
526 
527       is_zero = true;
528     }
529 
530   *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
531   *do_remove = (new_encountered_refcount && set_to_zero);
532 }
533 
534 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535    gomp_map_0len_lookup found oldn for newn.
536    Helper function of gomp_map_vars.  */
537 
538 static inline void
gomp_map_vars_existing(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree_key oldn,splay_tree_key newn,struct target_var_desc * tgt_var,unsigned char kind,bool always_to_flag,bool implicit,struct gomp_coalesce_buf * cbuf,htab_t * refcount_set)539 gomp_map_vars_existing (struct gomp_device_descr *devicep,
540 			struct goacc_asyncqueue *aq, splay_tree_key oldn,
541 			splay_tree_key newn, struct target_var_desc *tgt_var,
542 			unsigned char kind, bool always_to_flag, bool implicit,
543 			struct gomp_coalesce_buf *cbuf,
544 			htab_t *refcount_set)
545 {
546   assert (kind != GOMP_MAP_ATTACH
547 	  || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
548 
549   tgt_var->key = oldn;
550   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
551   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
552   tgt_var->is_attach = false;
553   tgt_var->offset = newn->host_start - oldn->host_start;
554 
555   /* For implicit maps, old contained in new is valid.  */
556   bool implicit_subset = (implicit
557 			  && newn->host_start <= oldn->host_start
558 			  && oldn->host_end <= newn->host_end);
559   if (implicit_subset)
560     tgt_var->length = oldn->host_end - oldn->host_start;
561   else
562     tgt_var->length = newn->host_end - newn->host_start;
563 
564   if ((kind & GOMP_MAP_FLAG_FORCE)
565       /* For implicit maps, old contained in new is valid.  */
566       || !(implicit_subset
567 	   /* Otherwise, new contained inside old is considered valid.  */
568 	   || (oldn->host_start <= newn->host_start
569 	       && newn->host_end <= oldn->host_end)))
570     {
571       gomp_mutex_unlock (&devicep->lock);
572       gomp_fatal ("Trying to map into device [%p..%p) object when "
573 		  "[%p..%p) is already mapped",
574 		  (void *) newn->host_start, (void *) newn->host_end,
575 		  (void *) oldn->host_start, (void *) oldn->host_end);
576     }
577 
578   if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
579     {
580       /* Implicit + always should not happen. If this does occur, below
581 	 address/length adjustment is a TODO.  */
582       assert (!implicit_subset);
583 
584       if (oldn->aux && oldn->aux->attach_count)
585 	{
586 	  /* We have to be careful not to overwrite still attached pointers
587 	     during the copyback to host.  */
588 	  uintptr_t addr = newn->host_start;
589 	  while (addr < newn->host_end)
590 	    {
591 	      size_t i = (addr - oldn->host_start) / sizeof (void *);
592 	      if (oldn->aux->attach_count[i] == 0)
593 		gomp_copy_host2dev (devicep, aq,
594 				    (void *) (oldn->tgt->tgt_start
595 					      + oldn->tgt_offset
596 					      + addr - oldn->host_start),
597 				    (void *) addr,
598 				    sizeof (void *), false, cbuf);
599 	      addr += sizeof (void *);
600 	    }
601 	}
602       else
603 	gomp_copy_host2dev (devicep, aq,
604 			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
605 				      + newn->host_start - oldn->host_start),
606 			    (void *) newn->host_start,
607 			    newn->host_end - newn->host_start, false, cbuf);
608     }
609 
610   gomp_increment_refcount (oldn, refcount_set);
611 }
612 
613 static int
get_kind(bool short_mapkind,void * kinds,int idx)614 get_kind (bool short_mapkind, void *kinds, int idx)
615 {
616   if (!short_mapkind)
617     return ((unsigned char *) kinds)[idx];
618 
619   int val = ((unsigned short *) kinds)[idx];
620   if (GOMP_MAP_IMPLICIT_P (val))
621     val &= ~GOMP_MAP_IMPLICIT;
622   return val;
623 }
624 
625 
626 static bool
get_implicit(bool short_mapkind,void * kinds,int idx)627 get_implicit (bool short_mapkind, void *kinds, int idx)
628 {
629   if (!short_mapkind)
630     return false;
631 
632   int val = ((unsigned short *) kinds)[idx];
633   return GOMP_MAP_IMPLICIT_P (val);
634 }
635 
636 static void
gomp_map_pointer(struct target_mem_desc * tgt,struct goacc_asyncqueue * aq,uintptr_t host_ptr,uintptr_t target_offset,uintptr_t bias,struct gomp_coalesce_buf * cbuf,bool allow_zero_length_array_sections)637 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
638 		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
639 		  struct gomp_coalesce_buf *cbuf,
640 		  bool allow_zero_length_array_sections)
641 {
642   struct gomp_device_descr *devicep = tgt->device_descr;
643   struct splay_tree_s *mem_map = &devicep->mem_map;
644   struct splay_tree_key_s cur_node;
645 
646   cur_node.host_start = host_ptr;
647   if (cur_node.host_start == (uintptr_t) NULL)
648     {
649       cur_node.tgt_offset = (uintptr_t) NULL;
650       gomp_copy_host2dev (devicep, aq,
651 			  (void *) (tgt->tgt_start + target_offset),
652 			  (void *) &cur_node.tgt_offset, sizeof (void *),
653 			  true, cbuf);
654       return;
655     }
656   /* Add bias to the pointer value.  */
657   cur_node.host_start += bias;
658   cur_node.host_end = cur_node.host_start;
659   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
660   if (n == NULL)
661     {
662       if (allow_zero_length_array_sections)
663 	cur_node.tgt_offset = 0;
664       else
665 	{
666 	  gomp_mutex_unlock (&devicep->lock);
667 	  gomp_fatal ("Pointer target of array section wasn't mapped");
668 	}
669     }
670   else
671     {
672       cur_node.host_start -= n->host_start;
673       cur_node.tgt_offset
674 	= n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
675       /* At this point tgt_offset is target address of the
676 	 array section.  Now subtract bias to get what we want
677 	 to initialize the pointer with.  */
678       cur_node.tgt_offset -= bias;
679     }
680   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
681 		      (void *) &cur_node.tgt_offset, sizeof (void *),
682 		      true, cbuf);
683 }
684 
685 static void
gomp_map_fields_existing(struct target_mem_desc * tgt,struct goacc_asyncqueue * aq,splay_tree_key n,size_t first,size_t i,void ** hostaddrs,size_t * sizes,void * kinds,struct gomp_coalesce_buf * cbuf,htab_t * refcount_set)686 gomp_map_fields_existing (struct target_mem_desc *tgt,
687 			  struct goacc_asyncqueue *aq, splay_tree_key n,
688 			  size_t first, size_t i, void **hostaddrs,
689 			  size_t *sizes, void *kinds,
690 			  struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
691 {
692   struct gomp_device_descr *devicep = tgt->device_descr;
693   struct splay_tree_s *mem_map = &devicep->mem_map;
694   struct splay_tree_key_s cur_node;
695   int kind;
696   bool implicit;
697   const bool short_mapkind = true;
698   const int typemask = short_mapkind ? 0xff : 0x7;
699 
700   cur_node.host_start = (uintptr_t) hostaddrs[i];
701   cur_node.host_end = cur_node.host_start + sizes[i];
702   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
703   kind = get_kind (short_mapkind, kinds, i);
704   implicit = get_implicit (short_mapkind, kinds, i);
705   if (n2
706       && n2->tgt == n->tgt
707       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
708     {
709       gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
710 			      kind & typemask, false, implicit, cbuf,
711 			      refcount_set);
712       return;
713     }
714   if (sizes[i] == 0)
715     {
716       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
717 	{
718 	  cur_node.host_start--;
719 	  n2 = splay_tree_lookup (mem_map, &cur_node);
720 	  cur_node.host_start++;
721 	  if (n2
722 	      && n2->tgt == n->tgt
723 	      && n2->host_start - n->host_start
724 		 == n2->tgt_offset - n->tgt_offset)
725 	    {
726 	      gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
727 				      kind & typemask, false, implicit, cbuf,
728 				      refcount_set);
729 	      return;
730 	    }
731 	}
732       cur_node.host_end++;
733       n2 = splay_tree_lookup (mem_map, &cur_node);
734       cur_node.host_end--;
735       if (n2
736 	  && n2->tgt == n->tgt
737 	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
738 	{
739 	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
740 				  kind & typemask, false, implicit, cbuf,
741 				  refcount_set);
742 	  return;
743 	}
744     }
745   gomp_mutex_unlock (&devicep->lock);
746   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
747 	      "other mapped elements from the same structure weren't mapped "
748 	      "together with it", (void *) cur_node.host_start,
749 	      (void *) cur_node.host_end);
750 }
751 
752 attribute_hidden void
gomp_attach_pointer(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree mem_map,splay_tree_key n,uintptr_t attach_to,size_t bias,struct gomp_coalesce_buf * cbufp,bool allow_zero_length_array_sections)753 gomp_attach_pointer (struct gomp_device_descr *devicep,
754 		     struct goacc_asyncqueue *aq, splay_tree mem_map,
755 		     splay_tree_key n, uintptr_t attach_to, size_t bias,
756 		     struct gomp_coalesce_buf *cbufp,
757 		     bool allow_zero_length_array_sections)
758 {
759   struct splay_tree_key_s s;
760   size_t size, idx;
761 
762   if (n == NULL)
763     {
764       gomp_mutex_unlock (&devicep->lock);
765       gomp_fatal ("enclosing struct not mapped for attach");
766     }
767 
768   size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
769   /* We might have a pointer in a packed struct: however we cannot have more
770      than one such pointer in each pointer-sized portion of the struct, so
771      this is safe.  */
772   idx = (attach_to - n->host_start) / sizeof (void *);
773 
774   if (!n->aux)
775     n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
776 
777   if (!n->aux->attach_count)
778     n->aux->attach_count
779       = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
780 
781   if (n->aux->attach_count[idx] < UINTPTR_MAX)
782     n->aux->attach_count[idx]++;
783   else
784     {
785       gomp_mutex_unlock (&devicep->lock);
786       gomp_fatal ("attach count overflow");
787     }
788 
789   if (n->aux->attach_count[idx] == 1)
790     {
791       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
792 			 - n->host_start;
793       uintptr_t target = (uintptr_t) *(void **) attach_to;
794       splay_tree_key tn;
795       uintptr_t data;
796 
797       if ((void *) target == NULL)
798 	{
799 	  gomp_mutex_unlock (&devicep->lock);
800 	  gomp_fatal ("attempt to attach null pointer");
801 	}
802 
803       s.host_start = target + bias;
804       s.host_end = s.host_start + 1;
805       tn = splay_tree_lookup (mem_map, &s);
806 
807       if (!tn)
808 	{
809 	  if (allow_zero_length_array_sections)
810 	    /* When allowing attachment to zero-length array sections, we
811 	       allow attaching to NULL pointers when the target region is not
812 	       mapped.  */
813 	    data = 0;
814 	  else
815 	    {
816 	      gomp_mutex_unlock (&devicep->lock);
817 	      gomp_fatal ("pointer target not mapped for attach");
818 	    }
819 	}
820       else
821 	data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
822 
823       gomp_debug (1,
824 		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
825 		  __FUNCTION__, (void *) attach_to, (void *) devptr,
826 		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
827 
828       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
829 			  sizeof (void *), true, cbufp);
830     }
831   else
832     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
833 		(void *) attach_to, (int) n->aux->attach_count[idx]);
834 }
835 
836 attribute_hidden void
gomp_detach_pointer(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree_key n,uintptr_t detach_from,bool finalize,struct gomp_coalesce_buf * cbufp)837 gomp_detach_pointer (struct gomp_device_descr *devicep,
838 		     struct goacc_asyncqueue *aq, splay_tree_key n,
839 		     uintptr_t detach_from, bool finalize,
840 		     struct gomp_coalesce_buf *cbufp)
841 {
842   size_t idx;
843 
844   if (n == NULL)
845     {
846       gomp_mutex_unlock (&devicep->lock);
847       gomp_fatal ("enclosing struct not mapped for detach");
848     }
849 
850   idx = (detach_from - n->host_start) / sizeof (void *);
851 
852   if (!n->aux || !n->aux->attach_count)
853     {
854       gomp_mutex_unlock (&devicep->lock);
855       gomp_fatal ("no attachment counters for struct");
856     }
857 
858   if (finalize)
859     n->aux->attach_count[idx] = 1;
860 
861   if (n->aux->attach_count[idx] == 0)
862     {
863       gomp_mutex_unlock (&devicep->lock);
864       gomp_fatal ("attach count underflow");
865     }
866   else
867     n->aux->attach_count[idx]--;
868 
869   if (n->aux->attach_count[idx] == 0)
870     {
871       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
872 			 - n->host_start;
873       uintptr_t target = (uintptr_t) *(void **) detach_from;
874 
875       gomp_debug (1,
876 		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
877 		  __FUNCTION__, (void *) detach_from, (void *) devptr,
878 		  (void *) (n->tgt->tgt_start + n->tgt_offset),
879 		  (void *) target);
880 
881       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
882 			  sizeof (void *), true, cbufp);
883     }
884   else
885     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
886 		(void *) detach_from, (int) n->aux->attach_count[idx]);
887 }
888 
889 attribute_hidden uintptr_t
gomp_map_val(struct target_mem_desc * tgt,void ** hostaddrs,size_t i)890 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
891 {
892   if (tgt->list[i].key != NULL)
893     return tgt->list[i].key->tgt->tgt_start
894 	   + tgt->list[i].key->tgt_offset
895 	   + tgt->list[i].offset;
896 
897   switch (tgt->list[i].offset)
898     {
899     case OFFSET_INLINED:
900       return (uintptr_t) hostaddrs[i];
901 
902     case OFFSET_POINTER:
903       return 0;
904 
905     case OFFSET_STRUCT:
906       return tgt->list[i + 1].key->tgt->tgt_start
907 	     + tgt->list[i + 1].key->tgt_offset
908 	     + tgt->list[i + 1].offset
909 	     + (uintptr_t) hostaddrs[i]
910 	     - (uintptr_t) hostaddrs[i + 1];
911 
912     default:
913       return tgt->tgt_start + tgt->list[i].offset;
914     }
915 }
916 
917 static inline __attribute__((always_inline)) struct target_mem_desc *
gomp_map_vars_internal(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,htab_t * refcount_set,enum gomp_map_vars_kind pragma_kind)918 gomp_map_vars_internal (struct gomp_device_descr *devicep,
919 			struct goacc_asyncqueue *aq, size_t mapnum,
920 			void **hostaddrs, void **devaddrs, size_t *sizes,
921 			void *kinds, bool short_mapkind,
922 			htab_t *refcount_set,
923 			enum gomp_map_vars_kind pragma_kind)
924 {
925   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
926   bool has_firstprivate = false;
927   bool has_always_ptrset = false;
928   bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
929   const int rshift = short_mapkind ? 8 : 3;
930   const int typemask = short_mapkind ? 0xff : 0x7;
931   struct splay_tree_s *mem_map = &devicep->mem_map;
932   struct splay_tree_key_s cur_node;
933   struct target_mem_desc *tgt
934     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
935   tgt->list_count = mapnum;
936   tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
937   tgt->device_descr = devicep;
938   tgt->prev = NULL;
939   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
940 
941   if (mapnum == 0)
942     {
943       tgt->tgt_start = 0;
944       tgt->tgt_end = 0;
945       return tgt;
946     }
947 
948   tgt_align = sizeof (void *);
949   tgt_size = 0;
950   cbuf.chunks = NULL;
951   cbuf.chunk_cnt = -1;
952   cbuf.use_cnt = 0;
953   cbuf.buf = NULL;
954   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
955     {
956       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
957       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
958       cbuf.chunk_cnt = 0;
959     }
960   if (pragma_kind == GOMP_MAP_VARS_TARGET)
961     {
962       size_t align = 4 * sizeof (void *);
963       tgt_align = align;
964       tgt_size = mapnum * sizeof (void *);
965       cbuf.chunk_cnt = 1;
966       cbuf.use_cnt = 1 + (mapnum > 1);
967       cbuf.chunks[0].start = 0;
968       cbuf.chunks[0].end = tgt_size;
969     }
970 
971   gomp_mutex_lock (&devicep->lock);
972   if (devicep->state == GOMP_DEVICE_FINALIZED)
973     {
974       gomp_mutex_unlock (&devicep->lock);
975       free (tgt);
976       return NULL;
977     }
978 
979   for (i = 0; i < mapnum; i++)
980     {
981       int kind = get_kind (short_mapkind, kinds, i);
982       bool implicit = get_implicit (short_mapkind, kinds, i);
983       if (hostaddrs[i] == NULL
984 	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
985 	{
986 	  tgt->list[i].key = NULL;
987 	  tgt->list[i].offset = OFFSET_INLINED;
988 	  continue;
989 	}
990       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
991 	       || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
992 	{
993 	  tgt->list[i].key = NULL;
994 	  if (!not_found_cnt)
995 	    {
996 	      /* In OpenMP < 5.0 and OpenACC the mapping has to be done
997 		 on a separate construct prior to using use_device_{addr,ptr}.
998 		 In OpenMP 5.0, map directives need to be ordered by the
999 		 middle-end before the use_device_* clauses.  If
1000 		 !not_found_cnt, all mappings requested (if any) are already
1001 		 mapped, so use_device_{addr,ptr} can be resolved right away.
1002 		 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1003 		 now but would succeed after performing the mappings in the
1004 		 following loop.  We can't defer this always to the second
1005 		 loop, because it is not even invoked when !not_found_cnt
1006 		 after the first loop.  */
1007 	      cur_node.host_start = (uintptr_t) hostaddrs[i];
1008 	      cur_node.host_end = cur_node.host_start;
1009 	      splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1010 	      if (n != NULL)
1011 		{
1012 		  cur_node.host_start -= n->host_start;
1013 		  hostaddrs[i]
1014 		    = (void *) (n->tgt->tgt_start + n->tgt_offset
1015 				+ cur_node.host_start);
1016 		}
1017 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1018 		{
1019 		  gomp_mutex_unlock (&devicep->lock);
1020 		  gomp_fatal ("use_device_ptr pointer wasn't mapped");
1021 		}
1022 	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1023 		/* If not present, continue using the host address.  */
1024 		;
1025 	      else
1026 		__builtin_unreachable ();
1027 	      tgt->list[i].offset = OFFSET_INLINED;
1028 	    }
1029 	  else
1030 	    tgt->list[i].offset = 0;
1031 	  continue;
1032 	}
1033       else if ((kind & typemask) == GOMP_MAP_STRUCT)
1034 	{
1035 	  size_t first = i + 1;
1036 	  size_t last = i + sizes[i];
1037 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
1038 	  cur_node.host_end = (uintptr_t) hostaddrs[last]
1039 			      + sizes[last];
1040 	  tgt->list[i].key = NULL;
1041 	  tgt->list[i].offset = OFFSET_STRUCT;
1042 	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1043 	  if (n == NULL)
1044 	    {
1045 	      size_t align = (size_t) 1 << (kind >> rshift);
1046 	      if (tgt_align < align)
1047 		tgt_align = align;
1048 	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1049 	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
1050 	      tgt_size += cur_node.host_end - cur_node.host_start;
1051 	      not_found_cnt += last - i;
1052 	      for (i = first; i <= last; i++)
1053 		{
1054 		  tgt->list[i].key = NULL;
1055 		  if (!aq
1056 		      && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1057 						& typemask))
1058 		    gomp_coalesce_buf_add (&cbuf,
1059 					   tgt_size - cur_node.host_end
1060 					   + (uintptr_t) hostaddrs[i],
1061 					   sizes[i]);
1062 		}
1063 	      i--;
1064 	      continue;
1065 	    }
1066 	  for (i = first; i <= last; i++)
1067 	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1068 				      sizes, kinds, NULL, refcount_set);
1069 	  i--;
1070 	  continue;
1071 	}
1072       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1073 	{
1074 	  tgt->list[i].key = NULL;
1075 	  tgt->list[i].offset = OFFSET_POINTER;
1076 	  has_firstprivate = true;
1077 	  continue;
1078 	}
1079       else if ((kind & typemask) == GOMP_MAP_ATTACH
1080 	       || ((kind & typemask)
1081 		   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1082 	{
1083 	  tgt->list[i].key = NULL;
1084 	  has_firstprivate = true;
1085 	  continue;
1086 	}
1087       cur_node.host_start = (uintptr_t) hostaddrs[i];
1088       if (!GOMP_MAP_POINTER_P (kind & typemask))
1089 	cur_node.host_end = cur_node.host_start + sizes[i];
1090       else
1091 	cur_node.host_end = cur_node.host_start + sizeof (void *);
1092       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1093 	{
1094 	  tgt->list[i].key = NULL;
1095 
1096 	  size_t align = (size_t) 1 << (kind >> rshift);
1097 	  if (tgt_align < align)
1098 	    tgt_align = align;
1099 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
1100 	  if (!aq)
1101 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
1102 				   cur_node.host_end - cur_node.host_start);
1103 	  tgt_size += cur_node.host_end - cur_node.host_start;
1104 	  has_firstprivate = true;
1105 	  continue;
1106 	}
1107       splay_tree_key n;
1108       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1109 	{
1110 	  n = gomp_map_0len_lookup (mem_map, &cur_node);
1111 	  if (!n)
1112 	    {
1113 	      tgt->list[i].key = NULL;
1114 	      tgt->list[i].offset = OFFSET_POINTER;
1115 	      continue;
1116 	    }
1117 	}
1118       else
1119 	n = splay_tree_lookup (mem_map, &cur_node);
1120       if (n && n->refcount != REFCOUNT_LINK)
1121 	{
1122 	  int always_to_cnt = 0;
1123 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
1124 	    {
1125 	      bool has_nullptr = false;
1126 	      size_t j;
1127 	      for (j = 0; j < n->tgt->list_count; j++)
1128 		if (n->tgt->list[j].key == n)
1129 		  {
1130 		    has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1131 		    break;
1132 		  }
1133 	      if (n->tgt->list_count == 0)
1134 		{
1135 		  /* 'declare target'; assume has_nullptr; it could also be
1136 		     statically assigned pointer, but that it should be to
1137 		     the equivalent variable on the host.  */
1138 		  assert (n->refcount == REFCOUNT_INFINITY);
1139 		  has_nullptr = true;
1140 		}
1141 	      else
1142 		assert (j < n->tgt->list_count);
1143 	      /* Re-map the data if there is an 'always' modifier or if it a
1144 		 null pointer was there and non a nonnull has been found; that
1145 		 permits transparent re-mapping for Fortran array descriptors
1146 		 which were previously mapped unallocated.  */
1147 	      for (j = i + 1; j < mapnum; j++)
1148 		{
1149 		  int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1150 		  if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1151 		      && (!has_nullptr
1152 			  || !GOMP_MAP_POINTER_P (ptr_kind)
1153 			  || *(void **) hostaddrs[j] == NULL))
1154 		    break;
1155 		  else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1156 			   || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1157 			       > cur_node.host_end))
1158 		    break;
1159 		  else
1160 		    {
1161 		      has_always_ptrset = true;
1162 		      ++always_to_cnt;
1163 		    }
1164 		}
1165 	    }
1166 	  gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1167 				  kind & typemask, always_to_cnt > 0, implicit,
1168 				  NULL, refcount_set);
1169 	  i += always_to_cnt;
1170 	}
1171       else
1172 	{
1173 	  tgt->list[i].key = NULL;
1174 
1175 	  if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1176 	    {
1177 	      /* Not present, hence, skip entry - including its MAP_POINTER,
1178 		 when existing.  */
1179 	      tgt->list[i].offset = OFFSET_POINTER;
1180 	      if (i + 1 < mapnum
1181 		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1182 		      == GOMP_MAP_POINTER))
1183 		{
1184 		  ++i;
1185 		  tgt->list[i].key = NULL;
1186 		  tgt->list[i].offset = 0;
1187 		}
1188 	      continue;
1189 	    }
1190 	  size_t align = (size_t) 1 << (kind >> rshift);
1191 	  not_found_cnt++;
1192 	  if (tgt_align < align)
1193 	    tgt_align = align;
1194 	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
1195 	  if (!aq
1196 	      && gomp_to_device_kind_p (kind & typemask))
1197 	    gomp_coalesce_buf_add (&cbuf, tgt_size,
1198 				   cur_node.host_end - cur_node.host_start);
1199 	  tgt_size += cur_node.host_end - cur_node.host_start;
1200 	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
1201 	    {
1202 	      size_t j;
1203 	      int kind;
1204 	      for (j = i + 1; j < mapnum; j++)
1205 		if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1206 						  kinds, j)) & typemask))
1207 		    && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1208 		  break;
1209 		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1210 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1211 			     > cur_node.host_end))
1212 		  break;
1213 		else
1214 		  {
1215 		    tgt->list[j].key = NULL;
1216 		    i++;
1217 		  }
1218 	    }
1219 	}
1220     }
1221 
1222   if (devaddrs)
1223     {
1224       if (mapnum != 1)
1225 	{
1226 	  gomp_mutex_unlock (&devicep->lock);
1227 	  gomp_fatal ("unexpected aggregation");
1228 	}
1229       tgt->to_free = devaddrs[0];
1230       tgt->tgt_start = (uintptr_t) tgt->to_free;
1231       tgt->tgt_end = tgt->tgt_start + sizes[0];
1232     }
1233   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1234     {
1235       /* Allocate tgt_align aligned tgt_size block of memory.  */
1236       /* FIXME: Perhaps change interface to allocate properly aligned
1237 	 memory.  */
1238       tgt->to_free = devicep->alloc_func (devicep->target_id,
1239 					  tgt_size + tgt_align - 1);
1240       if (!tgt->to_free)
1241 	{
1242 	  gomp_mutex_unlock (&devicep->lock);
1243 	  gomp_fatal ("device memory allocation fail");
1244 	}
1245 
1246       tgt->tgt_start = (uintptr_t) tgt->to_free;
1247       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1248       tgt->tgt_end = tgt->tgt_start + tgt_size;
1249 
1250       if (cbuf.use_cnt == 1)
1251 	cbuf.chunk_cnt--;
1252       if (cbuf.chunk_cnt > 0)
1253 	{
1254 	  cbuf.buf
1255 	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1256 	  if (cbuf.buf)
1257 	    {
1258 	      cbuf.tgt = tgt;
1259 	      cbufp = &cbuf;
1260 	    }
1261 	}
1262     }
1263   else
1264     {
1265       tgt->to_free = NULL;
1266       tgt->tgt_start = 0;
1267       tgt->tgt_end = 0;
1268     }
1269 
1270   tgt_size = 0;
1271   if (pragma_kind == GOMP_MAP_VARS_TARGET)
1272     tgt_size = mapnum * sizeof (void *);
1273 
1274   tgt->array = NULL;
1275   if (not_found_cnt || has_firstprivate || has_always_ptrset)
1276     {
1277       if (not_found_cnt)
1278 	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1279       splay_tree_node array = tgt->array;
1280       size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1281       uintptr_t field_tgt_base = 0;
1282       splay_tree_key field_tgt_structelem_first = NULL;
1283 
1284       for (i = 0; i < mapnum; i++)
1285 	if (has_always_ptrset
1286 	    && tgt->list[i].key
1287 	    && (get_kind (short_mapkind, kinds, i) & typemask)
1288 	       == GOMP_MAP_TO_PSET)
1289 	  {
1290 	    splay_tree_key k = tgt->list[i].key;
1291 	    bool has_nullptr = false;
1292 	    size_t j;
1293 	    for (j = 0; j < k->tgt->list_count; j++)
1294 	      if (k->tgt->list[j].key == k)
1295 		{
1296 		  has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1297 		  break;
1298 		}
1299 	    if (k->tgt->list_count == 0)
1300 	      has_nullptr = true;
1301 	    else
1302 	      assert (j < k->tgt->list_count);
1303 
1304 	    tgt->list[i].has_null_ptr_assoc = false;
1305 	    for (j = i + 1; j < mapnum; j++)
1306 	      {
1307 		int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1308 		if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1309 		    && (!has_nullptr
1310 			|| !GOMP_MAP_POINTER_P (ptr_kind)
1311 			|| *(void **) hostaddrs[j] == NULL))
1312 		  break;
1313 		else if ((uintptr_t) hostaddrs[j] < k->host_start
1314 			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1315 			     > k->host_end))
1316 		  break;
1317 		else
1318 		  {
1319 		    if (*(void **) hostaddrs[j] == NULL)
1320 		      tgt->list[i].has_null_ptr_assoc = true;
1321 		    tgt->list[j].key = k;
1322 		    tgt->list[j].copy_from = false;
1323 		    tgt->list[j].always_copy_from = false;
1324 		    tgt->list[j].is_attach = false;
1325 		    gomp_increment_refcount (k, refcount_set);
1326 		    gomp_map_pointer (k->tgt, aq,
1327 				      (uintptr_t) *(void **) hostaddrs[j],
1328 				      k->tgt_offset + ((uintptr_t) hostaddrs[j]
1329 						       - k->host_start),
1330 				      sizes[j], cbufp, false);
1331 		  }
1332 	      }
1333 	    i = j - 1;
1334 	  }
1335 	else if (tgt->list[i].key == NULL)
1336 	  {
1337 	    int kind = get_kind (short_mapkind, kinds, i);
1338 	    bool implicit = get_implicit (short_mapkind, kinds, i);
1339 	    if (hostaddrs[i] == NULL)
1340 	      continue;
1341 	    switch (kind & typemask)
1342 	      {
1343 		size_t align, len, first, last;
1344 		splay_tree_key n;
1345 	      case GOMP_MAP_FIRSTPRIVATE:
1346 		align = (size_t) 1 << (kind >> rshift);
1347 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
1348 		tgt->list[i].offset = tgt_size;
1349 		len = sizes[i];
1350 		gomp_copy_host2dev (devicep, aq,
1351 				    (void *) (tgt->tgt_start + tgt_size),
1352 				    (void *) hostaddrs[i], len, false, cbufp);
1353 		tgt_size += len;
1354 		continue;
1355 	      case GOMP_MAP_FIRSTPRIVATE_INT:
1356 	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1357 		continue;
1358 	      case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1359 		/* The OpenACC 'host_data' construct only allows 'use_device'
1360 		   "mapping" clauses, so in the first loop, 'not_found_cnt'
1361 		   must always have been zero, so all OpenACC 'use_device'
1362 		   clauses have already been handled.  (We can only easily test
1363 		   'use_device' with 'if_present' clause here.)  */
1364 		assert (tgt->list[i].offset == OFFSET_INLINED);
1365 		/* Nevertheless, FALLTHRU to the normal handling, to keep the
1366 		   code conceptually simple, similar to the first loop.  */
1367 	      case GOMP_MAP_USE_DEVICE_PTR:
1368 		if (tgt->list[i].offset == 0)
1369 		  {
1370 		    cur_node.host_start = (uintptr_t) hostaddrs[i];
1371 		    cur_node.host_end = cur_node.host_start;
1372 		    n = gomp_map_lookup (mem_map, &cur_node);
1373 		    if (n != NULL)
1374 		      {
1375 			cur_node.host_start -= n->host_start;
1376 			hostaddrs[i]
1377 			  = (void *) (n->tgt->tgt_start + n->tgt_offset
1378 				      + cur_node.host_start);
1379 		      }
1380 		    else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1381 		      {
1382 			gomp_mutex_unlock (&devicep->lock);
1383 			gomp_fatal ("use_device_ptr pointer wasn't mapped");
1384 		      }
1385 		    else if ((kind & typemask)
1386 			     == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1387 		      /* If not present, continue using the host address.  */
1388 		      ;
1389 		    else
1390 		      __builtin_unreachable ();
1391 		    tgt->list[i].offset = OFFSET_INLINED;
1392 		  }
1393 		continue;
1394 	      case GOMP_MAP_STRUCT:
1395 		first = i + 1;
1396 		last = i + sizes[i];
1397 		cur_node.host_start = (uintptr_t) hostaddrs[i];
1398 		cur_node.host_end = (uintptr_t) hostaddrs[last]
1399 				    + sizes[last];
1400 		if (tgt->list[first].key != NULL)
1401 		  continue;
1402 		n = splay_tree_lookup (mem_map, &cur_node);
1403 		if (n == NULL)
1404 		  {
1405 		    size_t align = (size_t) 1 << (kind >> rshift);
1406 		    tgt_size -= (uintptr_t) hostaddrs[first]
1407 				- (uintptr_t) hostaddrs[i];
1408 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1409 		    tgt_size += (uintptr_t) hostaddrs[first]
1410 				- (uintptr_t) hostaddrs[i];
1411 		    field_tgt_base = (uintptr_t) hostaddrs[first];
1412 		    field_tgt_offset = tgt_size;
1413 		    field_tgt_clear = last;
1414 		    field_tgt_structelem_first = NULL;
1415 		    tgt_size += cur_node.host_end
1416 				- (uintptr_t) hostaddrs[first];
1417 		    continue;
1418 		  }
1419 		for (i = first; i <= last; i++)
1420 		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1421 					    sizes, kinds, cbufp, refcount_set);
1422 		i--;
1423 		continue;
1424 	      case GOMP_MAP_ALWAYS_POINTER:
1425 		cur_node.host_start = (uintptr_t) hostaddrs[i];
1426 		cur_node.host_end = cur_node.host_start + sizeof (void *);
1427 		n = splay_tree_lookup (mem_map, &cur_node);
1428 		if (n == NULL
1429 		    || n->host_start > cur_node.host_start
1430 		    || n->host_end < cur_node.host_end)
1431 		  {
1432 		    gomp_mutex_unlock (&devicep->lock);
1433 		    gomp_fatal ("always pointer not mapped");
1434 		  }
1435 		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1436 		    != GOMP_MAP_ALWAYS_POINTER)
1437 		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1438 		if (cur_node.tgt_offset)
1439 		  cur_node.tgt_offset -= sizes[i];
1440 		gomp_copy_host2dev (devicep, aq,
1441 				    (void *) (n->tgt->tgt_start
1442 					      + n->tgt_offset
1443 					      + cur_node.host_start
1444 					      - n->host_start),
1445 				    (void *) &cur_node.tgt_offset,
1446 				    sizeof (void *), true, cbufp);
1447 		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1448 				      + cur_node.host_start - n->host_start;
1449 		continue;
1450 	      case GOMP_MAP_IF_PRESENT:
1451 		/* Not present - otherwise handled above. Skip over its
1452 		   MAP_POINTER as well.  */
1453 		if (i + 1 < mapnum
1454 		    && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1455 			== GOMP_MAP_POINTER))
1456 		  ++i;
1457 		continue;
1458 	      case GOMP_MAP_ATTACH:
1459 	      case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1460 		{
1461 		  cur_node.host_start = (uintptr_t) hostaddrs[i];
1462 		  cur_node.host_end = cur_node.host_start + sizeof (void *);
1463 		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1464 		  if (n != NULL)
1465 		    {
1466 		      tgt->list[i].key = n;
1467 		      tgt->list[i].offset = cur_node.host_start - n->host_start;
1468 		      tgt->list[i].length = n->host_end - n->host_start;
1469 		      tgt->list[i].copy_from = false;
1470 		      tgt->list[i].always_copy_from = false;
1471 		      tgt->list[i].is_attach = true;
1472 		      /* OpenACC 'attach'/'detach' doesn't affect
1473 			 structured/dynamic reference counts ('n->refcount',
1474 			 'n->dynamic_refcount').  */
1475 
1476 		      bool zlas
1477 			= ((kind & typemask)
1478 			   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1479 		      gomp_attach_pointer (devicep, aq, mem_map, n,
1480 					   (uintptr_t) hostaddrs[i], sizes[i],
1481 					   cbufp, zlas);
1482 		    }
1483 		  else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1484 		    {
1485 		      gomp_mutex_unlock (&devicep->lock);
1486 		      gomp_fatal ("outer struct not mapped for attach");
1487 		    }
1488 		  continue;
1489 		}
1490 	      default:
1491 		break;
1492 	      }
1493 	    splay_tree_key k = &array->key;
1494 	    k->host_start = (uintptr_t) hostaddrs[i];
1495 	    if (!GOMP_MAP_POINTER_P (kind & typemask))
1496 	      k->host_end = k->host_start + sizes[i];
1497 	    else
1498 	      k->host_end = k->host_start + sizeof (void *);
1499 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
1500 	    if (n && n->refcount != REFCOUNT_LINK)
1501 	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1502 				      kind & typemask, false, implicit, cbufp,
1503 				      refcount_set);
1504 	    else
1505 	      {
1506 		k->aux = NULL;
1507 		if (n && n->refcount == REFCOUNT_LINK)
1508 		  {
1509 		    /* Replace target address of the pointer with target address
1510 		       of mapped object in the splay tree.  */
1511 		    splay_tree_remove (mem_map, n);
1512 		    k->aux
1513 		      = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1514 		    k->aux->link_key = n;
1515 		  }
1516 		size_t align = (size_t) 1 << (kind >> rshift);
1517 		tgt->list[i].key = k;
1518 		k->tgt = tgt;
1519 		k->refcount = 0;
1520 		k->dynamic_refcount = 0;
1521 		if (field_tgt_clear != FIELD_TGT_EMPTY)
1522 		  {
1523 		    k->tgt_offset = k->host_start - field_tgt_base
1524 				    + field_tgt_offset;
1525 		    if (openmp_p)
1526 		      {
1527 			k->refcount = REFCOUNT_STRUCTELEM;
1528 			if (field_tgt_structelem_first == NULL)
1529 			  {
1530 			    /* Set to first structure element of sequence.  */
1531 			    k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1532 			    field_tgt_structelem_first = k;
1533 			  }
1534 			else
1535 			  /* Point to refcount of leading element, but do not
1536 			     increment again.  */
1537 			  k->structelem_refcount_ptr
1538 			    = &field_tgt_structelem_first->structelem_refcount;
1539 
1540 			if (i == field_tgt_clear)
1541 			  {
1542 			    k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1543 			    field_tgt_structelem_first = NULL;
1544 			  }
1545 		      }
1546 		    if (i == field_tgt_clear)
1547 		      field_tgt_clear = FIELD_TGT_EMPTY;
1548 		  }
1549 		else
1550 		  {
1551 		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1552 		    k->tgt_offset = tgt_size;
1553 		    tgt_size += k->host_end - k->host_start;
1554 		  }
1555 		/* First increment, from 0 to 1. gomp_increment_refcount
1556 		   encapsulates the different increment cases, so use this
1557 		   instead of directly setting 1 during initialization.  */
1558 		gomp_increment_refcount (k, refcount_set);
1559 
1560 		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1561 		tgt->list[i].always_copy_from
1562 		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1563 		tgt->list[i].is_attach = false;
1564 		tgt->list[i].offset = 0;
1565 		tgt->list[i].length = k->host_end - k->host_start;
1566 		tgt->refcount++;
1567 		array->left = NULL;
1568 		array->right = NULL;
1569 		splay_tree_insert (mem_map, array);
1570 		switch (kind & typemask)
1571 		  {
1572 		  case GOMP_MAP_ALLOC:
1573 		  case GOMP_MAP_FROM:
1574 		  case GOMP_MAP_FORCE_ALLOC:
1575 		  case GOMP_MAP_FORCE_FROM:
1576 		  case GOMP_MAP_ALWAYS_FROM:
1577 		    break;
1578 		  case GOMP_MAP_TO:
1579 		  case GOMP_MAP_TOFROM:
1580 		  case GOMP_MAP_FORCE_TO:
1581 		  case GOMP_MAP_FORCE_TOFROM:
1582 		  case GOMP_MAP_ALWAYS_TO:
1583 		  case GOMP_MAP_ALWAYS_TOFROM:
1584 		    gomp_copy_host2dev (devicep, aq,
1585 					(void *) (tgt->tgt_start
1586 						  + k->tgt_offset),
1587 					(void *) k->host_start,
1588 					k->host_end - k->host_start,
1589 					false, cbufp);
1590 		    break;
1591 		  case GOMP_MAP_POINTER:
1592 		  case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1593 		    gomp_map_pointer
1594 		      (tgt, aq, (uintptr_t) *(void **) k->host_start,
1595 		       k->tgt_offset, sizes[i], cbufp,
1596 		       ((kind & typemask)
1597 			== GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1598 		    break;
1599 		  case GOMP_MAP_TO_PSET:
1600 		    gomp_copy_host2dev (devicep, aq,
1601 					(void *) (tgt->tgt_start
1602 						  + k->tgt_offset),
1603 					(void *) k->host_start,
1604 					k->host_end - k->host_start,
1605 					false, cbufp);
1606 		    tgt->list[i].has_null_ptr_assoc = false;
1607 
1608 		    for (j = i + 1; j < mapnum; j++)
1609 		      {
1610 			int ptr_kind = (get_kind (short_mapkind, kinds, j)
1611 					& typemask);
1612 			if (!GOMP_MAP_POINTER_P (ptr_kind)
1613 			    && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1614 			  break;
1615 			else if ((uintptr_t) hostaddrs[j] < k->host_start
1616 				 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1617 				     > k->host_end))
1618 			  break;
1619 			else
1620 			  {
1621 			    tgt->list[j].key = k;
1622 			    tgt->list[j].copy_from = false;
1623 			    tgt->list[j].always_copy_from = false;
1624 			    tgt->list[j].is_attach = false;
1625 			    tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1626 			    /* For OpenMP, the use of refcount_sets causes
1627 			       errors if we set k->refcount = 1 above but also
1628 			       increment it again here, for decrementing will
1629 			       not properly match, since we decrement only once
1630 			       for each key's refcount. Therefore avoid this
1631 			       increment for OpenMP constructs.  */
1632 			    if (!openmp_p)
1633 			      gomp_increment_refcount (k, refcount_set);
1634 			    gomp_map_pointer (tgt, aq,
1635 					      (uintptr_t) *(void **) hostaddrs[j],
1636 					      k->tgt_offset
1637 					      + ((uintptr_t) hostaddrs[j]
1638 						 - k->host_start),
1639 					      sizes[j], cbufp, false);
1640 			  }
1641 			}
1642 		    i = j - 1;
1643 		    break;
1644 		  case GOMP_MAP_FORCE_PRESENT:
1645 		    {
1646 		      /* We already looked up the memory region above and it
1647 			 was missing.  */
1648 		      size_t size = k->host_end - k->host_start;
1649 		      gomp_mutex_unlock (&devicep->lock);
1650 #ifdef HAVE_INTTYPES_H
1651 		      gomp_fatal ("present clause: !acc_is_present (%p, "
1652 				  "%"PRIu64" (0x%"PRIx64"))",
1653 				  (void *) k->host_start,
1654 				  (uint64_t) size, (uint64_t) size);
1655 #else
1656 		      gomp_fatal ("present clause: !acc_is_present (%p, "
1657 				  "%lu (0x%lx))", (void *) k->host_start,
1658 				  (unsigned long) size, (unsigned long) size);
1659 #endif
1660 		    }
1661 		    break;
1662 		  case GOMP_MAP_FORCE_DEVICEPTR:
1663 		    assert (k->host_end - k->host_start == sizeof (void *));
1664 		    gomp_copy_host2dev (devicep, aq,
1665 					(void *) (tgt->tgt_start
1666 						  + k->tgt_offset),
1667 					(void *) k->host_start,
1668 					sizeof (void *), false, cbufp);
1669 		    break;
1670 		  default:
1671 		    gomp_mutex_unlock (&devicep->lock);
1672 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1673 				kind);
1674 		  }
1675 
1676 		if (k->aux && k->aux->link_key)
1677 		  {
1678 		    /* Set link pointer on target to the device address of the
1679 		       mapped object.  */
1680 		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1681 		    /* We intentionally do not use coalescing here, as it's not
1682 		       data allocated by the current call to this function.  */
1683 		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1684 					&tgt_addr, sizeof (void *), true, NULL);
1685 		  }
1686 		array++;
1687 	      }
1688 	  }
1689     }
1690 
1691   if (pragma_kind == GOMP_MAP_VARS_TARGET)
1692     {
1693       for (i = 0; i < mapnum; i++)
1694 	{
1695 	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1696 	  gomp_copy_host2dev (devicep, aq,
1697 			      (void *) (tgt->tgt_start + i * sizeof (void *)),
1698 			      (void *) &cur_node.tgt_offset, sizeof (void *),
1699 			      true, cbufp);
1700 	}
1701     }
1702 
1703   if (cbufp)
1704     {
1705       /* See 'gomp_coalesce_buf_add'.  */
1706       assert (!aq);
1707 
1708       long c = 0;
1709       for (c = 0; c < cbuf.chunk_cnt; ++c)
1710 	gomp_copy_host2dev (devicep, aq,
1711 			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1712 			    (char *) cbuf.buf + (cbuf.chunks[c].start
1713 						 - cbuf.chunks[0].start),
1714 			    cbuf.chunks[c].end - cbuf.chunks[c].start,
1715 			    true, NULL);
1716       free (cbuf.buf);
1717       cbuf.buf = NULL;
1718       cbufp = NULL;
1719     }
1720 
1721   /* If the variable from "omp target enter data" map-list was already mapped,
1722      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1723      gomp_exit_data.  */
1724   if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1725     {
1726       free (tgt);
1727       tgt = NULL;
1728     }
1729 
1730   gomp_mutex_unlock (&devicep->lock);
1731   return tgt;
1732 }
1733 
1734 static struct target_mem_desc *
gomp_map_vars(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,htab_t * refcount_set,enum gomp_map_vars_kind pragma_kind)1735 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1736 	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1737 	       bool short_mapkind, htab_t *refcount_set,
1738 	       enum gomp_map_vars_kind pragma_kind)
1739 {
1740   /* This management of a local refcount_set is for convenience of callers
1741      who do not share a refcount_set over multiple map/unmap uses.  */
1742   htab_t local_refcount_set = NULL;
1743   if (refcount_set == NULL)
1744     {
1745       local_refcount_set = htab_create (mapnum);
1746       refcount_set = &local_refcount_set;
1747     }
1748 
1749   struct target_mem_desc *tgt;
1750   tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1751 				sizes, kinds, short_mapkind, refcount_set,
1752 				pragma_kind);
1753   if (local_refcount_set)
1754     htab_free (local_refcount_set);
1755 
1756   return tgt;
1757 }
1758 
1759 attribute_hidden struct target_mem_desc *
goacc_map_vars(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,enum gomp_map_vars_kind pragma_kind)1760 goacc_map_vars (struct gomp_device_descr *devicep,
1761 		struct goacc_asyncqueue *aq, size_t mapnum,
1762 		void **hostaddrs, void **devaddrs, size_t *sizes,
1763 		void *kinds, bool short_mapkind,
1764 		enum gomp_map_vars_kind pragma_kind)
1765 {
1766   return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1767 				 sizes, kinds, short_mapkind, NULL,
1768 				 GOMP_MAP_VARS_OPENACC | pragma_kind);
1769 }
1770 
1771 static void
gomp_unmap_tgt(struct target_mem_desc * tgt)1772 gomp_unmap_tgt (struct target_mem_desc *tgt)
1773 {
1774   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1775   if (tgt->tgt_end)
1776     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1777 
1778   free (tgt->array);
1779   free (tgt);
1780 }
1781 
1782 static bool
gomp_unref_tgt(void * ptr)1783 gomp_unref_tgt (void *ptr)
1784 {
1785   bool is_tgt_unmapped = false;
1786 
1787   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1788 
1789   if (tgt->refcount > 1)
1790     tgt->refcount--;
1791   else
1792     {
1793       gomp_unmap_tgt (tgt);
1794       is_tgt_unmapped = true;
1795     }
1796 
1797   return is_tgt_unmapped;
1798 }
1799 
1800 static void
gomp_unref_tgt_void(void * ptr)1801 gomp_unref_tgt_void (void *ptr)
1802 {
1803   (void) gomp_unref_tgt (ptr);
1804 }
1805 
1806 static void
gomp_remove_splay_tree_key(splay_tree sp,splay_tree_key k)1807 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1808 {
1809   splay_tree_remove (sp, k);
1810   if (k->aux)
1811     {
1812       if (k->aux->link_key)
1813 	splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1814       if (k->aux->attach_count)
1815 	free (k->aux->attach_count);
1816       free (k->aux);
1817       k->aux = NULL;
1818     }
1819 }
1820 
1821 static inline __attribute__((always_inline)) bool
gomp_remove_var_internal(struct gomp_device_descr * devicep,splay_tree_key k,struct goacc_asyncqueue * aq)1822 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1823 			  struct goacc_asyncqueue *aq)
1824 {
1825   bool is_tgt_unmapped = false;
1826 
1827   if (REFCOUNT_STRUCTELEM_P (k->refcount))
1828     {
1829       if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1830 	/* Infer the splay_tree_key of the first structelem key using the
1831 	   pointer to the first structleme_refcount.  */
1832 	k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1833 			      - offsetof (struct splay_tree_key_s,
1834 					  structelem_refcount));
1835       assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1836 
1837       /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1838 	 with the splay_tree_keys embedded inside.  */
1839       splay_tree_node node =
1840 	(splay_tree_node) ((char *) k
1841 			   - offsetof (struct splay_tree_node_s, key));
1842       while (true)
1843 	{
1844 	  /* Starting from the _FIRST key, and continue for all following
1845 	     sibling keys.  */
1846 	  gomp_remove_splay_tree_key (&devicep->mem_map, k);
1847 	  if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1848 	    break;
1849 	  else
1850 	    k = &(++node)->key;
1851 	}
1852     }
1853   else
1854     gomp_remove_splay_tree_key (&devicep->mem_map, k);
1855 
1856   if (aq)
1857     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1858 						(void *) k->tgt);
1859   else
1860     is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1861   return is_tgt_unmapped;
1862 }
1863 
1864 attribute_hidden bool
gomp_remove_var(struct gomp_device_descr * devicep,splay_tree_key k)1865 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1866 {
1867   return gomp_remove_var_internal (devicep, k, NULL);
1868 }
1869 
1870 /* Remove a variable asynchronously.  This actually removes the variable
1871    mapping immediately, but retains the linked target_mem_desc until the
1872    asynchronous operation has completed (as it may still refer to target
1873    memory).  The device lock must be held before entry, and remains locked on
1874    exit.  */
1875 
1876 attribute_hidden void
gomp_remove_var_async(struct gomp_device_descr * devicep,splay_tree_key k,struct goacc_asyncqueue * aq)1877 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1878 		       struct goacc_asyncqueue *aq)
1879 {
1880   (void) gomp_remove_var_internal (devicep, k, aq);
1881 }
1882 
1883 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1884    variables back from device to host: if it is false, it is assumed that this
1885    has been done already.  */
1886 
1887 static inline __attribute__((always_inline)) void
gomp_unmap_vars_internal(struct target_mem_desc * tgt,bool do_copyfrom,htab_t * refcount_set,struct goacc_asyncqueue * aq)1888 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1889 			  htab_t *refcount_set, struct goacc_asyncqueue *aq)
1890 {
1891   struct gomp_device_descr *devicep = tgt->device_descr;
1892 
1893   if (tgt->list_count == 0)
1894     {
1895       free (tgt);
1896       return;
1897     }
1898 
1899   gomp_mutex_lock (&devicep->lock);
1900   if (devicep->state == GOMP_DEVICE_FINALIZED)
1901     {
1902       gomp_mutex_unlock (&devicep->lock);
1903       free (tgt->array);
1904       free (tgt);
1905       return;
1906     }
1907 
1908   size_t i;
1909 
1910   /* We must perform detachments before any copies back to the host.  */
1911   for (i = 0; i < tgt->list_count; i++)
1912     {
1913       splay_tree_key k = tgt->list[i].key;
1914 
1915       if (k != NULL && tgt->list[i].is_attach)
1916 	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1917 					     + tgt->list[i].offset,
1918 			     false, NULL);
1919     }
1920 
1921   for (i = 0; i < tgt->list_count; i++)
1922     {
1923       splay_tree_key k = tgt->list[i].key;
1924       if (k == NULL)
1925 	continue;
1926 
1927       /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1928 	 counts ('n->refcount', 'n->dynamic_refcount').  */
1929       if (tgt->list[i].is_attach)
1930 	continue;
1931 
1932       bool do_copy, do_remove;
1933       gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1934 
1935       if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1936 	  || tgt->list[i].always_copy_from)
1937 	gomp_copy_dev2host (devicep, aq,
1938 			    (void *) (k->host_start + tgt->list[i].offset),
1939 			    (void *) (k->tgt->tgt_start + k->tgt_offset
1940 				      + tgt->list[i].offset),
1941 			    tgt->list[i].length);
1942       if (do_remove)
1943 	{
1944 	  struct target_mem_desc *k_tgt = k->tgt;
1945 	  bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1946 	  /* It would be bad if TGT got unmapped while we're still iterating
1947 	     over its LIST_COUNT, and also expect to use it in the following
1948 	     code.  */
1949 	  assert (!is_tgt_unmapped
1950 		  || k_tgt != tgt);
1951 	}
1952     }
1953 
1954   if (aq)
1955     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1956 						(void *) tgt);
1957   else
1958     gomp_unref_tgt ((void *) tgt);
1959 
1960   gomp_mutex_unlock (&devicep->lock);
1961 }
1962 
1963 static void
gomp_unmap_vars(struct target_mem_desc * tgt,bool do_copyfrom,htab_t * refcount_set)1964 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1965 		 htab_t *refcount_set)
1966 {
1967   /* This management of a local refcount_set is for convenience of callers
1968      who do not share a refcount_set over multiple map/unmap uses.  */
1969   htab_t local_refcount_set = NULL;
1970   if (refcount_set == NULL)
1971     {
1972       local_refcount_set = htab_create (tgt->list_count);
1973       refcount_set = &local_refcount_set;
1974     }
1975 
1976   gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1977 
1978   if (local_refcount_set)
1979     htab_free (local_refcount_set);
1980 }
1981 
1982 attribute_hidden void
goacc_unmap_vars(struct target_mem_desc * tgt,bool do_copyfrom,struct goacc_asyncqueue * aq)1983 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1984 		  struct goacc_asyncqueue *aq)
1985 {
1986   gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1987 }
1988 
1989 static void
gomp_update(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds,bool short_mapkind)1990 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1991 	     size_t *sizes, void *kinds, bool short_mapkind)
1992 {
1993   size_t i;
1994   struct splay_tree_key_s cur_node;
1995   const int typemask = short_mapkind ? 0xff : 0x7;
1996 
1997   if (!devicep)
1998     return;
1999 
2000   if (mapnum == 0)
2001     return;
2002 
2003   gomp_mutex_lock (&devicep->lock);
2004   if (devicep->state == GOMP_DEVICE_FINALIZED)
2005     {
2006       gomp_mutex_unlock (&devicep->lock);
2007       return;
2008     }
2009 
2010   for (i = 0; i < mapnum; i++)
2011     if (sizes[i])
2012       {
2013 	cur_node.host_start = (uintptr_t) hostaddrs[i];
2014 	cur_node.host_end = cur_node.host_start + sizes[i];
2015 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2016 	if (n)
2017 	  {
2018 	    int kind = get_kind (short_mapkind, kinds, i);
2019 	    if (n->host_start > cur_node.host_start
2020 		|| n->host_end < cur_node.host_end)
2021 	      {
2022 		gomp_mutex_unlock (&devicep->lock);
2023 		gomp_fatal ("Trying to update [%p..%p) object when "
2024 			    "only [%p..%p) is mapped",
2025 			    (void *) cur_node.host_start,
2026 			    (void *) cur_node.host_end,
2027 			    (void *) n->host_start,
2028 			    (void *) n->host_end);
2029 	      }
2030 
2031 	    if (n->aux && n->aux->attach_count)
2032 	      {
2033 		uintptr_t addr = cur_node.host_start;
2034 		while (addr < cur_node.host_end)
2035 		  {
2036 		    /* We have to be careful not to overwrite still attached
2037 		       pointers during host<->device updates.  */
2038 		    size_t i = (addr - cur_node.host_start) / sizeof (void *);
2039 		    if (n->aux->attach_count[i] == 0)
2040 		      {
2041 			void *devaddr = (void *) (n->tgt->tgt_start
2042 						  + n->tgt_offset
2043 						  + addr - n->host_start);
2044 			if (GOMP_MAP_COPY_TO_P (kind & typemask))
2045 			  gomp_copy_host2dev (devicep, NULL,
2046 					      devaddr, (void *) addr,
2047 					      sizeof (void *), false, NULL);
2048 			if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2049 			  gomp_copy_dev2host (devicep, NULL,
2050 					      (void *) addr, devaddr,
2051 					      sizeof (void *));
2052 		      }
2053 		    addr += sizeof (void *);
2054 		  }
2055 	      }
2056 	    else
2057 	      {
2058 		void *hostaddr = (void *) cur_node.host_start;
2059 		void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2060 					  + cur_node.host_start
2061 					  - n->host_start);
2062 		size_t size = cur_node.host_end - cur_node.host_start;
2063 
2064 		if (GOMP_MAP_COPY_TO_P (kind & typemask))
2065 		  gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2066 				      false, NULL);
2067 		if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2068 		  gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2069 	      }
2070 	  }
2071       }
2072   gomp_mutex_unlock (&devicep->lock);
2073 }
2074 
2075 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2076    And insert to splay tree the mapping between addresses from HOST_TABLE and
2077    from loaded target image.  We rely in the host and device compiler
2078    emitting variable and functions in the same order.  */
2079 
2080 static void
gomp_load_image_to_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data,bool is_register_lock)2081 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2082 			   const void *host_table, const void *target_data,
2083 			   bool is_register_lock)
2084 {
2085   void **host_func_table = ((void ***) host_table)[0];
2086   void **host_funcs_end  = ((void ***) host_table)[1];
2087   void **host_var_table  = ((void ***) host_table)[2];
2088   void **host_vars_end   = ((void ***) host_table)[3];
2089 
2090   /* The func table contains only addresses, the var table contains addresses
2091      and corresponding sizes.  */
2092   int num_funcs = host_funcs_end - host_func_table;
2093   int num_vars  = (host_vars_end - host_var_table) / 2;
2094 
2095   /* Others currently is only 'device_num' */
2096   int num_others = 1;
2097 
2098   /* Load image to device and get target addresses for the image.  */
2099   struct addr_pair *target_table = NULL;
2100   int i, num_target_entries;
2101 
2102   num_target_entries
2103     = devicep->load_image_func (devicep->target_id, version,
2104 				target_data, &target_table);
2105 
2106   if (num_target_entries != num_funcs + num_vars
2107       /* Others (device_num) are included as trailing entries in pair list.  */
2108       && num_target_entries != num_funcs + num_vars + num_others)
2109     {
2110       gomp_mutex_unlock (&devicep->lock);
2111       if (is_register_lock)
2112 	gomp_mutex_unlock (&register_lock);
2113       gomp_fatal ("Cannot map target functions or variables"
2114 		  " (expected %u, have %u)", num_funcs + num_vars,
2115 		  num_target_entries);
2116     }
2117 
2118   /* Insert host-target address mapping into splay tree.  */
2119   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2120   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2121   tgt->refcount = REFCOUNT_INFINITY;
2122   tgt->tgt_start = 0;
2123   tgt->tgt_end = 0;
2124   tgt->to_free = NULL;
2125   tgt->prev = NULL;
2126   tgt->list_count = 0;
2127   tgt->device_descr = devicep;
2128   splay_tree_node array = tgt->array;
2129 
2130   for (i = 0; i < num_funcs; i++)
2131     {
2132       splay_tree_key k = &array->key;
2133       k->host_start = (uintptr_t) host_func_table[i];
2134       k->host_end = k->host_start + 1;
2135       k->tgt = tgt;
2136       k->tgt_offset = target_table[i].start;
2137       k->refcount = REFCOUNT_INFINITY;
2138       k->dynamic_refcount = 0;
2139       k->aux = NULL;
2140       array->left = NULL;
2141       array->right = NULL;
2142       splay_tree_insert (&devicep->mem_map, array);
2143       array++;
2144     }
2145 
2146   /* Most significant bit of the size in host and target tables marks
2147      "omp declare target link" variables.  */
2148   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2149   const uintptr_t size_mask = ~link_bit;
2150 
2151   for (i = 0; i < num_vars; i++)
2152     {
2153       struct addr_pair *target_var = &target_table[num_funcs + i];
2154       uintptr_t target_size = target_var->end - target_var->start;
2155       bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2156 
2157       if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2158 	{
2159 	  gomp_mutex_unlock (&devicep->lock);
2160 	  if (is_register_lock)
2161 	    gomp_mutex_unlock (&register_lock);
2162 	  gomp_fatal ("Cannot map target variables (size mismatch)");
2163 	}
2164 
2165       splay_tree_key k = &array->key;
2166       k->host_start = (uintptr_t) host_var_table[i * 2];
2167       k->host_end
2168 	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2169       k->tgt = tgt;
2170       k->tgt_offset = target_var->start;
2171       k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2172       k->dynamic_refcount = 0;
2173       k->aux = NULL;
2174       array->left = NULL;
2175       array->right = NULL;
2176       splay_tree_insert (&devicep->mem_map, array);
2177       array++;
2178     }
2179 
2180   /* Last entry is for the on-device 'device_num' variable. Tolerate case
2181      where plugin does not return this entry.  */
2182   if (num_funcs + num_vars < num_target_entries)
2183     {
2184       struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2185       /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2186 	 was found in this image.  */
2187       if (device_num_var->start != 0)
2188 	{
2189 	  /* The index of the devicep within devices[] is regarded as its
2190 	     'device number', which is different from the per-device type
2191 	     devicep->target_id.  */
2192 	  int device_num_val = (int) (devicep - &devices[0]);
2193 	  if (device_num_var->end - device_num_var->start != sizeof (int))
2194 	    {
2195 	      gomp_mutex_unlock (&devicep->lock);
2196 	      if (is_register_lock)
2197 		gomp_mutex_unlock (&register_lock);
2198 	      gomp_fatal ("offload plugin managed 'device_num' not of expected "
2199 			  "format");
2200 	    }
2201 
2202 	  /* Copy device_num value to place on device memory, hereby actually
2203 	     designating its device number into effect.  */
2204 	  gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2205 			      &device_num_val, sizeof (int), false, NULL);
2206 	}
2207     }
2208 
2209   free (target_table);
2210 }
2211 
2212 /* Unload the mappings described by target_data from device DEVICE_P.
2213    The device must be locked.   */
2214 
2215 static void
gomp_unload_image_from_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data)2216 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2217 			       unsigned version,
2218 			       const void *host_table, const void *target_data)
2219 {
2220   void **host_func_table = ((void ***) host_table)[0];
2221   void **host_funcs_end  = ((void ***) host_table)[1];
2222   void **host_var_table  = ((void ***) host_table)[2];
2223   void **host_vars_end   = ((void ***) host_table)[3];
2224 
2225   /* The func table contains only addresses, the var table contains addresses
2226      and corresponding sizes.  */
2227   int num_funcs = host_funcs_end - host_func_table;
2228   int num_vars  = (host_vars_end - host_var_table) / 2;
2229 
2230   struct splay_tree_key_s k;
2231   splay_tree_key node = NULL;
2232 
2233   /* Find mapping at start of node array */
2234   if (num_funcs || num_vars)
2235     {
2236       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2237 		      : (uintptr_t) host_var_table[0]);
2238       k.host_end = k.host_start + 1;
2239       node = splay_tree_lookup (&devicep->mem_map, &k);
2240     }
2241 
2242   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2243     {
2244       gomp_mutex_unlock (&devicep->lock);
2245       gomp_fatal ("image unload fail");
2246     }
2247 
2248   /* Remove mappings from splay tree.  */
2249   int i;
2250   for (i = 0; i < num_funcs; i++)
2251     {
2252       k.host_start = (uintptr_t) host_func_table[i];
2253       k.host_end = k.host_start + 1;
2254       splay_tree_remove (&devicep->mem_map, &k);
2255     }
2256 
2257   /* Most significant bit of the size in host and target tables marks
2258      "omp declare target link" variables.  */
2259   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2260   const uintptr_t size_mask = ~link_bit;
2261   bool is_tgt_unmapped = false;
2262 
2263   for (i = 0; i < num_vars; i++)
2264     {
2265       k.host_start = (uintptr_t) host_var_table[i * 2];
2266       k.host_end
2267 	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2268 
2269       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2270 	splay_tree_remove (&devicep->mem_map, &k);
2271       else
2272 	{
2273 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2274 	  is_tgt_unmapped = gomp_remove_var (devicep, n);
2275 	}
2276     }
2277 
2278   if (node && !is_tgt_unmapped)
2279     {
2280       free (node->tgt);
2281       free (node);
2282     }
2283 }
2284 
2285 /* This function should be called from every offload image while loading.
2286    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2287    the target, and TARGET_DATA needed by target plugin.  */
2288 
2289 void
GOMP_offload_register_ver(unsigned version,const void * host_table,int target_type,const void * target_data)2290 GOMP_offload_register_ver (unsigned version, const void *host_table,
2291 			   int target_type, const void *target_data)
2292 {
2293   int i;
2294 
2295   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2296     gomp_fatal ("Library too old for offload (version %u < %u)",
2297 		GOMP_VERSION, GOMP_VERSION_LIB (version));
2298 
2299   gomp_mutex_lock (&register_lock);
2300 
2301   /* Load image to all initialized devices.  */
2302   for (i = 0; i < num_devices; i++)
2303     {
2304       struct gomp_device_descr *devicep = &devices[i];
2305       gomp_mutex_lock (&devicep->lock);
2306       if (devicep->type == target_type
2307 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
2308 	gomp_load_image_to_device (devicep, version,
2309 				   host_table, target_data, true);
2310       gomp_mutex_unlock (&devicep->lock);
2311     }
2312 
2313   /* Insert image to array of pending images.  */
2314   offload_images
2315     = gomp_realloc_unlock (offload_images,
2316 			   (num_offload_images + 1)
2317 			   * sizeof (struct offload_image_descr));
2318   offload_images[num_offload_images].version = version;
2319   offload_images[num_offload_images].type = target_type;
2320   offload_images[num_offload_images].host_table = host_table;
2321   offload_images[num_offload_images].target_data = target_data;
2322 
2323   num_offload_images++;
2324   gomp_mutex_unlock (&register_lock);
2325 }
2326 
2327 void
GOMP_offload_register(const void * host_table,int target_type,const void * target_data)2328 GOMP_offload_register (const void *host_table, int target_type,
2329 		       const void *target_data)
2330 {
2331   GOMP_offload_register_ver (0, host_table, target_type, target_data);
2332 }
2333 
2334 /* This function should be called from every offload image while unloading.
2335    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2336    the target, and TARGET_DATA needed by target plugin.  */
2337 
2338 void
GOMP_offload_unregister_ver(unsigned version,const void * host_table,int target_type,const void * target_data)2339 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2340 			     int target_type, const void *target_data)
2341 {
2342   int i;
2343 
2344   gomp_mutex_lock (&register_lock);
2345 
2346   /* Unload image from all initialized devices.  */
2347   for (i = 0; i < num_devices; i++)
2348     {
2349       struct gomp_device_descr *devicep = &devices[i];
2350       gomp_mutex_lock (&devicep->lock);
2351       if (devicep->type == target_type
2352 	  && devicep->state == GOMP_DEVICE_INITIALIZED)
2353 	gomp_unload_image_from_device (devicep, version,
2354 				       host_table, target_data);
2355       gomp_mutex_unlock (&devicep->lock);
2356     }
2357 
2358   /* Remove image from array of pending images.  */
2359   for (i = 0; i < num_offload_images; i++)
2360     if (offload_images[i].target_data == target_data)
2361       {
2362 	offload_images[i] = offload_images[--num_offload_images];
2363 	break;
2364       }
2365 
2366   gomp_mutex_unlock (&register_lock);
2367 }
2368 
2369 void
GOMP_offload_unregister(const void * host_table,int target_type,const void * target_data)2370 GOMP_offload_unregister (const void *host_table, int target_type,
2371 			 const void *target_data)
2372 {
2373   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2374 }
2375 
2376 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
2377    must be locked on entry, and remains locked on return.  */
2378 
2379 attribute_hidden void
gomp_init_device(struct gomp_device_descr * devicep)2380 gomp_init_device (struct gomp_device_descr *devicep)
2381 {
2382   int i;
2383   if (!devicep->init_device_func (devicep->target_id))
2384     {
2385       gomp_mutex_unlock (&devicep->lock);
2386       gomp_fatal ("device initialization failed");
2387     }
2388 
2389   /* Load to device all images registered by the moment.  */
2390   for (i = 0; i < num_offload_images; i++)
2391     {
2392       struct offload_image_descr *image = &offload_images[i];
2393       if (image->type == devicep->type)
2394 	gomp_load_image_to_device (devicep, image->version,
2395 				   image->host_table, image->target_data,
2396 				   false);
2397     }
2398 
2399   /* Initialize OpenACC asynchronous queues.  */
2400   goacc_init_asyncqueues (devicep);
2401 
2402   devicep->state = GOMP_DEVICE_INITIALIZED;
2403 }
2404 
2405 /* This function finalizes the target device, specified by DEVICEP.  DEVICEP
2406    must be locked on entry, and remains locked on return.  */
2407 
2408 attribute_hidden bool
gomp_fini_device(struct gomp_device_descr * devicep)2409 gomp_fini_device (struct gomp_device_descr *devicep)
2410 {
2411   bool ret = goacc_fini_asyncqueues (devicep);
2412   ret &= devicep->fini_device_func (devicep->target_id);
2413   devicep->state = GOMP_DEVICE_FINALIZED;
2414   return ret;
2415 }
2416 
2417 attribute_hidden void
gomp_unload_device(struct gomp_device_descr * devicep)2418 gomp_unload_device (struct gomp_device_descr *devicep)
2419 {
2420   if (devicep->state == GOMP_DEVICE_INITIALIZED)
2421     {
2422       unsigned i;
2423 
2424       /* Unload from device all images registered at the moment.  */
2425       for (i = 0; i < num_offload_images; i++)
2426 	{
2427 	  struct offload_image_descr *image = &offload_images[i];
2428 	  if (image->type == devicep->type)
2429 	    gomp_unload_image_from_device (devicep, image->version,
2430 					   image->host_table,
2431 					   image->target_data);
2432 	}
2433     }
2434 }
2435 
2436 /* Host fallback for GOMP_target{,_ext} routines.  */
2437 
2438 static void
gomp_target_fallback(void (* fn)(void *),void ** hostaddrs,struct gomp_device_descr * devicep,void ** args)2439 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2440 		      struct gomp_device_descr *devicep, void **args)
2441 {
2442   struct gomp_thread old_thr, *thr = gomp_thread ();
2443 
2444   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2445       && devicep != NULL)
2446     gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2447 		"be used for offloading");
2448 
2449   old_thr = *thr;
2450   memset (thr, '\0', sizeof (*thr));
2451   if (gomp_places_list)
2452     {
2453       thr->place = old_thr.place;
2454       thr->ts.place_partition_len = gomp_places_list_len;
2455     }
2456   if (args)
2457     while (*args)
2458       {
2459 	intptr_t id = (intptr_t) *args++, val;
2460 	if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2461 	  val = (intptr_t) *args++;
2462 	else
2463 	  val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2464 	if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2465 	  continue;
2466 	id &= GOMP_TARGET_ARG_ID_MASK;
2467 	if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2468 	  continue;
2469 	val = val > INT_MAX ? INT_MAX : val;
2470 	if (val)
2471 	  gomp_icv (true)->thread_limit_var = val;
2472 	break;
2473       }
2474 
2475   fn (hostaddrs);
2476   gomp_free_thread (thr);
2477   *thr = old_thr;
2478 }
2479 
2480 /* Calculate alignment and size requirements of a private copy of data shared
2481    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
2482 
2483 static inline void
calculate_firstprivate_requirements(size_t mapnum,size_t * sizes,unsigned short * kinds,size_t * tgt_align,size_t * tgt_size)2484 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2485 				     unsigned short *kinds, size_t *tgt_align,
2486 				     size_t *tgt_size)
2487 {
2488   size_t i;
2489   for (i = 0; i < mapnum; i++)
2490     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2491       {
2492 	size_t align = (size_t) 1 << (kinds[i] >> 8);
2493 	if (*tgt_align < align)
2494 	  *tgt_align = align;
2495 	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2496 	*tgt_size += sizes[i];
2497       }
2498 }
2499 
2500 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
2501 
2502 static inline void
copy_firstprivate_data(char * tgt,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,size_t tgt_align,size_t tgt_size)2503 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2504 			size_t *sizes, unsigned short *kinds, size_t tgt_align,
2505 			size_t tgt_size)
2506 {
2507   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2508   if (al)
2509     tgt += tgt_align - al;
2510   tgt_size = 0;
2511   size_t i;
2512   for (i = 0; i < mapnum; i++)
2513     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2514       {
2515 	size_t align = (size_t) 1 << (kinds[i] >> 8);
2516 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
2517 	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2518 	hostaddrs[i] = tgt + tgt_size;
2519 	tgt_size = tgt_size + sizes[i];
2520       }
2521 }
2522 
2523 /* Helper function of GOMP_target{,_ext} routines.  */
2524 
2525 static void *
gomp_get_target_fn_addr(struct gomp_device_descr * devicep,void (* host_fn)(void *))2526 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2527 			 void (*host_fn) (void *))
2528 {
2529   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2530     return (void *) host_fn;
2531   else
2532     {
2533       gomp_mutex_lock (&devicep->lock);
2534       if (devicep->state == GOMP_DEVICE_FINALIZED)
2535 	{
2536 	  gomp_mutex_unlock (&devicep->lock);
2537 	  return NULL;
2538 	}
2539 
2540       struct splay_tree_key_s k;
2541       k.host_start = (uintptr_t) host_fn;
2542       k.host_end = k.host_start + 1;
2543       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2544       gomp_mutex_unlock (&devicep->lock);
2545       if (tgt_fn == NULL)
2546 	return NULL;
2547 
2548       return (void *) tgt_fn->tgt_offset;
2549     }
2550 }
2551 
2552 /* Called when encountering a target directive.  If DEVICE
2553    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
2554    GOMP_DEVICE_HOST_FALLBACK (or any value
2555    larger than last available hw device), use host fallback.
2556    FN is address of host code, UNUSED is part of the current ABI, but
2557    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
2558    with MAPNUM entries, with addresses of the host objects,
2559    sizes of the host objects (resp. for pointer kind pointer bias
2560    and assumed sizeof (void *) size) and kinds.  */
2561 
2562 void
GOMP_target(int device,void (* fn)(void *),const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2563 GOMP_target (int device, void (*fn) (void *), const void *unused,
2564 	     size_t mapnum, void **hostaddrs, size_t *sizes,
2565 	     unsigned char *kinds)
2566 {
2567   struct gomp_device_descr *devicep = resolve_device (device);
2568 
2569   void *fn_addr;
2570   if (devicep == NULL
2571       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2572       /* All shared memory devices should use the GOMP_target_ext function.  */
2573       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2574       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2575     return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2576 
2577   htab_t refcount_set = htab_create (mapnum);
2578   struct target_mem_desc *tgt_vars
2579     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2580 		     &refcount_set, GOMP_MAP_VARS_TARGET);
2581   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2582 		     NULL);
2583   htab_clear (refcount_set);
2584   gomp_unmap_vars (tgt_vars, true, &refcount_set);
2585   htab_free (refcount_set);
2586 }
2587 
2588 static inline unsigned int
clear_unsupported_flags(struct gomp_device_descr * devicep,unsigned int flags)2589 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2590 {
2591   /* If we cannot run asynchronously, simply ignore nowait.  */
2592   if (devicep != NULL && devicep->async_run_func == NULL)
2593     flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2594 
2595   return flags;
2596 }
2597 
2598 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2599    and several arguments have been added:
2600    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2601    DEPEND is array of dependencies, see GOMP_task for details.
2602 
2603    ARGS is a pointer to an array consisting of a variable number of both
2604    device-independent and device-specific arguments, which can take one two
2605    elements where the first specifies for which device it is intended, the type
2606    and optionally also the value.  If the value is not present in the first
2607    one, the whole second element the actual value.  The last element of the
2608    array is a single NULL.  Among the device independent can be for example
2609    NUM_TEAMS and THREAD_LIMIT.
2610 
2611    NUM_TEAMS is positive if GOMP_teams will be called in the body with
2612    that value, or 1 if teams construct is not present, or 0, if
2613    teams construct does not have num_teams clause and so the choice is
2614    implementation defined, and -1 if it can't be determined on the host
2615    what value will GOMP_teams have on the device.
2616    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2617    body with that value, or 0, if teams construct does not have thread_limit
2618    clause or the teams construct is not present, or -1 if it can't be
2619    determined on the host what value will GOMP_teams have on the device.  */
2620 
2621 void
GOMP_target_ext(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args)2622 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2623 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2624 		 unsigned int flags, void **depend, void **args)
2625 {
2626   struct gomp_device_descr *devicep = resolve_device (device);
2627   size_t tgt_align = 0, tgt_size = 0;
2628   bool fpc_done = false;
2629 
2630   flags = clear_unsupported_flags (devicep, flags);
2631 
2632   if (flags & GOMP_TARGET_FLAG_NOWAIT)
2633     {
2634       struct gomp_thread *thr = gomp_thread ();
2635       /* Create a team if we don't have any around, as nowait
2636 	 target tasks make sense to run asynchronously even when
2637 	 outside of any parallel.  */
2638       if (__builtin_expect (thr->ts.team == NULL, 0))
2639 	{
2640 	  struct gomp_team *team = gomp_new_team (1);
2641 	  struct gomp_task *task = thr->task;
2642 	  struct gomp_task **implicit_task = &task;
2643 	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2644 	  team->prev_ts = thr->ts;
2645 	  thr->ts.team = team;
2646 	  thr->ts.team_id = 0;
2647 	  thr->ts.work_share = &team->work_shares[0];
2648 	  thr->ts.last_work_share = NULL;
2649 #ifdef HAVE_SYNC_BUILTINS
2650 	  thr->ts.single_count = 0;
2651 #endif
2652 	  thr->ts.static_trip = 0;
2653 	  thr->task = &team->implicit_task[0];
2654 	  gomp_init_task (thr->task, NULL, icv);
2655 	  while (*implicit_task
2656 		 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2657 	    implicit_task = &(*implicit_task)->parent;
2658 	  if (*implicit_task)
2659 	    {
2660 	      thr->task = *implicit_task;
2661 	      gomp_end_task ();
2662 	      free (*implicit_task);
2663 	      thr->task = &team->implicit_task[0];
2664 	    }
2665 	  else
2666 	    pthread_setspecific (gomp_thread_destructor, thr);
2667 	  if (implicit_task != &task)
2668 	    {
2669 	      *implicit_task = thr->task;
2670 	      thr->task = task;
2671 	    }
2672 	}
2673       if (thr->ts.team
2674 	  && !thr->task->final_task)
2675 	{
2676 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2677 				   sizes, kinds, flags, depend, args,
2678 				   GOMP_TARGET_TASK_BEFORE_MAP);
2679 	  return;
2680 	}
2681     }
2682 
2683   /* If there are depend clauses, but nowait is not present
2684      (or we are in a final task), block the parent task until the
2685      dependencies are resolved and then just continue with the rest
2686      of the function as if it is a merged task.  */
2687   if (depend != NULL)
2688     {
2689       struct gomp_thread *thr = gomp_thread ();
2690       if (thr->task && thr->task->depend_hash)
2691 	{
2692 	  /* If we might need to wait, copy firstprivate now.  */
2693 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2694 					       &tgt_align, &tgt_size);
2695 	  if (tgt_align)
2696 	    {
2697 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2698 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2699 				      tgt_align, tgt_size);
2700 	    }
2701 	  fpc_done = true;
2702 	  gomp_task_maybe_wait_for_dependencies (depend);
2703 	}
2704     }
2705 
2706   void *fn_addr;
2707   if (devicep == NULL
2708       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2709       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2710       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2711     {
2712       if (!fpc_done)
2713 	{
2714 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2715 					       &tgt_align, &tgt_size);
2716 	  if (tgt_align)
2717 	    {
2718 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2719 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2720 				      tgt_align, tgt_size);
2721 	    }
2722 	}
2723       gomp_target_fallback (fn, hostaddrs, devicep, args);
2724       return;
2725     }
2726 
2727   struct target_mem_desc *tgt_vars;
2728   htab_t refcount_set = NULL;
2729 
2730   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2731     {
2732       if (!fpc_done)
2733 	{
2734 	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2735 					       &tgt_align, &tgt_size);
2736 	  if (tgt_align)
2737 	    {
2738 	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2739 	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2740 				      tgt_align, tgt_size);
2741 	    }
2742 	}
2743       tgt_vars = NULL;
2744     }
2745   else
2746     {
2747       refcount_set = htab_create (mapnum);
2748       tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2749 				true, &refcount_set, GOMP_MAP_VARS_TARGET);
2750     }
2751   devicep->run_func (devicep->target_id, fn_addr,
2752 		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2753 		     args);
2754   if (tgt_vars)
2755     {
2756       htab_clear (refcount_set);
2757       gomp_unmap_vars (tgt_vars, true, &refcount_set);
2758     }
2759   if (refcount_set)
2760     htab_free (refcount_set);
2761 }
2762 
2763 /* Host fallback for GOMP_target_data{,_ext} routines.  */
2764 
2765 static void
gomp_target_data_fallback(struct gomp_device_descr * devicep)2766 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2767 {
2768   struct gomp_task_icv *icv = gomp_icv (false);
2769 
2770   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2771       && devicep != NULL)
2772     gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2773 		"be used for offloading");
2774 
2775   if (icv->target_data)
2776     {
2777       /* Even when doing a host fallback, if there are any active
2778          #pragma omp target data constructs, need to remember the
2779          new #pragma omp target data, otherwise GOMP_target_end_data
2780          would get out of sync.  */
2781       struct target_mem_desc *tgt
2782 	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2783 			 NULL, GOMP_MAP_VARS_DATA);
2784       tgt->prev = icv->target_data;
2785       icv->target_data = tgt;
2786     }
2787 }
2788 
2789 void
GOMP_target_data(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2790 GOMP_target_data (int device, const void *unused, size_t mapnum,
2791 		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
2792 {
2793   struct gomp_device_descr *devicep = resolve_device (device);
2794 
2795   if (devicep == NULL
2796       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2797       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2798     return gomp_target_data_fallback (devicep);
2799 
2800   struct target_mem_desc *tgt
2801     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2802 		     NULL, GOMP_MAP_VARS_DATA);
2803   struct gomp_task_icv *icv = gomp_icv (true);
2804   tgt->prev = icv->target_data;
2805   icv->target_data = tgt;
2806 }
2807 
2808 void
GOMP_target_data_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)2809 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2810 		      size_t *sizes, unsigned short *kinds)
2811 {
2812   struct gomp_device_descr *devicep = resolve_device (device);
2813 
2814   if (devicep == NULL
2815       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2816       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2817     return gomp_target_data_fallback (devicep);
2818 
2819   struct target_mem_desc *tgt
2820     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2821 		     NULL, GOMP_MAP_VARS_DATA);
2822   struct gomp_task_icv *icv = gomp_icv (true);
2823   tgt->prev = icv->target_data;
2824   icv->target_data = tgt;
2825 }
2826 
2827 void
GOMP_target_end_data(void)2828 GOMP_target_end_data (void)
2829 {
2830   struct gomp_task_icv *icv = gomp_icv (false);
2831   if (icv->target_data)
2832     {
2833       struct target_mem_desc *tgt = icv->target_data;
2834       icv->target_data = tgt->prev;
2835       gomp_unmap_vars (tgt, true, NULL);
2836     }
2837 }
2838 
2839 void
GOMP_target_update(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2840 GOMP_target_update (int device, const void *unused, size_t mapnum,
2841 		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
2842 {
2843   struct gomp_device_descr *devicep = resolve_device (device);
2844 
2845   if (devicep == NULL
2846       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2847       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2848     return;
2849 
2850   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2851 }
2852 
2853 void
GOMP_target_update_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)2854 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2855 			size_t *sizes, unsigned short *kinds,
2856 			unsigned int flags, void **depend)
2857 {
2858   struct gomp_device_descr *devicep = resolve_device (device);
2859 
2860   /* If there are depend clauses, but nowait is not present,
2861      block the parent task until the dependencies are resolved
2862      and then just continue with the rest of the function as if it
2863      is a merged task.  Until we are able to schedule task during
2864      variable mapping or unmapping, ignore nowait if depend clauses
2865      are not present.  */
2866   if (depend != NULL)
2867     {
2868       struct gomp_thread *thr = gomp_thread ();
2869       if (thr->task && thr->task->depend_hash)
2870 	{
2871 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2872 	      && thr->ts.team
2873 	      && !thr->task->final_task)
2874 	    {
2875 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2876 					   mapnum, hostaddrs, sizes, kinds,
2877 					   flags | GOMP_TARGET_FLAG_UPDATE,
2878 					   depend, NULL, GOMP_TARGET_TASK_DATA))
2879 		return;
2880 	    }
2881 	  else
2882 	    {
2883 	      struct gomp_team *team = thr->ts.team;
2884 	      /* If parallel or taskgroup has been cancelled, don't start new
2885 		 tasks.  */
2886 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2887 		{
2888 		  if (gomp_team_barrier_cancelled (&team->barrier))
2889 		    return;
2890 		  if (thr->task->taskgroup)
2891 		    {
2892 		      if (thr->task->taskgroup->cancelled)
2893 			return;
2894 		      if (thr->task->taskgroup->workshare
2895 			  && thr->task->taskgroup->prev
2896 			  && thr->task->taskgroup->prev->cancelled)
2897 			return;
2898 		    }
2899 		}
2900 
2901 	      gomp_task_maybe_wait_for_dependencies (depend);
2902 	    }
2903 	}
2904     }
2905 
2906   if (devicep == NULL
2907       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2908       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2909     return;
2910 
2911   struct gomp_thread *thr = gomp_thread ();
2912   struct gomp_team *team = thr->ts.team;
2913   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2914   if (__builtin_expect (gomp_cancel_var, 0) && team)
2915     {
2916       if (gomp_team_barrier_cancelled (&team->barrier))
2917 	return;
2918       if (thr->task->taskgroup)
2919 	{
2920 	  if (thr->task->taskgroup->cancelled)
2921 	    return;
2922 	  if (thr->task->taskgroup->workshare
2923 	      && thr->task->taskgroup->prev
2924 	      && thr->task->taskgroup->prev->cancelled)
2925 	    return;
2926 	}
2927     }
2928 
2929   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2930 }
2931 
2932 static void
gomp_exit_data(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,htab_t * refcount_set)2933 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2934 		void **hostaddrs, size_t *sizes, unsigned short *kinds,
2935 		htab_t *refcount_set)
2936 {
2937   const int typemask = 0xff;
2938   size_t i;
2939   gomp_mutex_lock (&devicep->lock);
2940   if (devicep->state == GOMP_DEVICE_FINALIZED)
2941     {
2942       gomp_mutex_unlock (&devicep->lock);
2943       return;
2944     }
2945 
2946   for (i = 0; i < mapnum; i++)
2947     if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2948       {
2949 	struct splay_tree_key_s cur_node;
2950 	cur_node.host_start = (uintptr_t) hostaddrs[i];
2951 	cur_node.host_end = cur_node.host_start + sizeof (void *);
2952 	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2953 
2954 	if (n)
2955 	  gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2956 			       false, NULL);
2957       }
2958 
2959   int nrmvars = 0;
2960   splay_tree_key remove_vars[mapnum];
2961 
2962   for (i = 0; i < mapnum; i++)
2963     {
2964       struct splay_tree_key_s cur_node;
2965       unsigned char kind = kinds[i] & typemask;
2966       switch (kind)
2967 	{
2968 	case GOMP_MAP_FROM:
2969 	case GOMP_MAP_ALWAYS_FROM:
2970 	case GOMP_MAP_DELETE:
2971 	case GOMP_MAP_RELEASE:
2972 	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2973 	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2974 	  cur_node.host_start = (uintptr_t) hostaddrs[i];
2975 	  cur_node.host_end = cur_node.host_start + sizes[i];
2976 	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2977 			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2978 	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2979 	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
2980 	  if (!k)
2981 	    continue;
2982 
2983 	  bool delete_p = (kind == GOMP_MAP_DELETE
2984 			   || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2985 	  bool do_copy, do_remove;
2986 	  gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2987 				   &do_remove);
2988 
2989 	  if ((kind == GOMP_MAP_FROM && do_copy)
2990 	      || kind == GOMP_MAP_ALWAYS_FROM)
2991 	    {
2992 	      if (k->aux && k->aux->attach_count)
2993 		{
2994 		  /* We have to be careful not to overwrite still attached
2995 		     pointers during the copyback to host.  */
2996 		  uintptr_t addr = k->host_start;
2997 		  while (addr < k->host_end)
2998 		    {
2999 		      size_t i = (addr - k->host_start) / sizeof (void *);
3000 		      if (k->aux->attach_count[i] == 0)
3001 			gomp_copy_dev2host (devicep, NULL, (void *) addr,
3002 					    (void *) (k->tgt->tgt_start
3003 						      + k->tgt_offset
3004 						      + addr - k->host_start),
3005 					    sizeof (void *));
3006 		      addr += sizeof (void *);
3007 		    }
3008 		}
3009 	      else
3010 		gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3011 				    (void *) (k->tgt->tgt_start + k->tgt_offset
3012 					      + cur_node.host_start
3013 					      - k->host_start),
3014 				    cur_node.host_end - cur_node.host_start);
3015 	    }
3016 
3017 	  /* Structure elements lists are removed altogether at once, which
3018 	     may cause immediate deallocation of the target_mem_desc, causing
3019 	     errors if we still have following element siblings to copy back.
3020 	     While we're at it, it also seems more disciplined to simply
3021 	     queue all removals together for processing below.
3022 
3023 	     Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3024 	     not have this problem, since they maintain an additional
3025 	     tgt->refcount = 1 reference to the target_mem_desc to start with.
3026 	  */
3027 	  if (do_remove)
3028 	    remove_vars[nrmvars++] = k;
3029 	  break;
3030 
3031 	case GOMP_MAP_DETACH:
3032 	  break;
3033 	default:
3034 	  gomp_mutex_unlock (&devicep->lock);
3035 	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3036 		      kind);
3037 	}
3038     }
3039 
3040   for (int i = 0; i < nrmvars; i++)
3041     gomp_remove_var (devicep, remove_vars[i]);
3042 
3043   gomp_mutex_unlock (&devicep->lock);
3044 }
3045 
3046 void
GOMP_target_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)3047 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3048 			     size_t *sizes, unsigned short *kinds,
3049 			     unsigned int flags, void **depend)
3050 {
3051   struct gomp_device_descr *devicep = resolve_device (device);
3052 
3053   /* If there are depend clauses, but nowait is not present,
3054      block the parent task until the dependencies are resolved
3055      and then just continue with the rest of the function as if it
3056      is a merged task.  Until we are able to schedule task during
3057      variable mapping or unmapping, ignore nowait if depend clauses
3058      are not present.  */
3059   if (depend != NULL)
3060     {
3061       struct gomp_thread *thr = gomp_thread ();
3062       if (thr->task && thr->task->depend_hash)
3063 	{
3064 	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3065 	      && thr->ts.team
3066 	      && !thr->task->final_task)
3067 	    {
3068 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3069 					   mapnum, hostaddrs, sizes, kinds,
3070 					   flags, depend, NULL,
3071 					   GOMP_TARGET_TASK_DATA))
3072 		return;
3073 	    }
3074 	  else
3075 	    {
3076 	      struct gomp_team *team = thr->ts.team;
3077 	      /* If parallel or taskgroup has been cancelled, don't start new
3078 		 tasks.  */
3079 	      if (__builtin_expect (gomp_cancel_var, 0) && team)
3080 		{
3081 		  if (gomp_team_barrier_cancelled (&team->barrier))
3082 		    return;
3083 		  if (thr->task->taskgroup)
3084 		    {
3085 		      if (thr->task->taskgroup->cancelled)
3086 			return;
3087 		      if (thr->task->taskgroup->workshare
3088 			  && thr->task->taskgroup->prev
3089 			  && thr->task->taskgroup->prev->cancelled)
3090 			return;
3091 		    }
3092 		}
3093 
3094 	      gomp_task_maybe_wait_for_dependencies (depend);
3095 	    }
3096 	}
3097     }
3098 
3099   if (devicep == NULL
3100       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3101       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3102     return;
3103 
3104   struct gomp_thread *thr = gomp_thread ();
3105   struct gomp_team *team = thr->ts.team;
3106   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
3107   if (__builtin_expect (gomp_cancel_var, 0) && team)
3108     {
3109       if (gomp_team_barrier_cancelled (&team->barrier))
3110 	return;
3111       if (thr->task->taskgroup)
3112 	{
3113 	  if (thr->task->taskgroup->cancelled)
3114 	    return;
3115 	  if (thr->task->taskgroup->workshare
3116 	      && thr->task->taskgroup->prev
3117 	      && thr->task->taskgroup->prev->cancelled)
3118 	    return;
3119 	}
3120     }
3121 
3122   htab_t refcount_set = htab_create (mapnum);
3123 
3124   /* The variables are mapped separately such that they can be released
3125      independently.  */
3126   size_t i, j;
3127   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3128     for (i = 0; i < mapnum; i++)
3129       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3130 	{
3131 	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3132 			 &kinds[i], true, &refcount_set,
3133 			 GOMP_MAP_VARS_ENTER_DATA);
3134 	  i += sizes[i];
3135 	}
3136       else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3137 	{
3138 	  for (j = i + 1; j < mapnum; j++)
3139 	    if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3140 		&& !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3141 	      break;
3142 	  gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3143 			 &kinds[i], true, &refcount_set,
3144 			 GOMP_MAP_VARS_ENTER_DATA);
3145 	  i += j - i - 1;
3146 	}
3147       else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3148 	{
3149 	  /* An attach operation must be processed together with the mapped
3150 	     base-pointer list item.  */
3151 	  gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3152 			 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3153 	  i += 1;
3154 	}
3155       else
3156 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3157 		       true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3158   else
3159     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3160   htab_free (refcount_set);
3161 }
3162 
3163 bool
gomp_target_task_fn(void * data)3164 gomp_target_task_fn (void *data)
3165 {
3166   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3167   struct gomp_device_descr *devicep = ttask->devicep;
3168 
3169   if (ttask->fn != NULL)
3170     {
3171       void *fn_addr;
3172       if (devicep == NULL
3173 	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3174 	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3175 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3176 	{
3177 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
3178 	  gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3179 				ttask->args);
3180 	  return false;
3181 	}
3182 
3183       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3184 	{
3185 	  if (ttask->tgt)
3186 	    gomp_unmap_vars (ttask->tgt, true, NULL);
3187 	  return false;
3188 	}
3189 
3190       void *actual_arguments;
3191       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3192 	{
3193 	  ttask->tgt = NULL;
3194 	  actual_arguments = ttask->hostaddrs;
3195 	}
3196       else
3197 	{
3198 	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3199 				      NULL, ttask->sizes, ttask->kinds, true,
3200 				      NULL, GOMP_MAP_VARS_TARGET);
3201 	  actual_arguments = (void *) ttask->tgt->tgt_start;
3202 	}
3203       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3204 
3205       assert (devicep->async_run_func);
3206       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3207 			       ttask->args, (void *) ttask);
3208       return true;
3209     }
3210   else if (devicep == NULL
3211 	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3212 	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3213     return false;
3214 
3215   size_t i;
3216   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3217     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3218 		 ttask->kinds, true);
3219   else
3220     {
3221       htab_t refcount_set = htab_create (ttask->mapnum);
3222       if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3223 	for (i = 0; i < ttask->mapnum; i++)
3224 	  if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3225 	    {
3226 	      gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3227 			     NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3228 			     &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3229 	      i += ttask->sizes[i];
3230 	    }
3231 	  else
3232 	    gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3233 			   &ttask->kinds[i], true, &refcount_set,
3234 			   GOMP_MAP_VARS_ENTER_DATA);
3235       else
3236 	gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3237 			ttask->kinds, &refcount_set);
3238       htab_free (refcount_set);
3239     }
3240   return false;
3241 }
3242 
3243 void
GOMP_teams(unsigned int num_teams,unsigned int thread_limit)3244 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3245 {
3246   if (thread_limit)
3247     {
3248       struct gomp_task_icv *icv = gomp_icv (true);
3249       icv->thread_limit_var
3250 	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3251     }
3252   (void) num_teams;
3253 }
3254 
3255 bool
GOMP_teams4(unsigned int num_teams_low,unsigned int num_teams_high,unsigned int thread_limit,bool first)3256 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3257 	     unsigned int thread_limit, bool first)
3258 {
3259   struct gomp_thread *thr = gomp_thread ();
3260   if (first)
3261     {
3262       if (thread_limit)
3263 	{
3264 	  struct gomp_task_icv *icv = gomp_icv (true);
3265 	  icv->thread_limit_var
3266 	    = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3267 	}
3268       (void) num_teams_high;
3269       if (num_teams_low == 0)
3270 	num_teams_low = 1;
3271       thr->num_teams = num_teams_low - 1;
3272       thr->team_num = 0;
3273     }
3274   else if (thr->team_num == thr->num_teams)
3275     return false;
3276   else
3277     ++thr->team_num;
3278   return true;
3279 }
3280 
3281 void *
omp_target_alloc(size_t size,int device_num)3282 omp_target_alloc (size_t size, int device_num)
3283 {
3284   if (device_num == gomp_get_num_devices ())
3285     return malloc (size);
3286 
3287   if (device_num < 0)
3288     return NULL;
3289 
3290   struct gomp_device_descr *devicep = resolve_device (device_num);
3291   if (devicep == NULL)
3292     return NULL;
3293 
3294   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3295       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3296     return malloc (size);
3297 
3298   gomp_mutex_lock (&devicep->lock);
3299   void *ret = devicep->alloc_func (devicep->target_id, size);
3300   gomp_mutex_unlock (&devicep->lock);
3301   return ret;
3302 }
3303 
3304 void
omp_target_free(void * device_ptr,int device_num)3305 omp_target_free (void *device_ptr, int device_num)
3306 {
3307   if (device_ptr == NULL)
3308     return;
3309 
3310   if (device_num == gomp_get_num_devices ())
3311     {
3312       free (device_ptr);
3313       return;
3314     }
3315 
3316   if (device_num < 0)
3317     return;
3318 
3319   struct gomp_device_descr *devicep = resolve_device (device_num);
3320   if (devicep == NULL)
3321     return;
3322 
3323   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3324       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3325     {
3326       free (device_ptr);
3327       return;
3328     }
3329 
3330   gomp_mutex_lock (&devicep->lock);
3331   gomp_free_device_memory (devicep, device_ptr);
3332   gomp_mutex_unlock (&devicep->lock);
3333 }
3334 
3335 int
omp_target_is_present(const void * ptr,int device_num)3336 omp_target_is_present (const void *ptr, int device_num)
3337 {
3338   if (ptr == NULL)
3339     return 1;
3340 
3341   if (device_num == gomp_get_num_devices ())
3342     return 1;
3343 
3344   if (device_num < 0)
3345     return 0;
3346 
3347   struct gomp_device_descr *devicep = resolve_device (device_num);
3348   if (devicep == NULL)
3349     return 0;
3350 
3351   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3352       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3353     return 1;
3354 
3355   gomp_mutex_lock (&devicep->lock);
3356   struct splay_tree_s *mem_map = &devicep->mem_map;
3357   struct splay_tree_key_s cur_node;
3358 
3359   cur_node.host_start = (uintptr_t) ptr;
3360   cur_node.host_end = cur_node.host_start;
3361   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3362   int ret = n != NULL;
3363   gomp_mutex_unlock (&devicep->lock);
3364   return ret;
3365 }
3366 
3367 int
omp_target_memcpy(void * dst,const void * src,size_t length,size_t dst_offset,size_t src_offset,int dst_device_num,int src_device_num)3368 omp_target_memcpy (void *dst, const void *src, size_t length,
3369 		   size_t dst_offset, size_t src_offset, int dst_device_num,
3370 		   int src_device_num)
3371 {
3372   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3373   bool ret;
3374 
3375   if (dst_device_num != gomp_get_num_devices ())
3376     {
3377       if (dst_device_num < 0)
3378 	return EINVAL;
3379 
3380       dst_devicep = resolve_device (dst_device_num);
3381       if (dst_devicep == NULL)
3382 	return EINVAL;
3383 
3384       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3385 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3386 	dst_devicep = NULL;
3387     }
3388   if (src_device_num != num_devices_openmp)
3389     {
3390       if (src_device_num < 0)
3391 	return EINVAL;
3392 
3393       src_devicep = resolve_device (src_device_num);
3394       if (src_devicep == NULL)
3395 	return EINVAL;
3396 
3397       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3398 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3399 	src_devicep = NULL;
3400     }
3401   if (src_devicep == NULL && dst_devicep == NULL)
3402     {
3403       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3404       return 0;
3405     }
3406   if (src_devicep == NULL)
3407     {
3408       gomp_mutex_lock (&dst_devicep->lock);
3409       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3410 					(char *) dst + dst_offset,
3411 					(char *) src + src_offset, length);
3412       gomp_mutex_unlock (&dst_devicep->lock);
3413       return (ret ? 0 : EINVAL);
3414     }
3415   if (dst_devicep == NULL)
3416     {
3417       gomp_mutex_lock (&src_devicep->lock);
3418       ret = src_devicep->dev2host_func (src_devicep->target_id,
3419 					(char *) dst + dst_offset,
3420 					(char *) src + src_offset, length);
3421       gomp_mutex_unlock (&src_devicep->lock);
3422       return (ret ? 0 : EINVAL);
3423     }
3424   if (src_devicep == dst_devicep)
3425     {
3426       gomp_mutex_lock (&src_devicep->lock);
3427       ret = src_devicep->dev2dev_func (src_devicep->target_id,
3428 				       (char *) dst + dst_offset,
3429 				       (char *) src + src_offset, length);
3430       gomp_mutex_unlock (&src_devicep->lock);
3431       return (ret ? 0 : EINVAL);
3432     }
3433   return EINVAL;
3434 }
3435 
3436 static int
omp_target_memcpy_rect_worker(void * dst,const void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,struct gomp_device_descr * dst_devicep,struct gomp_device_descr * src_devicep)3437 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3438 			       int num_dims, const size_t *volume,
3439 			       const size_t *dst_offsets,
3440 			       const size_t *src_offsets,
3441 			       const size_t *dst_dimensions,
3442 			       const size_t *src_dimensions,
3443 			       struct gomp_device_descr *dst_devicep,
3444 			       struct gomp_device_descr *src_devicep)
3445 {
3446   size_t dst_slice = element_size;
3447   size_t src_slice = element_size;
3448   size_t j, dst_off, src_off, length;
3449   int i, ret;
3450 
3451   if (num_dims == 1)
3452     {
3453       if (__builtin_mul_overflow (element_size, volume[0], &length)
3454 	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3455 	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3456 	return EINVAL;
3457       if (dst_devicep == NULL && src_devicep == NULL)
3458 	{
3459 	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3460 		  length);
3461 	  ret = 1;
3462 	}
3463       else if (src_devicep == NULL)
3464 	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3465 					  (char *) dst + dst_off,
3466 					  (const char *) src + src_off,
3467 					  length);
3468       else if (dst_devicep == NULL)
3469 	ret = src_devicep->dev2host_func (src_devicep->target_id,
3470 					  (char *) dst + dst_off,
3471 					  (const char *) src + src_off,
3472 					  length);
3473       else if (src_devicep == dst_devicep)
3474 	ret = src_devicep->dev2dev_func (src_devicep->target_id,
3475 					 (char *) dst + dst_off,
3476 					 (const char *) src + src_off,
3477 					 length);
3478       else
3479 	ret = 0;
3480       return ret ? 0 : EINVAL;
3481     }
3482 
3483   /* FIXME: it would be nice to have some plugin function to handle
3484      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
3485      be handled in the generic recursion below, and for host-host it
3486      should be used even for any num_dims >= 2.  */
3487 
3488   for (i = 1; i < num_dims; i++)
3489     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3490 	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3491       return EINVAL;
3492   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3493       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3494     return EINVAL;
3495   for (j = 0; j < volume[0]; j++)
3496     {
3497       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3498 					   (const char *) src + src_off,
3499 					   element_size, num_dims - 1,
3500 					   volume + 1, dst_offsets + 1,
3501 					   src_offsets + 1, dst_dimensions + 1,
3502 					   src_dimensions + 1, dst_devicep,
3503 					   src_devicep);
3504       if (ret)
3505 	return ret;
3506       dst_off += dst_slice;
3507       src_off += src_slice;
3508     }
3509   return 0;
3510 }
3511 
3512 int
omp_target_memcpy_rect(void * dst,const void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,int dst_device_num,int src_device_num)3513 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3514 			int num_dims, const size_t *volume,
3515 			const size_t *dst_offsets,
3516 			const size_t *src_offsets,
3517 			const size_t *dst_dimensions,
3518 			const size_t *src_dimensions,
3519 			int dst_device_num, int src_device_num)
3520 {
3521   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3522 
3523   if (!dst && !src)
3524     return INT_MAX;
3525 
3526   if (dst_device_num != gomp_get_num_devices ())
3527     {
3528       if (dst_device_num < 0)
3529 	return EINVAL;
3530 
3531       dst_devicep = resolve_device (dst_device_num);
3532       if (dst_devicep == NULL)
3533 	return EINVAL;
3534 
3535       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3536 	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3537 	dst_devicep = NULL;
3538     }
3539   if (src_device_num != num_devices_openmp)
3540     {
3541       if (src_device_num < 0)
3542 	return EINVAL;
3543 
3544       src_devicep = resolve_device (src_device_num);
3545       if (src_devicep == NULL)
3546 	return EINVAL;
3547 
3548       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3549 	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3550 	src_devicep = NULL;
3551     }
3552 
3553   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3554     return EINVAL;
3555 
3556   if (src_devicep)
3557     gomp_mutex_lock (&src_devicep->lock);
3558   else if (dst_devicep)
3559     gomp_mutex_lock (&dst_devicep->lock);
3560   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3561 					   volume, dst_offsets, src_offsets,
3562 					   dst_dimensions, src_dimensions,
3563 					   dst_devicep, src_devicep);
3564   if (src_devicep)
3565     gomp_mutex_unlock (&src_devicep->lock);
3566   else if (dst_devicep)
3567     gomp_mutex_unlock (&dst_devicep->lock);
3568   return ret;
3569 }
3570 
3571 int
omp_target_associate_ptr(const void * host_ptr,const void * device_ptr,size_t size,size_t device_offset,int device_num)3572 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3573 			  size_t size, size_t device_offset, int device_num)
3574 {
3575   if (device_num == gomp_get_num_devices ())
3576     return EINVAL;
3577 
3578   if (device_num < 0)
3579     return EINVAL;
3580 
3581   struct gomp_device_descr *devicep = resolve_device (device_num);
3582   if (devicep == NULL)
3583     return EINVAL;
3584 
3585   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3586       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3587     return EINVAL;
3588 
3589   gomp_mutex_lock (&devicep->lock);
3590 
3591   struct splay_tree_s *mem_map = &devicep->mem_map;
3592   struct splay_tree_key_s cur_node;
3593   int ret = EINVAL;
3594 
3595   cur_node.host_start = (uintptr_t) host_ptr;
3596   cur_node.host_end = cur_node.host_start + size;
3597   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3598   if (n)
3599     {
3600       if (n->tgt->tgt_start + n->tgt_offset
3601 	  == (uintptr_t) device_ptr + device_offset
3602 	  && n->host_start <= cur_node.host_start
3603 	  && n->host_end >= cur_node.host_end)
3604 	ret = 0;
3605     }
3606   else
3607     {
3608       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3609       tgt->array = gomp_malloc (sizeof (*tgt->array));
3610       tgt->refcount = 1;
3611       tgt->tgt_start = 0;
3612       tgt->tgt_end = 0;
3613       tgt->to_free = NULL;
3614       tgt->prev = NULL;
3615       tgt->list_count = 0;
3616       tgt->device_descr = devicep;
3617       splay_tree_node array = tgt->array;
3618       splay_tree_key k = &array->key;
3619       k->host_start = cur_node.host_start;
3620       k->host_end = cur_node.host_end;
3621       k->tgt = tgt;
3622       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3623       k->refcount = REFCOUNT_INFINITY;
3624       k->dynamic_refcount = 0;
3625       k->aux = NULL;
3626       array->left = NULL;
3627       array->right = NULL;
3628       splay_tree_insert (&devicep->mem_map, array);
3629       ret = 0;
3630     }
3631   gomp_mutex_unlock (&devicep->lock);
3632   return ret;
3633 }
3634 
3635 int
omp_target_disassociate_ptr(const void * ptr,int device_num)3636 omp_target_disassociate_ptr (const void *ptr, int device_num)
3637 {
3638   if (device_num == gomp_get_num_devices ())
3639     return EINVAL;
3640 
3641   if (device_num < 0)
3642     return EINVAL;
3643 
3644   struct gomp_device_descr *devicep = resolve_device (device_num);
3645   if (devicep == NULL)
3646     return EINVAL;
3647 
3648   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3649     return EINVAL;
3650 
3651   gomp_mutex_lock (&devicep->lock);
3652 
3653   struct splay_tree_s *mem_map = &devicep->mem_map;
3654   struct splay_tree_key_s cur_node;
3655   int ret = EINVAL;
3656 
3657   cur_node.host_start = (uintptr_t) ptr;
3658   cur_node.host_end = cur_node.host_start;
3659   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3660   if (n
3661       && n->host_start == cur_node.host_start
3662       && n->refcount == REFCOUNT_INFINITY
3663       && n->tgt->tgt_start == 0
3664       && n->tgt->to_free == NULL
3665       && n->tgt->refcount == 1
3666       && n->tgt->list_count == 0)
3667     {
3668       splay_tree_remove (&devicep->mem_map, n);
3669       gomp_unmap_tgt (n->tgt);
3670       ret = 0;
3671     }
3672 
3673   gomp_mutex_unlock (&devicep->lock);
3674   return ret;
3675 }
3676 
3677 int
omp_pause_resource(omp_pause_resource_t kind,int device_num)3678 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3679 {
3680   (void) kind;
3681   if (device_num == gomp_get_num_devices ())
3682     return gomp_pause_host ();
3683   if (device_num < 0 || device_num >= num_devices_openmp)
3684     return -1;
3685   /* Do nothing for target devices for now.  */
3686   return 0;
3687 }
3688 
3689 int
omp_pause_resource_all(omp_pause_resource_t kind)3690 omp_pause_resource_all (omp_pause_resource_t kind)
3691 {
3692   (void) kind;
3693   if (gomp_pause_host ())
3694     return -1;
3695   /* Do nothing for target devices for now.  */
3696   return 0;
3697 }
3698 
3699 ialias (omp_pause_resource)
ialias(omp_pause_resource_all)3700 ialias (omp_pause_resource_all)
3701 
3702 #ifdef PLUGIN_SUPPORT
3703 
3704 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
3705    in PLUGIN_NAME.
3706    The handles of the found functions are stored in the corresponding fields
3707    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
3708 
3709 static bool
3710 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3711 			     const char *plugin_name)
3712 {
3713   const char *err = NULL, *last_missing = NULL;
3714 
3715   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3716   if (!plugin_handle)
3717 #if OFFLOAD_DEFAULTED
3718     return 0;
3719 #else
3720     goto dl_fail;
3721 #endif
3722 
3723   /* Check if all required functions are available in the plugin and store
3724      their handlers.  None of the symbols can legitimately be NULL,
3725      so we don't need to check dlerror all the time.  */
3726 #define DLSYM(f)							\
3727   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
3728     goto dl_fail
3729   /* Similar, but missing functions are not an error.  Return false if
3730      failed, true otherwise.  */
3731 #define DLSYM_OPT(f, n)							\
3732   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
3733    || (last_missing = #n, 0))
3734 
3735   DLSYM (version);
3736   if (device->version_func () != GOMP_VERSION)
3737     {
3738       err = "plugin version mismatch";
3739       goto fail;
3740     }
3741 
3742   DLSYM (get_name);
3743   DLSYM (get_caps);
3744   DLSYM (get_type);
3745   DLSYM (get_num_devices);
3746   DLSYM (init_device);
3747   DLSYM (fini_device);
3748   DLSYM (load_image);
3749   DLSYM (unload_image);
3750   DLSYM (alloc);
3751   DLSYM (free);
3752   DLSYM (dev2host);
3753   DLSYM (host2dev);
3754   device->capabilities = device->get_caps_func ();
3755   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3756     {
3757       DLSYM (run);
3758       DLSYM_OPT (async_run, async_run);
3759       DLSYM_OPT (can_run, can_run);
3760       DLSYM (dev2dev);
3761     }
3762   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3763     {
3764       if (!DLSYM_OPT (openacc.exec, openacc_exec)
3765 	  || !DLSYM_OPT (openacc.create_thread_data,
3766 			 openacc_create_thread_data)
3767 	  || !DLSYM_OPT (openacc.destroy_thread_data,
3768 			 openacc_destroy_thread_data)
3769 	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3770 	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3771 	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3772 	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3773 	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3774 	  || !DLSYM_OPT (openacc.async.queue_callback,
3775 			 openacc_async_queue_callback)
3776 	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3777 	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3778 	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3779 	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3780 	{
3781 	  /* Require all the OpenACC handlers if we have
3782 	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
3783 	  err = "plugin missing OpenACC handler function";
3784 	  goto fail;
3785 	}
3786 
3787       unsigned cuda = 0;
3788       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3789 			 openacc_cuda_get_current_device);
3790       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3791 			 openacc_cuda_get_current_context);
3792       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3793       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3794       if (cuda && cuda != 4)
3795 	{
3796 	  /* Make sure all the CUDA functions are there if any of them are.  */
3797 	  err = "plugin missing OpenACC CUDA handler function";
3798 	  goto fail;
3799 	}
3800     }
3801 #undef DLSYM
3802 #undef DLSYM_OPT
3803 
3804   return 1;
3805 
3806  dl_fail:
3807   err = dlerror ();
3808  fail:
3809   gomp_error ("while loading %s: %s", plugin_name, err);
3810   if (last_missing)
3811     gomp_error ("missing function was %s", last_missing);
3812   if (plugin_handle)
3813     dlclose (plugin_handle);
3814 
3815   return 0;
3816 }
3817 
3818 /* This function finalizes all initialized devices.  */
3819 
3820 static void
gomp_target_fini(void)3821 gomp_target_fini (void)
3822 {
3823   int i;
3824   for (i = 0; i < num_devices; i++)
3825     {
3826       bool ret = true;
3827       struct gomp_device_descr *devicep = &devices[i];
3828       gomp_mutex_lock (&devicep->lock);
3829       if (devicep->state == GOMP_DEVICE_INITIALIZED)
3830 	ret = gomp_fini_device (devicep);
3831       gomp_mutex_unlock (&devicep->lock);
3832       if (!ret)
3833 	gomp_fatal ("device finalization failed");
3834     }
3835 }
3836 
3837 /* This function initializes the runtime for offloading.
3838    It parses the list of offload plugins, and tries to load these.
3839    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3840    will be set, and the array DEVICES initialized, containing descriptors for
3841    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3842    by the others.  */
3843 
3844 static void
gomp_target_init(void)3845 gomp_target_init (void)
3846 {
3847   const char *prefix ="libgomp-plugin-";
3848   const char *suffix = SONAME_SUFFIX (1);
3849   const char *cur, *next;
3850   char *plugin_name;
3851   int i, new_num_devs;
3852   int num_devs = 0, num_devs_openmp;
3853   struct gomp_device_descr *devs = NULL;
3854 
3855   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3856     return;
3857 
3858   cur = OFFLOAD_PLUGINS;
3859   if (*cur)
3860     do
3861       {
3862 	struct gomp_device_descr current_device;
3863 	size_t prefix_len, suffix_len, cur_len;
3864 
3865 	next = strchr (cur, ',');
3866 
3867 	prefix_len = strlen (prefix);
3868 	cur_len = next ? next - cur : strlen (cur);
3869 	suffix_len = strlen (suffix);
3870 
3871 	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3872 	if (!plugin_name)
3873 	  {
3874 	    num_devs = 0;
3875 	    break;
3876 	  }
3877 
3878 	memcpy (plugin_name, prefix, prefix_len);
3879 	memcpy (plugin_name + prefix_len, cur, cur_len);
3880 	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3881 
3882 	if (gomp_load_plugin_for_device (&current_device, plugin_name))
3883 	  {
3884 	    new_num_devs = current_device.get_num_devices_func ();
3885 	    if (new_num_devs >= 1)
3886 	      {
3887 		/* Augment DEVICES and NUM_DEVICES.  */
3888 
3889 		devs = realloc (devs, (num_devs + new_num_devs)
3890 				      * sizeof (struct gomp_device_descr));
3891 		if (!devs)
3892 		  {
3893 		    num_devs = 0;
3894 		    free (plugin_name);
3895 		    break;
3896 		  }
3897 
3898 		current_device.name = current_device.get_name_func ();
3899 		/* current_device.capabilities has already been set.  */
3900 		current_device.type = current_device.get_type_func ();
3901 		current_device.mem_map.root = NULL;
3902 		current_device.state = GOMP_DEVICE_UNINITIALIZED;
3903 		for (i = 0; i < new_num_devs; i++)
3904 		  {
3905 		    current_device.target_id = i;
3906 		    devs[num_devs] = current_device;
3907 		    gomp_mutex_init (&devs[num_devs].lock);
3908 		    num_devs++;
3909 		  }
3910 	      }
3911 	  }
3912 
3913 	free (plugin_name);
3914 	cur = next + 1;
3915       }
3916     while (next);
3917 
3918   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3919      NUM_DEVICES_OPENMP.  */
3920   struct gomp_device_descr *devs_s
3921     = malloc (num_devs * sizeof (struct gomp_device_descr));
3922   if (!devs_s)
3923     {
3924       num_devs = 0;
3925       free (devs);
3926       devs = NULL;
3927     }
3928   num_devs_openmp = 0;
3929   for (i = 0; i < num_devs; i++)
3930     if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3931       devs_s[num_devs_openmp++] = devs[i];
3932   int num_devs_after_openmp = num_devs_openmp;
3933   for (i = 0; i < num_devs; i++)
3934     if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3935       devs_s[num_devs_after_openmp++] = devs[i];
3936   free (devs);
3937   devs = devs_s;
3938 
3939   for (i = 0; i < num_devs; i++)
3940     {
3941       /* The 'devices' array can be moved (by the realloc call) until we have
3942 	 found all the plugins, so registering with the OpenACC runtime (which
3943 	 takes a copy of the pointer argument) must be delayed until now.  */
3944       if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3945 	goacc_register (&devs[i]);
3946     }
3947 
3948   num_devices = num_devs;
3949   num_devices_openmp = num_devs_openmp;
3950   devices = devs;
3951   if (atexit (gomp_target_fini) != 0)
3952     gomp_fatal ("atexit failed");
3953 }
3954 
3955 #else /* PLUGIN_SUPPORT */
3956 /* If dlfcn.h is unavailable we always fallback to host execution.
3957    GOMP_target* routines are just stubs for this case.  */
3958 static void
3959 gomp_target_init (void)
3960 {
3961 }
3962 #endif /* PLUGIN_SUPPORT */
3963