1*38fd1498Szrj /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2*38fd1498Szrj Contributed by Jakub Jelinek <jakub@redhat.com>.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of the GNU Offloading and Multi Processing Library
5*38fd1498Szrj (libgomp).
6*38fd1498Szrj
7*38fd1498Szrj Libgomp is free software; you can redistribute it and/or modify it
8*38fd1498Szrj under the terms of the GNU General Public License as published by
9*38fd1498Szrj the Free Software Foundation; either version 3, or (at your option)
10*38fd1498Szrj any later version.
11*38fd1498Szrj
12*38fd1498Szrj Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14*38fd1498Szrj FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15*38fd1498Szrj more details.
16*38fd1498Szrj
17*38fd1498Szrj Under Section 7 of GPL version 3, you are granted additional
18*38fd1498Szrj permissions described in the GCC Runtime Library Exception, version
19*38fd1498Szrj 3.1, as published by the Free Software Foundation.
20*38fd1498Szrj
21*38fd1498Szrj You should have received a copy of the GNU General Public License and
22*38fd1498Szrj a copy of the GCC Runtime Library Exception along with this program;
23*38fd1498Szrj see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24*38fd1498Szrj <http://www.gnu.org/licenses/>. */
25*38fd1498Szrj
26*38fd1498Szrj /* This file contains the support of offloading. */
27*38fd1498Szrj
28*38fd1498Szrj #include "config.h"
29*38fd1498Szrj #include "libgomp.h"
30*38fd1498Szrj #include "oacc-plugin.h"
31*38fd1498Szrj #include "oacc-int.h"
32*38fd1498Szrj #include "gomp-constants.h"
33*38fd1498Szrj #include <limits.h>
34*38fd1498Szrj #include <stdbool.h>
35*38fd1498Szrj #include <stdlib.h>
36*38fd1498Szrj #ifdef HAVE_INTTYPES_H
37*38fd1498Szrj # include <inttypes.h> /* For PRIu64. */
38*38fd1498Szrj #endif
39*38fd1498Szrj #include <string.h>
40*38fd1498Szrj #include <assert.h>
41*38fd1498Szrj #include <errno.h>
42*38fd1498Szrj
43*38fd1498Szrj #ifdef PLUGIN_SUPPORT
44*38fd1498Szrj #include <dlfcn.h>
45*38fd1498Szrj #include "plugin-suffix.h"
46*38fd1498Szrj #endif
47*38fd1498Szrj
48*38fd1498Szrj static void gomp_target_init (void);
49*38fd1498Szrj
50*38fd1498Szrj /* The whole initialization code for offloading plugins is only run one. */
51*38fd1498Szrj static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
52*38fd1498Szrj
53*38fd1498Szrj /* Mutex for offload image registration. */
54*38fd1498Szrj static gomp_mutex_t register_lock;
55*38fd1498Szrj
56*38fd1498Szrj /* This structure describes an offload image.
57*38fd1498Szrj It contains type of the target device, pointer to host table descriptor, and
58*38fd1498Szrj pointer to target data. */
59*38fd1498Szrj struct offload_image_descr {
60*38fd1498Szrj unsigned version;
61*38fd1498Szrj enum offload_target_type type;
62*38fd1498Szrj const void *host_table;
63*38fd1498Szrj const void *target_data;
64*38fd1498Szrj };
65*38fd1498Szrj
66*38fd1498Szrj /* Array of descriptors of offload images. */
67*38fd1498Szrj static struct offload_image_descr *offload_images;
68*38fd1498Szrj
69*38fd1498Szrj /* Total number of offload images. */
70*38fd1498Szrj static int num_offload_images;
71*38fd1498Szrj
72*38fd1498Szrj /* Array of descriptors for all available devices. */
73*38fd1498Szrj static struct gomp_device_descr *devices;
74*38fd1498Szrj
75*38fd1498Szrj /* Total number of available devices. */
76*38fd1498Szrj static int num_devices;
77*38fd1498Szrj
78*38fd1498Szrj /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
79*38fd1498Szrj static int num_devices_openmp;
80*38fd1498Szrj
81*38fd1498Szrj /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
82*38fd1498Szrj
83*38fd1498Szrj static void *
gomp_realloc_unlock(void * old,size_t size)84*38fd1498Szrj gomp_realloc_unlock (void *old, size_t size)
85*38fd1498Szrj {
86*38fd1498Szrj void *ret = realloc (old, size);
87*38fd1498Szrj if (ret == NULL)
88*38fd1498Szrj {
89*38fd1498Szrj gomp_mutex_unlock (®ister_lock);
90*38fd1498Szrj gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
91*38fd1498Szrj }
92*38fd1498Szrj return ret;
93*38fd1498Szrj }
94*38fd1498Szrj
95*38fd1498Szrj attribute_hidden void
gomp_init_targets_once(void)96*38fd1498Szrj gomp_init_targets_once (void)
97*38fd1498Szrj {
98*38fd1498Szrj (void) pthread_once (&gomp_is_initialized, gomp_target_init);
99*38fd1498Szrj }
100*38fd1498Szrj
101*38fd1498Szrj attribute_hidden int
gomp_get_num_devices(void)102*38fd1498Szrj gomp_get_num_devices (void)
103*38fd1498Szrj {
104*38fd1498Szrj gomp_init_targets_once ();
105*38fd1498Szrj return num_devices_openmp;
106*38fd1498Szrj }
107*38fd1498Szrj
108*38fd1498Szrj static struct gomp_device_descr *
resolve_device(int device_id)109*38fd1498Szrj resolve_device (int device_id)
110*38fd1498Szrj {
111*38fd1498Szrj if (device_id == GOMP_DEVICE_ICV)
112*38fd1498Szrj {
113*38fd1498Szrj struct gomp_task_icv *icv = gomp_icv (false);
114*38fd1498Szrj device_id = icv->default_device_var;
115*38fd1498Szrj }
116*38fd1498Szrj
117*38fd1498Szrj if (device_id < 0 || device_id >= gomp_get_num_devices ())
118*38fd1498Szrj return NULL;
119*38fd1498Szrj
120*38fd1498Szrj gomp_mutex_lock (&devices[device_id].lock);
121*38fd1498Szrj if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
122*38fd1498Szrj gomp_init_device (&devices[device_id]);
123*38fd1498Szrj else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
124*38fd1498Szrj {
125*38fd1498Szrj gomp_mutex_unlock (&devices[device_id].lock);
126*38fd1498Szrj return NULL;
127*38fd1498Szrj }
128*38fd1498Szrj gomp_mutex_unlock (&devices[device_id].lock);
129*38fd1498Szrj
130*38fd1498Szrj return &devices[device_id];
131*38fd1498Szrj }
132*38fd1498Szrj
133*38fd1498Szrj
134*38fd1498Szrj static inline splay_tree_key
gomp_map_lookup(splay_tree mem_map,splay_tree_key key)135*38fd1498Szrj gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
136*38fd1498Szrj {
137*38fd1498Szrj if (key->host_start != key->host_end)
138*38fd1498Szrj return splay_tree_lookup (mem_map, key);
139*38fd1498Szrj
140*38fd1498Szrj key->host_end++;
141*38fd1498Szrj splay_tree_key n = splay_tree_lookup (mem_map, key);
142*38fd1498Szrj key->host_end--;
143*38fd1498Szrj if (n)
144*38fd1498Szrj return n;
145*38fd1498Szrj key->host_start--;
146*38fd1498Szrj n = splay_tree_lookup (mem_map, key);
147*38fd1498Szrj key->host_start++;
148*38fd1498Szrj if (n)
149*38fd1498Szrj return n;
150*38fd1498Szrj return splay_tree_lookup (mem_map, key);
151*38fd1498Szrj }
152*38fd1498Szrj
153*38fd1498Szrj static inline splay_tree_key
gomp_map_0len_lookup(splay_tree mem_map,splay_tree_key key)154*38fd1498Szrj gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
155*38fd1498Szrj {
156*38fd1498Szrj if (key->host_start != key->host_end)
157*38fd1498Szrj return splay_tree_lookup (mem_map, key);
158*38fd1498Szrj
159*38fd1498Szrj key->host_end++;
160*38fd1498Szrj splay_tree_key n = splay_tree_lookup (mem_map, key);
161*38fd1498Szrj key->host_end--;
162*38fd1498Szrj return n;
163*38fd1498Szrj }
164*38fd1498Szrj
165*38fd1498Szrj static inline void
gomp_device_copy(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t),const char * dst,void * dstaddr,const char * src,const void * srcaddr,size_t size)166*38fd1498Szrj gomp_device_copy (struct gomp_device_descr *devicep,
167*38fd1498Szrj bool (*copy_func) (int, void *, const void *, size_t),
168*38fd1498Szrj const char *dst, void *dstaddr,
169*38fd1498Szrj const char *src, const void *srcaddr,
170*38fd1498Szrj size_t size)
171*38fd1498Szrj {
172*38fd1498Szrj if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
173*38fd1498Szrj {
174*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
175*38fd1498Szrj gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
176*38fd1498Szrj src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
177*38fd1498Szrj }
178*38fd1498Szrj }
179*38fd1498Szrj
180*38fd1498Szrj /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
181*38fd1498Szrj host to device memory transfers. */
182*38fd1498Szrj
183*38fd1498Szrj struct gomp_coalesce_buf
184*38fd1498Szrj {
185*38fd1498Szrj /* Buffer into which gomp_copy_host2dev will memcpy data and from which
186*38fd1498Szrj it will be copied to the device. */
187*38fd1498Szrj void *buf;
188*38fd1498Szrj struct target_mem_desc *tgt;
189*38fd1498Szrj /* Array with offsets, chunks[2 * i] is the starting offset and
190*38fd1498Szrj chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
191*38fd1498Szrj of chunks which are to be copied to buf and later copied to device. */
192*38fd1498Szrj size_t *chunks;
193*38fd1498Szrj /* Number of chunks in chunks array, or -1 if coalesce buffering should not
194*38fd1498Szrj be performed. */
195*38fd1498Szrj long chunk_cnt;
196*38fd1498Szrj /* During construction of chunks array, how many memory regions are within
197*38fd1498Szrj the last chunk. If there is just one memory region for a chunk, we copy
198*38fd1498Szrj it directly to device rather than going through buf. */
199*38fd1498Szrj long use_cnt;
200*38fd1498Szrj };
201*38fd1498Szrj
202*38fd1498Szrj /* Maximum size of memory region considered for coalescing. Larger copies
203*38fd1498Szrj are performed directly. */
204*38fd1498Szrj #define MAX_COALESCE_BUF_SIZE (32 * 1024)
205*38fd1498Szrj
206*38fd1498Szrj /* Maximum size of a gap in between regions to consider them being copied
207*38fd1498Szrj within the same chunk. All the device offsets considered are within
208*38fd1498Szrj newly allocated device memory, so it isn't fatal if we copy some padding
209*38fd1498Szrj in between from host to device. The gaps come either from alignment
210*38fd1498Szrj padding or from memory regions which are not supposed to be copied from
211*38fd1498Szrj host to device (e.g. map(alloc:), map(from:) etc.). */
212*38fd1498Szrj #define MAX_COALESCE_BUF_GAP (4 * 1024)
213*38fd1498Szrj
214*38fd1498Szrj /* Add region with device tgt_start relative offset and length to CBUF. */
215*38fd1498Szrj
216*38fd1498Szrj static inline void
gomp_coalesce_buf_add(struct gomp_coalesce_buf * cbuf,size_t start,size_t len)217*38fd1498Szrj gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
218*38fd1498Szrj {
219*38fd1498Szrj if (len > MAX_COALESCE_BUF_SIZE || len == 0)
220*38fd1498Szrj return;
221*38fd1498Szrj if (cbuf->chunk_cnt)
222*38fd1498Szrj {
223*38fd1498Szrj if (cbuf->chunk_cnt < 0)
224*38fd1498Szrj return;
225*38fd1498Szrj if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
226*38fd1498Szrj {
227*38fd1498Szrj cbuf->chunk_cnt = -1;
228*38fd1498Szrj return;
229*38fd1498Szrj }
230*38fd1498Szrj if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
231*38fd1498Szrj {
232*38fd1498Szrj cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
233*38fd1498Szrj cbuf->use_cnt++;
234*38fd1498Szrj return;
235*38fd1498Szrj }
236*38fd1498Szrj /* If the last chunk is only used by one mapping, discard it,
237*38fd1498Szrj as it will be one host to device copy anyway and
238*38fd1498Szrj memcpying it around will only waste cycles. */
239*38fd1498Szrj if (cbuf->use_cnt == 1)
240*38fd1498Szrj cbuf->chunk_cnt--;
241*38fd1498Szrj }
242*38fd1498Szrj cbuf->chunks[2 * cbuf->chunk_cnt] = start;
243*38fd1498Szrj cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
244*38fd1498Szrj cbuf->chunk_cnt++;
245*38fd1498Szrj cbuf->use_cnt = 1;
246*38fd1498Szrj }
247*38fd1498Szrj
248*38fd1498Szrj /* Return true for mapping kinds which need to copy data from the
249*38fd1498Szrj host to device for regions that weren't previously mapped. */
250*38fd1498Szrj
251*38fd1498Szrj static inline bool
gomp_to_device_kind_p(int kind)252*38fd1498Szrj gomp_to_device_kind_p (int kind)
253*38fd1498Szrj {
254*38fd1498Szrj switch (kind)
255*38fd1498Szrj {
256*38fd1498Szrj case GOMP_MAP_ALLOC:
257*38fd1498Szrj case GOMP_MAP_FROM:
258*38fd1498Szrj case GOMP_MAP_FORCE_ALLOC:
259*38fd1498Szrj case GOMP_MAP_ALWAYS_FROM:
260*38fd1498Szrj return false;
261*38fd1498Szrj default:
262*38fd1498Szrj return true;
263*38fd1498Szrj }
264*38fd1498Szrj }
265*38fd1498Szrj
266*38fd1498Szrj static void
gomp_copy_host2dev(struct gomp_device_descr * devicep,void * d,const void * h,size_t sz,struct gomp_coalesce_buf * cbuf)267*38fd1498Szrj gomp_copy_host2dev (struct gomp_device_descr *devicep,
268*38fd1498Szrj void *d, const void *h, size_t sz,
269*38fd1498Szrj struct gomp_coalesce_buf *cbuf)
270*38fd1498Szrj {
271*38fd1498Szrj if (cbuf)
272*38fd1498Szrj {
273*38fd1498Szrj uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
274*38fd1498Szrj if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
275*38fd1498Szrj {
276*38fd1498Szrj long first = 0;
277*38fd1498Szrj long last = cbuf->chunk_cnt - 1;
278*38fd1498Szrj while (first <= last)
279*38fd1498Szrj {
280*38fd1498Szrj long middle = (first + last) >> 1;
281*38fd1498Szrj if (cbuf->chunks[2 * middle + 1] <= doff)
282*38fd1498Szrj first = middle + 1;
283*38fd1498Szrj else if (cbuf->chunks[2 * middle] <= doff)
284*38fd1498Szrj {
285*38fd1498Szrj if (doff + sz > cbuf->chunks[2 * middle + 1])
286*38fd1498Szrj gomp_fatal ("internal libgomp cbuf error");
287*38fd1498Szrj memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
288*38fd1498Szrj h, sz);
289*38fd1498Szrj return;
290*38fd1498Szrj }
291*38fd1498Szrj else
292*38fd1498Szrj last = middle - 1;
293*38fd1498Szrj }
294*38fd1498Szrj }
295*38fd1498Szrj }
296*38fd1498Szrj gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
297*38fd1498Szrj }
298*38fd1498Szrj
299*38fd1498Szrj static void
gomp_copy_dev2host(struct gomp_device_descr * devicep,void * h,const void * d,size_t sz)300*38fd1498Szrj gomp_copy_dev2host (struct gomp_device_descr *devicep,
301*38fd1498Szrj void *h, const void *d, size_t sz)
302*38fd1498Szrj {
303*38fd1498Szrj gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
304*38fd1498Szrj }
305*38fd1498Szrj
306*38fd1498Szrj static void
gomp_free_device_memory(struct gomp_device_descr * devicep,void * devptr)307*38fd1498Szrj gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
308*38fd1498Szrj {
309*38fd1498Szrj if (!devicep->free_func (devicep->target_id, devptr))
310*38fd1498Szrj {
311*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
312*38fd1498Szrj gomp_fatal ("error in freeing device memory block at %p", devptr);
313*38fd1498Szrj }
314*38fd1498Szrj }
315*38fd1498Szrj
316*38fd1498Szrj /* Handle the case where gomp_map_lookup, splay_tree_lookup or
317*38fd1498Szrj gomp_map_0len_lookup found oldn for newn.
318*38fd1498Szrj Helper function of gomp_map_vars. */
319*38fd1498Szrj
320*38fd1498Szrj static inline void
gomp_map_vars_existing(struct gomp_device_descr * devicep,splay_tree_key oldn,splay_tree_key newn,struct target_var_desc * tgt_var,unsigned char kind,struct gomp_coalesce_buf * cbuf)321*38fd1498Szrj gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
322*38fd1498Szrj splay_tree_key newn, struct target_var_desc *tgt_var,
323*38fd1498Szrj unsigned char kind, struct gomp_coalesce_buf *cbuf)
324*38fd1498Szrj {
325*38fd1498Szrj tgt_var->key = oldn;
326*38fd1498Szrj tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
327*38fd1498Szrj tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
328*38fd1498Szrj tgt_var->offset = newn->host_start - oldn->host_start;
329*38fd1498Szrj tgt_var->length = newn->host_end - newn->host_start;
330*38fd1498Szrj
331*38fd1498Szrj if ((kind & GOMP_MAP_FLAG_FORCE)
332*38fd1498Szrj || oldn->host_start > newn->host_start
333*38fd1498Szrj || oldn->host_end < newn->host_end)
334*38fd1498Szrj {
335*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
336*38fd1498Szrj gomp_fatal ("Trying to map into device [%p..%p) object when "
337*38fd1498Szrj "[%p..%p) is already mapped",
338*38fd1498Szrj (void *) newn->host_start, (void *) newn->host_end,
339*38fd1498Szrj (void *) oldn->host_start, (void *) oldn->host_end);
340*38fd1498Szrj }
341*38fd1498Szrj
342*38fd1498Szrj if (GOMP_MAP_ALWAYS_TO_P (kind))
343*38fd1498Szrj gomp_copy_host2dev (devicep,
344*38fd1498Szrj (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
345*38fd1498Szrj + newn->host_start - oldn->host_start),
346*38fd1498Szrj (void *) newn->host_start,
347*38fd1498Szrj newn->host_end - newn->host_start, cbuf);
348*38fd1498Szrj
349*38fd1498Szrj if (oldn->refcount != REFCOUNT_INFINITY)
350*38fd1498Szrj oldn->refcount++;
351*38fd1498Szrj }
352*38fd1498Szrj
353*38fd1498Szrj static int
get_kind(bool short_mapkind,void * kinds,int idx)354*38fd1498Szrj get_kind (bool short_mapkind, void *kinds, int idx)
355*38fd1498Szrj {
356*38fd1498Szrj return short_mapkind ? ((unsigned short *) kinds)[idx]
357*38fd1498Szrj : ((unsigned char *) kinds)[idx];
358*38fd1498Szrj }
359*38fd1498Szrj
360*38fd1498Szrj static void
gomp_map_pointer(struct target_mem_desc * tgt,uintptr_t host_ptr,uintptr_t target_offset,uintptr_t bias,struct gomp_coalesce_buf * cbuf)361*38fd1498Szrj gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
362*38fd1498Szrj uintptr_t target_offset, uintptr_t bias,
363*38fd1498Szrj struct gomp_coalesce_buf *cbuf)
364*38fd1498Szrj {
365*38fd1498Szrj struct gomp_device_descr *devicep = tgt->device_descr;
366*38fd1498Szrj struct splay_tree_s *mem_map = &devicep->mem_map;
367*38fd1498Szrj struct splay_tree_key_s cur_node;
368*38fd1498Szrj
369*38fd1498Szrj cur_node.host_start = host_ptr;
370*38fd1498Szrj if (cur_node.host_start == (uintptr_t) NULL)
371*38fd1498Szrj {
372*38fd1498Szrj cur_node.tgt_offset = (uintptr_t) NULL;
373*38fd1498Szrj gomp_copy_host2dev (devicep,
374*38fd1498Szrj (void *) (tgt->tgt_start + target_offset),
375*38fd1498Szrj (void *) &cur_node.tgt_offset,
376*38fd1498Szrj sizeof (void *), cbuf);
377*38fd1498Szrj return;
378*38fd1498Szrj }
379*38fd1498Szrj /* Add bias to the pointer value. */
380*38fd1498Szrj cur_node.host_start += bias;
381*38fd1498Szrj cur_node.host_end = cur_node.host_start;
382*38fd1498Szrj splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
383*38fd1498Szrj if (n == NULL)
384*38fd1498Szrj {
385*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
386*38fd1498Szrj gomp_fatal ("Pointer target of array section wasn't mapped");
387*38fd1498Szrj }
388*38fd1498Szrj cur_node.host_start -= n->host_start;
389*38fd1498Szrj cur_node.tgt_offset
390*38fd1498Szrj = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
391*38fd1498Szrj /* At this point tgt_offset is target address of the
392*38fd1498Szrj array section. Now subtract bias to get what we want
393*38fd1498Szrj to initialize the pointer with. */
394*38fd1498Szrj cur_node.tgt_offset -= bias;
395*38fd1498Szrj gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
396*38fd1498Szrj (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
397*38fd1498Szrj }
398*38fd1498Szrj
399*38fd1498Szrj static void
gomp_map_fields_existing(struct target_mem_desc * tgt,splay_tree_key n,size_t first,size_t i,void ** hostaddrs,size_t * sizes,void * kinds,struct gomp_coalesce_buf * cbuf)400*38fd1498Szrj gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
401*38fd1498Szrj size_t first, size_t i, void **hostaddrs,
402*38fd1498Szrj size_t *sizes, void *kinds,
403*38fd1498Szrj struct gomp_coalesce_buf *cbuf)
404*38fd1498Szrj {
405*38fd1498Szrj struct gomp_device_descr *devicep = tgt->device_descr;
406*38fd1498Szrj struct splay_tree_s *mem_map = &devicep->mem_map;
407*38fd1498Szrj struct splay_tree_key_s cur_node;
408*38fd1498Szrj int kind;
409*38fd1498Szrj const bool short_mapkind = true;
410*38fd1498Szrj const int typemask = short_mapkind ? 0xff : 0x7;
411*38fd1498Szrj
412*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
413*38fd1498Szrj cur_node.host_end = cur_node.host_start + sizes[i];
414*38fd1498Szrj splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
415*38fd1498Szrj kind = get_kind (short_mapkind, kinds, i);
416*38fd1498Szrj if (n2
417*38fd1498Szrj && n2->tgt == n->tgt
418*38fd1498Szrj && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
419*38fd1498Szrj {
420*38fd1498Szrj gomp_map_vars_existing (devicep, n2, &cur_node,
421*38fd1498Szrj &tgt->list[i], kind & typemask, cbuf);
422*38fd1498Szrj return;
423*38fd1498Szrj }
424*38fd1498Szrj if (sizes[i] == 0)
425*38fd1498Szrj {
426*38fd1498Szrj if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
427*38fd1498Szrj {
428*38fd1498Szrj cur_node.host_start--;
429*38fd1498Szrj n2 = splay_tree_lookup (mem_map, &cur_node);
430*38fd1498Szrj cur_node.host_start++;
431*38fd1498Szrj if (n2
432*38fd1498Szrj && n2->tgt == n->tgt
433*38fd1498Szrj && n2->host_start - n->host_start
434*38fd1498Szrj == n2->tgt_offset - n->tgt_offset)
435*38fd1498Szrj {
436*38fd1498Szrj gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
437*38fd1498Szrj kind & typemask, cbuf);
438*38fd1498Szrj return;
439*38fd1498Szrj }
440*38fd1498Szrj }
441*38fd1498Szrj cur_node.host_end++;
442*38fd1498Szrj n2 = splay_tree_lookup (mem_map, &cur_node);
443*38fd1498Szrj cur_node.host_end--;
444*38fd1498Szrj if (n2
445*38fd1498Szrj && n2->tgt == n->tgt
446*38fd1498Szrj && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
447*38fd1498Szrj {
448*38fd1498Szrj gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
449*38fd1498Szrj kind & typemask, cbuf);
450*38fd1498Szrj return;
451*38fd1498Szrj }
452*38fd1498Szrj }
453*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
454*38fd1498Szrj gomp_fatal ("Trying to map into device [%p..%p) structure element when "
455*38fd1498Szrj "other mapped elements from the same structure weren't mapped "
456*38fd1498Szrj "together with it", (void *) cur_node.host_start,
457*38fd1498Szrj (void *) cur_node.host_end);
458*38fd1498Szrj }
459*38fd1498Szrj
460*38fd1498Szrj static inline uintptr_t
gomp_map_val(struct target_mem_desc * tgt,void ** hostaddrs,size_t i)461*38fd1498Szrj gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
462*38fd1498Szrj {
463*38fd1498Szrj if (tgt->list[i].key != NULL)
464*38fd1498Szrj return tgt->list[i].key->tgt->tgt_start
465*38fd1498Szrj + tgt->list[i].key->tgt_offset
466*38fd1498Szrj + tgt->list[i].offset;
467*38fd1498Szrj if (tgt->list[i].offset == ~(uintptr_t) 0)
468*38fd1498Szrj return (uintptr_t) hostaddrs[i];
469*38fd1498Szrj if (tgt->list[i].offset == ~(uintptr_t) 1)
470*38fd1498Szrj return 0;
471*38fd1498Szrj if (tgt->list[i].offset == ~(uintptr_t) 2)
472*38fd1498Szrj return tgt->list[i + 1].key->tgt->tgt_start
473*38fd1498Szrj + tgt->list[i + 1].key->tgt_offset
474*38fd1498Szrj + tgt->list[i + 1].offset
475*38fd1498Szrj + (uintptr_t) hostaddrs[i]
476*38fd1498Szrj - (uintptr_t) hostaddrs[i + 1];
477*38fd1498Szrj return tgt->tgt_start + tgt->list[i].offset;
478*38fd1498Szrj }
479*38fd1498Szrj
480*38fd1498Szrj attribute_hidden struct target_mem_desc *
gomp_map_vars(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,enum gomp_map_vars_kind pragma_kind)481*38fd1498Szrj gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
482*38fd1498Szrj void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
483*38fd1498Szrj bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
484*38fd1498Szrj {
485*38fd1498Szrj size_t i, tgt_align, tgt_size, not_found_cnt = 0;
486*38fd1498Szrj bool has_firstprivate = false;
487*38fd1498Szrj const int rshift = short_mapkind ? 8 : 3;
488*38fd1498Szrj const int typemask = short_mapkind ? 0xff : 0x7;
489*38fd1498Szrj struct splay_tree_s *mem_map = &devicep->mem_map;
490*38fd1498Szrj struct splay_tree_key_s cur_node;
491*38fd1498Szrj struct target_mem_desc *tgt
492*38fd1498Szrj = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
493*38fd1498Szrj tgt->list_count = mapnum;
494*38fd1498Szrj tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1;
495*38fd1498Szrj tgt->device_descr = devicep;
496*38fd1498Szrj struct gomp_coalesce_buf cbuf, *cbufp = NULL;
497*38fd1498Szrj
498*38fd1498Szrj if (mapnum == 0)
499*38fd1498Szrj {
500*38fd1498Szrj tgt->tgt_start = 0;
501*38fd1498Szrj tgt->tgt_end = 0;
502*38fd1498Szrj return tgt;
503*38fd1498Szrj }
504*38fd1498Szrj
505*38fd1498Szrj tgt_align = sizeof (void *);
506*38fd1498Szrj tgt_size = 0;
507*38fd1498Szrj cbuf.chunks = NULL;
508*38fd1498Szrj cbuf.chunk_cnt = -1;
509*38fd1498Szrj cbuf.use_cnt = 0;
510*38fd1498Szrj cbuf.buf = NULL;
511*38fd1498Szrj if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
512*38fd1498Szrj {
513*38fd1498Szrj cbuf.chunks
514*38fd1498Szrj = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
515*38fd1498Szrj cbuf.chunk_cnt = 0;
516*38fd1498Szrj }
517*38fd1498Szrj if (pragma_kind == GOMP_MAP_VARS_TARGET)
518*38fd1498Szrj {
519*38fd1498Szrj size_t align = 4 * sizeof (void *);
520*38fd1498Szrj tgt_align = align;
521*38fd1498Szrj tgt_size = mapnum * sizeof (void *);
522*38fd1498Szrj cbuf.chunk_cnt = 1;
523*38fd1498Szrj cbuf.use_cnt = 1 + (mapnum > 1);
524*38fd1498Szrj cbuf.chunks[0] = 0;
525*38fd1498Szrj cbuf.chunks[1] = tgt_size;
526*38fd1498Szrj }
527*38fd1498Szrj
528*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
529*38fd1498Szrj if (devicep->state == GOMP_DEVICE_FINALIZED)
530*38fd1498Szrj {
531*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
532*38fd1498Szrj free (tgt);
533*38fd1498Szrj return NULL;
534*38fd1498Szrj }
535*38fd1498Szrj
536*38fd1498Szrj for (i = 0; i < mapnum; i++)
537*38fd1498Szrj {
538*38fd1498Szrj int kind = get_kind (short_mapkind, kinds, i);
539*38fd1498Szrj if (hostaddrs[i] == NULL
540*38fd1498Szrj || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
541*38fd1498Szrj {
542*38fd1498Szrj tgt->list[i].key = NULL;
543*38fd1498Szrj tgt->list[i].offset = ~(uintptr_t) 0;
544*38fd1498Szrj continue;
545*38fd1498Szrj }
546*38fd1498Szrj else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
547*38fd1498Szrj {
548*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
549*38fd1498Szrj cur_node.host_end = cur_node.host_start;
550*38fd1498Szrj splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
551*38fd1498Szrj if (n == NULL)
552*38fd1498Szrj {
553*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
554*38fd1498Szrj gomp_fatal ("use_device_ptr pointer wasn't mapped");
555*38fd1498Szrj }
556*38fd1498Szrj cur_node.host_start -= n->host_start;
557*38fd1498Szrj hostaddrs[i]
558*38fd1498Szrj = (void *) (n->tgt->tgt_start + n->tgt_offset
559*38fd1498Szrj + cur_node.host_start);
560*38fd1498Szrj tgt->list[i].key = NULL;
561*38fd1498Szrj tgt->list[i].offset = ~(uintptr_t) 0;
562*38fd1498Szrj continue;
563*38fd1498Szrj }
564*38fd1498Szrj else if ((kind & typemask) == GOMP_MAP_STRUCT)
565*38fd1498Szrj {
566*38fd1498Szrj size_t first = i + 1;
567*38fd1498Szrj size_t last = i + sizes[i];
568*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
569*38fd1498Szrj cur_node.host_end = (uintptr_t) hostaddrs[last]
570*38fd1498Szrj + sizes[last];
571*38fd1498Szrj tgt->list[i].key = NULL;
572*38fd1498Szrj tgt->list[i].offset = ~(uintptr_t) 2;
573*38fd1498Szrj splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
574*38fd1498Szrj if (n == NULL)
575*38fd1498Szrj {
576*38fd1498Szrj size_t align = (size_t) 1 << (kind >> rshift);
577*38fd1498Szrj if (tgt_align < align)
578*38fd1498Szrj tgt_align = align;
579*38fd1498Szrj tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
580*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
581*38fd1498Szrj tgt_size += cur_node.host_end - cur_node.host_start;
582*38fd1498Szrj not_found_cnt += last - i;
583*38fd1498Szrj for (i = first; i <= last; i++)
584*38fd1498Szrj {
585*38fd1498Szrj tgt->list[i].key = NULL;
586*38fd1498Szrj if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
587*38fd1498Szrj & typemask))
588*38fd1498Szrj gomp_coalesce_buf_add (&cbuf,
589*38fd1498Szrj tgt_size - cur_node.host_end
590*38fd1498Szrj + (uintptr_t) hostaddrs[i],
591*38fd1498Szrj sizes[i]);
592*38fd1498Szrj }
593*38fd1498Szrj i--;
594*38fd1498Szrj continue;
595*38fd1498Szrj }
596*38fd1498Szrj for (i = first; i <= last; i++)
597*38fd1498Szrj gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
598*38fd1498Szrj sizes, kinds, NULL);
599*38fd1498Szrj i--;
600*38fd1498Szrj continue;
601*38fd1498Szrj }
602*38fd1498Szrj else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
603*38fd1498Szrj {
604*38fd1498Szrj tgt->list[i].key = NULL;
605*38fd1498Szrj tgt->list[i].offset = ~(uintptr_t) 1;
606*38fd1498Szrj has_firstprivate = true;
607*38fd1498Szrj continue;
608*38fd1498Szrj }
609*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
610*38fd1498Szrj if (!GOMP_MAP_POINTER_P (kind & typemask))
611*38fd1498Szrj cur_node.host_end = cur_node.host_start + sizes[i];
612*38fd1498Szrj else
613*38fd1498Szrj cur_node.host_end = cur_node.host_start + sizeof (void *);
614*38fd1498Szrj if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
615*38fd1498Szrj {
616*38fd1498Szrj tgt->list[i].key = NULL;
617*38fd1498Szrj
618*38fd1498Szrj size_t align = (size_t) 1 << (kind >> rshift);
619*38fd1498Szrj if (tgt_align < align)
620*38fd1498Szrj tgt_align = align;
621*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
622*38fd1498Szrj gomp_coalesce_buf_add (&cbuf, tgt_size,
623*38fd1498Szrj cur_node.host_end - cur_node.host_start);
624*38fd1498Szrj tgt_size += cur_node.host_end - cur_node.host_start;
625*38fd1498Szrj has_firstprivate = true;
626*38fd1498Szrj continue;
627*38fd1498Szrj }
628*38fd1498Szrj splay_tree_key n;
629*38fd1498Szrj if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
630*38fd1498Szrj {
631*38fd1498Szrj n = gomp_map_0len_lookup (mem_map, &cur_node);
632*38fd1498Szrj if (!n)
633*38fd1498Szrj {
634*38fd1498Szrj tgt->list[i].key = NULL;
635*38fd1498Szrj tgt->list[i].offset = ~(uintptr_t) 1;
636*38fd1498Szrj continue;
637*38fd1498Szrj }
638*38fd1498Szrj }
639*38fd1498Szrj else
640*38fd1498Szrj n = splay_tree_lookup (mem_map, &cur_node);
641*38fd1498Szrj if (n && n->refcount != REFCOUNT_LINK)
642*38fd1498Szrj gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
643*38fd1498Szrj kind & typemask, NULL);
644*38fd1498Szrj else
645*38fd1498Szrj {
646*38fd1498Szrj tgt->list[i].key = NULL;
647*38fd1498Szrj
648*38fd1498Szrj size_t align = (size_t) 1 << (kind >> rshift);
649*38fd1498Szrj not_found_cnt++;
650*38fd1498Szrj if (tgt_align < align)
651*38fd1498Szrj tgt_align = align;
652*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
653*38fd1498Szrj if (gomp_to_device_kind_p (kind & typemask))
654*38fd1498Szrj gomp_coalesce_buf_add (&cbuf, tgt_size,
655*38fd1498Szrj cur_node.host_end - cur_node.host_start);
656*38fd1498Szrj tgt_size += cur_node.host_end - cur_node.host_start;
657*38fd1498Szrj if ((kind & typemask) == GOMP_MAP_TO_PSET)
658*38fd1498Szrj {
659*38fd1498Szrj size_t j;
660*38fd1498Szrj for (j = i + 1; j < mapnum; j++)
661*38fd1498Szrj if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
662*38fd1498Szrj & typemask))
663*38fd1498Szrj break;
664*38fd1498Szrj else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
665*38fd1498Szrj || ((uintptr_t) hostaddrs[j] + sizeof (void *)
666*38fd1498Szrj > cur_node.host_end))
667*38fd1498Szrj break;
668*38fd1498Szrj else
669*38fd1498Szrj {
670*38fd1498Szrj tgt->list[j].key = NULL;
671*38fd1498Szrj i++;
672*38fd1498Szrj }
673*38fd1498Szrj }
674*38fd1498Szrj }
675*38fd1498Szrj }
676*38fd1498Szrj
677*38fd1498Szrj if (devaddrs)
678*38fd1498Szrj {
679*38fd1498Szrj if (mapnum != 1)
680*38fd1498Szrj {
681*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
682*38fd1498Szrj gomp_fatal ("unexpected aggregation");
683*38fd1498Szrj }
684*38fd1498Szrj tgt->to_free = devaddrs[0];
685*38fd1498Szrj tgt->tgt_start = (uintptr_t) tgt->to_free;
686*38fd1498Szrj tgt->tgt_end = tgt->tgt_start + sizes[0];
687*38fd1498Szrj }
688*38fd1498Szrj else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
689*38fd1498Szrj {
690*38fd1498Szrj /* Allocate tgt_align aligned tgt_size block of memory. */
691*38fd1498Szrj /* FIXME: Perhaps change interface to allocate properly aligned
692*38fd1498Szrj memory. */
693*38fd1498Szrj tgt->to_free = devicep->alloc_func (devicep->target_id,
694*38fd1498Szrj tgt_size + tgt_align - 1);
695*38fd1498Szrj if (!tgt->to_free)
696*38fd1498Szrj {
697*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
698*38fd1498Szrj gomp_fatal ("device memory allocation fail");
699*38fd1498Szrj }
700*38fd1498Szrj
701*38fd1498Szrj tgt->tgt_start = (uintptr_t) tgt->to_free;
702*38fd1498Szrj tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
703*38fd1498Szrj tgt->tgt_end = tgt->tgt_start + tgt_size;
704*38fd1498Szrj
705*38fd1498Szrj if (cbuf.use_cnt == 1)
706*38fd1498Szrj cbuf.chunk_cnt--;
707*38fd1498Szrj if (cbuf.chunk_cnt > 0)
708*38fd1498Szrj {
709*38fd1498Szrj cbuf.buf
710*38fd1498Szrj = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
711*38fd1498Szrj if (cbuf.buf)
712*38fd1498Szrj {
713*38fd1498Szrj cbuf.tgt = tgt;
714*38fd1498Szrj cbufp = &cbuf;
715*38fd1498Szrj }
716*38fd1498Szrj }
717*38fd1498Szrj }
718*38fd1498Szrj else
719*38fd1498Szrj {
720*38fd1498Szrj tgt->to_free = NULL;
721*38fd1498Szrj tgt->tgt_start = 0;
722*38fd1498Szrj tgt->tgt_end = 0;
723*38fd1498Szrj }
724*38fd1498Szrj
725*38fd1498Szrj tgt_size = 0;
726*38fd1498Szrj if (pragma_kind == GOMP_MAP_VARS_TARGET)
727*38fd1498Szrj tgt_size = mapnum * sizeof (void *);
728*38fd1498Szrj
729*38fd1498Szrj tgt->array = NULL;
730*38fd1498Szrj if (not_found_cnt || has_firstprivate)
731*38fd1498Szrj {
732*38fd1498Szrj if (not_found_cnt)
733*38fd1498Szrj tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
734*38fd1498Szrj splay_tree_node array = tgt->array;
735*38fd1498Szrj size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
736*38fd1498Szrj uintptr_t field_tgt_base = 0;
737*38fd1498Szrj
738*38fd1498Szrj for (i = 0; i < mapnum; i++)
739*38fd1498Szrj if (tgt->list[i].key == NULL)
740*38fd1498Szrj {
741*38fd1498Szrj int kind = get_kind (short_mapkind, kinds, i);
742*38fd1498Szrj if (hostaddrs[i] == NULL)
743*38fd1498Szrj continue;
744*38fd1498Szrj switch (kind & typemask)
745*38fd1498Szrj {
746*38fd1498Szrj size_t align, len, first, last;
747*38fd1498Szrj splay_tree_key n;
748*38fd1498Szrj case GOMP_MAP_FIRSTPRIVATE:
749*38fd1498Szrj align = (size_t) 1 << (kind >> rshift);
750*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
751*38fd1498Szrj tgt->list[i].offset = tgt_size;
752*38fd1498Szrj len = sizes[i];
753*38fd1498Szrj gomp_copy_host2dev (devicep,
754*38fd1498Szrj (void *) (tgt->tgt_start + tgt_size),
755*38fd1498Szrj (void *) hostaddrs[i], len, cbufp);
756*38fd1498Szrj tgt_size += len;
757*38fd1498Szrj continue;
758*38fd1498Szrj case GOMP_MAP_FIRSTPRIVATE_INT:
759*38fd1498Szrj case GOMP_MAP_USE_DEVICE_PTR:
760*38fd1498Szrj case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
761*38fd1498Szrj continue;
762*38fd1498Szrj case GOMP_MAP_STRUCT:
763*38fd1498Szrj first = i + 1;
764*38fd1498Szrj last = i + sizes[i];
765*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
766*38fd1498Szrj cur_node.host_end = (uintptr_t) hostaddrs[last]
767*38fd1498Szrj + sizes[last];
768*38fd1498Szrj if (tgt->list[first].key != NULL)
769*38fd1498Szrj continue;
770*38fd1498Szrj n = splay_tree_lookup (mem_map, &cur_node);
771*38fd1498Szrj if (n == NULL)
772*38fd1498Szrj {
773*38fd1498Szrj size_t align = (size_t) 1 << (kind >> rshift);
774*38fd1498Szrj tgt_size -= (uintptr_t) hostaddrs[first]
775*38fd1498Szrj - (uintptr_t) hostaddrs[i];
776*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
777*38fd1498Szrj tgt_size += (uintptr_t) hostaddrs[first]
778*38fd1498Szrj - (uintptr_t) hostaddrs[i];
779*38fd1498Szrj field_tgt_base = (uintptr_t) hostaddrs[first];
780*38fd1498Szrj field_tgt_offset = tgt_size;
781*38fd1498Szrj field_tgt_clear = last;
782*38fd1498Szrj tgt_size += cur_node.host_end
783*38fd1498Szrj - (uintptr_t) hostaddrs[first];
784*38fd1498Szrj continue;
785*38fd1498Szrj }
786*38fd1498Szrj for (i = first; i <= last; i++)
787*38fd1498Szrj gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
788*38fd1498Szrj sizes, kinds, cbufp);
789*38fd1498Szrj i--;
790*38fd1498Szrj continue;
791*38fd1498Szrj case GOMP_MAP_ALWAYS_POINTER:
792*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
793*38fd1498Szrj cur_node.host_end = cur_node.host_start + sizeof (void *);
794*38fd1498Szrj n = splay_tree_lookup (mem_map, &cur_node);
795*38fd1498Szrj if (n == NULL
796*38fd1498Szrj || n->host_start > cur_node.host_start
797*38fd1498Szrj || n->host_end < cur_node.host_end)
798*38fd1498Szrj {
799*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
800*38fd1498Szrj gomp_fatal ("always pointer not mapped");
801*38fd1498Szrj }
802*38fd1498Szrj if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
803*38fd1498Szrj != GOMP_MAP_ALWAYS_POINTER)
804*38fd1498Szrj cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
805*38fd1498Szrj if (cur_node.tgt_offset)
806*38fd1498Szrj cur_node.tgt_offset -= sizes[i];
807*38fd1498Szrj gomp_copy_host2dev (devicep,
808*38fd1498Szrj (void *) (n->tgt->tgt_start
809*38fd1498Szrj + n->tgt_offset
810*38fd1498Szrj + cur_node.host_start
811*38fd1498Szrj - n->host_start),
812*38fd1498Szrj (void *) &cur_node.tgt_offset,
813*38fd1498Szrj sizeof (void *), cbufp);
814*38fd1498Szrj cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
815*38fd1498Szrj + cur_node.host_start - n->host_start;
816*38fd1498Szrj continue;
817*38fd1498Szrj default:
818*38fd1498Szrj break;
819*38fd1498Szrj }
820*38fd1498Szrj splay_tree_key k = &array->key;
821*38fd1498Szrj k->host_start = (uintptr_t) hostaddrs[i];
822*38fd1498Szrj if (!GOMP_MAP_POINTER_P (kind & typemask))
823*38fd1498Szrj k->host_end = k->host_start + sizes[i];
824*38fd1498Szrj else
825*38fd1498Szrj k->host_end = k->host_start + sizeof (void *);
826*38fd1498Szrj splay_tree_key n = splay_tree_lookup (mem_map, k);
827*38fd1498Szrj if (n && n->refcount != REFCOUNT_LINK)
828*38fd1498Szrj gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
829*38fd1498Szrj kind & typemask, cbufp);
830*38fd1498Szrj else
831*38fd1498Szrj {
832*38fd1498Szrj k->link_key = NULL;
833*38fd1498Szrj if (n && n->refcount == REFCOUNT_LINK)
834*38fd1498Szrj {
835*38fd1498Szrj /* Replace target address of the pointer with target address
836*38fd1498Szrj of mapped object in the splay tree. */
837*38fd1498Szrj splay_tree_remove (mem_map, n);
838*38fd1498Szrj k->link_key = n;
839*38fd1498Szrj }
840*38fd1498Szrj size_t align = (size_t) 1 << (kind >> rshift);
841*38fd1498Szrj tgt->list[i].key = k;
842*38fd1498Szrj k->tgt = tgt;
843*38fd1498Szrj if (field_tgt_clear != ~(size_t) 0)
844*38fd1498Szrj {
845*38fd1498Szrj k->tgt_offset = k->host_start - field_tgt_base
846*38fd1498Szrj + field_tgt_offset;
847*38fd1498Szrj if (i == field_tgt_clear)
848*38fd1498Szrj field_tgt_clear = ~(size_t) 0;
849*38fd1498Szrj }
850*38fd1498Szrj else
851*38fd1498Szrj {
852*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
853*38fd1498Szrj k->tgt_offset = tgt_size;
854*38fd1498Szrj tgt_size += k->host_end - k->host_start;
855*38fd1498Szrj }
856*38fd1498Szrj tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
857*38fd1498Szrj tgt->list[i].always_copy_from
858*38fd1498Szrj = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
859*38fd1498Szrj tgt->list[i].offset = 0;
860*38fd1498Szrj tgt->list[i].length = k->host_end - k->host_start;
861*38fd1498Szrj k->refcount = 1;
862*38fd1498Szrj tgt->refcount++;
863*38fd1498Szrj array->left = NULL;
864*38fd1498Szrj array->right = NULL;
865*38fd1498Szrj splay_tree_insert (mem_map, array);
866*38fd1498Szrj switch (kind & typemask)
867*38fd1498Szrj {
868*38fd1498Szrj case GOMP_MAP_ALLOC:
869*38fd1498Szrj case GOMP_MAP_FROM:
870*38fd1498Szrj case GOMP_MAP_FORCE_ALLOC:
871*38fd1498Szrj case GOMP_MAP_FORCE_FROM:
872*38fd1498Szrj case GOMP_MAP_ALWAYS_FROM:
873*38fd1498Szrj break;
874*38fd1498Szrj case GOMP_MAP_TO:
875*38fd1498Szrj case GOMP_MAP_TOFROM:
876*38fd1498Szrj case GOMP_MAP_FORCE_TO:
877*38fd1498Szrj case GOMP_MAP_FORCE_TOFROM:
878*38fd1498Szrj case GOMP_MAP_ALWAYS_TO:
879*38fd1498Szrj case GOMP_MAP_ALWAYS_TOFROM:
880*38fd1498Szrj gomp_copy_host2dev (devicep,
881*38fd1498Szrj (void *) (tgt->tgt_start
882*38fd1498Szrj + k->tgt_offset),
883*38fd1498Szrj (void *) k->host_start,
884*38fd1498Szrj k->host_end - k->host_start, cbufp);
885*38fd1498Szrj break;
886*38fd1498Szrj case GOMP_MAP_POINTER:
887*38fd1498Szrj gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
888*38fd1498Szrj k->tgt_offset, sizes[i], cbufp);
889*38fd1498Szrj break;
890*38fd1498Szrj case GOMP_MAP_TO_PSET:
891*38fd1498Szrj gomp_copy_host2dev (devicep,
892*38fd1498Szrj (void *) (tgt->tgt_start
893*38fd1498Szrj + k->tgt_offset),
894*38fd1498Szrj (void *) k->host_start,
895*38fd1498Szrj k->host_end - k->host_start, cbufp);
896*38fd1498Szrj
897*38fd1498Szrj for (j = i + 1; j < mapnum; j++)
898*38fd1498Szrj if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
899*38fd1498Szrj j)
900*38fd1498Szrj & typemask))
901*38fd1498Szrj break;
902*38fd1498Szrj else if ((uintptr_t) hostaddrs[j] < k->host_start
903*38fd1498Szrj || ((uintptr_t) hostaddrs[j] + sizeof (void *)
904*38fd1498Szrj > k->host_end))
905*38fd1498Szrj break;
906*38fd1498Szrj else
907*38fd1498Szrj {
908*38fd1498Szrj tgt->list[j].key = k;
909*38fd1498Szrj tgt->list[j].copy_from = false;
910*38fd1498Szrj tgt->list[j].always_copy_from = false;
911*38fd1498Szrj if (k->refcount != REFCOUNT_INFINITY)
912*38fd1498Szrj k->refcount++;
913*38fd1498Szrj gomp_map_pointer (tgt,
914*38fd1498Szrj (uintptr_t) *(void **) hostaddrs[j],
915*38fd1498Szrj k->tgt_offset
916*38fd1498Szrj + ((uintptr_t) hostaddrs[j]
917*38fd1498Szrj - k->host_start),
918*38fd1498Szrj sizes[j], cbufp);
919*38fd1498Szrj i++;
920*38fd1498Szrj }
921*38fd1498Szrj break;
922*38fd1498Szrj case GOMP_MAP_FORCE_PRESENT:
923*38fd1498Szrj {
924*38fd1498Szrj /* We already looked up the memory region above and it
925*38fd1498Szrj was missing. */
926*38fd1498Szrj size_t size = k->host_end - k->host_start;
927*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
928*38fd1498Szrj #ifdef HAVE_INTTYPES_H
929*38fd1498Szrj gomp_fatal ("present clause: !acc_is_present (%p, "
930*38fd1498Szrj "%"PRIu64" (0x%"PRIx64"))",
931*38fd1498Szrj (void *) k->host_start,
932*38fd1498Szrj (uint64_t) size, (uint64_t) size);
933*38fd1498Szrj #else
934*38fd1498Szrj gomp_fatal ("present clause: !acc_is_present (%p, "
935*38fd1498Szrj "%lu (0x%lx))", (void *) k->host_start,
936*38fd1498Szrj (unsigned long) size, (unsigned long) size);
937*38fd1498Szrj #endif
938*38fd1498Szrj }
939*38fd1498Szrj break;
940*38fd1498Szrj case GOMP_MAP_FORCE_DEVICEPTR:
941*38fd1498Szrj assert (k->host_end - k->host_start == sizeof (void *));
942*38fd1498Szrj gomp_copy_host2dev (devicep,
943*38fd1498Szrj (void *) (tgt->tgt_start
944*38fd1498Szrj + k->tgt_offset),
945*38fd1498Szrj (void *) k->host_start,
946*38fd1498Szrj sizeof (void *), cbufp);
947*38fd1498Szrj break;
948*38fd1498Szrj default:
949*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
950*38fd1498Szrj gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
951*38fd1498Szrj kind);
952*38fd1498Szrj }
953*38fd1498Szrj
954*38fd1498Szrj if (k->link_key)
955*38fd1498Szrj {
956*38fd1498Szrj /* Set link pointer on target to the device address of the
957*38fd1498Szrj mapped object. */
958*38fd1498Szrj void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
959*38fd1498Szrj devicep->host2dev_func (devicep->target_id,
960*38fd1498Szrj (void *) n->tgt_offset,
961*38fd1498Szrj &tgt_addr, sizeof (void *));
962*38fd1498Szrj }
963*38fd1498Szrj array++;
964*38fd1498Szrj }
965*38fd1498Szrj }
966*38fd1498Szrj }
967*38fd1498Szrj
968*38fd1498Szrj if (pragma_kind == GOMP_MAP_VARS_TARGET)
969*38fd1498Szrj {
970*38fd1498Szrj for (i = 0; i < mapnum; i++)
971*38fd1498Szrj {
972*38fd1498Szrj cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
973*38fd1498Szrj gomp_copy_host2dev (devicep,
974*38fd1498Szrj (void *) (tgt->tgt_start + i * sizeof (void *)),
975*38fd1498Szrj (void *) &cur_node.tgt_offset, sizeof (void *),
976*38fd1498Szrj cbufp);
977*38fd1498Szrj }
978*38fd1498Szrj }
979*38fd1498Szrj
980*38fd1498Szrj if (cbufp)
981*38fd1498Szrj {
982*38fd1498Szrj long c = 0;
983*38fd1498Szrj for (c = 0; c < cbuf.chunk_cnt; ++c)
984*38fd1498Szrj gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
985*38fd1498Szrj (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
986*38fd1498Szrj cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
987*38fd1498Szrj free (cbuf.buf);
988*38fd1498Szrj }
989*38fd1498Szrj
990*38fd1498Szrj /* If the variable from "omp target enter data" map-list was already mapped,
991*38fd1498Szrj tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
992*38fd1498Szrj gomp_exit_data. */
993*38fd1498Szrj if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0)
994*38fd1498Szrj {
995*38fd1498Szrj free (tgt);
996*38fd1498Szrj tgt = NULL;
997*38fd1498Szrj }
998*38fd1498Szrj
999*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1000*38fd1498Szrj return tgt;
1001*38fd1498Szrj }
1002*38fd1498Szrj
1003*38fd1498Szrj static void
gomp_unmap_tgt(struct target_mem_desc * tgt)1004*38fd1498Szrj gomp_unmap_tgt (struct target_mem_desc *tgt)
1005*38fd1498Szrj {
1006*38fd1498Szrj /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
1007*38fd1498Szrj if (tgt->tgt_end)
1008*38fd1498Szrj gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1009*38fd1498Szrj
1010*38fd1498Szrj free (tgt->array);
1011*38fd1498Szrj free (tgt);
1012*38fd1498Szrj }
1013*38fd1498Szrj
1014*38fd1498Szrj /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
1015*38fd1498Szrj variables back from device to host: if it is false, it is assumed that this
1016*38fd1498Szrj has been done already. */
1017*38fd1498Szrj
1018*38fd1498Szrj attribute_hidden void
gomp_unmap_vars(struct target_mem_desc * tgt,bool do_copyfrom)1019*38fd1498Szrj gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
1020*38fd1498Szrj {
1021*38fd1498Szrj struct gomp_device_descr *devicep = tgt->device_descr;
1022*38fd1498Szrj
1023*38fd1498Szrj if (tgt->list_count == 0)
1024*38fd1498Szrj {
1025*38fd1498Szrj free (tgt);
1026*38fd1498Szrj return;
1027*38fd1498Szrj }
1028*38fd1498Szrj
1029*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
1030*38fd1498Szrj if (devicep->state == GOMP_DEVICE_FINALIZED)
1031*38fd1498Szrj {
1032*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1033*38fd1498Szrj free (tgt->array);
1034*38fd1498Szrj free (tgt);
1035*38fd1498Szrj return;
1036*38fd1498Szrj }
1037*38fd1498Szrj
1038*38fd1498Szrj size_t i;
1039*38fd1498Szrj for (i = 0; i < tgt->list_count; i++)
1040*38fd1498Szrj {
1041*38fd1498Szrj splay_tree_key k = tgt->list[i].key;
1042*38fd1498Szrj if (k == NULL)
1043*38fd1498Szrj continue;
1044*38fd1498Szrj
1045*38fd1498Szrj bool do_unmap = false;
1046*38fd1498Szrj if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
1047*38fd1498Szrj k->refcount--;
1048*38fd1498Szrj else if (k->refcount == 1)
1049*38fd1498Szrj {
1050*38fd1498Szrj k->refcount--;
1051*38fd1498Szrj do_unmap = true;
1052*38fd1498Szrj }
1053*38fd1498Szrj
1054*38fd1498Szrj if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
1055*38fd1498Szrj || tgt->list[i].always_copy_from)
1056*38fd1498Szrj gomp_copy_dev2host (devicep,
1057*38fd1498Szrj (void *) (k->host_start + tgt->list[i].offset),
1058*38fd1498Szrj (void *) (k->tgt->tgt_start + k->tgt_offset
1059*38fd1498Szrj + tgt->list[i].offset),
1060*38fd1498Szrj tgt->list[i].length);
1061*38fd1498Szrj if (do_unmap)
1062*38fd1498Szrj {
1063*38fd1498Szrj splay_tree_remove (&devicep->mem_map, k);
1064*38fd1498Szrj if (k->link_key)
1065*38fd1498Szrj splay_tree_insert (&devicep->mem_map,
1066*38fd1498Szrj (splay_tree_node) k->link_key);
1067*38fd1498Szrj if (k->tgt->refcount > 1)
1068*38fd1498Szrj k->tgt->refcount--;
1069*38fd1498Szrj else
1070*38fd1498Szrj gomp_unmap_tgt (k->tgt);
1071*38fd1498Szrj }
1072*38fd1498Szrj }
1073*38fd1498Szrj
1074*38fd1498Szrj if (tgt->refcount > 1)
1075*38fd1498Szrj tgt->refcount--;
1076*38fd1498Szrj else
1077*38fd1498Szrj gomp_unmap_tgt (tgt);
1078*38fd1498Szrj
1079*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1080*38fd1498Szrj }
1081*38fd1498Szrj
1082*38fd1498Szrj static void
gomp_update(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds,bool short_mapkind)1083*38fd1498Szrj gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1084*38fd1498Szrj size_t *sizes, void *kinds, bool short_mapkind)
1085*38fd1498Szrj {
1086*38fd1498Szrj size_t i;
1087*38fd1498Szrj struct splay_tree_key_s cur_node;
1088*38fd1498Szrj const int typemask = short_mapkind ? 0xff : 0x7;
1089*38fd1498Szrj
1090*38fd1498Szrj if (!devicep)
1091*38fd1498Szrj return;
1092*38fd1498Szrj
1093*38fd1498Szrj if (mapnum == 0)
1094*38fd1498Szrj return;
1095*38fd1498Szrj
1096*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
1097*38fd1498Szrj if (devicep->state == GOMP_DEVICE_FINALIZED)
1098*38fd1498Szrj {
1099*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1100*38fd1498Szrj return;
1101*38fd1498Szrj }
1102*38fd1498Szrj
1103*38fd1498Szrj for (i = 0; i < mapnum; i++)
1104*38fd1498Szrj if (sizes[i])
1105*38fd1498Szrj {
1106*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
1107*38fd1498Szrj cur_node.host_end = cur_node.host_start + sizes[i];
1108*38fd1498Szrj splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
1109*38fd1498Szrj if (n)
1110*38fd1498Szrj {
1111*38fd1498Szrj int kind = get_kind (short_mapkind, kinds, i);
1112*38fd1498Szrj if (n->host_start > cur_node.host_start
1113*38fd1498Szrj || n->host_end < cur_node.host_end)
1114*38fd1498Szrj {
1115*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1116*38fd1498Szrj gomp_fatal ("Trying to update [%p..%p) object when "
1117*38fd1498Szrj "only [%p..%p) is mapped",
1118*38fd1498Szrj (void *) cur_node.host_start,
1119*38fd1498Szrj (void *) cur_node.host_end,
1120*38fd1498Szrj (void *) n->host_start,
1121*38fd1498Szrj (void *) n->host_end);
1122*38fd1498Szrj }
1123*38fd1498Szrj
1124*38fd1498Szrj
1125*38fd1498Szrj void *hostaddr = (void *) cur_node.host_start;
1126*38fd1498Szrj void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
1127*38fd1498Szrj + cur_node.host_start - n->host_start);
1128*38fd1498Szrj size_t size = cur_node.host_end - cur_node.host_start;
1129*38fd1498Szrj
1130*38fd1498Szrj if (GOMP_MAP_COPY_TO_P (kind & typemask))
1131*38fd1498Szrj gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
1132*38fd1498Szrj if (GOMP_MAP_COPY_FROM_P (kind & typemask))
1133*38fd1498Szrj gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
1134*38fd1498Szrj }
1135*38fd1498Szrj }
1136*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1137*38fd1498Szrj }
1138*38fd1498Szrj
1139*38fd1498Szrj /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
1140*38fd1498Szrj And insert to splay tree the mapping between addresses from HOST_TABLE and
1141*38fd1498Szrj from loaded target image. We rely in the host and device compiler
1142*38fd1498Szrj emitting variable and functions in the same order. */
1143*38fd1498Szrj
1144*38fd1498Szrj static void
gomp_load_image_to_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data,bool is_register_lock)1145*38fd1498Szrj gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
1146*38fd1498Szrj const void *host_table, const void *target_data,
1147*38fd1498Szrj bool is_register_lock)
1148*38fd1498Szrj {
1149*38fd1498Szrj void **host_func_table = ((void ***) host_table)[0];
1150*38fd1498Szrj void **host_funcs_end = ((void ***) host_table)[1];
1151*38fd1498Szrj void **host_var_table = ((void ***) host_table)[2];
1152*38fd1498Szrj void **host_vars_end = ((void ***) host_table)[3];
1153*38fd1498Szrj
1154*38fd1498Szrj /* The func table contains only addresses, the var table contains addresses
1155*38fd1498Szrj and corresponding sizes. */
1156*38fd1498Szrj int num_funcs = host_funcs_end - host_func_table;
1157*38fd1498Szrj int num_vars = (host_vars_end - host_var_table) / 2;
1158*38fd1498Szrj
1159*38fd1498Szrj /* Load image to device and get target addresses for the image. */
1160*38fd1498Szrj struct addr_pair *target_table = NULL;
1161*38fd1498Szrj int i, num_target_entries;
1162*38fd1498Szrj
1163*38fd1498Szrj num_target_entries
1164*38fd1498Szrj = devicep->load_image_func (devicep->target_id, version,
1165*38fd1498Szrj target_data, &target_table);
1166*38fd1498Szrj
1167*38fd1498Szrj if (num_target_entries != num_funcs + num_vars)
1168*38fd1498Szrj {
1169*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1170*38fd1498Szrj if (is_register_lock)
1171*38fd1498Szrj gomp_mutex_unlock (®ister_lock);
1172*38fd1498Szrj gomp_fatal ("Cannot map target functions or variables"
1173*38fd1498Szrj " (expected %u, have %u)", num_funcs + num_vars,
1174*38fd1498Szrj num_target_entries);
1175*38fd1498Szrj }
1176*38fd1498Szrj
1177*38fd1498Szrj /* Insert host-target address mapping into splay tree. */
1178*38fd1498Szrj struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
1179*38fd1498Szrj tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
1180*38fd1498Szrj tgt->refcount = REFCOUNT_INFINITY;
1181*38fd1498Szrj tgt->tgt_start = 0;
1182*38fd1498Szrj tgt->tgt_end = 0;
1183*38fd1498Szrj tgt->to_free = NULL;
1184*38fd1498Szrj tgt->prev = NULL;
1185*38fd1498Szrj tgt->list_count = 0;
1186*38fd1498Szrj tgt->device_descr = devicep;
1187*38fd1498Szrj splay_tree_node array = tgt->array;
1188*38fd1498Szrj
1189*38fd1498Szrj for (i = 0; i < num_funcs; i++)
1190*38fd1498Szrj {
1191*38fd1498Szrj splay_tree_key k = &array->key;
1192*38fd1498Szrj k->host_start = (uintptr_t) host_func_table[i];
1193*38fd1498Szrj k->host_end = k->host_start + 1;
1194*38fd1498Szrj k->tgt = tgt;
1195*38fd1498Szrj k->tgt_offset = target_table[i].start;
1196*38fd1498Szrj k->refcount = REFCOUNT_INFINITY;
1197*38fd1498Szrj k->link_key = NULL;
1198*38fd1498Szrj array->left = NULL;
1199*38fd1498Szrj array->right = NULL;
1200*38fd1498Szrj splay_tree_insert (&devicep->mem_map, array);
1201*38fd1498Szrj array++;
1202*38fd1498Szrj }
1203*38fd1498Szrj
1204*38fd1498Szrj /* Most significant bit of the size in host and target tables marks
1205*38fd1498Szrj "omp declare target link" variables. */
1206*38fd1498Szrj const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1207*38fd1498Szrj const uintptr_t size_mask = ~link_bit;
1208*38fd1498Szrj
1209*38fd1498Szrj for (i = 0; i < num_vars; i++)
1210*38fd1498Szrj {
1211*38fd1498Szrj struct addr_pair *target_var = &target_table[num_funcs + i];
1212*38fd1498Szrj uintptr_t target_size = target_var->end - target_var->start;
1213*38fd1498Szrj
1214*38fd1498Szrj if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
1215*38fd1498Szrj {
1216*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1217*38fd1498Szrj if (is_register_lock)
1218*38fd1498Szrj gomp_mutex_unlock (®ister_lock);
1219*38fd1498Szrj gomp_fatal ("Cannot map target variables (size mismatch)");
1220*38fd1498Szrj }
1221*38fd1498Szrj
1222*38fd1498Szrj splay_tree_key k = &array->key;
1223*38fd1498Szrj k->host_start = (uintptr_t) host_var_table[i * 2];
1224*38fd1498Szrj k->host_end
1225*38fd1498Szrj = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1226*38fd1498Szrj k->tgt = tgt;
1227*38fd1498Szrj k->tgt_offset = target_var->start;
1228*38fd1498Szrj k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
1229*38fd1498Szrj k->link_key = NULL;
1230*38fd1498Szrj array->left = NULL;
1231*38fd1498Szrj array->right = NULL;
1232*38fd1498Szrj splay_tree_insert (&devicep->mem_map, array);
1233*38fd1498Szrj array++;
1234*38fd1498Szrj }
1235*38fd1498Szrj
1236*38fd1498Szrj free (target_table);
1237*38fd1498Szrj }
1238*38fd1498Szrj
1239*38fd1498Szrj /* Unload the mappings described by target_data from device DEVICE_P.
1240*38fd1498Szrj The device must be locked. */
1241*38fd1498Szrj
1242*38fd1498Szrj static void
gomp_unload_image_from_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data)1243*38fd1498Szrj gomp_unload_image_from_device (struct gomp_device_descr *devicep,
1244*38fd1498Szrj unsigned version,
1245*38fd1498Szrj const void *host_table, const void *target_data)
1246*38fd1498Szrj {
1247*38fd1498Szrj void **host_func_table = ((void ***) host_table)[0];
1248*38fd1498Szrj void **host_funcs_end = ((void ***) host_table)[1];
1249*38fd1498Szrj void **host_var_table = ((void ***) host_table)[2];
1250*38fd1498Szrj void **host_vars_end = ((void ***) host_table)[3];
1251*38fd1498Szrj
1252*38fd1498Szrj /* The func table contains only addresses, the var table contains addresses
1253*38fd1498Szrj and corresponding sizes. */
1254*38fd1498Szrj int num_funcs = host_funcs_end - host_func_table;
1255*38fd1498Szrj int num_vars = (host_vars_end - host_var_table) / 2;
1256*38fd1498Szrj
1257*38fd1498Szrj struct splay_tree_key_s k;
1258*38fd1498Szrj splay_tree_key node = NULL;
1259*38fd1498Szrj
1260*38fd1498Szrj /* Find mapping at start of node array */
1261*38fd1498Szrj if (num_funcs || num_vars)
1262*38fd1498Szrj {
1263*38fd1498Szrj k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
1264*38fd1498Szrj : (uintptr_t) host_var_table[0]);
1265*38fd1498Szrj k.host_end = k.host_start + 1;
1266*38fd1498Szrj node = splay_tree_lookup (&devicep->mem_map, &k);
1267*38fd1498Szrj }
1268*38fd1498Szrj
1269*38fd1498Szrj if (!devicep->unload_image_func (devicep->target_id, version, target_data))
1270*38fd1498Szrj {
1271*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1272*38fd1498Szrj gomp_fatal ("image unload fail");
1273*38fd1498Szrj }
1274*38fd1498Szrj
1275*38fd1498Szrj /* Remove mappings from splay tree. */
1276*38fd1498Szrj int i;
1277*38fd1498Szrj for (i = 0; i < num_funcs; i++)
1278*38fd1498Szrj {
1279*38fd1498Szrj k.host_start = (uintptr_t) host_func_table[i];
1280*38fd1498Szrj k.host_end = k.host_start + 1;
1281*38fd1498Szrj splay_tree_remove (&devicep->mem_map, &k);
1282*38fd1498Szrj }
1283*38fd1498Szrj
1284*38fd1498Szrj /* Most significant bit of the size in host and target tables marks
1285*38fd1498Szrj "omp declare target link" variables. */
1286*38fd1498Szrj const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
1287*38fd1498Szrj const uintptr_t size_mask = ~link_bit;
1288*38fd1498Szrj bool is_tgt_unmapped = false;
1289*38fd1498Szrj
1290*38fd1498Szrj for (i = 0; i < num_vars; i++)
1291*38fd1498Szrj {
1292*38fd1498Szrj k.host_start = (uintptr_t) host_var_table[i * 2];
1293*38fd1498Szrj k.host_end
1294*38fd1498Szrj = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
1295*38fd1498Szrj
1296*38fd1498Szrj if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
1297*38fd1498Szrj splay_tree_remove (&devicep->mem_map, &k);
1298*38fd1498Szrj else
1299*38fd1498Szrj {
1300*38fd1498Szrj splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
1301*38fd1498Szrj splay_tree_remove (&devicep->mem_map, n);
1302*38fd1498Szrj if (n->link_key)
1303*38fd1498Szrj {
1304*38fd1498Szrj if (n->tgt->refcount > 1)
1305*38fd1498Szrj n->tgt->refcount--;
1306*38fd1498Szrj else
1307*38fd1498Szrj {
1308*38fd1498Szrj is_tgt_unmapped = true;
1309*38fd1498Szrj gomp_unmap_tgt (n->tgt);
1310*38fd1498Szrj }
1311*38fd1498Szrj }
1312*38fd1498Szrj }
1313*38fd1498Szrj }
1314*38fd1498Szrj
1315*38fd1498Szrj if (node && !is_tgt_unmapped)
1316*38fd1498Szrj {
1317*38fd1498Szrj free (node->tgt);
1318*38fd1498Szrj free (node);
1319*38fd1498Szrj }
1320*38fd1498Szrj }
1321*38fd1498Szrj
1322*38fd1498Szrj /* This function should be called from every offload image while loading.
1323*38fd1498Szrj It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1324*38fd1498Szrj the target, and TARGET_DATA needed by target plugin. */
1325*38fd1498Szrj
1326*38fd1498Szrj void
GOMP_offload_register_ver(unsigned version,const void * host_table,int target_type,const void * target_data)1327*38fd1498Szrj GOMP_offload_register_ver (unsigned version, const void *host_table,
1328*38fd1498Szrj int target_type, const void *target_data)
1329*38fd1498Szrj {
1330*38fd1498Szrj int i;
1331*38fd1498Szrj
1332*38fd1498Szrj if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
1333*38fd1498Szrj gomp_fatal ("Library too old for offload (version %u < %u)",
1334*38fd1498Szrj GOMP_VERSION, GOMP_VERSION_LIB (version));
1335*38fd1498Szrj
1336*38fd1498Szrj gomp_mutex_lock (®ister_lock);
1337*38fd1498Szrj
1338*38fd1498Szrj /* Load image to all initialized devices. */
1339*38fd1498Szrj for (i = 0; i < num_devices; i++)
1340*38fd1498Szrj {
1341*38fd1498Szrj struct gomp_device_descr *devicep = &devices[i];
1342*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
1343*38fd1498Szrj if (devicep->type == target_type
1344*38fd1498Szrj && devicep->state == GOMP_DEVICE_INITIALIZED)
1345*38fd1498Szrj gomp_load_image_to_device (devicep, version,
1346*38fd1498Szrj host_table, target_data, true);
1347*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1348*38fd1498Szrj }
1349*38fd1498Szrj
1350*38fd1498Szrj /* Insert image to array of pending images. */
1351*38fd1498Szrj offload_images
1352*38fd1498Szrj = gomp_realloc_unlock (offload_images,
1353*38fd1498Szrj (num_offload_images + 1)
1354*38fd1498Szrj * sizeof (struct offload_image_descr));
1355*38fd1498Szrj offload_images[num_offload_images].version = version;
1356*38fd1498Szrj offload_images[num_offload_images].type = target_type;
1357*38fd1498Szrj offload_images[num_offload_images].host_table = host_table;
1358*38fd1498Szrj offload_images[num_offload_images].target_data = target_data;
1359*38fd1498Szrj
1360*38fd1498Szrj num_offload_images++;
1361*38fd1498Szrj gomp_mutex_unlock (®ister_lock);
1362*38fd1498Szrj }
1363*38fd1498Szrj
1364*38fd1498Szrj void
GOMP_offload_register(const void * host_table,int target_type,const void * target_data)1365*38fd1498Szrj GOMP_offload_register (const void *host_table, int target_type,
1366*38fd1498Szrj const void *target_data)
1367*38fd1498Szrj {
1368*38fd1498Szrj GOMP_offload_register_ver (0, host_table, target_type, target_data);
1369*38fd1498Szrj }
1370*38fd1498Szrj
1371*38fd1498Szrj /* This function should be called from every offload image while unloading.
1372*38fd1498Szrj It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
1373*38fd1498Szrj the target, and TARGET_DATA needed by target plugin. */
1374*38fd1498Szrj
1375*38fd1498Szrj void
GOMP_offload_unregister_ver(unsigned version,const void * host_table,int target_type,const void * target_data)1376*38fd1498Szrj GOMP_offload_unregister_ver (unsigned version, const void *host_table,
1377*38fd1498Szrj int target_type, const void *target_data)
1378*38fd1498Szrj {
1379*38fd1498Szrj int i;
1380*38fd1498Szrj
1381*38fd1498Szrj gomp_mutex_lock (®ister_lock);
1382*38fd1498Szrj
1383*38fd1498Szrj /* Unload image from all initialized devices. */
1384*38fd1498Szrj for (i = 0; i < num_devices; i++)
1385*38fd1498Szrj {
1386*38fd1498Szrj struct gomp_device_descr *devicep = &devices[i];
1387*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
1388*38fd1498Szrj if (devicep->type == target_type
1389*38fd1498Szrj && devicep->state == GOMP_DEVICE_INITIALIZED)
1390*38fd1498Szrj gomp_unload_image_from_device (devicep, version,
1391*38fd1498Szrj host_table, target_data);
1392*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1393*38fd1498Szrj }
1394*38fd1498Szrj
1395*38fd1498Szrj /* Remove image from array of pending images. */
1396*38fd1498Szrj for (i = 0; i < num_offload_images; i++)
1397*38fd1498Szrj if (offload_images[i].target_data == target_data)
1398*38fd1498Szrj {
1399*38fd1498Szrj offload_images[i] = offload_images[--num_offload_images];
1400*38fd1498Szrj break;
1401*38fd1498Szrj }
1402*38fd1498Szrj
1403*38fd1498Szrj gomp_mutex_unlock (®ister_lock);
1404*38fd1498Szrj }
1405*38fd1498Szrj
1406*38fd1498Szrj void
GOMP_offload_unregister(const void * host_table,int target_type,const void * target_data)1407*38fd1498Szrj GOMP_offload_unregister (const void *host_table, int target_type,
1408*38fd1498Szrj const void *target_data)
1409*38fd1498Szrj {
1410*38fd1498Szrj GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
1411*38fd1498Szrj }
1412*38fd1498Szrj
1413*38fd1498Szrj /* This function initializes the target device, specified by DEVICEP. DEVICEP
1414*38fd1498Szrj must be locked on entry, and remains locked on return. */
1415*38fd1498Szrj
1416*38fd1498Szrj attribute_hidden void
gomp_init_device(struct gomp_device_descr * devicep)1417*38fd1498Szrj gomp_init_device (struct gomp_device_descr *devicep)
1418*38fd1498Szrj {
1419*38fd1498Szrj int i;
1420*38fd1498Szrj if (!devicep->init_device_func (devicep->target_id))
1421*38fd1498Szrj {
1422*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1423*38fd1498Szrj gomp_fatal ("device initialization failed");
1424*38fd1498Szrj }
1425*38fd1498Szrj
1426*38fd1498Szrj /* Load to device all images registered by the moment. */
1427*38fd1498Szrj for (i = 0; i < num_offload_images; i++)
1428*38fd1498Szrj {
1429*38fd1498Szrj struct offload_image_descr *image = &offload_images[i];
1430*38fd1498Szrj if (image->type == devicep->type)
1431*38fd1498Szrj gomp_load_image_to_device (devicep, image->version,
1432*38fd1498Szrj image->host_table, image->target_data,
1433*38fd1498Szrj false);
1434*38fd1498Szrj }
1435*38fd1498Szrj
1436*38fd1498Szrj devicep->state = GOMP_DEVICE_INITIALIZED;
1437*38fd1498Szrj }
1438*38fd1498Szrj
1439*38fd1498Szrj attribute_hidden void
gomp_unload_device(struct gomp_device_descr * devicep)1440*38fd1498Szrj gomp_unload_device (struct gomp_device_descr *devicep)
1441*38fd1498Szrj {
1442*38fd1498Szrj if (devicep->state == GOMP_DEVICE_INITIALIZED)
1443*38fd1498Szrj {
1444*38fd1498Szrj unsigned i;
1445*38fd1498Szrj
1446*38fd1498Szrj /* Unload from device all images registered at the moment. */
1447*38fd1498Szrj for (i = 0; i < num_offload_images; i++)
1448*38fd1498Szrj {
1449*38fd1498Szrj struct offload_image_descr *image = &offload_images[i];
1450*38fd1498Szrj if (image->type == devicep->type)
1451*38fd1498Szrj gomp_unload_image_from_device (devicep, image->version,
1452*38fd1498Szrj image->host_table,
1453*38fd1498Szrj image->target_data);
1454*38fd1498Szrj }
1455*38fd1498Szrj }
1456*38fd1498Szrj }
1457*38fd1498Szrj
1458*38fd1498Szrj /* Free address mapping tables. MM must be locked on entry, and remains locked
1459*38fd1498Szrj on return. */
1460*38fd1498Szrj
1461*38fd1498Szrj attribute_hidden void
gomp_free_memmap(struct splay_tree_s * mem_map)1462*38fd1498Szrj gomp_free_memmap (struct splay_tree_s *mem_map)
1463*38fd1498Szrj {
1464*38fd1498Szrj while (mem_map->root)
1465*38fd1498Szrj {
1466*38fd1498Szrj struct target_mem_desc *tgt = mem_map->root->key.tgt;
1467*38fd1498Szrj
1468*38fd1498Szrj splay_tree_remove (mem_map, &mem_map->root->key);
1469*38fd1498Szrj free (tgt->array);
1470*38fd1498Szrj free (tgt);
1471*38fd1498Szrj }
1472*38fd1498Szrj }
1473*38fd1498Szrj
1474*38fd1498Szrj /* Host fallback for GOMP_target{,_ext} routines. */
1475*38fd1498Szrj
1476*38fd1498Szrj static void
gomp_target_fallback(void (* fn)(void *),void ** hostaddrs)1477*38fd1498Szrj gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
1478*38fd1498Szrj {
1479*38fd1498Szrj struct gomp_thread old_thr, *thr = gomp_thread ();
1480*38fd1498Szrj old_thr = *thr;
1481*38fd1498Szrj memset (thr, '\0', sizeof (*thr));
1482*38fd1498Szrj if (gomp_places_list)
1483*38fd1498Szrj {
1484*38fd1498Szrj thr->place = old_thr.place;
1485*38fd1498Szrj thr->ts.place_partition_len = gomp_places_list_len;
1486*38fd1498Szrj }
1487*38fd1498Szrj fn (hostaddrs);
1488*38fd1498Szrj gomp_free_thread (thr);
1489*38fd1498Szrj *thr = old_thr;
1490*38fd1498Szrj }
1491*38fd1498Szrj
1492*38fd1498Szrj /* Calculate alignment and size requirements of a private copy of data shared
1493*38fd1498Szrj as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
1494*38fd1498Szrj
1495*38fd1498Szrj static inline void
calculate_firstprivate_requirements(size_t mapnum,size_t * sizes,unsigned short * kinds,size_t * tgt_align,size_t * tgt_size)1496*38fd1498Szrj calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
1497*38fd1498Szrj unsigned short *kinds, size_t *tgt_align,
1498*38fd1498Szrj size_t *tgt_size)
1499*38fd1498Szrj {
1500*38fd1498Szrj size_t i;
1501*38fd1498Szrj for (i = 0; i < mapnum; i++)
1502*38fd1498Szrj if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1503*38fd1498Szrj {
1504*38fd1498Szrj size_t align = (size_t) 1 << (kinds[i] >> 8);
1505*38fd1498Szrj if (*tgt_align < align)
1506*38fd1498Szrj *tgt_align = align;
1507*38fd1498Szrj *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
1508*38fd1498Szrj *tgt_size += sizes[i];
1509*38fd1498Szrj }
1510*38fd1498Szrj }
1511*38fd1498Szrj
1512*38fd1498Szrj /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
1513*38fd1498Szrj
1514*38fd1498Szrj static inline void
copy_firstprivate_data(char * tgt,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,size_t tgt_align,size_t tgt_size)1515*38fd1498Szrj copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
1516*38fd1498Szrj size_t *sizes, unsigned short *kinds, size_t tgt_align,
1517*38fd1498Szrj size_t tgt_size)
1518*38fd1498Szrj {
1519*38fd1498Szrj uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
1520*38fd1498Szrj if (al)
1521*38fd1498Szrj tgt += tgt_align - al;
1522*38fd1498Szrj tgt_size = 0;
1523*38fd1498Szrj size_t i;
1524*38fd1498Szrj for (i = 0; i < mapnum; i++)
1525*38fd1498Szrj if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
1526*38fd1498Szrj {
1527*38fd1498Szrj size_t align = (size_t) 1 << (kinds[i] >> 8);
1528*38fd1498Szrj tgt_size = (tgt_size + align - 1) & ~(align - 1);
1529*38fd1498Szrj memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
1530*38fd1498Szrj hostaddrs[i] = tgt + tgt_size;
1531*38fd1498Szrj tgt_size = tgt_size + sizes[i];
1532*38fd1498Szrj }
1533*38fd1498Szrj }
1534*38fd1498Szrj
1535*38fd1498Szrj /* Helper function of GOMP_target{,_ext} routines. */
1536*38fd1498Szrj
1537*38fd1498Szrj static void *
gomp_get_target_fn_addr(struct gomp_device_descr * devicep,void (* host_fn)(void *))1538*38fd1498Szrj gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
1539*38fd1498Szrj void (*host_fn) (void *))
1540*38fd1498Szrj {
1541*38fd1498Szrj if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
1542*38fd1498Szrj return (void *) host_fn;
1543*38fd1498Szrj else
1544*38fd1498Szrj {
1545*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
1546*38fd1498Szrj if (devicep->state == GOMP_DEVICE_FINALIZED)
1547*38fd1498Szrj {
1548*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1549*38fd1498Szrj return NULL;
1550*38fd1498Szrj }
1551*38fd1498Szrj
1552*38fd1498Szrj struct splay_tree_key_s k;
1553*38fd1498Szrj k.host_start = (uintptr_t) host_fn;
1554*38fd1498Szrj k.host_end = k.host_start + 1;
1555*38fd1498Szrj splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
1556*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1557*38fd1498Szrj if (tgt_fn == NULL)
1558*38fd1498Szrj return NULL;
1559*38fd1498Szrj
1560*38fd1498Szrj return (void *) tgt_fn->tgt_offset;
1561*38fd1498Szrj }
1562*38fd1498Szrj }
1563*38fd1498Szrj
1564*38fd1498Szrj /* Called when encountering a target directive. If DEVICE
1565*38fd1498Szrj is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
1566*38fd1498Szrj GOMP_DEVICE_HOST_FALLBACK (or any value
1567*38fd1498Szrj larger than last available hw device), use host fallback.
1568*38fd1498Szrj FN is address of host code, UNUSED is part of the current ABI, but
1569*38fd1498Szrj we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
1570*38fd1498Szrj with MAPNUM entries, with addresses of the host objects,
1571*38fd1498Szrj sizes of the host objects (resp. for pointer kind pointer bias
1572*38fd1498Szrj and assumed sizeof (void *) size) and kinds. */
1573*38fd1498Szrj
1574*38fd1498Szrj void
GOMP_target(int device,void (* fn)(void *),const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)1575*38fd1498Szrj GOMP_target (int device, void (*fn) (void *), const void *unused,
1576*38fd1498Szrj size_t mapnum, void **hostaddrs, size_t *sizes,
1577*38fd1498Szrj unsigned char *kinds)
1578*38fd1498Szrj {
1579*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1580*38fd1498Szrj
1581*38fd1498Szrj void *fn_addr;
1582*38fd1498Szrj if (devicep == NULL
1583*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1584*38fd1498Szrj /* All shared memory devices should use the GOMP_target_ext function. */
1585*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
1586*38fd1498Szrj || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
1587*38fd1498Szrj return gomp_target_fallback (fn, hostaddrs);
1588*38fd1498Szrj
1589*38fd1498Szrj struct target_mem_desc *tgt_vars
1590*38fd1498Szrj = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1591*38fd1498Szrj GOMP_MAP_VARS_TARGET);
1592*38fd1498Szrj devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
1593*38fd1498Szrj NULL);
1594*38fd1498Szrj gomp_unmap_vars (tgt_vars, true);
1595*38fd1498Szrj }
1596*38fd1498Szrj
1597*38fd1498Szrj /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
1598*38fd1498Szrj and several arguments have been added:
1599*38fd1498Szrj FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
1600*38fd1498Szrj DEPEND is array of dependencies, see GOMP_task for details.
1601*38fd1498Szrj
1602*38fd1498Szrj ARGS is a pointer to an array consisting of a variable number of both
1603*38fd1498Szrj device-independent and device-specific arguments, which can take one two
1604*38fd1498Szrj elements where the first specifies for which device it is intended, the type
1605*38fd1498Szrj and optionally also the value. If the value is not present in the first
1606*38fd1498Szrj one, the whole second element the actual value. The last element of the
1607*38fd1498Szrj array is a single NULL. Among the device independent can be for example
1608*38fd1498Szrj NUM_TEAMS and THREAD_LIMIT.
1609*38fd1498Szrj
1610*38fd1498Szrj NUM_TEAMS is positive if GOMP_teams will be called in the body with
1611*38fd1498Szrj that value, or 1 if teams construct is not present, or 0, if
1612*38fd1498Szrj teams construct does not have num_teams clause and so the choice is
1613*38fd1498Szrj implementation defined, and -1 if it can't be determined on the host
1614*38fd1498Szrj what value will GOMP_teams have on the device.
1615*38fd1498Szrj THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
1616*38fd1498Szrj body with that value, or 0, if teams construct does not have thread_limit
1617*38fd1498Szrj clause or the teams construct is not present, or -1 if it can't be
1618*38fd1498Szrj determined on the host what value will GOMP_teams have on the device. */
1619*38fd1498Szrj
1620*38fd1498Szrj void
GOMP_target_ext(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args)1621*38fd1498Szrj GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
1622*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned short *kinds,
1623*38fd1498Szrj unsigned int flags, void **depend, void **args)
1624*38fd1498Szrj {
1625*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1626*38fd1498Szrj size_t tgt_align = 0, tgt_size = 0;
1627*38fd1498Szrj bool fpc_done = false;
1628*38fd1498Szrj
1629*38fd1498Szrj if (flags & GOMP_TARGET_FLAG_NOWAIT)
1630*38fd1498Szrj {
1631*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1632*38fd1498Szrj /* Create a team if we don't have any around, as nowait
1633*38fd1498Szrj target tasks make sense to run asynchronously even when
1634*38fd1498Szrj outside of any parallel. */
1635*38fd1498Szrj if (__builtin_expect (thr->ts.team == NULL, 0))
1636*38fd1498Szrj {
1637*38fd1498Szrj struct gomp_team *team = gomp_new_team (1);
1638*38fd1498Szrj struct gomp_task *task = thr->task;
1639*38fd1498Szrj struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
1640*38fd1498Szrj team->prev_ts = thr->ts;
1641*38fd1498Szrj thr->ts.team = team;
1642*38fd1498Szrj thr->ts.team_id = 0;
1643*38fd1498Szrj thr->ts.work_share = &team->work_shares[0];
1644*38fd1498Szrj thr->ts.last_work_share = NULL;
1645*38fd1498Szrj #ifdef HAVE_SYNC_BUILTINS
1646*38fd1498Szrj thr->ts.single_count = 0;
1647*38fd1498Szrj #endif
1648*38fd1498Szrj thr->ts.static_trip = 0;
1649*38fd1498Szrj thr->task = &team->implicit_task[0];
1650*38fd1498Szrj gomp_init_task (thr->task, NULL, icv);
1651*38fd1498Szrj if (task)
1652*38fd1498Szrj {
1653*38fd1498Szrj thr->task = task;
1654*38fd1498Szrj gomp_end_task ();
1655*38fd1498Szrj free (task);
1656*38fd1498Szrj thr->task = &team->implicit_task[0];
1657*38fd1498Szrj }
1658*38fd1498Szrj else
1659*38fd1498Szrj pthread_setspecific (gomp_thread_destructor, thr);
1660*38fd1498Szrj }
1661*38fd1498Szrj if (thr->ts.team
1662*38fd1498Szrj && !thr->task->final_task)
1663*38fd1498Szrj {
1664*38fd1498Szrj gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
1665*38fd1498Szrj sizes, kinds, flags, depend, args,
1666*38fd1498Szrj GOMP_TARGET_TASK_BEFORE_MAP);
1667*38fd1498Szrj return;
1668*38fd1498Szrj }
1669*38fd1498Szrj }
1670*38fd1498Szrj
1671*38fd1498Szrj /* If there are depend clauses, but nowait is not present
1672*38fd1498Szrj (or we are in a final task), block the parent task until the
1673*38fd1498Szrj dependencies are resolved and then just continue with the rest
1674*38fd1498Szrj of the function as if it is a merged task. */
1675*38fd1498Szrj if (depend != NULL)
1676*38fd1498Szrj {
1677*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1678*38fd1498Szrj if (thr->task && thr->task->depend_hash)
1679*38fd1498Szrj {
1680*38fd1498Szrj /* If we might need to wait, copy firstprivate now. */
1681*38fd1498Szrj calculate_firstprivate_requirements (mapnum, sizes, kinds,
1682*38fd1498Szrj &tgt_align, &tgt_size);
1683*38fd1498Szrj if (tgt_align)
1684*38fd1498Szrj {
1685*38fd1498Szrj char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1686*38fd1498Szrj copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1687*38fd1498Szrj tgt_align, tgt_size);
1688*38fd1498Szrj }
1689*38fd1498Szrj fpc_done = true;
1690*38fd1498Szrj gomp_task_maybe_wait_for_dependencies (depend);
1691*38fd1498Szrj }
1692*38fd1498Szrj }
1693*38fd1498Szrj
1694*38fd1498Szrj void *fn_addr;
1695*38fd1498Szrj if (devicep == NULL
1696*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1697*38fd1498Szrj || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
1698*38fd1498Szrj || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
1699*38fd1498Szrj {
1700*38fd1498Szrj if (!fpc_done)
1701*38fd1498Szrj {
1702*38fd1498Szrj calculate_firstprivate_requirements (mapnum, sizes, kinds,
1703*38fd1498Szrj &tgt_align, &tgt_size);
1704*38fd1498Szrj if (tgt_align)
1705*38fd1498Szrj {
1706*38fd1498Szrj char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1707*38fd1498Szrj copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1708*38fd1498Szrj tgt_align, tgt_size);
1709*38fd1498Szrj }
1710*38fd1498Szrj }
1711*38fd1498Szrj gomp_target_fallback (fn, hostaddrs);
1712*38fd1498Szrj return;
1713*38fd1498Szrj }
1714*38fd1498Szrj
1715*38fd1498Szrj struct target_mem_desc *tgt_vars;
1716*38fd1498Szrj if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1717*38fd1498Szrj {
1718*38fd1498Szrj if (!fpc_done)
1719*38fd1498Szrj {
1720*38fd1498Szrj calculate_firstprivate_requirements (mapnum, sizes, kinds,
1721*38fd1498Szrj &tgt_align, &tgt_size);
1722*38fd1498Szrj if (tgt_align)
1723*38fd1498Szrj {
1724*38fd1498Szrj char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
1725*38fd1498Szrj copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
1726*38fd1498Szrj tgt_align, tgt_size);
1727*38fd1498Szrj }
1728*38fd1498Szrj }
1729*38fd1498Szrj tgt_vars = NULL;
1730*38fd1498Szrj }
1731*38fd1498Szrj else
1732*38fd1498Szrj tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
1733*38fd1498Szrj true, GOMP_MAP_VARS_TARGET);
1734*38fd1498Szrj devicep->run_func (devicep->target_id, fn_addr,
1735*38fd1498Szrj tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
1736*38fd1498Szrj args);
1737*38fd1498Szrj if (tgt_vars)
1738*38fd1498Szrj gomp_unmap_vars (tgt_vars, true);
1739*38fd1498Szrj }
1740*38fd1498Szrj
1741*38fd1498Szrj /* Host fallback for GOMP_target_data{,_ext} routines. */
1742*38fd1498Szrj
1743*38fd1498Szrj static void
gomp_target_data_fallback(void)1744*38fd1498Szrj gomp_target_data_fallback (void)
1745*38fd1498Szrj {
1746*38fd1498Szrj struct gomp_task_icv *icv = gomp_icv (false);
1747*38fd1498Szrj if (icv->target_data)
1748*38fd1498Szrj {
1749*38fd1498Szrj /* Even when doing a host fallback, if there are any active
1750*38fd1498Szrj #pragma omp target data constructs, need to remember the
1751*38fd1498Szrj new #pragma omp target data, otherwise GOMP_target_end_data
1752*38fd1498Szrj would get out of sync. */
1753*38fd1498Szrj struct target_mem_desc *tgt
1754*38fd1498Szrj = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
1755*38fd1498Szrj GOMP_MAP_VARS_DATA);
1756*38fd1498Szrj tgt->prev = icv->target_data;
1757*38fd1498Szrj icv->target_data = tgt;
1758*38fd1498Szrj }
1759*38fd1498Szrj }
1760*38fd1498Szrj
1761*38fd1498Szrj void
GOMP_target_data(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)1762*38fd1498Szrj GOMP_target_data (int device, const void *unused, size_t mapnum,
1763*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned char *kinds)
1764*38fd1498Szrj {
1765*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1766*38fd1498Szrj
1767*38fd1498Szrj if (devicep == NULL
1768*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1769*38fd1498Szrj || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
1770*38fd1498Szrj return gomp_target_data_fallback ();
1771*38fd1498Szrj
1772*38fd1498Szrj struct target_mem_desc *tgt
1773*38fd1498Szrj = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1774*38fd1498Szrj GOMP_MAP_VARS_DATA);
1775*38fd1498Szrj struct gomp_task_icv *icv = gomp_icv (true);
1776*38fd1498Szrj tgt->prev = icv->target_data;
1777*38fd1498Szrj icv->target_data = tgt;
1778*38fd1498Szrj }
1779*38fd1498Szrj
1780*38fd1498Szrj void
GOMP_target_data_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)1781*38fd1498Szrj GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
1782*38fd1498Szrj size_t *sizes, unsigned short *kinds)
1783*38fd1498Szrj {
1784*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1785*38fd1498Szrj
1786*38fd1498Szrj if (devicep == NULL
1787*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1788*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1789*38fd1498Szrj return gomp_target_data_fallback ();
1790*38fd1498Szrj
1791*38fd1498Szrj struct target_mem_desc *tgt
1792*38fd1498Szrj = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
1793*38fd1498Szrj GOMP_MAP_VARS_DATA);
1794*38fd1498Szrj struct gomp_task_icv *icv = gomp_icv (true);
1795*38fd1498Szrj tgt->prev = icv->target_data;
1796*38fd1498Szrj icv->target_data = tgt;
1797*38fd1498Szrj }
1798*38fd1498Szrj
1799*38fd1498Szrj void
GOMP_target_end_data(void)1800*38fd1498Szrj GOMP_target_end_data (void)
1801*38fd1498Szrj {
1802*38fd1498Szrj struct gomp_task_icv *icv = gomp_icv (false);
1803*38fd1498Szrj if (icv->target_data)
1804*38fd1498Szrj {
1805*38fd1498Szrj struct target_mem_desc *tgt = icv->target_data;
1806*38fd1498Szrj icv->target_data = tgt->prev;
1807*38fd1498Szrj gomp_unmap_vars (tgt, true);
1808*38fd1498Szrj }
1809*38fd1498Szrj }
1810*38fd1498Szrj
1811*38fd1498Szrj void
GOMP_target_update(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)1812*38fd1498Szrj GOMP_target_update (int device, const void *unused, size_t mapnum,
1813*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned char *kinds)
1814*38fd1498Szrj {
1815*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1816*38fd1498Szrj
1817*38fd1498Szrj if (devicep == NULL
1818*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1819*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1820*38fd1498Szrj return;
1821*38fd1498Szrj
1822*38fd1498Szrj gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1823*38fd1498Szrj }
1824*38fd1498Szrj
1825*38fd1498Szrj void
GOMP_target_update_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)1826*38fd1498Szrj GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
1827*38fd1498Szrj size_t *sizes, unsigned short *kinds,
1828*38fd1498Szrj unsigned int flags, void **depend)
1829*38fd1498Szrj {
1830*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1831*38fd1498Szrj
1832*38fd1498Szrj /* If there are depend clauses, but nowait is not present,
1833*38fd1498Szrj block the parent task until the dependencies are resolved
1834*38fd1498Szrj and then just continue with the rest of the function as if it
1835*38fd1498Szrj is a merged task. Until we are able to schedule task during
1836*38fd1498Szrj variable mapping or unmapping, ignore nowait if depend clauses
1837*38fd1498Szrj are not present. */
1838*38fd1498Szrj if (depend != NULL)
1839*38fd1498Szrj {
1840*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1841*38fd1498Szrj if (thr->task && thr->task->depend_hash)
1842*38fd1498Szrj {
1843*38fd1498Szrj if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1844*38fd1498Szrj && thr->ts.team
1845*38fd1498Szrj && !thr->task->final_task)
1846*38fd1498Szrj {
1847*38fd1498Szrj if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1848*38fd1498Szrj mapnum, hostaddrs, sizes, kinds,
1849*38fd1498Szrj flags | GOMP_TARGET_FLAG_UPDATE,
1850*38fd1498Szrj depend, NULL, GOMP_TARGET_TASK_DATA))
1851*38fd1498Szrj return;
1852*38fd1498Szrj }
1853*38fd1498Szrj else
1854*38fd1498Szrj {
1855*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1856*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new
1857*38fd1498Szrj tasks. */
1858*38fd1498Szrj if (team
1859*38fd1498Szrj && (gomp_team_barrier_cancelled (&team->barrier)
1860*38fd1498Szrj || (thr->task->taskgroup
1861*38fd1498Szrj && thr->task->taskgroup->cancelled)))
1862*38fd1498Szrj return;
1863*38fd1498Szrj
1864*38fd1498Szrj gomp_task_maybe_wait_for_dependencies (depend);
1865*38fd1498Szrj }
1866*38fd1498Szrj }
1867*38fd1498Szrj }
1868*38fd1498Szrj
1869*38fd1498Szrj if (devicep == NULL
1870*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1871*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
1872*38fd1498Szrj return;
1873*38fd1498Szrj
1874*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1875*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1876*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new tasks. */
1877*38fd1498Szrj if (team
1878*38fd1498Szrj && (gomp_team_barrier_cancelled (&team->barrier)
1879*38fd1498Szrj || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
1880*38fd1498Szrj return;
1881*38fd1498Szrj
1882*38fd1498Szrj gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
1883*38fd1498Szrj }
1884*38fd1498Szrj
1885*38fd1498Szrj static void
gomp_exit_data(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)1886*38fd1498Szrj gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
1887*38fd1498Szrj void **hostaddrs, size_t *sizes, unsigned short *kinds)
1888*38fd1498Szrj {
1889*38fd1498Szrj const int typemask = 0xff;
1890*38fd1498Szrj size_t i;
1891*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
1892*38fd1498Szrj if (devicep->state == GOMP_DEVICE_FINALIZED)
1893*38fd1498Szrj {
1894*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1895*38fd1498Szrj return;
1896*38fd1498Szrj }
1897*38fd1498Szrj
1898*38fd1498Szrj for (i = 0; i < mapnum; i++)
1899*38fd1498Szrj {
1900*38fd1498Szrj struct splay_tree_key_s cur_node;
1901*38fd1498Szrj unsigned char kind = kinds[i] & typemask;
1902*38fd1498Szrj switch (kind)
1903*38fd1498Szrj {
1904*38fd1498Szrj case GOMP_MAP_FROM:
1905*38fd1498Szrj case GOMP_MAP_ALWAYS_FROM:
1906*38fd1498Szrj case GOMP_MAP_DELETE:
1907*38fd1498Szrj case GOMP_MAP_RELEASE:
1908*38fd1498Szrj case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1909*38fd1498Szrj case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
1910*38fd1498Szrj cur_node.host_start = (uintptr_t) hostaddrs[i];
1911*38fd1498Szrj cur_node.host_end = cur_node.host_start + sizes[i];
1912*38fd1498Szrj splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
1913*38fd1498Szrj || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1914*38fd1498Szrj ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
1915*38fd1498Szrj : splay_tree_lookup (&devicep->mem_map, &cur_node);
1916*38fd1498Szrj if (!k)
1917*38fd1498Szrj continue;
1918*38fd1498Szrj
1919*38fd1498Szrj if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
1920*38fd1498Szrj k->refcount--;
1921*38fd1498Szrj if ((kind == GOMP_MAP_DELETE
1922*38fd1498Szrj || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
1923*38fd1498Szrj && k->refcount != REFCOUNT_INFINITY)
1924*38fd1498Szrj k->refcount = 0;
1925*38fd1498Szrj
1926*38fd1498Szrj if ((kind == GOMP_MAP_FROM && k->refcount == 0)
1927*38fd1498Szrj || kind == GOMP_MAP_ALWAYS_FROM)
1928*38fd1498Szrj gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
1929*38fd1498Szrj (void *) (k->tgt->tgt_start + k->tgt_offset
1930*38fd1498Szrj + cur_node.host_start
1931*38fd1498Szrj - k->host_start),
1932*38fd1498Szrj cur_node.host_end - cur_node.host_start);
1933*38fd1498Szrj if (k->refcount == 0)
1934*38fd1498Szrj {
1935*38fd1498Szrj splay_tree_remove (&devicep->mem_map, k);
1936*38fd1498Szrj if (k->link_key)
1937*38fd1498Szrj splay_tree_insert (&devicep->mem_map,
1938*38fd1498Szrj (splay_tree_node) k->link_key);
1939*38fd1498Szrj if (k->tgt->refcount > 1)
1940*38fd1498Szrj k->tgt->refcount--;
1941*38fd1498Szrj else
1942*38fd1498Szrj gomp_unmap_tgt (k->tgt);
1943*38fd1498Szrj }
1944*38fd1498Szrj
1945*38fd1498Szrj break;
1946*38fd1498Szrj default:
1947*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1948*38fd1498Szrj gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
1949*38fd1498Szrj kind);
1950*38fd1498Szrj }
1951*38fd1498Szrj }
1952*38fd1498Szrj
1953*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
1954*38fd1498Szrj }
1955*38fd1498Szrj
1956*38fd1498Szrj void
GOMP_target_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)1957*38fd1498Szrj GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
1958*38fd1498Szrj size_t *sizes, unsigned short *kinds,
1959*38fd1498Szrj unsigned int flags, void **depend)
1960*38fd1498Szrj {
1961*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device);
1962*38fd1498Szrj
1963*38fd1498Szrj /* If there are depend clauses, but nowait is not present,
1964*38fd1498Szrj block the parent task until the dependencies are resolved
1965*38fd1498Szrj and then just continue with the rest of the function as if it
1966*38fd1498Szrj is a merged task. Until we are able to schedule task during
1967*38fd1498Szrj variable mapping or unmapping, ignore nowait if depend clauses
1968*38fd1498Szrj are not present. */
1969*38fd1498Szrj if (depend != NULL)
1970*38fd1498Szrj {
1971*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
1972*38fd1498Szrj if (thr->task && thr->task->depend_hash)
1973*38fd1498Szrj {
1974*38fd1498Szrj if ((flags & GOMP_TARGET_FLAG_NOWAIT)
1975*38fd1498Szrj && thr->ts.team
1976*38fd1498Szrj && !thr->task->final_task)
1977*38fd1498Szrj {
1978*38fd1498Szrj if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
1979*38fd1498Szrj mapnum, hostaddrs, sizes, kinds,
1980*38fd1498Szrj flags, depend, NULL,
1981*38fd1498Szrj GOMP_TARGET_TASK_DATA))
1982*38fd1498Szrj return;
1983*38fd1498Szrj }
1984*38fd1498Szrj else
1985*38fd1498Szrj {
1986*38fd1498Szrj struct gomp_team *team = thr->ts.team;
1987*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new
1988*38fd1498Szrj tasks. */
1989*38fd1498Szrj if (team
1990*38fd1498Szrj && (gomp_team_barrier_cancelled (&team->barrier)
1991*38fd1498Szrj || (thr->task->taskgroup
1992*38fd1498Szrj && thr->task->taskgroup->cancelled)))
1993*38fd1498Szrj return;
1994*38fd1498Szrj
1995*38fd1498Szrj gomp_task_maybe_wait_for_dependencies (depend);
1996*38fd1498Szrj }
1997*38fd1498Szrj }
1998*38fd1498Szrj }
1999*38fd1498Szrj
2000*38fd1498Szrj if (devicep == NULL
2001*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2002*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2003*38fd1498Szrj return;
2004*38fd1498Szrj
2005*38fd1498Szrj struct gomp_thread *thr = gomp_thread ();
2006*38fd1498Szrj struct gomp_team *team = thr->ts.team;
2007*38fd1498Szrj /* If parallel or taskgroup has been cancelled, don't start new tasks. */
2008*38fd1498Szrj if (team
2009*38fd1498Szrj && (gomp_team_barrier_cancelled (&team->barrier)
2010*38fd1498Szrj || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
2011*38fd1498Szrj return;
2012*38fd1498Szrj
2013*38fd1498Szrj size_t i;
2014*38fd1498Szrj if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2015*38fd1498Szrj for (i = 0; i < mapnum; i++)
2016*38fd1498Szrj if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2017*38fd1498Szrj {
2018*38fd1498Szrj gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
2019*38fd1498Szrj &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2020*38fd1498Szrj i += sizes[i];
2021*38fd1498Szrj }
2022*38fd1498Szrj else
2023*38fd1498Szrj gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
2024*38fd1498Szrj true, GOMP_MAP_VARS_ENTER_DATA);
2025*38fd1498Szrj else
2026*38fd1498Szrj gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
2027*38fd1498Szrj }
2028*38fd1498Szrj
2029*38fd1498Szrj bool
gomp_target_task_fn(void * data)2030*38fd1498Szrj gomp_target_task_fn (void *data)
2031*38fd1498Szrj {
2032*38fd1498Szrj struct gomp_target_task *ttask = (struct gomp_target_task *) data;
2033*38fd1498Szrj struct gomp_device_descr *devicep = ttask->devicep;
2034*38fd1498Szrj
2035*38fd1498Szrj if (ttask->fn != NULL)
2036*38fd1498Szrj {
2037*38fd1498Szrj void *fn_addr;
2038*38fd1498Szrj if (devicep == NULL
2039*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2040*38fd1498Szrj || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
2041*38fd1498Szrj || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2042*38fd1498Szrj {
2043*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_FALLBACK;
2044*38fd1498Szrj gomp_target_fallback (ttask->fn, ttask->hostaddrs);
2045*38fd1498Szrj return false;
2046*38fd1498Szrj }
2047*38fd1498Szrj
2048*38fd1498Szrj if (ttask->state == GOMP_TARGET_TASK_FINISHED)
2049*38fd1498Szrj {
2050*38fd1498Szrj if (ttask->tgt)
2051*38fd1498Szrj gomp_unmap_vars (ttask->tgt, true);
2052*38fd1498Szrj return false;
2053*38fd1498Szrj }
2054*38fd1498Szrj
2055*38fd1498Szrj void *actual_arguments;
2056*38fd1498Szrj if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2057*38fd1498Szrj {
2058*38fd1498Szrj ttask->tgt = NULL;
2059*38fd1498Szrj actual_arguments = ttask->hostaddrs;
2060*38fd1498Szrj }
2061*38fd1498Szrj else
2062*38fd1498Szrj {
2063*38fd1498Szrj ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
2064*38fd1498Szrj NULL, ttask->sizes, ttask->kinds, true,
2065*38fd1498Szrj GOMP_MAP_VARS_TARGET);
2066*38fd1498Szrj actual_arguments = (void *) ttask->tgt->tgt_start;
2067*38fd1498Szrj }
2068*38fd1498Szrj ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
2069*38fd1498Szrj
2070*38fd1498Szrj devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
2071*38fd1498Szrj ttask->args, (void *) ttask);
2072*38fd1498Szrj return true;
2073*38fd1498Szrj }
2074*38fd1498Szrj else if (devicep == NULL
2075*38fd1498Szrj || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2076*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2077*38fd1498Szrj return false;
2078*38fd1498Szrj
2079*38fd1498Szrj size_t i;
2080*38fd1498Szrj if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
2081*38fd1498Szrj gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2082*38fd1498Szrj ttask->kinds, true);
2083*38fd1498Szrj else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
2084*38fd1498Szrj for (i = 0; i < ttask->mapnum; i++)
2085*38fd1498Szrj if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
2086*38fd1498Szrj {
2087*38fd1498Szrj gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
2088*38fd1498Szrj NULL, &ttask->sizes[i], &ttask->kinds[i], true,
2089*38fd1498Szrj GOMP_MAP_VARS_ENTER_DATA);
2090*38fd1498Szrj i += ttask->sizes[i];
2091*38fd1498Szrj }
2092*38fd1498Szrj else
2093*38fd1498Szrj gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
2094*38fd1498Szrj &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
2095*38fd1498Szrj else
2096*38fd1498Szrj gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
2097*38fd1498Szrj ttask->kinds);
2098*38fd1498Szrj return false;
2099*38fd1498Szrj }
2100*38fd1498Szrj
2101*38fd1498Szrj void
GOMP_teams(unsigned int num_teams,unsigned int thread_limit)2102*38fd1498Szrj GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
2103*38fd1498Szrj {
2104*38fd1498Szrj if (thread_limit)
2105*38fd1498Szrj {
2106*38fd1498Szrj struct gomp_task_icv *icv = gomp_icv (true);
2107*38fd1498Szrj icv->thread_limit_var
2108*38fd1498Szrj = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
2109*38fd1498Szrj }
2110*38fd1498Szrj (void) num_teams;
2111*38fd1498Szrj }
2112*38fd1498Szrj
2113*38fd1498Szrj void *
omp_target_alloc(size_t size,int device_num)2114*38fd1498Szrj omp_target_alloc (size_t size, int device_num)
2115*38fd1498Szrj {
2116*38fd1498Szrj if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2117*38fd1498Szrj return malloc (size);
2118*38fd1498Szrj
2119*38fd1498Szrj if (device_num < 0)
2120*38fd1498Szrj return NULL;
2121*38fd1498Szrj
2122*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device_num);
2123*38fd1498Szrj if (devicep == NULL)
2124*38fd1498Szrj return NULL;
2125*38fd1498Szrj
2126*38fd1498Szrj if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2127*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2128*38fd1498Szrj return malloc (size);
2129*38fd1498Szrj
2130*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
2131*38fd1498Szrj void *ret = devicep->alloc_func (devicep->target_id, size);
2132*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
2133*38fd1498Szrj return ret;
2134*38fd1498Szrj }
2135*38fd1498Szrj
2136*38fd1498Szrj void
omp_target_free(void * device_ptr,int device_num)2137*38fd1498Szrj omp_target_free (void *device_ptr, int device_num)
2138*38fd1498Szrj {
2139*38fd1498Szrj if (device_ptr == NULL)
2140*38fd1498Szrj return;
2141*38fd1498Szrj
2142*38fd1498Szrj if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2143*38fd1498Szrj {
2144*38fd1498Szrj free (device_ptr);
2145*38fd1498Szrj return;
2146*38fd1498Szrj }
2147*38fd1498Szrj
2148*38fd1498Szrj if (device_num < 0)
2149*38fd1498Szrj return;
2150*38fd1498Szrj
2151*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device_num);
2152*38fd1498Szrj if (devicep == NULL)
2153*38fd1498Szrj return;
2154*38fd1498Szrj
2155*38fd1498Szrj if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2156*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2157*38fd1498Szrj {
2158*38fd1498Szrj free (device_ptr);
2159*38fd1498Szrj return;
2160*38fd1498Szrj }
2161*38fd1498Szrj
2162*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
2163*38fd1498Szrj gomp_free_device_memory (devicep, device_ptr);
2164*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
2165*38fd1498Szrj }
2166*38fd1498Szrj
2167*38fd1498Szrj int
omp_target_is_present(void * ptr,int device_num)2168*38fd1498Szrj omp_target_is_present (void *ptr, int device_num)
2169*38fd1498Szrj {
2170*38fd1498Szrj if (ptr == NULL)
2171*38fd1498Szrj return 1;
2172*38fd1498Szrj
2173*38fd1498Szrj if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2174*38fd1498Szrj return 1;
2175*38fd1498Szrj
2176*38fd1498Szrj if (device_num < 0)
2177*38fd1498Szrj return 0;
2178*38fd1498Szrj
2179*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device_num);
2180*38fd1498Szrj if (devicep == NULL)
2181*38fd1498Szrj return 0;
2182*38fd1498Szrj
2183*38fd1498Szrj if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2184*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2185*38fd1498Szrj return 1;
2186*38fd1498Szrj
2187*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
2188*38fd1498Szrj struct splay_tree_s *mem_map = &devicep->mem_map;
2189*38fd1498Szrj struct splay_tree_key_s cur_node;
2190*38fd1498Szrj
2191*38fd1498Szrj cur_node.host_start = (uintptr_t) ptr;
2192*38fd1498Szrj cur_node.host_end = cur_node.host_start;
2193*38fd1498Szrj splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
2194*38fd1498Szrj int ret = n != NULL;
2195*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
2196*38fd1498Szrj return ret;
2197*38fd1498Szrj }
2198*38fd1498Szrj
2199*38fd1498Szrj int
omp_target_memcpy(void * dst,void * src,size_t length,size_t dst_offset,size_t src_offset,int dst_device_num,int src_device_num)2200*38fd1498Szrj omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset,
2201*38fd1498Szrj size_t src_offset, int dst_device_num, int src_device_num)
2202*38fd1498Szrj {
2203*38fd1498Szrj struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2204*38fd1498Szrj bool ret;
2205*38fd1498Szrj
2206*38fd1498Szrj if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2207*38fd1498Szrj {
2208*38fd1498Szrj if (dst_device_num < 0)
2209*38fd1498Szrj return EINVAL;
2210*38fd1498Szrj
2211*38fd1498Szrj dst_devicep = resolve_device (dst_device_num);
2212*38fd1498Szrj if (dst_devicep == NULL)
2213*38fd1498Szrj return EINVAL;
2214*38fd1498Szrj
2215*38fd1498Szrj if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2216*38fd1498Szrj || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2217*38fd1498Szrj dst_devicep = NULL;
2218*38fd1498Szrj }
2219*38fd1498Szrj if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2220*38fd1498Szrj {
2221*38fd1498Szrj if (src_device_num < 0)
2222*38fd1498Szrj return EINVAL;
2223*38fd1498Szrj
2224*38fd1498Szrj src_devicep = resolve_device (src_device_num);
2225*38fd1498Szrj if (src_devicep == NULL)
2226*38fd1498Szrj return EINVAL;
2227*38fd1498Szrj
2228*38fd1498Szrj if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2229*38fd1498Szrj || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2230*38fd1498Szrj src_devicep = NULL;
2231*38fd1498Szrj }
2232*38fd1498Szrj if (src_devicep == NULL && dst_devicep == NULL)
2233*38fd1498Szrj {
2234*38fd1498Szrj memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
2235*38fd1498Szrj return 0;
2236*38fd1498Szrj }
2237*38fd1498Szrj if (src_devicep == NULL)
2238*38fd1498Szrj {
2239*38fd1498Szrj gomp_mutex_lock (&dst_devicep->lock);
2240*38fd1498Szrj ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2241*38fd1498Szrj (char *) dst + dst_offset,
2242*38fd1498Szrj (char *) src + src_offset, length);
2243*38fd1498Szrj gomp_mutex_unlock (&dst_devicep->lock);
2244*38fd1498Szrj return (ret ? 0 : EINVAL);
2245*38fd1498Szrj }
2246*38fd1498Szrj if (dst_devicep == NULL)
2247*38fd1498Szrj {
2248*38fd1498Szrj gomp_mutex_lock (&src_devicep->lock);
2249*38fd1498Szrj ret = src_devicep->dev2host_func (src_devicep->target_id,
2250*38fd1498Szrj (char *) dst + dst_offset,
2251*38fd1498Szrj (char *) src + src_offset, length);
2252*38fd1498Szrj gomp_mutex_unlock (&src_devicep->lock);
2253*38fd1498Szrj return (ret ? 0 : EINVAL);
2254*38fd1498Szrj }
2255*38fd1498Szrj if (src_devicep == dst_devicep)
2256*38fd1498Szrj {
2257*38fd1498Szrj gomp_mutex_lock (&src_devicep->lock);
2258*38fd1498Szrj ret = src_devicep->dev2dev_func (src_devicep->target_id,
2259*38fd1498Szrj (char *) dst + dst_offset,
2260*38fd1498Szrj (char *) src + src_offset, length);
2261*38fd1498Szrj gomp_mutex_unlock (&src_devicep->lock);
2262*38fd1498Szrj return (ret ? 0 : EINVAL);
2263*38fd1498Szrj }
2264*38fd1498Szrj return EINVAL;
2265*38fd1498Szrj }
2266*38fd1498Szrj
2267*38fd1498Szrj static int
omp_target_memcpy_rect_worker(void * dst,void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,struct gomp_device_descr * dst_devicep,struct gomp_device_descr * src_devicep)2268*38fd1498Szrj omp_target_memcpy_rect_worker (void *dst, void *src, size_t element_size,
2269*38fd1498Szrj int num_dims, const size_t *volume,
2270*38fd1498Szrj const size_t *dst_offsets,
2271*38fd1498Szrj const size_t *src_offsets,
2272*38fd1498Szrj const size_t *dst_dimensions,
2273*38fd1498Szrj const size_t *src_dimensions,
2274*38fd1498Szrj struct gomp_device_descr *dst_devicep,
2275*38fd1498Szrj struct gomp_device_descr *src_devicep)
2276*38fd1498Szrj {
2277*38fd1498Szrj size_t dst_slice = element_size;
2278*38fd1498Szrj size_t src_slice = element_size;
2279*38fd1498Szrj size_t j, dst_off, src_off, length;
2280*38fd1498Szrj int i, ret;
2281*38fd1498Szrj
2282*38fd1498Szrj if (num_dims == 1)
2283*38fd1498Szrj {
2284*38fd1498Szrj if (__builtin_mul_overflow (element_size, volume[0], &length)
2285*38fd1498Szrj || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
2286*38fd1498Szrj || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
2287*38fd1498Szrj return EINVAL;
2288*38fd1498Szrj if (dst_devicep == NULL && src_devicep == NULL)
2289*38fd1498Szrj {
2290*38fd1498Szrj memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
2291*38fd1498Szrj ret = 1;
2292*38fd1498Szrj }
2293*38fd1498Szrj else if (src_devicep == NULL)
2294*38fd1498Szrj ret = dst_devicep->host2dev_func (dst_devicep->target_id,
2295*38fd1498Szrj (char *) dst + dst_off,
2296*38fd1498Szrj (char *) src + src_off, length);
2297*38fd1498Szrj else if (dst_devicep == NULL)
2298*38fd1498Szrj ret = src_devicep->dev2host_func (src_devicep->target_id,
2299*38fd1498Szrj (char *) dst + dst_off,
2300*38fd1498Szrj (char *) src + src_off, length);
2301*38fd1498Szrj else if (src_devicep == dst_devicep)
2302*38fd1498Szrj ret = src_devicep->dev2dev_func (src_devicep->target_id,
2303*38fd1498Szrj (char *) dst + dst_off,
2304*38fd1498Szrj (char *) src + src_off, length);
2305*38fd1498Szrj else
2306*38fd1498Szrj ret = 0;
2307*38fd1498Szrj return ret ? 0 : EINVAL;
2308*38fd1498Szrj }
2309*38fd1498Szrj
2310*38fd1498Szrj /* FIXME: it would be nice to have some plugin function to handle
2311*38fd1498Szrj num_dims == 2 and num_dims == 3 more efficiently. Larger ones can
2312*38fd1498Szrj be handled in the generic recursion below, and for host-host it
2313*38fd1498Szrj should be used even for any num_dims >= 2. */
2314*38fd1498Szrj
2315*38fd1498Szrj for (i = 1; i < num_dims; i++)
2316*38fd1498Szrj if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
2317*38fd1498Szrj || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
2318*38fd1498Szrj return EINVAL;
2319*38fd1498Szrj if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
2320*38fd1498Szrj || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
2321*38fd1498Szrj return EINVAL;
2322*38fd1498Szrj for (j = 0; j < volume[0]; j++)
2323*38fd1498Szrj {
2324*38fd1498Szrj ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
2325*38fd1498Szrj (char *) src + src_off,
2326*38fd1498Szrj element_size, num_dims - 1,
2327*38fd1498Szrj volume + 1, dst_offsets + 1,
2328*38fd1498Szrj src_offsets + 1, dst_dimensions + 1,
2329*38fd1498Szrj src_dimensions + 1, dst_devicep,
2330*38fd1498Szrj src_devicep);
2331*38fd1498Szrj if (ret)
2332*38fd1498Szrj return ret;
2333*38fd1498Szrj dst_off += dst_slice;
2334*38fd1498Szrj src_off += src_slice;
2335*38fd1498Szrj }
2336*38fd1498Szrj return 0;
2337*38fd1498Szrj }
2338*38fd1498Szrj
2339*38fd1498Szrj int
omp_target_memcpy_rect(void * dst,void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,int dst_device_num,int src_device_num)2340*38fd1498Szrj omp_target_memcpy_rect (void *dst, void *src, size_t element_size,
2341*38fd1498Szrj int num_dims, const size_t *volume,
2342*38fd1498Szrj const size_t *dst_offsets,
2343*38fd1498Szrj const size_t *src_offsets,
2344*38fd1498Szrj const size_t *dst_dimensions,
2345*38fd1498Szrj const size_t *src_dimensions,
2346*38fd1498Szrj int dst_device_num, int src_device_num)
2347*38fd1498Szrj {
2348*38fd1498Szrj struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
2349*38fd1498Szrj
2350*38fd1498Szrj if (!dst && !src)
2351*38fd1498Szrj return INT_MAX;
2352*38fd1498Szrj
2353*38fd1498Szrj if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
2354*38fd1498Szrj {
2355*38fd1498Szrj if (dst_device_num < 0)
2356*38fd1498Szrj return EINVAL;
2357*38fd1498Szrj
2358*38fd1498Szrj dst_devicep = resolve_device (dst_device_num);
2359*38fd1498Szrj if (dst_devicep == NULL)
2360*38fd1498Szrj return EINVAL;
2361*38fd1498Szrj
2362*38fd1498Szrj if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2363*38fd1498Szrj || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2364*38fd1498Szrj dst_devicep = NULL;
2365*38fd1498Szrj }
2366*38fd1498Szrj if (src_device_num != GOMP_DEVICE_HOST_FALLBACK)
2367*38fd1498Szrj {
2368*38fd1498Szrj if (src_device_num < 0)
2369*38fd1498Szrj return EINVAL;
2370*38fd1498Szrj
2371*38fd1498Szrj src_devicep = resolve_device (src_device_num);
2372*38fd1498Szrj if (src_devicep == NULL)
2373*38fd1498Szrj return EINVAL;
2374*38fd1498Szrj
2375*38fd1498Szrj if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2376*38fd1498Szrj || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2377*38fd1498Szrj src_devicep = NULL;
2378*38fd1498Szrj }
2379*38fd1498Szrj
2380*38fd1498Szrj if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
2381*38fd1498Szrj return EINVAL;
2382*38fd1498Szrj
2383*38fd1498Szrj if (src_devicep)
2384*38fd1498Szrj gomp_mutex_lock (&src_devicep->lock);
2385*38fd1498Szrj else if (dst_devicep)
2386*38fd1498Szrj gomp_mutex_lock (&dst_devicep->lock);
2387*38fd1498Szrj int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
2388*38fd1498Szrj volume, dst_offsets, src_offsets,
2389*38fd1498Szrj dst_dimensions, src_dimensions,
2390*38fd1498Szrj dst_devicep, src_devicep);
2391*38fd1498Szrj if (src_devicep)
2392*38fd1498Szrj gomp_mutex_unlock (&src_devicep->lock);
2393*38fd1498Szrj else if (dst_devicep)
2394*38fd1498Szrj gomp_mutex_unlock (&dst_devicep->lock);
2395*38fd1498Szrj return ret;
2396*38fd1498Szrj }
2397*38fd1498Szrj
2398*38fd1498Szrj int
omp_target_associate_ptr(void * host_ptr,void * device_ptr,size_t size,size_t device_offset,int device_num)2399*38fd1498Szrj omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size,
2400*38fd1498Szrj size_t device_offset, int device_num)
2401*38fd1498Szrj {
2402*38fd1498Szrj if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2403*38fd1498Szrj return EINVAL;
2404*38fd1498Szrj
2405*38fd1498Szrj if (device_num < 0)
2406*38fd1498Szrj return EINVAL;
2407*38fd1498Szrj
2408*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device_num);
2409*38fd1498Szrj if (devicep == NULL)
2410*38fd1498Szrj return EINVAL;
2411*38fd1498Szrj
2412*38fd1498Szrj if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2413*38fd1498Szrj || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2414*38fd1498Szrj return EINVAL;
2415*38fd1498Szrj
2416*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
2417*38fd1498Szrj
2418*38fd1498Szrj struct splay_tree_s *mem_map = &devicep->mem_map;
2419*38fd1498Szrj struct splay_tree_key_s cur_node;
2420*38fd1498Szrj int ret = EINVAL;
2421*38fd1498Szrj
2422*38fd1498Szrj cur_node.host_start = (uintptr_t) host_ptr;
2423*38fd1498Szrj cur_node.host_end = cur_node.host_start + size;
2424*38fd1498Szrj splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2425*38fd1498Szrj if (n)
2426*38fd1498Szrj {
2427*38fd1498Szrj if (n->tgt->tgt_start + n->tgt_offset
2428*38fd1498Szrj == (uintptr_t) device_ptr + device_offset
2429*38fd1498Szrj && n->host_start <= cur_node.host_start
2430*38fd1498Szrj && n->host_end >= cur_node.host_end)
2431*38fd1498Szrj ret = 0;
2432*38fd1498Szrj }
2433*38fd1498Szrj else
2434*38fd1498Szrj {
2435*38fd1498Szrj struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2436*38fd1498Szrj tgt->array = gomp_malloc (sizeof (*tgt->array));
2437*38fd1498Szrj tgt->refcount = 1;
2438*38fd1498Szrj tgt->tgt_start = 0;
2439*38fd1498Szrj tgt->tgt_end = 0;
2440*38fd1498Szrj tgt->to_free = NULL;
2441*38fd1498Szrj tgt->prev = NULL;
2442*38fd1498Szrj tgt->list_count = 0;
2443*38fd1498Szrj tgt->device_descr = devicep;
2444*38fd1498Szrj splay_tree_node array = tgt->array;
2445*38fd1498Szrj splay_tree_key k = &array->key;
2446*38fd1498Szrj k->host_start = cur_node.host_start;
2447*38fd1498Szrj k->host_end = cur_node.host_end;
2448*38fd1498Szrj k->tgt = tgt;
2449*38fd1498Szrj k->tgt_offset = (uintptr_t) device_ptr + device_offset;
2450*38fd1498Szrj k->refcount = REFCOUNT_INFINITY;
2451*38fd1498Szrj array->left = NULL;
2452*38fd1498Szrj array->right = NULL;
2453*38fd1498Szrj splay_tree_insert (&devicep->mem_map, array);
2454*38fd1498Szrj ret = 0;
2455*38fd1498Szrj }
2456*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
2457*38fd1498Szrj return ret;
2458*38fd1498Szrj }
2459*38fd1498Szrj
2460*38fd1498Szrj int
omp_target_disassociate_ptr(void * ptr,int device_num)2461*38fd1498Szrj omp_target_disassociate_ptr (void *ptr, int device_num)
2462*38fd1498Szrj {
2463*38fd1498Szrj if (device_num == GOMP_DEVICE_HOST_FALLBACK)
2464*38fd1498Szrj return EINVAL;
2465*38fd1498Szrj
2466*38fd1498Szrj if (device_num < 0)
2467*38fd1498Szrj return EINVAL;
2468*38fd1498Szrj
2469*38fd1498Szrj struct gomp_device_descr *devicep = resolve_device (device_num);
2470*38fd1498Szrj if (devicep == NULL)
2471*38fd1498Szrj return EINVAL;
2472*38fd1498Szrj
2473*38fd1498Szrj if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2474*38fd1498Szrj return EINVAL;
2475*38fd1498Szrj
2476*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
2477*38fd1498Szrj
2478*38fd1498Szrj struct splay_tree_s *mem_map = &devicep->mem_map;
2479*38fd1498Szrj struct splay_tree_key_s cur_node;
2480*38fd1498Szrj int ret = EINVAL;
2481*38fd1498Szrj
2482*38fd1498Szrj cur_node.host_start = (uintptr_t) ptr;
2483*38fd1498Szrj cur_node.host_end = cur_node.host_start;
2484*38fd1498Szrj splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
2485*38fd1498Szrj if (n
2486*38fd1498Szrj && n->host_start == cur_node.host_start
2487*38fd1498Szrj && n->refcount == REFCOUNT_INFINITY
2488*38fd1498Szrj && n->tgt->tgt_start == 0
2489*38fd1498Szrj && n->tgt->to_free == NULL
2490*38fd1498Szrj && n->tgt->refcount == 1
2491*38fd1498Szrj && n->tgt->list_count == 0)
2492*38fd1498Szrj {
2493*38fd1498Szrj splay_tree_remove (&devicep->mem_map, n);
2494*38fd1498Szrj gomp_unmap_tgt (n->tgt);
2495*38fd1498Szrj ret = 0;
2496*38fd1498Szrj }
2497*38fd1498Szrj
2498*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
2499*38fd1498Szrj return ret;
2500*38fd1498Szrj }
2501*38fd1498Szrj
2502*38fd1498Szrj #ifdef PLUGIN_SUPPORT
2503*38fd1498Szrj
2504*38fd1498Szrj /* This function tries to load a plugin for DEVICE. Name of plugin is passed
2505*38fd1498Szrj in PLUGIN_NAME.
2506*38fd1498Szrj The handles of the found functions are stored in the corresponding fields
2507*38fd1498Szrj of DEVICE. The function returns TRUE on success and FALSE otherwise. */
2508*38fd1498Szrj
2509*38fd1498Szrj static bool
gomp_load_plugin_for_device(struct gomp_device_descr * device,const char * plugin_name)2510*38fd1498Szrj gomp_load_plugin_for_device (struct gomp_device_descr *device,
2511*38fd1498Szrj const char *plugin_name)
2512*38fd1498Szrj {
2513*38fd1498Szrj const char *err = NULL, *last_missing = NULL;
2514*38fd1498Szrj
2515*38fd1498Szrj void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
2516*38fd1498Szrj if (!plugin_handle)
2517*38fd1498Szrj goto dl_fail;
2518*38fd1498Szrj
2519*38fd1498Szrj /* Check if all required functions are available in the plugin and store
2520*38fd1498Szrj their handlers. None of the symbols can legitimately be NULL,
2521*38fd1498Szrj so we don't need to check dlerror all the time. */
2522*38fd1498Szrj #define DLSYM(f) \
2523*38fd1498Szrj if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \
2524*38fd1498Szrj goto dl_fail
2525*38fd1498Szrj /* Similar, but missing functions are not an error. Return false if
2526*38fd1498Szrj failed, true otherwise. */
2527*38fd1498Szrj #define DLSYM_OPT(f, n) \
2528*38fd1498Szrj ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \
2529*38fd1498Szrj || (last_missing = #n, 0))
2530*38fd1498Szrj
2531*38fd1498Szrj DLSYM (version);
2532*38fd1498Szrj if (device->version_func () != GOMP_VERSION)
2533*38fd1498Szrj {
2534*38fd1498Szrj err = "plugin version mismatch";
2535*38fd1498Szrj goto fail;
2536*38fd1498Szrj }
2537*38fd1498Szrj
2538*38fd1498Szrj DLSYM (get_name);
2539*38fd1498Szrj DLSYM (get_caps);
2540*38fd1498Szrj DLSYM (get_type);
2541*38fd1498Szrj DLSYM (get_num_devices);
2542*38fd1498Szrj DLSYM (init_device);
2543*38fd1498Szrj DLSYM (fini_device);
2544*38fd1498Szrj DLSYM (load_image);
2545*38fd1498Szrj DLSYM (unload_image);
2546*38fd1498Szrj DLSYM (alloc);
2547*38fd1498Szrj DLSYM (free);
2548*38fd1498Szrj DLSYM (dev2host);
2549*38fd1498Szrj DLSYM (host2dev);
2550*38fd1498Szrj device->capabilities = device->get_caps_func ();
2551*38fd1498Szrj if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2552*38fd1498Szrj {
2553*38fd1498Szrj DLSYM (run);
2554*38fd1498Szrj DLSYM (async_run);
2555*38fd1498Szrj DLSYM_OPT (can_run, can_run);
2556*38fd1498Szrj DLSYM (dev2dev);
2557*38fd1498Szrj }
2558*38fd1498Szrj if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2559*38fd1498Szrj {
2560*38fd1498Szrj if (!DLSYM_OPT (openacc.exec, openacc_exec)
2561*38fd1498Szrj || !DLSYM_OPT (openacc.register_async_cleanup,
2562*38fd1498Szrj openacc_register_async_cleanup)
2563*38fd1498Szrj || !DLSYM_OPT (openacc.async_test, openacc_async_test)
2564*38fd1498Szrj || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
2565*38fd1498Szrj || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
2566*38fd1498Szrj || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
2567*38fd1498Szrj || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
2568*38fd1498Szrj || !DLSYM_OPT (openacc.async_wait_all_async,
2569*38fd1498Szrj openacc_async_wait_all_async)
2570*38fd1498Szrj || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
2571*38fd1498Szrj || !DLSYM_OPT (openacc.create_thread_data,
2572*38fd1498Szrj openacc_create_thread_data)
2573*38fd1498Szrj || !DLSYM_OPT (openacc.destroy_thread_data,
2574*38fd1498Szrj openacc_destroy_thread_data))
2575*38fd1498Szrj {
2576*38fd1498Szrj /* Require all the OpenACC handlers if we have
2577*38fd1498Szrj GOMP_OFFLOAD_CAP_OPENACC_200. */
2578*38fd1498Szrj err = "plugin missing OpenACC handler function";
2579*38fd1498Szrj goto fail;
2580*38fd1498Szrj }
2581*38fd1498Szrj
2582*38fd1498Szrj unsigned cuda = 0;
2583*38fd1498Szrj cuda += DLSYM_OPT (openacc.cuda.get_current_device,
2584*38fd1498Szrj openacc_cuda_get_current_device);
2585*38fd1498Szrj cuda += DLSYM_OPT (openacc.cuda.get_current_context,
2586*38fd1498Szrj openacc_cuda_get_current_context);
2587*38fd1498Szrj cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
2588*38fd1498Szrj cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
2589*38fd1498Szrj if (cuda && cuda != 4)
2590*38fd1498Szrj {
2591*38fd1498Szrj /* Make sure all the CUDA functions are there if any of them are. */
2592*38fd1498Szrj err = "plugin missing OpenACC CUDA handler function";
2593*38fd1498Szrj goto fail;
2594*38fd1498Szrj }
2595*38fd1498Szrj }
2596*38fd1498Szrj #undef DLSYM
2597*38fd1498Szrj #undef DLSYM_OPT
2598*38fd1498Szrj
2599*38fd1498Szrj return 1;
2600*38fd1498Szrj
2601*38fd1498Szrj dl_fail:
2602*38fd1498Szrj err = dlerror ();
2603*38fd1498Szrj fail:
2604*38fd1498Szrj gomp_error ("while loading %s: %s", plugin_name, err);
2605*38fd1498Szrj if (last_missing)
2606*38fd1498Szrj gomp_error ("missing function was %s", last_missing);
2607*38fd1498Szrj if (plugin_handle)
2608*38fd1498Szrj dlclose (plugin_handle);
2609*38fd1498Szrj
2610*38fd1498Szrj return 0;
2611*38fd1498Szrj }
2612*38fd1498Szrj
2613*38fd1498Szrj /* This function finalizes all initialized devices. */
2614*38fd1498Szrj
2615*38fd1498Szrj static void
gomp_target_fini(void)2616*38fd1498Szrj gomp_target_fini (void)
2617*38fd1498Szrj {
2618*38fd1498Szrj int i;
2619*38fd1498Szrj for (i = 0; i < num_devices; i++)
2620*38fd1498Szrj {
2621*38fd1498Szrj bool ret = true;
2622*38fd1498Szrj struct gomp_device_descr *devicep = &devices[i];
2623*38fd1498Szrj gomp_mutex_lock (&devicep->lock);
2624*38fd1498Szrj if (devicep->state == GOMP_DEVICE_INITIALIZED)
2625*38fd1498Szrj {
2626*38fd1498Szrj ret = devicep->fini_device_func (devicep->target_id);
2627*38fd1498Szrj devicep->state = GOMP_DEVICE_FINALIZED;
2628*38fd1498Szrj }
2629*38fd1498Szrj gomp_mutex_unlock (&devicep->lock);
2630*38fd1498Szrj if (!ret)
2631*38fd1498Szrj gomp_fatal ("device finalization failed");
2632*38fd1498Szrj }
2633*38fd1498Szrj }
2634*38fd1498Szrj
2635*38fd1498Szrj /* This function initializes the runtime needed for offloading.
2636*38fd1498Szrj It parses the list of offload targets and tries to load the plugins for
2637*38fd1498Szrj these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
2638*38fd1498Szrj will be set, and the array DEVICES initialized, containing descriptors for
2639*38fd1498Szrj corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
2640*38fd1498Szrj by the others. */
2641*38fd1498Szrj
2642*38fd1498Szrj static void
gomp_target_init(void)2643*38fd1498Szrj gomp_target_init (void)
2644*38fd1498Szrj {
2645*38fd1498Szrj const char *prefix ="libgomp-plugin-";
2646*38fd1498Szrj const char *suffix = SONAME_SUFFIX (1);
2647*38fd1498Szrj const char *cur, *next;
2648*38fd1498Szrj char *plugin_name;
2649*38fd1498Szrj int i, new_num_devices;
2650*38fd1498Szrj
2651*38fd1498Szrj num_devices = 0;
2652*38fd1498Szrj devices = NULL;
2653*38fd1498Szrj
2654*38fd1498Szrj cur = OFFLOAD_TARGETS;
2655*38fd1498Szrj if (*cur)
2656*38fd1498Szrj do
2657*38fd1498Szrj {
2658*38fd1498Szrj struct gomp_device_descr current_device;
2659*38fd1498Szrj size_t prefix_len, suffix_len, cur_len;
2660*38fd1498Szrj
2661*38fd1498Szrj next = strchr (cur, ',');
2662*38fd1498Szrj
2663*38fd1498Szrj prefix_len = strlen (prefix);
2664*38fd1498Szrj cur_len = next ? next - cur : strlen (cur);
2665*38fd1498Szrj suffix_len = strlen (suffix);
2666*38fd1498Szrj
2667*38fd1498Szrj plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
2668*38fd1498Szrj if (!plugin_name)
2669*38fd1498Szrj {
2670*38fd1498Szrj num_devices = 0;
2671*38fd1498Szrj break;
2672*38fd1498Szrj }
2673*38fd1498Szrj
2674*38fd1498Szrj memcpy (plugin_name, prefix, prefix_len);
2675*38fd1498Szrj memcpy (plugin_name + prefix_len, cur, cur_len);
2676*38fd1498Szrj memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
2677*38fd1498Szrj
2678*38fd1498Szrj if (gomp_load_plugin_for_device (¤t_device, plugin_name))
2679*38fd1498Szrj {
2680*38fd1498Szrj new_num_devices = current_device.get_num_devices_func ();
2681*38fd1498Szrj if (new_num_devices >= 1)
2682*38fd1498Szrj {
2683*38fd1498Szrj /* Augment DEVICES and NUM_DEVICES. */
2684*38fd1498Szrj
2685*38fd1498Szrj devices = realloc (devices, (num_devices + new_num_devices)
2686*38fd1498Szrj * sizeof (struct gomp_device_descr));
2687*38fd1498Szrj if (!devices)
2688*38fd1498Szrj {
2689*38fd1498Szrj num_devices = 0;
2690*38fd1498Szrj free (plugin_name);
2691*38fd1498Szrj break;
2692*38fd1498Szrj }
2693*38fd1498Szrj
2694*38fd1498Szrj current_device.name = current_device.get_name_func ();
2695*38fd1498Szrj /* current_device.capabilities has already been set. */
2696*38fd1498Szrj current_device.type = current_device.get_type_func ();
2697*38fd1498Szrj current_device.mem_map.root = NULL;
2698*38fd1498Szrj current_device.state = GOMP_DEVICE_UNINITIALIZED;
2699*38fd1498Szrj current_device.openacc.data_environ = NULL;
2700*38fd1498Szrj for (i = 0; i < new_num_devices; i++)
2701*38fd1498Szrj {
2702*38fd1498Szrj current_device.target_id = i;
2703*38fd1498Szrj devices[num_devices] = current_device;
2704*38fd1498Szrj gomp_mutex_init (&devices[num_devices].lock);
2705*38fd1498Szrj num_devices++;
2706*38fd1498Szrj }
2707*38fd1498Szrj }
2708*38fd1498Szrj }
2709*38fd1498Szrj
2710*38fd1498Szrj free (plugin_name);
2711*38fd1498Szrj cur = next + 1;
2712*38fd1498Szrj }
2713*38fd1498Szrj while (next);
2714*38fd1498Szrj
2715*38fd1498Szrj /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
2716*38fd1498Szrj NUM_DEVICES_OPENMP. */
2717*38fd1498Szrj struct gomp_device_descr *devices_s
2718*38fd1498Szrj = malloc (num_devices * sizeof (struct gomp_device_descr));
2719*38fd1498Szrj if (!devices_s)
2720*38fd1498Szrj {
2721*38fd1498Szrj num_devices = 0;
2722*38fd1498Szrj free (devices);
2723*38fd1498Szrj devices = NULL;
2724*38fd1498Szrj }
2725*38fd1498Szrj num_devices_openmp = 0;
2726*38fd1498Szrj for (i = 0; i < num_devices; i++)
2727*38fd1498Szrj if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2728*38fd1498Szrj devices_s[num_devices_openmp++] = devices[i];
2729*38fd1498Szrj int num_devices_after_openmp = num_devices_openmp;
2730*38fd1498Szrj for (i = 0; i < num_devices; i++)
2731*38fd1498Szrj if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
2732*38fd1498Szrj devices_s[num_devices_after_openmp++] = devices[i];
2733*38fd1498Szrj free (devices);
2734*38fd1498Szrj devices = devices_s;
2735*38fd1498Szrj
2736*38fd1498Szrj for (i = 0; i < num_devices; i++)
2737*38fd1498Szrj {
2738*38fd1498Szrj /* The 'devices' array can be moved (by the realloc call) until we have
2739*38fd1498Szrj found all the plugins, so registering with the OpenACC runtime (which
2740*38fd1498Szrj takes a copy of the pointer argument) must be delayed until now. */
2741*38fd1498Szrj if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
2742*38fd1498Szrj goacc_register (&devices[i]);
2743*38fd1498Szrj }
2744*38fd1498Szrj
2745*38fd1498Szrj if (atexit (gomp_target_fini) != 0)
2746*38fd1498Szrj gomp_fatal ("atexit failed");
2747*38fd1498Szrj }
2748*38fd1498Szrj
2749*38fd1498Szrj #else /* PLUGIN_SUPPORT */
2750*38fd1498Szrj /* If dlfcn.h is unavailable we always fallback to host execution.
2751*38fd1498Szrj GOMP_target* routines are just stubs for this case. */
2752*38fd1498Szrj static void
gomp_target_init(void)2753*38fd1498Szrj gomp_target_init (void)
2754*38fd1498Szrj {
2755*38fd1498Szrj }
2756*38fd1498Szrj #endif /* PLUGIN_SUPPORT */
2757