1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This file contains the support of offloading. */ 27 28 #include "config.h" 29 #include "libgomp.h" 30 #include "oacc-plugin.h" 31 #include "oacc-int.h" 32 #include "gomp-constants.h" 33 #include <limits.h> 34 #include <stdbool.h> 35 #include <stdlib.h> 36 #ifdef HAVE_INTTYPES_H 37 # include <inttypes.h> /* For PRIu64. */ 38 #endif 39 #include <string.h> 40 #include <assert.h> 41 #include <errno.h> 42 43 #ifdef PLUGIN_SUPPORT 44 #include <dlfcn.h> 45 #include "plugin-suffix.h" 46 #endif 47 48 static void gomp_target_init (void); 49 50 /* The whole initialization code for offloading plugins is only run one. */ 51 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; 52 53 /* Mutex for offload image registration. */ 54 static gomp_mutex_t register_lock; 55 56 /* This structure describes an offload image. 57 It contains type of the target device, pointer to host table descriptor, and 58 pointer to target data. */ 59 struct offload_image_descr { 60 unsigned version; 61 enum offload_target_type type; 62 const void *host_table; 63 const void *target_data; 64 }; 65 66 /* Array of descriptors of offload images. */ 67 static struct offload_image_descr *offload_images; 68 69 /* Total number of offload images. */ 70 static int num_offload_images; 71 72 /* Array of descriptors for all available devices. */ 73 static struct gomp_device_descr *devices; 74 75 /* Total number of available devices. */ 76 static int num_devices; 77 78 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ 79 static int num_devices_openmp; 80 81 /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ 82 83 static void * 84 gomp_realloc_unlock (void *old, size_t size) 85 { 86 void *ret = realloc (old, size); 87 if (ret == NULL) 88 { 89 gomp_mutex_unlock (®ister_lock); 90 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); 91 } 92 return ret; 93 } 94 95 attribute_hidden void 96 gomp_init_targets_once (void) 97 { 98 (void) pthread_once (&gomp_is_initialized, gomp_target_init); 99 } 100 101 attribute_hidden int 102 gomp_get_num_devices (void) 103 { 104 gomp_init_targets_once (); 105 return num_devices_openmp; 106 } 107 108 static struct gomp_device_descr * 109 resolve_device (int device_id) 110 { 111 if (device_id == GOMP_DEVICE_ICV) 112 { 113 struct gomp_task_icv *icv = gomp_icv (false); 114 device_id = icv->default_device_var; 115 } 116 117 if (device_id < 0 || device_id >= gomp_get_num_devices ()) 118 return NULL; 119 120 gomp_mutex_lock (&devices[device_id].lock); 121 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) 122 gomp_init_device (&devices[device_id]); 123 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) 124 { 125 gomp_mutex_unlock (&devices[device_id].lock); 126 return NULL; 127 } 128 gomp_mutex_unlock (&devices[device_id].lock); 129 130 return &devices[device_id]; 131 } 132 133 134 static inline splay_tree_key 135 gomp_map_lookup (splay_tree mem_map, splay_tree_key key) 136 { 137 if (key->host_start != key->host_end) 138 return splay_tree_lookup (mem_map, key); 139 140 key->host_end++; 141 splay_tree_key n = splay_tree_lookup (mem_map, key); 142 key->host_end--; 143 if (n) 144 return n; 145 key->host_start--; 146 n = splay_tree_lookup (mem_map, key); 147 key->host_start++; 148 if (n) 149 return n; 150 return splay_tree_lookup (mem_map, key); 151 } 152 153 static inline splay_tree_key 154 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) 155 { 156 if (key->host_start != key->host_end) 157 return splay_tree_lookup (mem_map, key); 158 159 key->host_end++; 160 splay_tree_key n = splay_tree_lookup (mem_map, key); 161 key->host_end--; 162 return n; 163 } 164 165 static inline void 166 gomp_device_copy (struct gomp_device_descr *devicep, 167 bool (*copy_func) (int, void *, const void *, size_t), 168 const char *dst, void *dstaddr, 169 const char *src, const void *srcaddr, 170 size_t size) 171 { 172 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) 173 { 174 gomp_mutex_unlock (&devicep->lock); 175 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", 176 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); 177 } 178 } 179 180 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) 181 host to device memory transfers. */ 182 183 struct gomp_coalesce_chunk 184 { 185 /* The starting and ending point of a coalesced chunk of memory. */ 186 size_t start, end; 187 }; 188 189 struct gomp_coalesce_buf 190 { 191 /* Buffer into which gomp_copy_host2dev will memcpy data and from which 192 it will be copied to the device. */ 193 void *buf; 194 struct target_mem_desc *tgt; 195 /* Array with offsets, chunks[i].start is the starting offset and 196 chunks[i].end ending offset relative to tgt->tgt_start device address 197 of chunks which are to be copied to buf and later copied to device. */ 198 struct gomp_coalesce_chunk *chunks; 199 /* Number of chunks in chunks array, or -1 if coalesce buffering should not 200 be performed. */ 201 long chunk_cnt; 202 /* During construction of chunks array, how many memory regions are within 203 the last chunk. If there is just one memory region for a chunk, we copy 204 it directly to device rather than going through buf. */ 205 long use_cnt; 206 }; 207 208 /* Maximum size of memory region considered for coalescing. Larger copies 209 are performed directly. */ 210 #define MAX_COALESCE_BUF_SIZE (32 * 1024) 211 212 /* Maximum size of a gap in between regions to consider them being copied 213 within the same chunk. All the device offsets considered are within 214 newly allocated device memory, so it isn't fatal if we copy some padding 215 in between from host to device. The gaps come either from alignment 216 padding or from memory regions which are not supposed to be copied from 217 host to device (e.g. map(alloc:), map(from:) etc.). */ 218 #define MAX_COALESCE_BUF_GAP (4 * 1024) 219 220 /* Add region with device tgt_start relative offset and length to CBUF. */ 221 222 static inline void 223 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) 224 { 225 if (len > MAX_COALESCE_BUF_SIZE || len == 0) 226 return; 227 if (cbuf->chunk_cnt) 228 { 229 if (cbuf->chunk_cnt < 0) 230 return; 231 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) 232 { 233 cbuf->chunk_cnt = -1; 234 return; 235 } 236 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) 237 { 238 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; 239 cbuf->use_cnt++; 240 return; 241 } 242 /* If the last chunk is only used by one mapping, discard it, 243 as it will be one host to device copy anyway and 244 memcpying it around will only waste cycles. */ 245 if (cbuf->use_cnt == 1) 246 cbuf->chunk_cnt--; 247 } 248 cbuf->chunks[cbuf->chunk_cnt].start = start; 249 cbuf->chunks[cbuf->chunk_cnt].end = start + len; 250 cbuf->chunk_cnt++; 251 cbuf->use_cnt = 1; 252 } 253 254 /* Return true for mapping kinds which need to copy data from the 255 host to device for regions that weren't previously mapped. */ 256 257 static inline bool 258 gomp_to_device_kind_p (int kind) 259 { 260 switch (kind) 261 { 262 case GOMP_MAP_ALLOC: 263 case GOMP_MAP_FROM: 264 case GOMP_MAP_FORCE_ALLOC: 265 case GOMP_MAP_ALWAYS_FROM: 266 return false; 267 default: 268 return true; 269 } 270 } 271 272 static void 273 gomp_copy_host2dev (struct gomp_device_descr *devicep, 274 void *d, const void *h, size_t sz, 275 struct gomp_coalesce_buf *cbuf) 276 { 277 if (cbuf) 278 { 279 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; 280 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) 281 { 282 long first = 0; 283 long last = cbuf->chunk_cnt - 1; 284 while (first <= last) 285 { 286 long middle = (first + last) >> 1; 287 if (cbuf->chunks[middle].end <= doff) 288 first = middle + 1; 289 else if (cbuf->chunks[middle].start <= doff) 290 { 291 if (doff + sz > cbuf->chunks[middle].end) 292 gomp_fatal ("internal libgomp cbuf error"); 293 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), 294 h, sz); 295 return; 296 } 297 else 298 last = middle - 1; 299 } 300 } 301 } 302 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); 303 } 304 305 static void 306 gomp_copy_dev2host (struct gomp_device_descr *devicep, 307 void *h, const void *d, size_t sz) 308 { 309 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); 310 } 311 312 static void 313 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) 314 { 315 if (!devicep->free_func (devicep->target_id, devptr)) 316 { 317 gomp_mutex_unlock (&devicep->lock); 318 gomp_fatal ("error in freeing device memory block at %p", devptr); 319 } 320 } 321 322 /* Handle the case where gomp_map_lookup, splay_tree_lookup or 323 gomp_map_0len_lookup found oldn for newn. 324 Helper function of gomp_map_vars. */ 325 326 static inline void 327 gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, 328 splay_tree_key newn, struct target_var_desc *tgt_var, 329 unsigned char kind, struct gomp_coalesce_buf *cbuf) 330 { 331 tgt_var->key = oldn; 332 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); 333 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); 334 tgt_var->offset = newn->host_start - oldn->host_start; 335 tgt_var->length = newn->host_end - newn->host_start; 336 337 if ((kind & GOMP_MAP_FLAG_FORCE) 338 || oldn->host_start > newn->host_start 339 || oldn->host_end < newn->host_end) 340 { 341 gomp_mutex_unlock (&devicep->lock); 342 gomp_fatal ("Trying to map into device [%p..%p) object when " 343 "[%p..%p) is already mapped", 344 (void *) newn->host_start, (void *) newn->host_end, 345 (void *) oldn->host_start, (void *) oldn->host_end); 346 } 347 348 if (GOMP_MAP_ALWAYS_TO_P (kind)) 349 gomp_copy_host2dev (devicep, 350 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset 351 + newn->host_start - oldn->host_start), 352 (void *) newn->host_start, 353 newn->host_end - newn->host_start, cbuf); 354 355 if (oldn->refcount != REFCOUNT_INFINITY) 356 oldn->refcount++; 357 } 358 359 static int 360 get_kind (bool short_mapkind, void *kinds, int idx) 361 { 362 return short_mapkind ? ((unsigned short *) kinds)[idx] 363 : ((unsigned char *) kinds)[idx]; 364 } 365 366 static void 367 gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr, 368 uintptr_t target_offset, uintptr_t bias, 369 struct gomp_coalesce_buf *cbuf) 370 { 371 struct gomp_device_descr *devicep = tgt->device_descr; 372 struct splay_tree_s *mem_map = &devicep->mem_map; 373 struct splay_tree_key_s cur_node; 374 375 cur_node.host_start = host_ptr; 376 if (cur_node.host_start == (uintptr_t) NULL) 377 { 378 cur_node.tgt_offset = (uintptr_t) NULL; 379 gomp_copy_host2dev (devicep, 380 (void *) (tgt->tgt_start + target_offset), 381 (void *) &cur_node.tgt_offset, 382 sizeof (void *), cbuf); 383 return; 384 } 385 /* Add bias to the pointer value. */ 386 cur_node.host_start += bias; 387 cur_node.host_end = cur_node.host_start; 388 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 389 if (n == NULL) 390 { 391 gomp_mutex_unlock (&devicep->lock); 392 gomp_fatal ("Pointer target of array section wasn't mapped"); 393 } 394 cur_node.host_start -= n->host_start; 395 cur_node.tgt_offset 396 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; 397 /* At this point tgt_offset is target address of the 398 array section. Now subtract bias to get what we want 399 to initialize the pointer with. */ 400 cur_node.tgt_offset -= bias; 401 gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset), 402 (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); 403 } 404 405 static void 406 gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n, 407 size_t first, size_t i, void **hostaddrs, 408 size_t *sizes, void *kinds, 409 struct gomp_coalesce_buf *cbuf) 410 { 411 struct gomp_device_descr *devicep = tgt->device_descr; 412 struct splay_tree_s *mem_map = &devicep->mem_map; 413 struct splay_tree_key_s cur_node; 414 int kind; 415 const bool short_mapkind = true; 416 const int typemask = short_mapkind ? 0xff : 0x7; 417 418 cur_node.host_start = (uintptr_t) hostaddrs[i]; 419 cur_node.host_end = cur_node.host_start + sizes[i]; 420 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); 421 kind = get_kind (short_mapkind, kinds, i); 422 if (n2 423 && n2->tgt == n->tgt 424 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 425 { 426 gomp_map_vars_existing (devicep, n2, &cur_node, 427 &tgt->list[i], kind & typemask, cbuf); 428 return; 429 } 430 if (sizes[i] == 0) 431 { 432 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) 433 { 434 cur_node.host_start--; 435 n2 = splay_tree_lookup (mem_map, &cur_node); 436 cur_node.host_start++; 437 if (n2 438 && n2->tgt == n->tgt 439 && n2->host_start - n->host_start 440 == n2->tgt_offset - n->tgt_offset) 441 { 442 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], 443 kind & typemask, cbuf); 444 return; 445 } 446 } 447 cur_node.host_end++; 448 n2 = splay_tree_lookup (mem_map, &cur_node); 449 cur_node.host_end--; 450 if (n2 451 && n2->tgt == n->tgt 452 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 453 { 454 gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i], 455 kind & typemask, cbuf); 456 return; 457 } 458 } 459 gomp_mutex_unlock (&devicep->lock); 460 gomp_fatal ("Trying to map into device [%p..%p) structure element when " 461 "other mapped elements from the same structure weren't mapped " 462 "together with it", (void *) cur_node.host_start, 463 (void *) cur_node.host_end); 464 } 465 466 static inline uintptr_t 467 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) 468 { 469 if (tgt->list[i].key != NULL) 470 return tgt->list[i].key->tgt->tgt_start 471 + tgt->list[i].key->tgt_offset 472 + tgt->list[i].offset; 473 if (tgt->list[i].offset == ~(uintptr_t) 0) 474 return (uintptr_t) hostaddrs[i]; 475 if (tgt->list[i].offset == ~(uintptr_t) 1) 476 return 0; 477 if (tgt->list[i].offset == ~(uintptr_t) 2) 478 return tgt->list[i + 1].key->tgt->tgt_start 479 + tgt->list[i + 1].key->tgt_offset 480 + tgt->list[i + 1].offset 481 + (uintptr_t) hostaddrs[i] 482 - (uintptr_t) hostaddrs[i + 1]; 483 return tgt->tgt_start + tgt->list[i].offset; 484 } 485 486 attribute_hidden struct target_mem_desc * 487 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, 488 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, 489 bool short_mapkind, enum gomp_map_vars_kind pragma_kind) 490 { 491 size_t i, tgt_align, tgt_size, not_found_cnt = 0; 492 bool has_firstprivate = false; 493 const int rshift = short_mapkind ? 8 : 3; 494 const int typemask = short_mapkind ? 0xff : 0x7; 495 struct splay_tree_s *mem_map = &devicep->mem_map; 496 struct splay_tree_key_s cur_node; 497 struct target_mem_desc *tgt 498 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); 499 tgt->list_count = mapnum; 500 tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; 501 tgt->device_descr = devicep; 502 struct gomp_coalesce_buf cbuf, *cbufp = NULL; 503 504 if (mapnum == 0) 505 { 506 tgt->tgt_start = 0; 507 tgt->tgt_end = 0; 508 return tgt; 509 } 510 511 tgt_align = sizeof (void *); 512 tgt_size = 0; 513 cbuf.chunks = NULL; 514 cbuf.chunk_cnt = -1; 515 cbuf.use_cnt = 0; 516 cbuf.buf = NULL; 517 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) 518 { 519 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); 520 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); 521 cbuf.chunk_cnt = 0; 522 } 523 if (pragma_kind == GOMP_MAP_VARS_TARGET) 524 { 525 size_t align = 4 * sizeof (void *); 526 tgt_align = align; 527 tgt_size = mapnum * sizeof (void *); 528 cbuf.chunk_cnt = 1; 529 cbuf.use_cnt = 1 + (mapnum > 1); 530 cbuf.chunks[0].start = 0; 531 cbuf.chunks[0].end = tgt_size; 532 } 533 534 gomp_mutex_lock (&devicep->lock); 535 if (devicep->state == GOMP_DEVICE_FINALIZED) 536 { 537 gomp_mutex_unlock (&devicep->lock); 538 free (tgt); 539 return NULL; 540 } 541 542 for (i = 0; i < mapnum; i++) 543 { 544 int kind = get_kind (short_mapkind, kinds, i); 545 if (hostaddrs[i] == NULL 546 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) 547 { 548 tgt->list[i].key = NULL; 549 tgt->list[i].offset = ~(uintptr_t) 0; 550 continue; 551 } 552 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 553 { 554 cur_node.host_start = (uintptr_t) hostaddrs[i]; 555 cur_node.host_end = cur_node.host_start; 556 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 557 if (n == NULL) 558 { 559 gomp_mutex_unlock (&devicep->lock); 560 gomp_fatal ("use_device_ptr pointer wasn't mapped"); 561 } 562 cur_node.host_start -= n->host_start; 563 hostaddrs[i] 564 = (void *) (n->tgt->tgt_start + n->tgt_offset 565 + cur_node.host_start); 566 tgt->list[i].key = NULL; 567 tgt->list[i].offset = ~(uintptr_t) 0; 568 continue; 569 } 570 else if ((kind & typemask) == GOMP_MAP_STRUCT) 571 { 572 size_t first = i + 1; 573 size_t last = i + sizes[i]; 574 cur_node.host_start = (uintptr_t) hostaddrs[i]; 575 cur_node.host_end = (uintptr_t) hostaddrs[last] 576 + sizes[last]; 577 tgt->list[i].key = NULL; 578 tgt->list[i].offset = ~(uintptr_t) 2; 579 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 580 if (n == NULL) 581 { 582 size_t align = (size_t) 1 << (kind >> rshift); 583 if (tgt_align < align) 584 tgt_align = align; 585 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; 586 tgt_size = (tgt_size + align - 1) & ~(align - 1); 587 tgt_size += cur_node.host_end - cur_node.host_start; 588 not_found_cnt += last - i; 589 for (i = first; i <= last; i++) 590 { 591 tgt->list[i].key = NULL; 592 if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) 593 & typemask)) 594 gomp_coalesce_buf_add (&cbuf, 595 tgt_size - cur_node.host_end 596 + (uintptr_t) hostaddrs[i], 597 sizes[i]); 598 } 599 i--; 600 continue; 601 } 602 for (i = first; i <= last; i++) 603 gomp_map_fields_existing (tgt, n, first, i, hostaddrs, 604 sizes, kinds, NULL); 605 i--; 606 continue; 607 } 608 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) 609 { 610 tgt->list[i].key = NULL; 611 tgt->list[i].offset = ~(uintptr_t) 1; 612 has_firstprivate = true; 613 continue; 614 } 615 cur_node.host_start = (uintptr_t) hostaddrs[i]; 616 if (!GOMP_MAP_POINTER_P (kind & typemask)) 617 cur_node.host_end = cur_node.host_start + sizes[i]; 618 else 619 cur_node.host_end = cur_node.host_start + sizeof (void *); 620 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) 621 { 622 tgt->list[i].key = NULL; 623 624 size_t align = (size_t) 1 << (kind >> rshift); 625 if (tgt_align < align) 626 tgt_align = align; 627 tgt_size = (tgt_size + align - 1) & ~(align - 1); 628 gomp_coalesce_buf_add (&cbuf, tgt_size, 629 cur_node.host_end - cur_node.host_start); 630 tgt_size += cur_node.host_end - cur_node.host_start; 631 has_firstprivate = true; 632 continue; 633 } 634 splay_tree_key n; 635 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 636 { 637 n = gomp_map_0len_lookup (mem_map, &cur_node); 638 if (!n) 639 { 640 tgt->list[i].key = NULL; 641 tgt->list[i].offset = ~(uintptr_t) 1; 642 continue; 643 } 644 } 645 else 646 n = splay_tree_lookup (mem_map, &cur_node); 647 if (n && n->refcount != REFCOUNT_LINK) 648 gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i], 649 kind & typemask, NULL); 650 else 651 { 652 tgt->list[i].key = NULL; 653 654 size_t align = (size_t) 1 << (kind >> rshift); 655 not_found_cnt++; 656 if (tgt_align < align) 657 tgt_align = align; 658 tgt_size = (tgt_size + align - 1) & ~(align - 1); 659 if (gomp_to_device_kind_p (kind & typemask)) 660 gomp_coalesce_buf_add (&cbuf, tgt_size, 661 cur_node.host_end - cur_node.host_start); 662 tgt_size += cur_node.host_end - cur_node.host_start; 663 if ((kind & typemask) == GOMP_MAP_TO_PSET) 664 { 665 size_t j; 666 for (j = i + 1; j < mapnum; j++) 667 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j) 668 & typemask)) 669 break; 670 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 671 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 672 > cur_node.host_end)) 673 break; 674 else 675 { 676 tgt->list[j].key = NULL; 677 i++; 678 } 679 } 680 } 681 } 682 683 if (devaddrs) 684 { 685 if (mapnum != 1) 686 { 687 gomp_mutex_unlock (&devicep->lock); 688 gomp_fatal ("unexpected aggregation"); 689 } 690 tgt->to_free = devaddrs[0]; 691 tgt->tgt_start = (uintptr_t) tgt->to_free; 692 tgt->tgt_end = tgt->tgt_start + sizes[0]; 693 } 694 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) 695 { 696 /* Allocate tgt_align aligned tgt_size block of memory. */ 697 /* FIXME: Perhaps change interface to allocate properly aligned 698 memory. */ 699 tgt->to_free = devicep->alloc_func (devicep->target_id, 700 tgt_size + tgt_align - 1); 701 if (!tgt->to_free) 702 { 703 gomp_mutex_unlock (&devicep->lock); 704 gomp_fatal ("device memory allocation fail"); 705 } 706 707 tgt->tgt_start = (uintptr_t) tgt->to_free; 708 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); 709 tgt->tgt_end = tgt->tgt_start + tgt_size; 710 711 if (cbuf.use_cnt == 1) 712 cbuf.chunk_cnt--; 713 if (cbuf.chunk_cnt > 0) 714 { 715 cbuf.buf 716 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); 717 if (cbuf.buf) 718 { 719 cbuf.tgt = tgt; 720 cbufp = &cbuf; 721 } 722 } 723 } 724 else 725 { 726 tgt->to_free = NULL; 727 tgt->tgt_start = 0; 728 tgt->tgt_end = 0; 729 } 730 731 tgt_size = 0; 732 if (pragma_kind == GOMP_MAP_VARS_TARGET) 733 tgt_size = mapnum * sizeof (void *); 734 735 tgt->array = NULL; 736 if (not_found_cnt || has_firstprivate) 737 { 738 if (not_found_cnt) 739 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); 740 splay_tree_node array = tgt->array; 741 size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0; 742 uintptr_t field_tgt_base = 0; 743 744 for (i = 0; i < mapnum; i++) 745 if (tgt->list[i].key == NULL) 746 { 747 int kind = get_kind (short_mapkind, kinds, i); 748 if (hostaddrs[i] == NULL) 749 continue; 750 switch (kind & typemask) 751 { 752 size_t align, len, first, last; 753 splay_tree_key n; 754 case GOMP_MAP_FIRSTPRIVATE: 755 align = (size_t) 1 << (kind >> rshift); 756 tgt_size = (tgt_size + align - 1) & ~(align - 1); 757 tgt->list[i].offset = tgt_size; 758 len = sizes[i]; 759 gomp_copy_host2dev (devicep, 760 (void *) (tgt->tgt_start + tgt_size), 761 (void *) hostaddrs[i], len, cbufp); 762 tgt_size += len; 763 continue; 764 case GOMP_MAP_FIRSTPRIVATE_INT: 765 case GOMP_MAP_USE_DEVICE_PTR: 766 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 767 continue; 768 case GOMP_MAP_STRUCT: 769 first = i + 1; 770 last = i + sizes[i]; 771 cur_node.host_start = (uintptr_t) hostaddrs[i]; 772 cur_node.host_end = (uintptr_t) hostaddrs[last] 773 + sizes[last]; 774 if (tgt->list[first].key != NULL) 775 continue; 776 n = splay_tree_lookup (mem_map, &cur_node); 777 if (n == NULL) 778 { 779 size_t align = (size_t) 1 << (kind >> rshift); 780 tgt_size -= (uintptr_t) hostaddrs[first] 781 - (uintptr_t) hostaddrs[i]; 782 tgt_size = (tgt_size + align - 1) & ~(align - 1); 783 tgt_size += (uintptr_t) hostaddrs[first] 784 - (uintptr_t) hostaddrs[i]; 785 field_tgt_base = (uintptr_t) hostaddrs[first]; 786 field_tgt_offset = tgt_size; 787 field_tgt_clear = last; 788 tgt_size += cur_node.host_end 789 - (uintptr_t) hostaddrs[first]; 790 continue; 791 } 792 for (i = first; i <= last; i++) 793 gomp_map_fields_existing (tgt, n, first, i, hostaddrs, 794 sizes, kinds, cbufp); 795 i--; 796 continue; 797 case GOMP_MAP_ALWAYS_POINTER: 798 cur_node.host_start = (uintptr_t) hostaddrs[i]; 799 cur_node.host_end = cur_node.host_start + sizeof (void *); 800 n = splay_tree_lookup (mem_map, &cur_node); 801 if (n == NULL 802 || n->host_start > cur_node.host_start 803 || n->host_end < cur_node.host_end) 804 { 805 gomp_mutex_unlock (&devicep->lock); 806 gomp_fatal ("always pointer not mapped"); 807 } 808 if ((get_kind (short_mapkind, kinds, i - 1) & typemask) 809 != GOMP_MAP_ALWAYS_POINTER) 810 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); 811 if (cur_node.tgt_offset) 812 cur_node.tgt_offset -= sizes[i]; 813 gomp_copy_host2dev (devicep, 814 (void *) (n->tgt->tgt_start 815 + n->tgt_offset 816 + cur_node.host_start 817 - n->host_start), 818 (void *) &cur_node.tgt_offset, 819 sizeof (void *), cbufp); 820 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset 821 + cur_node.host_start - n->host_start; 822 continue; 823 default: 824 break; 825 } 826 splay_tree_key k = &array->key; 827 k->host_start = (uintptr_t) hostaddrs[i]; 828 if (!GOMP_MAP_POINTER_P (kind & typemask)) 829 k->host_end = k->host_start + sizes[i]; 830 else 831 k->host_end = k->host_start + sizeof (void *); 832 splay_tree_key n = splay_tree_lookup (mem_map, k); 833 if (n && n->refcount != REFCOUNT_LINK) 834 gomp_map_vars_existing (devicep, n, k, &tgt->list[i], 835 kind & typemask, cbufp); 836 else 837 { 838 k->link_key = NULL; 839 if (n && n->refcount == REFCOUNT_LINK) 840 { 841 /* Replace target address of the pointer with target address 842 of mapped object in the splay tree. */ 843 splay_tree_remove (mem_map, n); 844 k->link_key = n; 845 } 846 size_t align = (size_t) 1 << (kind >> rshift); 847 tgt->list[i].key = k; 848 k->tgt = tgt; 849 if (field_tgt_clear != ~(size_t) 0) 850 { 851 k->tgt_offset = k->host_start - field_tgt_base 852 + field_tgt_offset; 853 if (i == field_tgt_clear) 854 field_tgt_clear = ~(size_t) 0; 855 } 856 else 857 { 858 tgt_size = (tgt_size + align - 1) & ~(align - 1); 859 k->tgt_offset = tgt_size; 860 tgt_size += k->host_end - k->host_start; 861 } 862 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); 863 tgt->list[i].always_copy_from 864 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); 865 tgt->list[i].offset = 0; 866 tgt->list[i].length = k->host_end - k->host_start; 867 k->refcount = 1; 868 k->dynamic_refcount = 0; 869 tgt->refcount++; 870 array->left = NULL; 871 array->right = NULL; 872 splay_tree_insert (mem_map, array); 873 switch (kind & typemask) 874 { 875 case GOMP_MAP_ALLOC: 876 case GOMP_MAP_FROM: 877 case GOMP_MAP_FORCE_ALLOC: 878 case GOMP_MAP_FORCE_FROM: 879 case GOMP_MAP_ALWAYS_FROM: 880 break; 881 case GOMP_MAP_TO: 882 case GOMP_MAP_TOFROM: 883 case GOMP_MAP_FORCE_TO: 884 case GOMP_MAP_FORCE_TOFROM: 885 case GOMP_MAP_ALWAYS_TO: 886 case GOMP_MAP_ALWAYS_TOFROM: 887 gomp_copy_host2dev (devicep, 888 (void *) (tgt->tgt_start 889 + k->tgt_offset), 890 (void *) k->host_start, 891 k->host_end - k->host_start, cbufp); 892 break; 893 case GOMP_MAP_POINTER: 894 gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start, 895 k->tgt_offset, sizes[i], cbufp); 896 break; 897 case GOMP_MAP_TO_PSET: 898 gomp_copy_host2dev (devicep, 899 (void *) (tgt->tgt_start 900 + k->tgt_offset), 901 (void *) k->host_start, 902 k->host_end - k->host_start, cbufp); 903 904 for (j = i + 1; j < mapnum; j++) 905 if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, 906 j) 907 & typemask)) 908 break; 909 else if ((uintptr_t) hostaddrs[j] < k->host_start 910 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 911 > k->host_end)) 912 break; 913 else 914 { 915 tgt->list[j].key = k; 916 tgt->list[j].copy_from = false; 917 tgt->list[j].always_copy_from = false; 918 if (k->refcount != REFCOUNT_INFINITY) 919 k->refcount++; 920 gomp_map_pointer (tgt, 921 (uintptr_t) *(void **) hostaddrs[j], 922 k->tgt_offset 923 + ((uintptr_t) hostaddrs[j] 924 - k->host_start), 925 sizes[j], cbufp); 926 i++; 927 } 928 break; 929 case GOMP_MAP_FORCE_PRESENT: 930 { 931 /* We already looked up the memory region above and it 932 was missing. */ 933 size_t size = k->host_end - k->host_start; 934 gomp_mutex_unlock (&devicep->lock); 935 #ifdef HAVE_INTTYPES_H 936 gomp_fatal ("present clause: !acc_is_present (%p, " 937 "%"PRIu64" (0x%"PRIx64"))", 938 (void *) k->host_start, 939 (uint64_t) size, (uint64_t) size); 940 #else 941 gomp_fatal ("present clause: !acc_is_present (%p, " 942 "%lu (0x%lx))", (void *) k->host_start, 943 (unsigned long) size, (unsigned long) size); 944 #endif 945 } 946 break; 947 case GOMP_MAP_FORCE_DEVICEPTR: 948 assert (k->host_end - k->host_start == sizeof (void *)); 949 gomp_copy_host2dev (devicep, 950 (void *) (tgt->tgt_start 951 + k->tgt_offset), 952 (void *) k->host_start, 953 sizeof (void *), cbufp); 954 break; 955 default: 956 gomp_mutex_unlock (&devicep->lock); 957 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, 958 kind); 959 } 960 961 if (k->link_key) 962 { 963 /* Set link pointer on target to the device address of the 964 mapped object. */ 965 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); 966 /* We intentionally do not use coalescing here, as it's not 967 data allocated by the current call to this function. */ 968 gomp_copy_host2dev (devicep, (void *) n->tgt_offset, 969 &tgt_addr, sizeof (void *), NULL); 970 } 971 array++; 972 } 973 } 974 } 975 976 if (pragma_kind == GOMP_MAP_VARS_TARGET) 977 { 978 for (i = 0; i < mapnum; i++) 979 { 980 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); 981 gomp_copy_host2dev (devicep, 982 (void *) (tgt->tgt_start + i * sizeof (void *)), 983 (void *) &cur_node.tgt_offset, sizeof (void *), 984 cbufp); 985 } 986 } 987 988 if (cbufp) 989 { 990 long c = 0; 991 for (c = 0; c < cbuf.chunk_cnt; ++c) 992 gomp_copy_host2dev (devicep, 993 (void *) (tgt->tgt_start + cbuf.chunks[c].start), 994 (char *) cbuf.buf + (cbuf.chunks[c].start 995 - cbuf.chunks[0].start), 996 cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); 997 free (cbuf.buf); 998 cbuf.buf = NULL; 999 cbufp = NULL; 1000 } 1001 1002 /* If the variable from "omp target enter data" map-list was already mapped, 1003 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or 1004 gomp_exit_data. */ 1005 if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) 1006 { 1007 free (tgt); 1008 tgt = NULL; 1009 } 1010 1011 gomp_mutex_unlock (&devicep->lock); 1012 return tgt; 1013 } 1014 1015 static void 1016 gomp_unmap_tgt (struct target_mem_desc *tgt) 1017 { 1018 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ 1019 if (tgt->tgt_end) 1020 gomp_free_device_memory (tgt->device_descr, tgt->to_free); 1021 1022 free (tgt->array); 1023 free (tgt); 1024 } 1025 1026 attribute_hidden bool 1027 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) 1028 { 1029 bool is_tgt_unmapped = false; 1030 splay_tree_remove (&devicep->mem_map, k); 1031 if (k->link_key) 1032 splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key); 1033 if (k->tgt->refcount > 1) 1034 k->tgt->refcount--; 1035 else 1036 { 1037 is_tgt_unmapped = true; 1038 gomp_unmap_tgt (k->tgt); 1039 } 1040 return is_tgt_unmapped; 1041 } 1042 1043 /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant 1044 variables back from device to host: if it is false, it is assumed that this 1045 has been done already. */ 1046 1047 attribute_hidden void 1048 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) 1049 { 1050 struct gomp_device_descr *devicep = tgt->device_descr; 1051 1052 if (tgt->list_count == 0) 1053 { 1054 free (tgt); 1055 return; 1056 } 1057 1058 gomp_mutex_lock (&devicep->lock); 1059 if (devicep->state == GOMP_DEVICE_FINALIZED) 1060 { 1061 gomp_mutex_unlock (&devicep->lock); 1062 free (tgt->array); 1063 free (tgt); 1064 return; 1065 } 1066 1067 size_t i; 1068 for (i = 0; i < tgt->list_count; i++) 1069 { 1070 splay_tree_key k = tgt->list[i].key; 1071 if (k == NULL) 1072 continue; 1073 1074 bool do_unmap = false; 1075 if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) 1076 k->refcount--; 1077 else if (k->refcount == 1) 1078 { 1079 k->refcount--; 1080 do_unmap = true; 1081 } 1082 1083 if ((do_unmap && do_copyfrom && tgt->list[i].copy_from) 1084 || tgt->list[i].always_copy_from) 1085 gomp_copy_dev2host (devicep, 1086 (void *) (k->host_start + tgt->list[i].offset), 1087 (void *) (k->tgt->tgt_start + k->tgt_offset 1088 + tgt->list[i].offset), 1089 tgt->list[i].length); 1090 if (do_unmap) 1091 gomp_remove_var (devicep, k); 1092 } 1093 1094 if (tgt->refcount > 1) 1095 tgt->refcount--; 1096 else 1097 gomp_unmap_tgt (tgt); 1098 1099 gomp_mutex_unlock (&devicep->lock); 1100 } 1101 1102 static void 1103 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, 1104 size_t *sizes, void *kinds, bool short_mapkind) 1105 { 1106 size_t i; 1107 struct splay_tree_key_s cur_node; 1108 const int typemask = short_mapkind ? 0xff : 0x7; 1109 1110 if (!devicep) 1111 return; 1112 1113 if (mapnum == 0) 1114 return; 1115 1116 gomp_mutex_lock (&devicep->lock); 1117 if (devicep->state == GOMP_DEVICE_FINALIZED) 1118 { 1119 gomp_mutex_unlock (&devicep->lock); 1120 return; 1121 } 1122 1123 for (i = 0; i < mapnum; i++) 1124 if (sizes[i]) 1125 { 1126 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1127 cur_node.host_end = cur_node.host_start + sizes[i]; 1128 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 1129 if (n) 1130 { 1131 int kind = get_kind (short_mapkind, kinds, i); 1132 if (n->host_start > cur_node.host_start 1133 || n->host_end < cur_node.host_end) 1134 { 1135 gomp_mutex_unlock (&devicep->lock); 1136 gomp_fatal ("Trying to update [%p..%p) object when " 1137 "only [%p..%p) is mapped", 1138 (void *) cur_node.host_start, 1139 (void *) cur_node.host_end, 1140 (void *) n->host_start, 1141 (void *) n->host_end); 1142 } 1143 1144 1145 void *hostaddr = (void *) cur_node.host_start; 1146 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset 1147 + cur_node.host_start - n->host_start); 1148 size_t size = cur_node.host_end - cur_node.host_start; 1149 1150 if (GOMP_MAP_COPY_TO_P (kind & typemask)) 1151 gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL); 1152 if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 1153 gomp_copy_dev2host (devicep, hostaddr, devaddr, size); 1154 } 1155 } 1156 gomp_mutex_unlock (&devicep->lock); 1157 } 1158 1159 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. 1160 And insert to splay tree the mapping between addresses from HOST_TABLE and 1161 from loaded target image. We rely in the host and device compiler 1162 emitting variable and functions in the same order. */ 1163 1164 static void 1165 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, 1166 const void *host_table, const void *target_data, 1167 bool is_register_lock) 1168 { 1169 void **host_func_table = ((void ***) host_table)[0]; 1170 void **host_funcs_end = ((void ***) host_table)[1]; 1171 void **host_var_table = ((void ***) host_table)[2]; 1172 void **host_vars_end = ((void ***) host_table)[3]; 1173 1174 /* The func table contains only addresses, the var table contains addresses 1175 and corresponding sizes. */ 1176 int num_funcs = host_funcs_end - host_func_table; 1177 int num_vars = (host_vars_end - host_var_table) / 2; 1178 1179 /* Load image to device and get target addresses for the image. */ 1180 struct addr_pair *target_table = NULL; 1181 int i, num_target_entries; 1182 1183 num_target_entries 1184 = devicep->load_image_func (devicep->target_id, version, 1185 target_data, &target_table); 1186 1187 if (num_target_entries != num_funcs + num_vars) 1188 { 1189 gomp_mutex_unlock (&devicep->lock); 1190 if (is_register_lock) 1191 gomp_mutex_unlock (®ister_lock); 1192 gomp_fatal ("Cannot map target functions or variables" 1193 " (expected %u, have %u)", num_funcs + num_vars, 1194 num_target_entries); 1195 } 1196 1197 /* Insert host-target address mapping into splay tree. */ 1198 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 1199 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); 1200 tgt->refcount = REFCOUNT_INFINITY; 1201 tgt->tgt_start = 0; 1202 tgt->tgt_end = 0; 1203 tgt->to_free = NULL; 1204 tgt->prev = NULL; 1205 tgt->list_count = 0; 1206 tgt->device_descr = devicep; 1207 splay_tree_node array = tgt->array; 1208 1209 for (i = 0; i < num_funcs; i++) 1210 { 1211 splay_tree_key k = &array->key; 1212 k->host_start = (uintptr_t) host_func_table[i]; 1213 k->host_end = k->host_start + 1; 1214 k->tgt = tgt; 1215 k->tgt_offset = target_table[i].start; 1216 k->refcount = REFCOUNT_INFINITY; 1217 k->dynamic_refcount = 0; 1218 k->link_key = NULL; 1219 array->left = NULL; 1220 array->right = NULL; 1221 splay_tree_insert (&devicep->mem_map, array); 1222 array++; 1223 } 1224 1225 /* Most significant bit of the size in host and target tables marks 1226 "omp declare target link" variables. */ 1227 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 1228 const uintptr_t size_mask = ~link_bit; 1229 1230 for (i = 0; i < num_vars; i++) 1231 { 1232 struct addr_pair *target_var = &target_table[num_funcs + i]; 1233 uintptr_t target_size = target_var->end - target_var->start; 1234 1235 if ((uintptr_t) host_var_table[i * 2 + 1] != target_size) 1236 { 1237 gomp_mutex_unlock (&devicep->lock); 1238 if (is_register_lock) 1239 gomp_mutex_unlock (®ister_lock); 1240 gomp_fatal ("Cannot map target variables (size mismatch)"); 1241 } 1242 1243 splay_tree_key k = &array->key; 1244 k->host_start = (uintptr_t) host_var_table[i * 2]; 1245 k->host_end 1246 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 1247 k->tgt = tgt; 1248 k->tgt_offset = target_var->start; 1249 k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; 1250 k->dynamic_refcount = 0; 1251 k->link_key = NULL; 1252 array->left = NULL; 1253 array->right = NULL; 1254 splay_tree_insert (&devicep->mem_map, array); 1255 array++; 1256 } 1257 1258 free (target_table); 1259 } 1260 1261 /* Unload the mappings described by target_data from device DEVICE_P. 1262 The device must be locked. */ 1263 1264 static void 1265 gomp_unload_image_from_device (struct gomp_device_descr *devicep, 1266 unsigned version, 1267 const void *host_table, const void *target_data) 1268 { 1269 void **host_func_table = ((void ***) host_table)[0]; 1270 void **host_funcs_end = ((void ***) host_table)[1]; 1271 void **host_var_table = ((void ***) host_table)[2]; 1272 void **host_vars_end = ((void ***) host_table)[3]; 1273 1274 /* The func table contains only addresses, the var table contains addresses 1275 and corresponding sizes. */ 1276 int num_funcs = host_funcs_end - host_func_table; 1277 int num_vars = (host_vars_end - host_var_table) / 2; 1278 1279 struct splay_tree_key_s k; 1280 splay_tree_key node = NULL; 1281 1282 /* Find mapping at start of node array */ 1283 if (num_funcs || num_vars) 1284 { 1285 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] 1286 : (uintptr_t) host_var_table[0]); 1287 k.host_end = k.host_start + 1; 1288 node = splay_tree_lookup (&devicep->mem_map, &k); 1289 } 1290 1291 if (!devicep->unload_image_func (devicep->target_id, version, target_data)) 1292 { 1293 gomp_mutex_unlock (&devicep->lock); 1294 gomp_fatal ("image unload fail"); 1295 } 1296 1297 /* Remove mappings from splay tree. */ 1298 int i; 1299 for (i = 0; i < num_funcs; i++) 1300 { 1301 k.host_start = (uintptr_t) host_func_table[i]; 1302 k.host_end = k.host_start + 1; 1303 splay_tree_remove (&devicep->mem_map, &k); 1304 } 1305 1306 /* Most significant bit of the size in host and target tables marks 1307 "omp declare target link" variables. */ 1308 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 1309 const uintptr_t size_mask = ~link_bit; 1310 bool is_tgt_unmapped = false; 1311 1312 for (i = 0; i < num_vars; i++) 1313 { 1314 k.host_start = (uintptr_t) host_var_table[i * 2]; 1315 k.host_end 1316 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 1317 1318 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) 1319 splay_tree_remove (&devicep->mem_map, &k); 1320 else 1321 { 1322 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); 1323 is_tgt_unmapped = gomp_remove_var (devicep, n); 1324 } 1325 } 1326 1327 if (node && !is_tgt_unmapped) 1328 { 1329 free (node->tgt); 1330 free (node); 1331 } 1332 } 1333 1334 /* This function should be called from every offload image while loading. 1335 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 1336 the target, and TARGET_DATA needed by target plugin. */ 1337 1338 void 1339 GOMP_offload_register_ver (unsigned version, const void *host_table, 1340 int target_type, const void *target_data) 1341 { 1342 int i; 1343 1344 if (GOMP_VERSION_LIB (version) > GOMP_VERSION) 1345 gomp_fatal ("Library too old for offload (version %u < %u)", 1346 GOMP_VERSION, GOMP_VERSION_LIB (version)); 1347 1348 gomp_mutex_lock (®ister_lock); 1349 1350 /* Load image to all initialized devices. */ 1351 for (i = 0; i < num_devices; i++) 1352 { 1353 struct gomp_device_descr *devicep = &devices[i]; 1354 gomp_mutex_lock (&devicep->lock); 1355 if (devicep->type == target_type 1356 && devicep->state == GOMP_DEVICE_INITIALIZED) 1357 gomp_load_image_to_device (devicep, version, 1358 host_table, target_data, true); 1359 gomp_mutex_unlock (&devicep->lock); 1360 } 1361 1362 /* Insert image to array of pending images. */ 1363 offload_images 1364 = gomp_realloc_unlock (offload_images, 1365 (num_offload_images + 1) 1366 * sizeof (struct offload_image_descr)); 1367 offload_images[num_offload_images].version = version; 1368 offload_images[num_offload_images].type = target_type; 1369 offload_images[num_offload_images].host_table = host_table; 1370 offload_images[num_offload_images].target_data = target_data; 1371 1372 num_offload_images++; 1373 gomp_mutex_unlock (®ister_lock); 1374 } 1375 1376 void 1377 GOMP_offload_register (const void *host_table, int target_type, 1378 const void *target_data) 1379 { 1380 GOMP_offload_register_ver (0, host_table, target_type, target_data); 1381 } 1382 1383 /* This function should be called from every offload image while unloading. 1384 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 1385 the target, and TARGET_DATA needed by target plugin. */ 1386 1387 void 1388 GOMP_offload_unregister_ver (unsigned version, const void *host_table, 1389 int target_type, const void *target_data) 1390 { 1391 int i; 1392 1393 gomp_mutex_lock (®ister_lock); 1394 1395 /* Unload image from all initialized devices. */ 1396 for (i = 0; i < num_devices; i++) 1397 { 1398 struct gomp_device_descr *devicep = &devices[i]; 1399 gomp_mutex_lock (&devicep->lock); 1400 if (devicep->type == target_type 1401 && devicep->state == GOMP_DEVICE_INITIALIZED) 1402 gomp_unload_image_from_device (devicep, version, 1403 host_table, target_data); 1404 gomp_mutex_unlock (&devicep->lock); 1405 } 1406 1407 /* Remove image from array of pending images. */ 1408 for (i = 0; i < num_offload_images; i++) 1409 if (offload_images[i].target_data == target_data) 1410 { 1411 offload_images[i] = offload_images[--num_offload_images]; 1412 break; 1413 } 1414 1415 gomp_mutex_unlock (®ister_lock); 1416 } 1417 1418 void 1419 GOMP_offload_unregister (const void *host_table, int target_type, 1420 const void *target_data) 1421 { 1422 GOMP_offload_unregister_ver (0, host_table, target_type, target_data); 1423 } 1424 1425 /* This function initializes the target device, specified by DEVICEP. DEVICEP 1426 must be locked on entry, and remains locked on return. */ 1427 1428 attribute_hidden void 1429 gomp_init_device (struct gomp_device_descr *devicep) 1430 { 1431 int i; 1432 if (!devicep->init_device_func (devicep->target_id)) 1433 { 1434 gomp_mutex_unlock (&devicep->lock); 1435 gomp_fatal ("device initialization failed"); 1436 } 1437 1438 /* Load to device all images registered by the moment. */ 1439 for (i = 0; i < num_offload_images; i++) 1440 { 1441 struct offload_image_descr *image = &offload_images[i]; 1442 if (image->type == devicep->type) 1443 gomp_load_image_to_device (devicep, image->version, 1444 image->host_table, image->target_data, 1445 false); 1446 } 1447 1448 devicep->state = GOMP_DEVICE_INITIALIZED; 1449 } 1450 1451 attribute_hidden void 1452 gomp_unload_device (struct gomp_device_descr *devicep) 1453 { 1454 if (devicep->state == GOMP_DEVICE_INITIALIZED) 1455 { 1456 unsigned i; 1457 1458 /* Unload from device all images registered at the moment. */ 1459 for (i = 0; i < num_offload_images; i++) 1460 { 1461 struct offload_image_descr *image = &offload_images[i]; 1462 if (image->type == devicep->type) 1463 gomp_unload_image_from_device (devicep, image->version, 1464 image->host_table, 1465 image->target_data); 1466 } 1467 } 1468 } 1469 1470 /* Free address mapping tables. MM must be locked on entry, and remains locked 1471 on return. */ 1472 1473 attribute_hidden void 1474 gomp_free_memmap (struct splay_tree_s *mem_map) 1475 { 1476 while (mem_map->root) 1477 { 1478 struct target_mem_desc *tgt = mem_map->root->key.tgt; 1479 1480 splay_tree_remove (mem_map, &mem_map->root->key); 1481 free (tgt->array); 1482 free (tgt); 1483 } 1484 } 1485 1486 /* Host fallback for GOMP_target{,_ext} routines. */ 1487 1488 static void 1489 gomp_target_fallback (void (*fn) (void *), void **hostaddrs) 1490 { 1491 struct gomp_thread old_thr, *thr = gomp_thread (); 1492 old_thr = *thr; 1493 memset (thr, '\0', sizeof (*thr)); 1494 if (gomp_places_list) 1495 { 1496 thr->place = old_thr.place; 1497 thr->ts.place_partition_len = gomp_places_list_len; 1498 } 1499 fn (hostaddrs); 1500 gomp_free_thread (thr); 1501 *thr = old_thr; 1502 } 1503 1504 /* Calculate alignment and size requirements of a private copy of data shared 1505 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ 1506 1507 static inline void 1508 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, 1509 unsigned short *kinds, size_t *tgt_align, 1510 size_t *tgt_size) 1511 { 1512 size_t i; 1513 for (i = 0; i < mapnum; i++) 1514 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 1515 { 1516 size_t align = (size_t) 1 << (kinds[i] >> 8); 1517 if (*tgt_align < align) 1518 *tgt_align = align; 1519 *tgt_size = (*tgt_size + align - 1) & ~(align - 1); 1520 *tgt_size += sizes[i]; 1521 } 1522 } 1523 1524 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ 1525 1526 static inline void 1527 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, 1528 size_t *sizes, unsigned short *kinds, size_t tgt_align, 1529 size_t tgt_size) 1530 { 1531 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 1532 if (al) 1533 tgt += tgt_align - al; 1534 tgt_size = 0; 1535 size_t i; 1536 for (i = 0; i < mapnum; i++) 1537 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 1538 { 1539 size_t align = (size_t) 1 << (kinds[i] >> 8); 1540 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1541 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); 1542 hostaddrs[i] = tgt + tgt_size; 1543 tgt_size = tgt_size + sizes[i]; 1544 } 1545 } 1546 1547 /* Helper function of GOMP_target{,_ext} routines. */ 1548 1549 static void * 1550 gomp_get_target_fn_addr (struct gomp_device_descr *devicep, 1551 void (*host_fn) (void *)) 1552 { 1553 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) 1554 return (void *) host_fn; 1555 else 1556 { 1557 gomp_mutex_lock (&devicep->lock); 1558 if (devicep->state == GOMP_DEVICE_FINALIZED) 1559 { 1560 gomp_mutex_unlock (&devicep->lock); 1561 return NULL; 1562 } 1563 1564 struct splay_tree_key_s k; 1565 k.host_start = (uintptr_t) host_fn; 1566 k.host_end = k.host_start + 1; 1567 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); 1568 gomp_mutex_unlock (&devicep->lock); 1569 if (tgt_fn == NULL) 1570 return NULL; 1571 1572 return (void *) tgt_fn->tgt_offset; 1573 } 1574 } 1575 1576 /* Called when encountering a target directive. If DEVICE 1577 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is 1578 GOMP_DEVICE_HOST_FALLBACK (or any value 1579 larger than last available hw device), use host fallback. 1580 FN is address of host code, UNUSED is part of the current ABI, but 1581 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays 1582 with MAPNUM entries, with addresses of the host objects, 1583 sizes of the host objects (resp. for pointer kind pointer bias 1584 and assumed sizeof (void *) size) and kinds. */ 1585 1586 void 1587 GOMP_target (int device, void (*fn) (void *), const void *unused, 1588 size_t mapnum, void **hostaddrs, size_t *sizes, 1589 unsigned char *kinds) 1590 { 1591 struct gomp_device_descr *devicep = resolve_device (device); 1592 1593 void *fn_addr; 1594 if (devicep == NULL 1595 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1596 /* All shared memory devices should use the GOMP_target_ext function. */ 1597 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM 1598 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) 1599 return gomp_target_fallback (fn, hostaddrs); 1600 1601 struct target_mem_desc *tgt_vars 1602 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 1603 GOMP_MAP_VARS_TARGET); 1604 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, 1605 NULL); 1606 gomp_unmap_vars (tgt_vars, true); 1607 } 1608 1609 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, 1610 and several arguments have been added: 1611 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. 1612 DEPEND is array of dependencies, see GOMP_task for details. 1613 1614 ARGS is a pointer to an array consisting of a variable number of both 1615 device-independent and device-specific arguments, which can take one two 1616 elements where the first specifies for which device it is intended, the type 1617 and optionally also the value. If the value is not present in the first 1618 one, the whole second element the actual value. The last element of the 1619 array is a single NULL. Among the device independent can be for example 1620 NUM_TEAMS and THREAD_LIMIT. 1621 1622 NUM_TEAMS is positive if GOMP_teams will be called in the body with 1623 that value, or 1 if teams construct is not present, or 0, if 1624 teams construct does not have num_teams clause and so the choice is 1625 implementation defined, and -1 if it can't be determined on the host 1626 what value will GOMP_teams have on the device. 1627 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the 1628 body with that value, or 0, if teams construct does not have thread_limit 1629 clause or the teams construct is not present, or -1 if it can't be 1630 determined on the host what value will GOMP_teams have on the device. */ 1631 1632 void 1633 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, 1634 void **hostaddrs, size_t *sizes, unsigned short *kinds, 1635 unsigned int flags, void **depend, void **args) 1636 { 1637 struct gomp_device_descr *devicep = resolve_device (device); 1638 size_t tgt_align = 0, tgt_size = 0; 1639 bool fpc_done = false; 1640 1641 if (flags & GOMP_TARGET_FLAG_NOWAIT) 1642 { 1643 struct gomp_thread *thr = gomp_thread (); 1644 /* Create a team if we don't have any around, as nowait 1645 target tasks make sense to run asynchronously even when 1646 outside of any parallel. */ 1647 if (__builtin_expect (thr->ts.team == NULL, 0)) 1648 { 1649 struct gomp_team *team = gomp_new_team (1); 1650 struct gomp_task *task = thr->task; 1651 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; 1652 team->prev_ts = thr->ts; 1653 thr->ts.team = team; 1654 thr->ts.team_id = 0; 1655 thr->ts.work_share = &team->work_shares[0]; 1656 thr->ts.last_work_share = NULL; 1657 #ifdef HAVE_SYNC_BUILTINS 1658 thr->ts.single_count = 0; 1659 #endif 1660 thr->ts.static_trip = 0; 1661 thr->task = &team->implicit_task[0]; 1662 gomp_init_task (thr->task, NULL, icv); 1663 if (task) 1664 { 1665 thr->task = task; 1666 gomp_end_task (); 1667 free (task); 1668 thr->task = &team->implicit_task[0]; 1669 } 1670 else 1671 pthread_setspecific (gomp_thread_destructor, thr); 1672 } 1673 if (thr->ts.team 1674 && !thr->task->final_task) 1675 { 1676 gomp_create_target_task (devicep, fn, mapnum, hostaddrs, 1677 sizes, kinds, flags, depend, args, 1678 GOMP_TARGET_TASK_BEFORE_MAP); 1679 return; 1680 } 1681 } 1682 1683 /* If there are depend clauses, but nowait is not present 1684 (or we are in a final task), block the parent task until the 1685 dependencies are resolved and then just continue with the rest 1686 of the function as if it is a merged task. */ 1687 if (depend != NULL) 1688 { 1689 struct gomp_thread *thr = gomp_thread (); 1690 if (thr->task && thr->task->depend_hash) 1691 { 1692 /* If we might need to wait, copy firstprivate now. */ 1693 calculate_firstprivate_requirements (mapnum, sizes, kinds, 1694 &tgt_align, &tgt_size); 1695 if (tgt_align) 1696 { 1697 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 1698 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 1699 tgt_align, tgt_size); 1700 } 1701 fpc_done = true; 1702 gomp_task_maybe_wait_for_dependencies (depend); 1703 } 1704 } 1705 1706 void *fn_addr; 1707 if (devicep == NULL 1708 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1709 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) 1710 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 1711 { 1712 if (!fpc_done) 1713 { 1714 calculate_firstprivate_requirements (mapnum, sizes, kinds, 1715 &tgt_align, &tgt_size); 1716 if (tgt_align) 1717 { 1718 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 1719 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 1720 tgt_align, tgt_size); 1721 } 1722 } 1723 gomp_target_fallback (fn, hostaddrs); 1724 return; 1725 } 1726 1727 struct target_mem_desc *tgt_vars; 1728 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1729 { 1730 if (!fpc_done) 1731 { 1732 calculate_firstprivate_requirements (mapnum, sizes, kinds, 1733 &tgt_align, &tgt_size); 1734 if (tgt_align) 1735 { 1736 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 1737 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 1738 tgt_align, tgt_size); 1739 } 1740 } 1741 tgt_vars = NULL; 1742 } 1743 else 1744 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, 1745 true, GOMP_MAP_VARS_TARGET); 1746 devicep->run_func (devicep->target_id, fn_addr, 1747 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, 1748 args); 1749 if (tgt_vars) 1750 gomp_unmap_vars (tgt_vars, true); 1751 } 1752 1753 /* Host fallback for GOMP_target_data{,_ext} routines. */ 1754 1755 static void 1756 gomp_target_data_fallback (void) 1757 { 1758 struct gomp_task_icv *icv = gomp_icv (false); 1759 if (icv->target_data) 1760 { 1761 /* Even when doing a host fallback, if there are any active 1762 #pragma omp target data constructs, need to remember the 1763 new #pragma omp target data, otherwise GOMP_target_end_data 1764 would get out of sync. */ 1765 struct target_mem_desc *tgt 1766 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, 1767 GOMP_MAP_VARS_DATA); 1768 tgt->prev = icv->target_data; 1769 icv->target_data = tgt; 1770 } 1771 } 1772 1773 void 1774 GOMP_target_data (int device, const void *unused, size_t mapnum, 1775 void **hostaddrs, size_t *sizes, unsigned char *kinds) 1776 { 1777 struct gomp_device_descr *devicep = resolve_device (device); 1778 1779 if (devicep == NULL 1780 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1781 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) 1782 return gomp_target_data_fallback (); 1783 1784 struct target_mem_desc *tgt 1785 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 1786 GOMP_MAP_VARS_DATA); 1787 struct gomp_task_icv *icv = gomp_icv (true); 1788 tgt->prev = icv->target_data; 1789 icv->target_data = tgt; 1790 } 1791 1792 void 1793 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, 1794 size_t *sizes, unsigned short *kinds) 1795 { 1796 struct gomp_device_descr *devicep = resolve_device (device); 1797 1798 if (devicep == NULL 1799 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1800 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1801 return gomp_target_data_fallback (); 1802 1803 struct target_mem_desc *tgt 1804 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 1805 GOMP_MAP_VARS_DATA); 1806 struct gomp_task_icv *icv = gomp_icv (true); 1807 tgt->prev = icv->target_data; 1808 icv->target_data = tgt; 1809 } 1810 1811 void 1812 GOMP_target_end_data (void) 1813 { 1814 struct gomp_task_icv *icv = gomp_icv (false); 1815 if (icv->target_data) 1816 { 1817 struct target_mem_desc *tgt = icv->target_data; 1818 icv->target_data = tgt->prev; 1819 gomp_unmap_vars (tgt, true); 1820 } 1821 } 1822 1823 void 1824 GOMP_target_update (int device, const void *unused, size_t mapnum, 1825 void **hostaddrs, size_t *sizes, unsigned char *kinds) 1826 { 1827 struct gomp_device_descr *devicep = resolve_device (device); 1828 1829 if (devicep == NULL 1830 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1831 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1832 return; 1833 1834 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); 1835 } 1836 1837 void 1838 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, 1839 size_t *sizes, unsigned short *kinds, 1840 unsigned int flags, void **depend) 1841 { 1842 struct gomp_device_descr *devicep = resolve_device (device); 1843 1844 /* If there are depend clauses, but nowait is not present, 1845 block the parent task until the dependencies are resolved 1846 and then just continue with the rest of the function as if it 1847 is a merged task. Until we are able to schedule task during 1848 variable mapping or unmapping, ignore nowait if depend clauses 1849 are not present. */ 1850 if (depend != NULL) 1851 { 1852 struct gomp_thread *thr = gomp_thread (); 1853 if (thr->task && thr->task->depend_hash) 1854 { 1855 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 1856 && thr->ts.team 1857 && !thr->task->final_task) 1858 { 1859 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 1860 mapnum, hostaddrs, sizes, kinds, 1861 flags | GOMP_TARGET_FLAG_UPDATE, 1862 depend, NULL, GOMP_TARGET_TASK_DATA)) 1863 return; 1864 } 1865 else 1866 { 1867 struct gomp_team *team = thr->ts.team; 1868 /* If parallel or taskgroup has been cancelled, don't start new 1869 tasks. */ 1870 if (__builtin_expect (gomp_cancel_var, 0) && team) 1871 { 1872 if (gomp_team_barrier_cancelled (&team->barrier)) 1873 return; 1874 if (thr->task->taskgroup) 1875 { 1876 if (thr->task->taskgroup->cancelled) 1877 return; 1878 if (thr->task->taskgroup->workshare 1879 && thr->task->taskgroup->prev 1880 && thr->task->taskgroup->prev->cancelled) 1881 return; 1882 } 1883 } 1884 1885 gomp_task_maybe_wait_for_dependencies (depend); 1886 } 1887 } 1888 } 1889 1890 if (devicep == NULL 1891 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1892 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 1893 return; 1894 1895 struct gomp_thread *thr = gomp_thread (); 1896 struct gomp_team *team = thr->ts.team; 1897 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 1898 if (__builtin_expect (gomp_cancel_var, 0) && team) 1899 { 1900 if (gomp_team_barrier_cancelled (&team->barrier)) 1901 return; 1902 if (thr->task->taskgroup) 1903 { 1904 if (thr->task->taskgroup->cancelled) 1905 return; 1906 if (thr->task->taskgroup->workshare 1907 && thr->task->taskgroup->prev 1908 && thr->task->taskgroup->prev->cancelled) 1909 return; 1910 } 1911 } 1912 1913 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); 1914 } 1915 1916 static void 1917 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, 1918 void **hostaddrs, size_t *sizes, unsigned short *kinds) 1919 { 1920 const int typemask = 0xff; 1921 size_t i; 1922 gomp_mutex_lock (&devicep->lock); 1923 if (devicep->state == GOMP_DEVICE_FINALIZED) 1924 { 1925 gomp_mutex_unlock (&devicep->lock); 1926 return; 1927 } 1928 1929 for (i = 0; i < mapnum; i++) 1930 { 1931 struct splay_tree_key_s cur_node; 1932 unsigned char kind = kinds[i] & typemask; 1933 switch (kind) 1934 { 1935 case GOMP_MAP_FROM: 1936 case GOMP_MAP_ALWAYS_FROM: 1937 case GOMP_MAP_DELETE: 1938 case GOMP_MAP_RELEASE: 1939 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 1940 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: 1941 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1942 cur_node.host_end = cur_node.host_start + sizes[i]; 1943 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION 1944 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 1945 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) 1946 : splay_tree_lookup (&devicep->mem_map, &cur_node); 1947 if (!k) 1948 continue; 1949 1950 if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY) 1951 k->refcount--; 1952 if ((kind == GOMP_MAP_DELETE 1953 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION) 1954 && k->refcount != REFCOUNT_INFINITY) 1955 k->refcount = 0; 1956 1957 if ((kind == GOMP_MAP_FROM && k->refcount == 0) 1958 || kind == GOMP_MAP_ALWAYS_FROM) 1959 gomp_copy_dev2host (devicep, (void *) cur_node.host_start, 1960 (void *) (k->tgt->tgt_start + k->tgt_offset 1961 + cur_node.host_start 1962 - k->host_start), 1963 cur_node.host_end - cur_node.host_start); 1964 if (k->refcount == 0) 1965 { 1966 splay_tree_remove (&devicep->mem_map, k); 1967 if (k->link_key) 1968 splay_tree_insert (&devicep->mem_map, 1969 (splay_tree_node) k->link_key); 1970 if (k->tgt->refcount > 1) 1971 k->tgt->refcount--; 1972 else 1973 gomp_unmap_tgt (k->tgt); 1974 } 1975 1976 break; 1977 default: 1978 gomp_mutex_unlock (&devicep->lock); 1979 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", 1980 kind); 1981 } 1982 } 1983 1984 gomp_mutex_unlock (&devicep->lock); 1985 } 1986 1987 void 1988 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, 1989 size_t *sizes, unsigned short *kinds, 1990 unsigned int flags, void **depend) 1991 { 1992 struct gomp_device_descr *devicep = resolve_device (device); 1993 1994 /* If there are depend clauses, but nowait is not present, 1995 block the parent task until the dependencies are resolved 1996 and then just continue with the rest of the function as if it 1997 is a merged task. Until we are able to schedule task during 1998 variable mapping or unmapping, ignore nowait if depend clauses 1999 are not present. */ 2000 if (depend != NULL) 2001 { 2002 struct gomp_thread *thr = gomp_thread (); 2003 if (thr->task && thr->task->depend_hash) 2004 { 2005 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 2006 && thr->ts.team 2007 && !thr->task->final_task) 2008 { 2009 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 2010 mapnum, hostaddrs, sizes, kinds, 2011 flags, depend, NULL, 2012 GOMP_TARGET_TASK_DATA)) 2013 return; 2014 } 2015 else 2016 { 2017 struct gomp_team *team = thr->ts.team; 2018 /* If parallel or taskgroup has been cancelled, don't start new 2019 tasks. */ 2020 if (__builtin_expect (gomp_cancel_var, 0) && team) 2021 { 2022 if (gomp_team_barrier_cancelled (&team->barrier)) 2023 return; 2024 if (thr->task->taskgroup) 2025 { 2026 if (thr->task->taskgroup->cancelled) 2027 return; 2028 if (thr->task->taskgroup->workshare 2029 && thr->task->taskgroup->prev 2030 && thr->task->taskgroup->prev->cancelled) 2031 return; 2032 } 2033 } 2034 2035 gomp_task_maybe_wait_for_dependencies (depend); 2036 } 2037 } 2038 } 2039 2040 if (devicep == NULL 2041 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2042 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2043 return; 2044 2045 struct gomp_thread *thr = gomp_thread (); 2046 struct gomp_team *team = thr->ts.team; 2047 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 2048 if (__builtin_expect (gomp_cancel_var, 0) && team) 2049 { 2050 if (gomp_team_barrier_cancelled (&team->barrier)) 2051 return; 2052 if (thr->task->taskgroup) 2053 { 2054 if (thr->task->taskgroup->cancelled) 2055 return; 2056 if (thr->task->taskgroup->workshare 2057 && thr->task->taskgroup->prev 2058 && thr->task->taskgroup->prev->cancelled) 2059 return; 2060 } 2061 } 2062 2063 size_t i; 2064 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 2065 for (i = 0; i < mapnum; i++) 2066 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) 2067 { 2068 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], 2069 &kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2070 i += sizes[i]; 2071 } 2072 else 2073 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 2074 true, GOMP_MAP_VARS_ENTER_DATA); 2075 else 2076 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds); 2077 } 2078 2079 bool 2080 gomp_target_task_fn (void *data) 2081 { 2082 struct gomp_target_task *ttask = (struct gomp_target_task *) data; 2083 struct gomp_device_descr *devicep = ttask->devicep; 2084 2085 if (ttask->fn != NULL) 2086 { 2087 void *fn_addr; 2088 if (devicep == NULL 2089 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2090 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) 2091 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 2092 { 2093 ttask->state = GOMP_TARGET_TASK_FALLBACK; 2094 gomp_target_fallback (ttask->fn, ttask->hostaddrs); 2095 return false; 2096 } 2097 2098 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 2099 { 2100 if (ttask->tgt) 2101 gomp_unmap_vars (ttask->tgt, true); 2102 return false; 2103 } 2104 2105 void *actual_arguments; 2106 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2107 { 2108 ttask->tgt = NULL; 2109 actual_arguments = ttask->hostaddrs; 2110 } 2111 else 2112 { 2113 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, 2114 NULL, ttask->sizes, ttask->kinds, true, 2115 GOMP_MAP_VARS_TARGET); 2116 actual_arguments = (void *) ttask->tgt->tgt_start; 2117 } 2118 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; 2119 2120 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, 2121 ttask->args, (void *) ttask); 2122 return true; 2123 } 2124 else if (devicep == NULL 2125 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2126 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2127 return false; 2128 2129 size_t i; 2130 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) 2131 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 2132 ttask->kinds, true); 2133 else if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 2134 for (i = 0; i < ttask->mapnum; i++) 2135 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) 2136 { 2137 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], 2138 NULL, &ttask->sizes[i], &ttask->kinds[i], true, 2139 GOMP_MAP_VARS_ENTER_DATA); 2140 i += ttask->sizes[i]; 2141 } 2142 else 2143 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], 2144 &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA); 2145 else 2146 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 2147 ttask->kinds); 2148 return false; 2149 } 2150 2151 void 2152 GOMP_teams (unsigned int num_teams, unsigned int thread_limit) 2153 { 2154 if (thread_limit) 2155 { 2156 struct gomp_task_icv *icv = gomp_icv (true); 2157 icv->thread_limit_var 2158 = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 2159 } 2160 (void) num_teams; 2161 } 2162 2163 void * 2164 omp_target_alloc (size_t size, int device_num) 2165 { 2166 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2167 return malloc (size); 2168 2169 if (device_num < 0) 2170 return NULL; 2171 2172 struct gomp_device_descr *devicep = resolve_device (device_num); 2173 if (devicep == NULL) 2174 return NULL; 2175 2176 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2177 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2178 return malloc (size); 2179 2180 gomp_mutex_lock (&devicep->lock); 2181 void *ret = devicep->alloc_func (devicep->target_id, size); 2182 gomp_mutex_unlock (&devicep->lock); 2183 return ret; 2184 } 2185 2186 void 2187 omp_target_free (void *device_ptr, int device_num) 2188 { 2189 if (device_ptr == NULL) 2190 return; 2191 2192 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2193 { 2194 free (device_ptr); 2195 return; 2196 } 2197 2198 if (device_num < 0) 2199 return; 2200 2201 struct gomp_device_descr *devicep = resolve_device (device_num); 2202 if (devicep == NULL) 2203 return; 2204 2205 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2206 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2207 { 2208 free (device_ptr); 2209 return; 2210 } 2211 2212 gomp_mutex_lock (&devicep->lock); 2213 gomp_free_device_memory (devicep, device_ptr); 2214 gomp_mutex_unlock (&devicep->lock); 2215 } 2216 2217 int 2218 omp_target_is_present (const void *ptr, int device_num) 2219 { 2220 if (ptr == NULL) 2221 return 1; 2222 2223 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2224 return 1; 2225 2226 if (device_num < 0) 2227 return 0; 2228 2229 struct gomp_device_descr *devicep = resolve_device (device_num); 2230 if (devicep == NULL) 2231 return 0; 2232 2233 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2234 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2235 return 1; 2236 2237 gomp_mutex_lock (&devicep->lock); 2238 struct splay_tree_s *mem_map = &devicep->mem_map; 2239 struct splay_tree_key_s cur_node; 2240 2241 cur_node.host_start = (uintptr_t) ptr; 2242 cur_node.host_end = cur_node.host_start; 2243 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); 2244 int ret = n != NULL; 2245 gomp_mutex_unlock (&devicep->lock); 2246 return ret; 2247 } 2248 2249 int 2250 omp_target_memcpy (void *dst, const void *src, size_t length, 2251 size_t dst_offset, size_t src_offset, int dst_device_num, 2252 int src_device_num) 2253 { 2254 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 2255 bool ret; 2256 2257 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) 2258 { 2259 if (dst_device_num < 0) 2260 return EINVAL; 2261 2262 dst_devicep = resolve_device (dst_device_num); 2263 if (dst_devicep == NULL) 2264 return EINVAL; 2265 2266 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2267 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2268 dst_devicep = NULL; 2269 } 2270 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) 2271 { 2272 if (src_device_num < 0) 2273 return EINVAL; 2274 2275 src_devicep = resolve_device (src_device_num); 2276 if (src_devicep == NULL) 2277 return EINVAL; 2278 2279 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2280 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2281 src_devicep = NULL; 2282 } 2283 if (src_devicep == NULL && dst_devicep == NULL) 2284 { 2285 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); 2286 return 0; 2287 } 2288 if (src_devicep == NULL) 2289 { 2290 gomp_mutex_lock (&dst_devicep->lock); 2291 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 2292 (char *) dst + dst_offset, 2293 (char *) src + src_offset, length); 2294 gomp_mutex_unlock (&dst_devicep->lock); 2295 return (ret ? 0 : EINVAL); 2296 } 2297 if (dst_devicep == NULL) 2298 { 2299 gomp_mutex_lock (&src_devicep->lock); 2300 ret = src_devicep->dev2host_func (src_devicep->target_id, 2301 (char *) dst + dst_offset, 2302 (char *) src + src_offset, length); 2303 gomp_mutex_unlock (&src_devicep->lock); 2304 return (ret ? 0 : EINVAL); 2305 } 2306 if (src_devicep == dst_devicep) 2307 { 2308 gomp_mutex_lock (&src_devicep->lock); 2309 ret = src_devicep->dev2dev_func (src_devicep->target_id, 2310 (char *) dst + dst_offset, 2311 (char *) src + src_offset, length); 2312 gomp_mutex_unlock (&src_devicep->lock); 2313 return (ret ? 0 : EINVAL); 2314 } 2315 return EINVAL; 2316 } 2317 2318 static int 2319 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, 2320 int num_dims, const size_t *volume, 2321 const size_t *dst_offsets, 2322 const size_t *src_offsets, 2323 const size_t *dst_dimensions, 2324 const size_t *src_dimensions, 2325 struct gomp_device_descr *dst_devicep, 2326 struct gomp_device_descr *src_devicep) 2327 { 2328 size_t dst_slice = element_size; 2329 size_t src_slice = element_size; 2330 size_t j, dst_off, src_off, length; 2331 int i, ret; 2332 2333 if (num_dims == 1) 2334 { 2335 if (__builtin_mul_overflow (element_size, volume[0], &length) 2336 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) 2337 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) 2338 return EINVAL; 2339 if (dst_devicep == NULL && src_devicep == NULL) 2340 { 2341 memcpy ((char *) dst + dst_off, (const char *) src + src_off, 2342 length); 2343 ret = 1; 2344 } 2345 else if (src_devicep == NULL) 2346 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 2347 (char *) dst + dst_off, 2348 (const char *) src + src_off, 2349 length); 2350 else if (dst_devicep == NULL) 2351 ret = src_devicep->dev2host_func (src_devicep->target_id, 2352 (char *) dst + dst_off, 2353 (const char *) src + src_off, 2354 length); 2355 else if (src_devicep == dst_devicep) 2356 ret = src_devicep->dev2dev_func (src_devicep->target_id, 2357 (char *) dst + dst_off, 2358 (const char *) src + src_off, 2359 length); 2360 else 2361 ret = 0; 2362 return ret ? 0 : EINVAL; 2363 } 2364 2365 /* FIXME: it would be nice to have some plugin function to handle 2366 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can 2367 be handled in the generic recursion below, and for host-host it 2368 should be used even for any num_dims >= 2. */ 2369 2370 for (i = 1; i < num_dims; i++) 2371 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) 2372 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) 2373 return EINVAL; 2374 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) 2375 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) 2376 return EINVAL; 2377 for (j = 0; j < volume[0]; j++) 2378 { 2379 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, 2380 (const char *) src + src_off, 2381 element_size, num_dims - 1, 2382 volume + 1, dst_offsets + 1, 2383 src_offsets + 1, dst_dimensions + 1, 2384 src_dimensions + 1, dst_devicep, 2385 src_devicep); 2386 if (ret) 2387 return ret; 2388 dst_off += dst_slice; 2389 src_off += src_slice; 2390 } 2391 return 0; 2392 } 2393 2394 int 2395 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, 2396 int num_dims, const size_t *volume, 2397 const size_t *dst_offsets, 2398 const size_t *src_offsets, 2399 const size_t *dst_dimensions, 2400 const size_t *src_dimensions, 2401 int dst_device_num, int src_device_num) 2402 { 2403 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 2404 2405 if (!dst && !src) 2406 return INT_MAX; 2407 2408 if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK) 2409 { 2410 if (dst_device_num < 0) 2411 return EINVAL; 2412 2413 dst_devicep = resolve_device (dst_device_num); 2414 if (dst_devicep == NULL) 2415 return EINVAL; 2416 2417 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2418 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2419 dst_devicep = NULL; 2420 } 2421 if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) 2422 { 2423 if (src_device_num < 0) 2424 return EINVAL; 2425 2426 src_devicep = resolve_device (src_device_num); 2427 if (src_devicep == NULL) 2428 return EINVAL; 2429 2430 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2431 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2432 src_devicep = NULL; 2433 } 2434 2435 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) 2436 return EINVAL; 2437 2438 if (src_devicep) 2439 gomp_mutex_lock (&src_devicep->lock); 2440 else if (dst_devicep) 2441 gomp_mutex_lock (&dst_devicep->lock); 2442 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, 2443 volume, dst_offsets, src_offsets, 2444 dst_dimensions, src_dimensions, 2445 dst_devicep, src_devicep); 2446 if (src_devicep) 2447 gomp_mutex_unlock (&src_devicep->lock); 2448 else if (dst_devicep) 2449 gomp_mutex_unlock (&dst_devicep->lock); 2450 return ret; 2451 } 2452 2453 int 2454 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, 2455 size_t size, size_t device_offset, int device_num) 2456 { 2457 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2458 return EINVAL; 2459 2460 if (device_num < 0) 2461 return EINVAL; 2462 2463 struct gomp_device_descr *devicep = resolve_device (device_num); 2464 if (devicep == NULL) 2465 return EINVAL; 2466 2467 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2468 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2469 return EINVAL; 2470 2471 gomp_mutex_lock (&devicep->lock); 2472 2473 struct splay_tree_s *mem_map = &devicep->mem_map; 2474 struct splay_tree_key_s cur_node; 2475 int ret = EINVAL; 2476 2477 cur_node.host_start = (uintptr_t) host_ptr; 2478 cur_node.host_end = cur_node.host_start + size; 2479 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 2480 if (n) 2481 { 2482 if (n->tgt->tgt_start + n->tgt_offset 2483 == (uintptr_t) device_ptr + device_offset 2484 && n->host_start <= cur_node.host_start 2485 && n->host_end >= cur_node.host_end) 2486 ret = 0; 2487 } 2488 else 2489 { 2490 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 2491 tgt->array = gomp_malloc (sizeof (*tgt->array)); 2492 tgt->refcount = 1; 2493 tgt->tgt_start = 0; 2494 tgt->tgt_end = 0; 2495 tgt->to_free = NULL; 2496 tgt->prev = NULL; 2497 tgt->list_count = 0; 2498 tgt->device_descr = devicep; 2499 splay_tree_node array = tgt->array; 2500 splay_tree_key k = &array->key; 2501 k->host_start = cur_node.host_start; 2502 k->host_end = cur_node.host_end; 2503 k->tgt = tgt; 2504 k->tgt_offset = (uintptr_t) device_ptr + device_offset; 2505 k->refcount = REFCOUNT_INFINITY; 2506 k->dynamic_refcount = 0; 2507 array->left = NULL; 2508 array->right = NULL; 2509 splay_tree_insert (&devicep->mem_map, array); 2510 ret = 0; 2511 } 2512 gomp_mutex_unlock (&devicep->lock); 2513 return ret; 2514 } 2515 2516 int 2517 omp_target_disassociate_ptr (const void *ptr, int device_num) 2518 { 2519 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2520 return EINVAL; 2521 2522 if (device_num < 0) 2523 return EINVAL; 2524 2525 struct gomp_device_descr *devicep = resolve_device (device_num); 2526 if (devicep == NULL) 2527 return EINVAL; 2528 2529 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 2530 return EINVAL; 2531 2532 gomp_mutex_lock (&devicep->lock); 2533 2534 struct splay_tree_s *mem_map = &devicep->mem_map; 2535 struct splay_tree_key_s cur_node; 2536 int ret = EINVAL; 2537 2538 cur_node.host_start = (uintptr_t) ptr; 2539 cur_node.host_end = cur_node.host_start; 2540 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 2541 if (n 2542 && n->host_start == cur_node.host_start 2543 && n->refcount == REFCOUNT_INFINITY 2544 && n->tgt->tgt_start == 0 2545 && n->tgt->to_free == NULL 2546 && n->tgt->refcount == 1 2547 && n->tgt->list_count == 0) 2548 { 2549 splay_tree_remove (&devicep->mem_map, n); 2550 gomp_unmap_tgt (n->tgt); 2551 ret = 0; 2552 } 2553 2554 gomp_mutex_unlock (&devicep->lock); 2555 return ret; 2556 } 2557 2558 int 2559 omp_pause_resource (omp_pause_resource_t kind, int device_num) 2560 { 2561 (void) kind; 2562 if (device_num == GOMP_DEVICE_HOST_FALLBACK) 2563 return gomp_pause_host (); 2564 if (device_num < 0 || device_num >= gomp_get_num_devices ()) 2565 return -1; 2566 /* Do nothing for target devices for now. */ 2567 return 0; 2568 } 2569 2570 int 2571 omp_pause_resource_all (omp_pause_resource_t kind) 2572 { 2573 (void) kind; 2574 if (gomp_pause_host ()) 2575 return -1; 2576 /* Do nothing for target devices for now. */ 2577 return 0; 2578 } 2579 2580 ialias (omp_pause_resource) 2581 ialias (omp_pause_resource_all) 2582 2583 #ifdef PLUGIN_SUPPORT 2584 2585 /* This function tries to load a plugin for DEVICE. Name of plugin is passed 2586 in PLUGIN_NAME. 2587 The handles of the found functions are stored in the corresponding fields 2588 of DEVICE. The function returns TRUE on success and FALSE otherwise. */ 2589 2590 static bool 2591 gomp_load_plugin_for_device (struct gomp_device_descr *device, 2592 const char *plugin_name) 2593 { 2594 const char *err = NULL, *last_missing = NULL; 2595 2596 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); 2597 if (!plugin_handle) 2598 goto dl_fail; 2599 2600 /* Check if all required functions are available in the plugin and store 2601 their handlers. None of the symbols can legitimately be NULL, 2602 so we don't need to check dlerror all the time. */ 2603 #define DLSYM(f) \ 2604 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ 2605 goto dl_fail 2606 /* Similar, but missing functions are not an error. Return false if 2607 failed, true otherwise. */ 2608 #define DLSYM_OPT(f, n) \ 2609 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ 2610 || (last_missing = #n, 0)) 2611 2612 DLSYM (version); 2613 if (device->version_func () != GOMP_VERSION) 2614 { 2615 err = "plugin version mismatch"; 2616 goto fail; 2617 } 2618 2619 DLSYM (get_name); 2620 DLSYM (get_caps); 2621 DLSYM (get_type); 2622 DLSYM (get_num_devices); 2623 DLSYM (init_device); 2624 DLSYM (fini_device); 2625 DLSYM (load_image); 2626 DLSYM (unload_image); 2627 DLSYM (alloc); 2628 DLSYM (free); 2629 DLSYM (dev2host); 2630 DLSYM (host2dev); 2631 device->capabilities = device->get_caps_func (); 2632 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2633 { 2634 DLSYM (run); 2635 DLSYM (async_run); 2636 DLSYM_OPT (can_run, can_run); 2637 DLSYM (dev2dev); 2638 } 2639 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 2640 { 2641 if (!DLSYM_OPT (openacc.exec, openacc_exec) 2642 || !DLSYM_OPT (openacc.register_async_cleanup, 2643 openacc_register_async_cleanup) 2644 || !DLSYM_OPT (openacc.async_test, openacc_async_test) 2645 || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all) 2646 || !DLSYM_OPT (openacc.async_wait, openacc_async_wait) 2647 || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async) 2648 || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all) 2649 || !DLSYM_OPT (openacc.async_wait_all_async, 2650 openacc_async_wait_all_async) 2651 || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async) 2652 || !DLSYM_OPT (openacc.create_thread_data, 2653 openacc_create_thread_data) 2654 || !DLSYM_OPT (openacc.destroy_thread_data, 2655 openacc_destroy_thread_data)) 2656 { 2657 /* Require all the OpenACC handlers if we have 2658 GOMP_OFFLOAD_CAP_OPENACC_200. */ 2659 err = "plugin missing OpenACC handler function"; 2660 goto fail; 2661 } 2662 2663 unsigned cuda = 0; 2664 cuda += DLSYM_OPT (openacc.cuda.get_current_device, 2665 openacc_cuda_get_current_device); 2666 cuda += DLSYM_OPT (openacc.cuda.get_current_context, 2667 openacc_cuda_get_current_context); 2668 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); 2669 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); 2670 if (cuda && cuda != 4) 2671 { 2672 /* Make sure all the CUDA functions are there if any of them are. */ 2673 err = "plugin missing OpenACC CUDA handler function"; 2674 goto fail; 2675 } 2676 } 2677 #undef DLSYM 2678 #undef DLSYM_OPT 2679 2680 return 1; 2681 2682 dl_fail: 2683 err = dlerror (); 2684 fail: 2685 gomp_error ("while loading %s: %s", plugin_name, err); 2686 if (last_missing) 2687 gomp_error ("missing function was %s", last_missing); 2688 if (plugin_handle) 2689 dlclose (plugin_handle); 2690 2691 return 0; 2692 } 2693 2694 /* This function finalizes all initialized devices. */ 2695 2696 static void 2697 gomp_target_fini (void) 2698 { 2699 int i; 2700 for (i = 0; i < num_devices; i++) 2701 { 2702 bool ret = true; 2703 struct gomp_device_descr *devicep = &devices[i]; 2704 gomp_mutex_lock (&devicep->lock); 2705 if (devicep->state == GOMP_DEVICE_INITIALIZED) 2706 { 2707 ret = devicep->fini_device_func (devicep->target_id); 2708 devicep->state = GOMP_DEVICE_FINALIZED; 2709 } 2710 gomp_mutex_unlock (&devicep->lock); 2711 if (!ret) 2712 gomp_fatal ("device finalization failed"); 2713 } 2714 } 2715 2716 /* This function initializes the runtime for offloading. 2717 It parses the list of offload plugins, and tries to load these. 2718 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP 2719 will be set, and the array DEVICES initialized, containing descriptors for 2720 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows 2721 by the others. */ 2722 2723 static void 2724 gomp_target_init (void) 2725 { 2726 const char *prefix ="libgomp-plugin-"; 2727 const char *suffix = SONAME_SUFFIX (1); 2728 const char *cur, *next; 2729 char *plugin_name; 2730 int i, new_num_devices; 2731 2732 num_devices = 0; 2733 devices = NULL; 2734 2735 cur = OFFLOAD_PLUGINS; 2736 if (*cur) 2737 do 2738 { 2739 struct gomp_device_descr current_device; 2740 size_t prefix_len, suffix_len, cur_len; 2741 2742 next = strchr (cur, ','); 2743 2744 prefix_len = strlen (prefix); 2745 cur_len = next ? next - cur : strlen (cur); 2746 suffix_len = strlen (suffix); 2747 2748 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); 2749 if (!plugin_name) 2750 { 2751 num_devices = 0; 2752 break; 2753 } 2754 2755 memcpy (plugin_name, prefix, prefix_len); 2756 memcpy (plugin_name + prefix_len, cur, cur_len); 2757 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); 2758 2759 if (gomp_load_plugin_for_device (¤t_device, plugin_name)) 2760 { 2761 new_num_devices = current_device.get_num_devices_func (); 2762 if (new_num_devices >= 1) 2763 { 2764 /* Augment DEVICES and NUM_DEVICES. */ 2765 2766 devices = realloc (devices, (num_devices + new_num_devices) 2767 * sizeof (struct gomp_device_descr)); 2768 if (!devices) 2769 { 2770 num_devices = 0; 2771 free (plugin_name); 2772 break; 2773 } 2774 2775 current_device.name = current_device.get_name_func (); 2776 /* current_device.capabilities has already been set. */ 2777 current_device.type = current_device.get_type_func (); 2778 current_device.mem_map.root = NULL; 2779 current_device.state = GOMP_DEVICE_UNINITIALIZED; 2780 current_device.openacc.data_environ = NULL; 2781 for (i = 0; i < new_num_devices; i++) 2782 { 2783 current_device.target_id = i; 2784 devices[num_devices] = current_device; 2785 gomp_mutex_init (&devices[num_devices].lock); 2786 num_devices++; 2787 } 2788 } 2789 } 2790 2791 free (plugin_name); 2792 cur = next + 1; 2793 } 2794 while (next); 2795 2796 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set 2797 NUM_DEVICES_OPENMP. */ 2798 struct gomp_device_descr *devices_s 2799 = malloc (num_devices * sizeof (struct gomp_device_descr)); 2800 if (!devices_s) 2801 { 2802 num_devices = 0; 2803 free (devices); 2804 devices = NULL; 2805 } 2806 num_devices_openmp = 0; 2807 for (i = 0; i < num_devices; i++) 2808 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2809 devices_s[num_devices_openmp++] = devices[i]; 2810 int num_devices_after_openmp = num_devices_openmp; 2811 for (i = 0; i < num_devices; i++) 2812 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 2813 devices_s[num_devices_after_openmp++] = devices[i]; 2814 free (devices); 2815 devices = devices_s; 2816 2817 for (i = 0; i < num_devices; i++) 2818 { 2819 /* The 'devices' array can be moved (by the realloc call) until we have 2820 found all the plugins, so registering with the OpenACC runtime (which 2821 takes a copy of the pointer argument) must be delayed until now. */ 2822 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 2823 goacc_register (&devices[i]); 2824 } 2825 2826 if (atexit (gomp_target_fini) != 0) 2827 gomp_fatal ("atexit failed"); 2828 } 2829 2830 #else /* PLUGIN_SUPPORT */ 2831 /* If dlfcn.h is unavailable we always fallback to host execution. 2832 GOMP_target* routines are just stubs for this case. */ 2833 static void 2834 gomp_target_init (void) 2835 { 2836 } 2837 #endif /* PLUGIN_SUPPORT */ 2838