1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc. 2 3 Contributed by Mentor Embedded. 4 5 This file is part of the GNU Offloading and Multi Processing Library 6 (libgomp). 7 8 Libgomp is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 16 more details. 17 18 Under Section 7 of GPL version 3, you are granted additional 19 permissions described in the GCC Runtime Library Exception, version 20 3.1, as published by the Free Software Foundation. 21 22 You should have received a copy of the GNU General Public License and 23 a copy of the GCC Runtime Library Exception along with this program; 24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 25 <http://www.gnu.org/licenses/>. */ 26 27 /* This file handles OpenACC constructs. */ 28 29 #include "openacc.h" 30 #include "libgomp.h" 31 #include "libgomp_g.h" 32 #include "gomp-constants.h" 33 #include "oacc-int.h" 34 #ifdef HAVE_INTTYPES_H 35 # include <inttypes.h> /* For PRIu64. */ 36 #endif 37 #include <string.h> 38 #include <stdarg.h> 39 #include <assert.h> 40 41 42 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we 43 continue to support the following two legacy values. */ 44 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0, 45 "legacy GOMP_DEVICE_ICV broken"); 46 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) 47 == GOACC_FLAG_HOST_FALLBACK, 48 "legacy GOMP_DEVICE_HOST_FALLBACK broken"); 49 50 51 /* Returns the number of mappings associated with the pointer or pset. PSET 52 have three mappings, whereas pointer have two. */ 53 54 static int 55 find_pointer (int pos, size_t mapnum, unsigned short *kinds) 56 { 57 if (pos + 1 >= mapnum) 58 return 0; 59 60 unsigned char kind = kinds[pos+1] & 0xff; 61 62 if (kind == GOMP_MAP_TO_PSET) 63 return 3; 64 else if (kind == GOMP_MAP_POINTER) 65 return 2; 66 67 return 0; 68 } 69 70 /* Handle the mapping pair that are presented when a 71 deviceptr clause is used with Fortran. */ 72 73 static void 74 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes, 75 unsigned short *kinds) 76 { 77 int i; 78 79 for (i = 0; i < mapnum; i++) 80 { 81 unsigned short kind1 = kinds[i] & 0xff; 82 83 /* Handle Fortran deviceptr clause. */ 84 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR) 85 { 86 unsigned short kind2; 87 88 if (i < (signed)mapnum - 1) 89 kind2 = kinds[i + 1] & 0xff; 90 else 91 kind2 = 0xffff; 92 93 if (sizes[i] == sizeof (void *)) 94 continue; 95 96 /* At this point, we're dealing with a Fortran deviceptr. 97 If the next element is not what we're expecting, then 98 this is an instance of where the deviceptr variable was 99 not used within the region and the pointer was removed 100 by the gimplifier. */ 101 if (kind2 == GOMP_MAP_POINTER 102 && sizes[i + 1] == 0 103 && hostaddrs[i] == *(void **)hostaddrs[i + 1]) 104 { 105 kinds[i+1] = kinds[i]; 106 sizes[i+1] = sizeof (void *); 107 } 108 109 /* Invalidate the entry. */ 110 hostaddrs[i] = NULL; 111 } 112 } 113 } 114 115 static void goacc_wait (int async, int num_waits, va_list *ap); 116 117 118 /* Launch a possibly offloaded function with FLAGS. FN is the host fn 119 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory 120 blocks to be copied to/from the device. Varadic arguments are 121 keyed optional parameters terminated with a zero. */ 122 123 void 124 GOACC_parallel_keyed (int flags_m, void (*fn) (void *), 125 size_t mapnum, void **hostaddrs, size_t *sizes, 126 unsigned short *kinds, ...) 127 { 128 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 129 130 va_list ap; 131 struct goacc_thread *thr; 132 struct gomp_device_descr *acc_dev; 133 struct target_mem_desc *tgt; 134 void **devaddrs; 135 unsigned int i; 136 struct splay_tree_key_s k; 137 splay_tree_key tgt_fn_key; 138 void (*tgt_fn); 139 int async = GOMP_ASYNC_SYNC; 140 unsigned dims[GOMP_DIM_MAX]; 141 unsigned tag; 142 143 #ifdef HAVE_INTTYPES_H 144 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 145 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 146 #else 147 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 148 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 149 #endif 150 goacc_lazy_initialize (); 151 152 thr = goacc_thread (); 153 acc_dev = thr->dev; 154 155 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); 156 157 /* Host fallback if "if" clause is false or if the current device is set to 158 the host. */ 159 if (flags & GOACC_FLAG_HOST_FALLBACK) 160 { 161 goacc_save_and_set_bind (acc_device_host); 162 fn (hostaddrs); 163 goacc_restore_bind (); 164 return; 165 } 166 else if (acc_device_type (acc_dev->type) == acc_device_host) 167 { 168 fn (hostaddrs); 169 return; 170 } 171 172 /* Default: let the runtime choose. */ 173 for (i = 0; i != GOMP_DIM_MAX; i++) 174 dims[i] = 0; 175 176 va_start (ap, kinds); 177 /* TODO: This will need amending when device_type is implemented. */ 178 while ((tag = va_arg (ap, unsigned)) != 0) 179 { 180 if (GOMP_LAUNCH_DEVICE (tag)) 181 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old", 182 GOMP_LAUNCH_DEVICE (tag)); 183 184 switch (GOMP_LAUNCH_CODE (tag)) 185 { 186 case GOMP_LAUNCH_DIM: 187 { 188 unsigned mask = GOMP_LAUNCH_OP (tag); 189 190 for (i = 0; i != GOMP_DIM_MAX; i++) 191 if (mask & GOMP_DIM_MASK (i)) 192 dims[i] = va_arg (ap, unsigned); 193 } 194 break; 195 196 case GOMP_LAUNCH_ASYNC: 197 { 198 /* Small constant values are encoded in the operand. */ 199 async = GOMP_LAUNCH_OP (tag); 200 201 if (async == GOMP_LAUNCH_OP_MAX) 202 async = va_arg (ap, unsigned); 203 break; 204 } 205 206 case GOMP_LAUNCH_WAIT: 207 { 208 unsigned num_waits = GOMP_LAUNCH_OP (tag); 209 goacc_wait (async, num_waits, &ap); 210 break; 211 } 212 213 default: 214 gomp_fatal ("unrecognized offload code '%d'," 215 " libgomp is too old", GOMP_LAUNCH_CODE (tag)); 216 } 217 } 218 va_end (ap); 219 220 acc_dev->openacc.async_set_async_func (async); 221 222 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) 223 { 224 k.host_start = (uintptr_t) fn; 225 k.host_end = k.host_start + 1; 226 gomp_mutex_lock (&acc_dev->lock); 227 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k); 228 gomp_mutex_unlock (&acc_dev->lock); 229 230 if (tgt_fn_key == NULL) 231 gomp_fatal ("target function wasn't mapped"); 232 233 tgt_fn = (void (*)) tgt_fn_key->tgt_offset; 234 } 235 else 236 tgt_fn = (void (*)) fn; 237 238 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, 239 GOMP_MAP_VARS_OPENACC); 240 241 devaddrs = gomp_alloca (sizeof (void *) * mapnum); 242 for (i = 0; i < mapnum; i++) 243 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start 244 + tgt->list[i].key->tgt_offset 245 + tgt->list[i].offset); 246 247 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, 248 async, dims, tgt); 249 250 /* If running synchronously, unmap immediately. */ 251 bool copyfrom = true; 252 if (async_synchronous_p (async)) 253 gomp_unmap_vars (tgt, true); 254 else 255 { 256 bool async_unmap = false; 257 for (size_t i = 0; i < tgt->list_count; i++) 258 { 259 splay_tree_key k = tgt->list[i].key; 260 if (k && k->refcount == 1) 261 { 262 async_unmap = true; 263 break; 264 } 265 } 266 if (async_unmap) 267 tgt->device_descr->openacc.register_async_cleanup_func (tgt, async); 268 else 269 { 270 copyfrom = false; 271 gomp_unmap_vars (tgt, copyfrom); 272 } 273 } 274 275 acc_dev->openacc.async_set_async_func (acc_async_sync); 276 } 277 278 /* Legacy entry point, only provide host execution. */ 279 280 void 281 GOACC_parallel (int flags_m, void (*fn) (void *), 282 size_t mapnum, void **hostaddrs, size_t *sizes, 283 unsigned short *kinds, 284 int num_gangs, int num_workers, int vector_length, 285 int async, int num_waits, ...) 286 { 287 goacc_save_and_set_bind (acc_device_host); 288 fn (hostaddrs); 289 goacc_restore_bind (); 290 } 291 292 void 293 GOACC_data_start (int flags_m, size_t mapnum, 294 void **hostaddrs, size_t *sizes, unsigned short *kinds) 295 { 296 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 297 298 struct target_mem_desc *tgt; 299 300 #ifdef HAVE_INTTYPES_H 301 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", 302 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds); 303 #else 304 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n", 305 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds); 306 #endif 307 308 goacc_lazy_initialize (); 309 310 struct goacc_thread *thr = goacc_thread (); 311 struct gomp_device_descr *acc_dev = thr->dev; 312 313 /* Host fallback or 'do nothing'. */ 314 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 315 || (flags & GOACC_FLAG_HOST_FALLBACK)) 316 { 317 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, 318 GOMP_MAP_VARS_OPENACC); 319 tgt->prev = thr->mapped_data; 320 thr->mapped_data = tgt; 321 322 return; 323 } 324 325 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); 326 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true, 327 GOMP_MAP_VARS_OPENACC); 328 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); 329 tgt->prev = thr->mapped_data; 330 thr->mapped_data = tgt; 331 } 332 333 void 334 GOACC_data_end (void) 335 { 336 struct goacc_thread *thr = goacc_thread (); 337 struct target_mem_desc *tgt = thr->mapped_data; 338 339 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); 340 thr->mapped_data = tgt->prev; 341 gomp_unmap_vars (tgt, true); 342 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); 343 } 344 345 void 346 GOACC_enter_exit_data (int flags_m, size_t mapnum, 347 void **hostaddrs, size_t *sizes, unsigned short *kinds, 348 int async, int num_waits, ...) 349 { 350 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 351 352 struct goacc_thread *thr; 353 struct gomp_device_descr *acc_dev; 354 bool data_enter = false; 355 size_t i; 356 357 goacc_lazy_initialize (); 358 359 thr = goacc_thread (); 360 acc_dev = thr->dev; 361 362 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 363 || (flags & GOACC_FLAG_HOST_FALLBACK)) 364 return; 365 366 if (num_waits) 367 { 368 va_list ap; 369 370 va_start (ap, num_waits); 371 goacc_wait (async, num_waits, &ap); 372 va_end (ap); 373 } 374 375 /* Determine whether "finalize" semantics apply to all mappings of this 376 OpenACC directive. */ 377 bool finalize = false; 378 if (mapnum > 0) 379 { 380 unsigned char kind = kinds[0] & 0xff; 381 if (kind == GOMP_MAP_DELETE 382 || kind == GOMP_MAP_FORCE_FROM) 383 finalize = true; 384 } 385 386 acc_dev->openacc.async_set_async_func (async); 387 388 /* Determine if this is an "acc enter data". */ 389 for (i = 0; i < mapnum; ++i) 390 { 391 unsigned char kind = kinds[i] & 0xff; 392 393 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) 394 continue; 395 396 if (kind == GOMP_MAP_FORCE_ALLOC 397 || kind == GOMP_MAP_FORCE_PRESENT 398 || kind == GOMP_MAP_FORCE_TO 399 || kind == GOMP_MAP_TO 400 || kind == GOMP_MAP_ALLOC) 401 { 402 data_enter = true; 403 break; 404 } 405 406 if (kind == GOMP_MAP_RELEASE 407 || kind == GOMP_MAP_DELETE 408 || kind == GOMP_MAP_FROM 409 || kind == GOMP_MAP_FORCE_FROM) 410 break; 411 412 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", 413 kind); 414 } 415 416 /* In c, non-pointers and arrays are represented by a single data clause. 417 Dynamically allocated arrays and subarrays are represented by a data 418 clause followed by an internal GOMP_MAP_POINTER. 419 420 In fortran, scalars and not allocated arrays are represented by a 421 single data clause. Allocated arrays and subarrays have three mappings: 422 1) the original data clause, 2) a PSET 3) a pointer to the array data. 423 */ 424 425 if (data_enter) 426 { 427 for (i = 0; i < mapnum; i++) 428 { 429 unsigned char kind = kinds[i] & 0xff; 430 431 /* Scan for pointers and PSETs. */ 432 int pointer = find_pointer (i, mapnum, kinds); 433 434 if (!pointer) 435 { 436 switch (kind) 437 { 438 case GOMP_MAP_ALLOC: 439 case GOMP_MAP_FORCE_ALLOC: 440 acc_create (hostaddrs[i], sizes[i]); 441 break; 442 case GOMP_MAP_TO: 443 case GOMP_MAP_FORCE_TO: 444 acc_copyin (hostaddrs[i], sizes[i]); 445 break; 446 default: 447 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", 448 kind); 449 break; 450 } 451 } 452 else 453 { 454 gomp_acc_insert_pointer (pointer, &hostaddrs[i], 455 &sizes[i], &kinds[i]); 456 /* Increment 'i' by two because OpenACC requires fortran 457 arrays to be contiguous, so each PSET is associated with 458 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and 459 one MAP_POINTER. */ 460 i += pointer - 1; 461 } 462 } 463 } 464 else 465 for (i = 0; i < mapnum; ++i) 466 { 467 unsigned char kind = kinds[i] & 0xff; 468 469 int pointer = find_pointer (i, mapnum, kinds); 470 471 if (!pointer) 472 { 473 switch (kind) 474 { 475 case GOMP_MAP_RELEASE: 476 case GOMP_MAP_DELETE: 477 if (acc_is_present (hostaddrs[i], sizes[i])) 478 { 479 if (finalize) 480 acc_delete_finalize (hostaddrs[i], sizes[i]); 481 else 482 acc_delete (hostaddrs[i], sizes[i]); 483 } 484 break; 485 case GOMP_MAP_FROM: 486 case GOMP_MAP_FORCE_FROM: 487 if (finalize) 488 acc_copyout_finalize (hostaddrs[i], sizes[i]); 489 else 490 acc_copyout (hostaddrs[i], sizes[i]); 491 break; 492 default: 493 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", 494 kind); 495 break; 496 } 497 } 498 else 499 { 500 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM 501 || kind == GOMP_MAP_FROM); 502 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, 503 finalize, pointer); 504 /* See the above comment. */ 505 i += pointer - 1; 506 } 507 } 508 509 acc_dev->openacc.async_set_async_func (acc_async_sync); 510 } 511 512 static void 513 goacc_wait (int async, int num_waits, va_list *ap) 514 { 515 while (num_waits--) 516 { 517 int qid = va_arg (*ap, int); 518 519 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */ 520 if (qid == acc_async_noval) 521 { 522 if (async == acc_async_sync) 523 acc_wait_all (); 524 else 525 acc_wait_all_async (async); 526 break; 527 } 528 529 if (acc_async_test (qid)) 530 continue; 531 532 if (async == acc_async_sync) 533 acc_wait (qid); 534 else if (qid == async) 535 ;/* If we're waiting on the same asynchronous queue as we're 536 launching on, the queue itself will order work as 537 required, so there's no need to wait explicitly. */ 538 else 539 acc_wait_async (qid, async); 540 } 541 } 542 543 void 544 GOACC_update (int flags_m, size_t mapnum, 545 void **hostaddrs, size_t *sizes, unsigned short *kinds, 546 int async, int num_waits, ...) 547 { 548 int flags = GOACC_FLAGS_UNMARSHAL (flags_m); 549 550 size_t i; 551 552 goacc_lazy_initialize (); 553 554 struct goacc_thread *thr = goacc_thread (); 555 struct gomp_device_descr *acc_dev = thr->dev; 556 557 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 558 || (flags & GOACC_FLAG_HOST_FALLBACK)) 559 return; 560 561 if (num_waits) 562 { 563 va_list ap; 564 565 va_start (ap, num_waits); 566 goacc_wait (async, num_waits, &ap); 567 va_end (ap); 568 } 569 570 acc_dev->openacc.async_set_async_func (async); 571 572 bool update_device = false; 573 for (i = 0; i < mapnum; ++i) 574 { 575 unsigned char kind = kinds[i] & 0xff; 576 577 switch (kind) 578 { 579 case GOMP_MAP_POINTER: 580 case GOMP_MAP_TO_PSET: 581 break; 582 583 case GOMP_MAP_ALWAYS_POINTER: 584 if (update_device) 585 { 586 /* Save the contents of the host pointer. */ 587 void *dptr = acc_deviceptr (hostaddrs[i-1]); 588 uintptr_t t = *(uintptr_t *) hostaddrs[i]; 589 590 /* Update the contents of the host pointer to reflect 591 the value of the allocated device memory in the 592 previous pointer. */ 593 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr; 594 acc_update_device (hostaddrs[i], sizeof (uintptr_t)); 595 596 /* Restore the host pointer. */ 597 *(uintptr_t *) hostaddrs[i] = t; 598 update_device = false; 599 } 600 break; 601 602 case GOMP_MAP_TO: 603 if (!acc_is_present (hostaddrs[i], sizes[i])) 604 { 605 update_device = false; 606 break; 607 } 608 /* Fallthru */ 609 case GOMP_MAP_FORCE_TO: 610 update_device = true; 611 acc_update_device (hostaddrs[i], sizes[i]); 612 break; 613 614 case GOMP_MAP_FROM: 615 if (!acc_is_present (hostaddrs[i], sizes[i])) 616 { 617 update_device = false; 618 break; 619 } 620 /* Fallthru */ 621 case GOMP_MAP_FORCE_FROM: 622 update_device = false; 623 acc_update_self (hostaddrs[i], sizes[i]); 624 break; 625 626 default: 627 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind); 628 break; 629 } 630 } 631 632 acc_dev->openacc.async_set_async_func (acc_async_sync); 633 } 634 635 void 636 GOACC_wait (int async, int num_waits, ...) 637 { 638 if (num_waits) 639 { 640 va_list ap; 641 642 va_start (ap, num_waits); 643 goacc_wait (async, num_waits, &ap); 644 va_end (ap); 645 } 646 else if (async == acc_async_sync) 647 acc_wait_all (); 648 else 649 acc_wait_all_async (async); 650 } 651 652 int 653 GOACC_get_num_threads (void) 654 { 655 return 1; 656 } 657 658 int 659 GOACC_get_thread_num (void) 660 { 661 return 0; 662 } 663 664 void 665 GOACC_declare (int flags_m, size_t mapnum, 666 void **hostaddrs, size_t *sizes, unsigned short *kinds) 667 { 668 int i; 669 670 for (i = 0; i < mapnum; i++) 671 { 672 unsigned char kind = kinds[i] & 0xff; 673 674 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET) 675 continue; 676 677 switch (kind) 678 { 679 case GOMP_MAP_FORCE_ALLOC: 680 case GOMP_MAP_FORCE_FROM: 681 case GOMP_MAP_FORCE_TO: 682 case GOMP_MAP_POINTER: 683 case GOMP_MAP_RELEASE: 684 case GOMP_MAP_DELETE: 685 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], 686 &kinds[i], GOMP_ASYNC_SYNC, 0); 687 break; 688 689 case GOMP_MAP_FORCE_DEVICEPTR: 690 break; 691 692 case GOMP_MAP_ALLOC: 693 if (!acc_is_present (hostaddrs[i], sizes[i])) 694 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], 695 &kinds[i], GOMP_ASYNC_SYNC, 0); 696 break; 697 698 case GOMP_MAP_TO: 699 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], 700 &kinds[i], GOMP_ASYNC_SYNC, 0); 701 702 break; 703 704 case GOMP_MAP_FROM: 705 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i], 706 &kinds[i], GOMP_ASYNC_SYNC, 0); 707 break; 708 709 case GOMP_MAP_FORCE_PRESENT: 710 if (!acc_is_present (hostaddrs[i], sizes[i])) 711 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i], 712 (unsigned long) sizes[i]); 713 break; 714 715 default: 716 assert (0); 717 break; 718 } 719 } 720 } 721