1 /* 2 * kmp_csupport.cpp -- kfront linkage support for OpenMP. 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #define __KMP_IMP 14 #include "omp.h" /* extern "C" declarations of user-visible routines */ 15 #include "kmp.h" 16 #include "kmp_error.h" 17 #include "kmp_i18n.h" 18 #include "kmp_itt.h" 19 #include "kmp_lock.h" 20 #include "kmp_stats.h" 21 #include "ompt-specific.h" 22 23 #define MAX_MESSAGE 512 24 25 // flags will be used in future, e.g. to implement openmp_strict library 26 // restrictions 27 28 /*! 29 * @ingroup STARTUP_SHUTDOWN 30 * @param loc in source location information 31 * @param flags in for future use (currently ignored) 32 * 33 * Initialize the runtime library. This call is optional; if it is not made then 34 * it will be implicitly called by attempts to use other library functions. 35 */ 36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) { 37 // By default __kmpc_begin() is no-op. 38 char *env; 39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && 40 __kmp_str_match_true(env)) { 41 __kmp_middle_initialize(); 42 __kmp_assign_root_init_mask(); 43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); 44 } else if (__kmp_ignore_mppbeg() == FALSE) { 45 // By default __kmp_ignore_mppbeg() returns TRUE. 46 __kmp_internal_begin(); 47 KC_TRACE(10, ("__kmpc_begin: called\n")); 48 } 49 } 50 51 /*! 52 * @ingroup STARTUP_SHUTDOWN 53 * @param loc source location information 54 * 55 * Shutdown the runtime library. This is also optional, and even if called will 56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to 57 * zero. 58 */ 59 void __kmpc_end(ident_t *loc) { 60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() 61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND 62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() 63 // returns FALSE and __kmpc_end() will unregister this root (it can cause 64 // library shut down). 65 if (__kmp_ignore_mppend() == FALSE) { 66 KC_TRACE(10, ("__kmpc_end: called\n")); 67 KA_TRACE(30, ("__kmpc_end\n")); 68 69 __kmp_internal_end_thread(-1); 70 } 71 #if KMP_OS_WINDOWS && OMPT_SUPPORT 72 // Normal exit process on Windows does not allow worker threads of the final 73 // parallel region to finish reporting their events, so shutting down the 74 // library here fixes the issue at least for the cases where __kmpc_end() is 75 // placed properly. 76 if (ompt_enabled.enabled) 77 __kmp_internal_end_library(__kmp_gtid_get_specific()); 78 #endif 79 } 80 81 /*! 82 @ingroup THREAD_STATES 83 @param loc Source location information. 84 @return The global thread index of the active thread. 85 86 This function can be called in any context. 87 88 If the runtime has ony been entered at the outermost level from a 89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is 90 that which would be returned by omp_get_thread_num() in the outermost 91 active parallel construct. (Or zero if there is no active parallel 92 construct, since the primary thread is necessarily thread zero). 93 94 If multiple non-OpenMP threads all enter an OpenMP construct then this 95 will be a unique thread identifier among all the threads created by 96 the OpenMP runtime (but the value cannot be defined in terms of 97 OpenMP thread ids returned by omp_get_thread_num()). 98 */ 99 kmp_int32 __kmpc_global_thread_num(ident_t *loc) { 100 kmp_int32 gtid = __kmp_entry_gtid(); 101 102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); 103 104 return gtid; 105 } 106 107 /*! 108 @ingroup THREAD_STATES 109 @param loc Source location information. 110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime 111 112 This function can be called in any context. 113 It returns the total number of threads under the control of the OpenMP runtime. 114 That is not a number that can be determined by any OpenMP standard calls, since 115 the library may be called from more than one non-OpenMP thread, and this 116 reflects the total over all such calls. Similarly the runtime maintains 117 underlying threads even when they are not active (since the cost of creating 118 and destroying OS threads is high), this call counts all such threads even if 119 they are not waiting for work. 120 */ 121 kmp_int32 __kmpc_global_num_threads(ident_t *loc) { 122 KC_TRACE(10, 123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); 124 125 return TCR_4(__kmp_all_nth); 126 } 127 128 /*! 129 @ingroup THREAD_STATES 130 @param loc Source location information. 131 @return The thread number of the calling thread in the innermost active parallel 132 construct. 133 */ 134 kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { 135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); 136 return __kmp_tid_from_gtid(__kmp_entry_gtid()); 137 } 138 139 /*! 140 @ingroup THREAD_STATES 141 @param loc Source location information. 142 @return The number of threads in the innermost active parallel construct. 143 */ 144 kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { 145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); 146 147 return __kmp_entry_thread()->th.th_team->t.t_nproc; 148 } 149 150 /*! 151 * @ingroup DEPRECATED 152 * @param loc location description 153 * 154 * This function need not be called. It always returns TRUE. 155 */ 156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { 157 #ifndef KMP_DEBUG 158 159 return TRUE; 160 161 #else 162 163 const char *semi2; 164 const char *semi3; 165 int line_no; 166 167 if (__kmp_par_range == 0) { 168 return TRUE; 169 } 170 semi2 = loc->psource; 171 if (semi2 == NULL) { 172 return TRUE; 173 } 174 semi2 = strchr(semi2, ';'); 175 if (semi2 == NULL) { 176 return TRUE; 177 } 178 semi2 = strchr(semi2 + 1, ';'); 179 if (semi2 == NULL) { 180 return TRUE; 181 } 182 if (__kmp_par_range_filename[0]) { 183 const char *name = semi2 - 1; 184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) { 185 name--; 186 } 187 if ((*name == '/') || (*name == ';')) { 188 name++; 189 } 190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { 191 return __kmp_par_range < 0; 192 } 193 } 194 semi3 = strchr(semi2 + 1, ';'); 195 if (__kmp_par_range_routine[0]) { 196 if ((semi3 != NULL) && (semi3 > semi2) && 197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { 198 return __kmp_par_range < 0; 199 } 200 } 201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { 202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { 203 return __kmp_par_range > 0; 204 } 205 return __kmp_par_range < 0; 206 } 207 return TRUE; 208 209 #endif /* KMP_DEBUG */ 210 } 211 212 /*! 213 @ingroup THREAD_STATES 214 @param loc Source location information. 215 @return 1 if this thread is executing inside an active parallel region, zero if 216 not. 217 */ 218 kmp_int32 __kmpc_in_parallel(ident_t *loc) { 219 return __kmp_entry_thread()->th.th_root->r.r_active; 220 } 221 222 /*! 223 @ingroup PARALLEL 224 @param loc source location information 225 @param global_tid global thread number 226 @param num_threads number of threads requested for this parallel construct 227 228 Set the number of threads to be used by the next fork spawned by this thread. 229 This call is only required if the parallel construct has a `num_threads` clause. 230 */ 231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 232 kmp_int32 num_threads) { 233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", 234 global_tid, num_threads)); 235 __kmp_assert_valid_gtid(global_tid); 236 __kmp_push_num_threads(loc, global_tid, num_threads); 237 } 238 239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { 240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); 241 /* the num_threads are automatically popped */ 242 } 243 244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 245 kmp_int32 proc_bind) { 246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, 247 proc_bind)); 248 __kmp_assert_valid_gtid(global_tid); 249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); 250 } 251 252 /*! 253 @ingroup PARALLEL 254 @param loc source location information 255 @param argc total number of arguments in the ellipsis 256 @param microtask pointer to callback routine consisting of outlined parallel 257 construct 258 @param ... pointers to shared variables that aren't global 259 260 Do the actual fork and call the microtask in the relevant number of threads. 261 */ 262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { 263 int gtid = __kmp_entry_gtid(); 264 265 #if (KMP_STATS_ENABLED) 266 // If we were in a serial region, then stop the serial timer, record 267 // the event, and start parallel region timer 268 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 269 if (previous_state == stats_state_e::SERIAL_REGION) { 270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead); 271 } else { 272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead); 273 } 274 int inParallel = __kmpc_in_parallel(loc); 275 if (inParallel) { 276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); 277 } else { 278 KMP_COUNT_BLOCK(OMP_PARALLEL); 279 } 280 #endif 281 282 // maybe to save thr_state is enough here 283 { 284 va_list ap; 285 va_start(ap, microtask); 286 287 #if OMPT_SUPPORT 288 ompt_frame_t *ompt_frame; 289 if (ompt_enabled.enabled) { 290 kmp_info_t *master_th = __kmp_threads[gtid]; 291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame; 292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 293 } 294 OMPT_STORE_RETURN_ADDRESS(gtid); 295 #endif 296 297 #if INCLUDE_SSC_MARKS 298 SSC_MARK_FORKING(); 299 #endif 300 __kmp_fork_call(loc, gtid, fork_context_intel, argc, 301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task 302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, 303 kmp_va_addr_of(ap)); 304 #if INCLUDE_SSC_MARKS 305 SSC_MARK_JOINING(); 306 #endif 307 __kmp_join_call(loc, gtid 308 #if OMPT_SUPPORT 309 , 310 fork_context_intel 311 #endif 312 ); 313 314 va_end(ap); 315 316 #if OMPT_SUPPORT 317 if (ompt_enabled.enabled) { 318 ompt_frame->enter_frame = ompt_data_none; 319 } 320 #endif 321 } 322 323 #if KMP_STATS_ENABLED 324 if (previous_state == stats_state_e::SERIAL_REGION) { 325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 326 KMP_SET_THREAD_STATE(previous_state); 327 } else { 328 KMP_POP_PARTITIONED_TIMER(); 329 } 330 #endif // KMP_STATS_ENABLED 331 } 332 333 /*! 334 @ingroup PARALLEL 335 @param loc source location information 336 @param microtask pointer to callback routine consisting of outlined parallel 337 construct 338 @param cond condition for running in parallel 339 @param args struct of pointers to shared variables that aren't global 340 341 Perform a fork only if the condition is true. 342 */ 343 void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 344 kmp_int32 cond, void *args) { 345 int gtid = __kmp_entry_gtid(); 346 if (cond) { 347 if (args) 348 __kmpc_fork_call(loc, argc, microtask, args); 349 else 350 __kmpc_fork_call(loc, argc, microtask); 351 } else { 352 __kmpc_serialized_parallel(loc, gtid); 353 354 #if OMPT_SUPPORT 355 void *exit_frame_ptr; 356 #endif 357 358 if (args) 359 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, 360 /*npr=*/0, 361 /*argc=*/1, &args 362 #if OMPT_SUPPORT 363 , 364 &exit_frame_ptr 365 #endif 366 ); 367 else 368 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid, 369 /*npr=*/0, 370 /*argc=*/0, 371 /*args=*/nullptr 372 #if OMPT_SUPPORT 373 , 374 &exit_frame_ptr 375 #endif 376 ); 377 378 __kmpc_end_serialized_parallel(loc, gtid); 379 } 380 } 381 382 /*! 383 @ingroup PARALLEL 384 @param loc source location information 385 @param global_tid global thread number 386 @param num_teams number of teams requested for the teams construct 387 @param num_threads number of threads per team requested for the teams construct 388 389 Set the number of teams to be used by the teams construct. 390 This call is only required if the teams construct has a `num_teams` clause 391 or a `thread_limit` clause (or both). 392 */ 393 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 394 kmp_int32 num_teams, kmp_int32 num_threads) { 395 KA_TRACE(20, 396 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", 397 global_tid, num_teams, num_threads)); 398 __kmp_assert_valid_gtid(global_tid); 399 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); 400 } 401 402 /*! 403 @ingroup PARALLEL 404 @param loc source location information 405 @param global_tid global thread number 406 @param thread_limit limit on number of threads which can be created within the 407 current task 408 409 Set the thread_limit for the current task 410 This call is there to support `thread_limit` clause on the `target` construct 411 */ 412 void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, 413 kmp_int32 thread_limit) { 414 __kmp_assert_valid_gtid(global_tid); 415 kmp_info_t *thread = __kmp_threads[global_tid]; 416 if (thread_limit > 0) 417 thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit; 418 } 419 420 /*! 421 @ingroup PARALLEL 422 @param loc source location information 423 @param global_tid global thread number 424 @param num_teams_lb lower bound on number of teams requested for the teams 425 construct 426 @param num_teams_ub upper bound on number of teams requested for the teams 427 construct 428 @param num_threads number of threads per team requested for the teams construct 429 430 Set the number of teams to be used by the teams construct. The number of initial 431 teams cretaed will be greater than or equal to the lower bound and less than or 432 equal to the upper bound. 433 This call is only required if the teams construct has a `num_teams` clause 434 or a `thread_limit` clause (or both). 435 */ 436 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, 437 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, 438 kmp_int32 num_threads) { 439 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d" 440 " num_teams_ub=%d num_threads=%d\n", 441 global_tid, num_teams_lb, num_teams_ub, num_threads)); 442 __kmp_assert_valid_gtid(global_tid); 443 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub, 444 num_threads); 445 } 446 447 /*! 448 @ingroup PARALLEL 449 @param loc source location information 450 @param argc total number of arguments in the ellipsis 451 @param microtask pointer to callback routine consisting of outlined teams 452 construct 453 @param ... pointers to shared variables that aren't global 454 455 Do the actual fork and call the microtask in the relevant number of threads. 456 */ 457 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, 458 ...) { 459 int gtid = __kmp_entry_gtid(); 460 kmp_info_t *this_thr = __kmp_threads[gtid]; 461 va_list ap; 462 va_start(ap, microtask); 463 464 #if KMP_STATS_ENABLED 465 KMP_COUNT_BLOCK(OMP_TEAMS); 466 stats_state_e previous_state = KMP_GET_THREAD_STATE(); 467 if (previous_state == stats_state_e::SERIAL_REGION) { 468 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead); 469 } else { 470 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead); 471 } 472 #endif 473 474 // remember teams entry point and nesting level 475 this_thr->th.th_teams_microtask = microtask; 476 this_thr->th.th_teams_level = 477 this_thr->th.th_team->t.t_level; // AC: can be >0 on host 478 479 #if OMPT_SUPPORT 480 kmp_team_t *parent_team = this_thr->th.th_team; 481 int tid = __kmp_tid_from_gtid(gtid); 482 if (ompt_enabled.enabled) { 483 parent_team->t.t_implicit_task_taskdata[tid] 484 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 485 } 486 OMPT_STORE_RETURN_ADDRESS(gtid); 487 #endif 488 489 // check if __kmpc_push_num_teams called, set default number of teams 490 // otherwise 491 if (this_thr->th.th_teams_size.nteams == 0) { 492 __kmp_push_num_teams(loc, gtid, 0, 0); 493 } 494 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); 495 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); 496 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); 497 498 __kmp_fork_call( 499 loc, gtid, fork_context_intel, argc, 500 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task 501 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap)); 502 __kmp_join_call(loc, gtid 503 #if OMPT_SUPPORT 504 , 505 fork_context_intel 506 #endif 507 ); 508 509 // Pop current CG root off list 510 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 511 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots; 512 this_thr->th.th_cg_roots = tmp->up; 513 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up" 514 " to node %p. cg_nthreads was %d\n", 515 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads)); 516 KMP_DEBUG_ASSERT(tmp->cg_nthreads); 517 int i = tmp->cg_nthreads--; 518 if (i == 1) { // check is we are the last thread in CG (not always the case) 519 __kmp_free(tmp); 520 } 521 // Restore current task's thread_limit from CG root 522 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots); 523 this_thr->th.th_current_task->td_icvs.thread_limit = 524 this_thr->th.th_cg_roots->cg_thread_limit; 525 526 this_thr->th.th_teams_microtask = NULL; 527 this_thr->th.th_teams_level = 0; 528 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; 529 va_end(ap); 530 #if KMP_STATS_ENABLED 531 if (previous_state == stats_state_e::SERIAL_REGION) { 532 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial); 533 KMP_SET_THREAD_STATE(previous_state); 534 } else { 535 KMP_POP_PARTITIONED_TIMER(); 536 } 537 #endif // KMP_STATS_ENABLED 538 } 539 540 // I don't think this function should ever have been exported. 541 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated 542 // openmp code ever called it, but it's been exported from the RTL for so 543 // long that I'm afraid to remove the definition. 544 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } 545 546 /*! 547 @ingroup PARALLEL 548 @param loc source location information 549 @param global_tid global thread number 550 551 Enter a serialized parallel construct. This interface is used to handle a 552 conditional parallel region, like this, 553 @code 554 #pragma omp parallel if (condition) 555 @endcode 556 when the condition is false. 557 */ 558 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 559 // The implementation is now in kmp_runtime.cpp so that it can share static 560 // functions with kmp_fork_call since the tasks to be done are similar in 561 // each case. 562 __kmp_assert_valid_gtid(global_tid); 563 #if OMPT_SUPPORT 564 OMPT_STORE_RETURN_ADDRESS(global_tid); 565 #endif 566 __kmp_serialized_parallel(loc, global_tid); 567 } 568 569 /*! 570 @ingroup PARALLEL 571 @param loc source location information 572 @param global_tid global thread number 573 574 Leave a serialized parallel construct. 575 */ 576 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { 577 kmp_internal_control_t *top; 578 kmp_info_t *this_thr; 579 kmp_team_t *serial_team; 580 581 KC_TRACE(10, 582 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); 583 584 /* skip all this code for autopar serialized loops since it results in 585 unacceptable overhead */ 586 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) 587 return; 588 589 // Not autopar code 590 __kmp_assert_valid_gtid(global_tid); 591 if (!TCR_4(__kmp_init_parallel)) 592 __kmp_parallel_initialize(); 593 594 __kmp_resume_if_soft_paused(); 595 596 this_thr = __kmp_threads[global_tid]; 597 serial_team = this_thr->th.th_serial_team; 598 599 kmp_task_team_t *task_team = this_thr->th.th_task_team; 600 // we need to wait for the proxy tasks before finishing the thread 601 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks || 602 task_team->tt.tt_hidden_helper_task_encountered)) 603 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); 604 605 KMP_MB(); 606 KMP_DEBUG_ASSERT(serial_team); 607 KMP_ASSERT(serial_team->t.t_serialized); 608 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); 609 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); 610 KMP_DEBUG_ASSERT(serial_team->t.t_threads); 611 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); 612 613 #if OMPT_SUPPORT 614 if (ompt_enabled.enabled && 615 this_thr->th.ompt_thread_info.state != ompt_state_overhead) { 616 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; 617 if (ompt_enabled.ompt_callback_implicit_task) { 618 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( 619 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, 620 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); 621 } 622 623 // reset clear the task id only after unlinking the task 624 ompt_data_t *parent_task_data; 625 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL); 626 627 if (ompt_enabled.ompt_callback_parallel_end) { 628 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)( 629 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data, 630 ompt_parallel_invoker_program | ompt_parallel_team, 631 OMPT_LOAD_RETURN_ADDRESS(global_tid)); 632 } 633 __ompt_lw_taskteam_unlink(this_thr); 634 this_thr->th.ompt_thread_info.state = ompt_state_overhead; 635 } 636 #endif 637 638 /* If necessary, pop the internal control stack values and replace the team 639 * values */ 640 top = serial_team->t.t_control_stack_top; 641 if (top && top->serial_nesting_level == serial_team->t.t_serialized) { 642 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); 643 serial_team->t.t_control_stack_top = top->next; 644 __kmp_free(top); 645 } 646 647 /* pop dispatch buffers stack */ 648 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); 649 { 650 dispatch_private_info_t *disp_buffer = 651 serial_team->t.t_dispatch->th_disp_buffer; 652 serial_team->t.t_dispatch->th_disp_buffer = 653 serial_team->t.t_dispatch->th_disp_buffer->next; 654 __kmp_free(disp_buffer); 655 } 656 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore 657 658 --serial_team->t.t_serialized; 659 if (serial_team->t.t_serialized == 0) { 660 661 /* return to the parallel section */ 662 663 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 664 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { 665 __kmp_clear_x87_fpu_status_word(); 666 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); 667 __kmp_load_mxcsr(&serial_team->t.t_mxcsr); 668 } 669 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 670 671 __kmp_pop_current_task_from_thread(this_thr); 672 #if OMPD_SUPPORT 673 if (ompd_state & OMPD_ENABLE_BP) 674 ompd_bp_parallel_end(); 675 #endif 676 677 this_thr->th.th_team = serial_team->t.t_parent; 678 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; 679 680 /* restore values cached in the thread */ 681 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ 682 this_thr->th.th_team_master = 683 serial_team->t.t_parent->t.t_threads[0]; /* JPH */ 684 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; 685 686 /* TODO the below shouldn't need to be adjusted for serialized teams */ 687 this_thr->th.th_dispatch = 688 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; 689 690 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); 691 this_thr->th.th_current_task->td_flags.executing = 1; 692 693 if (__kmp_tasking_mode != tskm_immediate_exec) { 694 // Copy the task team from the new child / old parent team to the thread. 695 this_thr->th.th_task_team = 696 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; 697 KA_TRACE(20, 698 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " 699 "team %p\n", 700 global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); 701 } 702 #if KMP_AFFINITY_SUPPORTED 703 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) { 704 __kmp_reset_root_init_mask(global_tid); 705 } 706 #endif 707 } else { 708 if (__kmp_tasking_mode != tskm_immediate_exec) { 709 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " 710 "depth of serial team %p to %d\n", 711 global_tid, serial_team, serial_team->t.t_serialized)); 712 } 713 } 714 715 serial_team->t.t_level--; 716 if (__kmp_env_consistency_check) 717 __kmp_pop_parallel(global_tid, NULL); 718 #if OMPT_SUPPORT 719 if (ompt_enabled.enabled) 720 this_thr->th.ompt_thread_info.state = 721 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial 722 : ompt_state_work_parallel); 723 #endif 724 } 725 726 /*! 727 @ingroup SYNCHRONIZATION 728 @param loc source location information. 729 730 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though 731 depending on the memory ordering convention obeyed by the compiler 732 even that may not be necessary). 733 */ 734 void __kmpc_flush(ident_t *loc) { 735 KC_TRACE(10, ("__kmpc_flush: called\n")); 736 737 /* need explicit __mf() here since use volatile instead in library */ 738 KMP_MFENCE(); /* Flush all pending memory write invalidates. */ 739 740 #if OMPT_SUPPORT && OMPT_OPTIONAL 741 if (ompt_enabled.ompt_callback_flush) { 742 ompt_callbacks.ompt_callback(ompt_callback_flush)( 743 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0)); 744 } 745 #endif 746 } 747 748 /* -------------------------------------------------------------------------- */ 749 /*! 750 @ingroup SYNCHRONIZATION 751 @param loc source location information 752 @param global_tid thread id. 753 754 Execute a barrier. 755 */ 756 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { 757 KMP_COUNT_BLOCK(OMP_BARRIER); 758 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); 759 __kmp_assert_valid_gtid(global_tid); 760 761 if (!TCR_4(__kmp_init_parallel)) 762 __kmp_parallel_initialize(); 763 764 __kmp_resume_if_soft_paused(); 765 766 if (__kmp_env_consistency_check) { 767 if (loc == 0) { 768 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 769 } 770 __kmp_check_barrier(global_tid, ct_barrier, loc); 771 } 772 773 #if OMPT_SUPPORT 774 ompt_frame_t *ompt_frame; 775 if (ompt_enabled.enabled) { 776 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 777 if (ompt_frame->enter_frame.ptr == NULL) 778 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 779 } 780 OMPT_STORE_RETURN_ADDRESS(global_tid); 781 #endif 782 __kmp_threads[global_tid]->th.th_ident = loc; 783 // TODO: explicit barrier_wait_id: 784 // this function is called when 'barrier' directive is present or 785 // implicit barrier at the end of a worksharing construct. 786 // 1) better to add a per-thread barrier counter to a thread data structure 787 // 2) set to 0 when a new team is created 788 // 4) no sync is required 789 790 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 791 #if OMPT_SUPPORT && OMPT_OPTIONAL 792 if (ompt_enabled.enabled) { 793 ompt_frame->enter_frame = ompt_data_none; 794 } 795 #endif 796 } 797 798 /* The BARRIER for a MASTER section is always explicit */ 799 /*! 800 @ingroup WORK_SHARING 801 @param loc source location information. 802 @param global_tid global thread number . 803 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise. 804 */ 805 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { 806 int status = 0; 807 808 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); 809 __kmp_assert_valid_gtid(global_tid); 810 811 if (!TCR_4(__kmp_init_parallel)) 812 __kmp_parallel_initialize(); 813 814 __kmp_resume_if_soft_paused(); 815 816 if (KMP_MASTER_GTID(global_tid)) { 817 KMP_COUNT_BLOCK(OMP_MASTER); 818 KMP_PUSH_PARTITIONED_TIMER(OMP_master); 819 status = 1; 820 } 821 822 #if OMPT_SUPPORT && OMPT_OPTIONAL 823 if (status) { 824 if (ompt_enabled.ompt_callback_masked) { 825 kmp_info_t *this_thr = __kmp_threads[global_tid]; 826 kmp_team_t *team = this_thr->th.th_team; 827 828 int tid = __kmp_tid_from_gtid(global_tid); 829 ompt_callbacks.ompt_callback(ompt_callback_masked)( 830 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 831 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 832 OMPT_GET_RETURN_ADDRESS(0)); 833 } 834 } 835 #endif 836 837 if (__kmp_env_consistency_check) { 838 #if KMP_USE_DYNAMIC_LOCK 839 if (status) 840 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); 841 else 842 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); 843 #else 844 if (status) 845 __kmp_push_sync(global_tid, ct_master, loc, NULL); 846 else 847 __kmp_check_sync(global_tid, ct_master, loc, NULL); 848 #endif 849 } 850 851 return status; 852 } 853 854 /*! 855 @ingroup WORK_SHARING 856 @param loc source location information. 857 @param global_tid global thread number . 858 859 Mark the end of a <tt>master</tt> region. This should only be called by the 860 thread that executes the <tt>master</tt> region. 861 */ 862 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { 863 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); 864 __kmp_assert_valid_gtid(global_tid); 865 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); 866 KMP_POP_PARTITIONED_TIMER(); 867 868 #if OMPT_SUPPORT && OMPT_OPTIONAL 869 kmp_info_t *this_thr = __kmp_threads[global_tid]; 870 kmp_team_t *team = this_thr->th.th_team; 871 if (ompt_enabled.ompt_callback_masked) { 872 int tid = __kmp_tid_from_gtid(global_tid); 873 ompt_callbacks.ompt_callback(ompt_callback_masked)( 874 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 875 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 876 OMPT_GET_RETURN_ADDRESS(0)); 877 } 878 #endif 879 880 if (__kmp_env_consistency_check) { 881 if (KMP_MASTER_GTID(global_tid)) 882 __kmp_pop_sync(global_tid, ct_master, loc); 883 } 884 } 885 886 /*! 887 @ingroup WORK_SHARING 888 @param loc source location information. 889 @param global_tid global thread number. 890 @param filter result of evaluating filter clause on thread global_tid, or zero 891 if no filter clause present 892 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise. 893 */ 894 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) { 895 int status = 0; 896 int tid; 897 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid)); 898 __kmp_assert_valid_gtid(global_tid); 899 900 if (!TCR_4(__kmp_init_parallel)) 901 __kmp_parallel_initialize(); 902 903 __kmp_resume_if_soft_paused(); 904 905 tid = __kmp_tid_from_gtid(global_tid); 906 if (tid == filter) { 907 KMP_COUNT_BLOCK(OMP_MASKED); 908 KMP_PUSH_PARTITIONED_TIMER(OMP_masked); 909 status = 1; 910 } 911 912 #if OMPT_SUPPORT && OMPT_OPTIONAL 913 if (status) { 914 if (ompt_enabled.ompt_callback_masked) { 915 kmp_info_t *this_thr = __kmp_threads[global_tid]; 916 kmp_team_t *team = this_thr->th.th_team; 917 ompt_callbacks.ompt_callback(ompt_callback_masked)( 918 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data), 919 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 920 OMPT_GET_RETURN_ADDRESS(0)); 921 } 922 } 923 #endif 924 925 if (__kmp_env_consistency_check) { 926 #if KMP_USE_DYNAMIC_LOCK 927 if (status) 928 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0); 929 else 930 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0); 931 #else 932 if (status) 933 __kmp_push_sync(global_tid, ct_masked, loc, NULL); 934 else 935 __kmp_check_sync(global_tid, ct_masked, loc, NULL); 936 #endif 937 } 938 939 return status; 940 } 941 942 /*! 943 @ingroup WORK_SHARING 944 @param loc source location information. 945 @param global_tid global thread number . 946 947 Mark the end of a <tt>masked</tt> region. This should only be called by the 948 thread that executes the <tt>masked</tt> region. 949 */ 950 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) { 951 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid)); 952 __kmp_assert_valid_gtid(global_tid); 953 KMP_POP_PARTITIONED_TIMER(); 954 955 #if OMPT_SUPPORT && OMPT_OPTIONAL 956 kmp_info_t *this_thr = __kmp_threads[global_tid]; 957 kmp_team_t *team = this_thr->th.th_team; 958 if (ompt_enabled.ompt_callback_masked) { 959 int tid = __kmp_tid_from_gtid(global_tid); 960 ompt_callbacks.ompt_callback(ompt_callback_masked)( 961 ompt_scope_end, &(team->t.ompt_team_info.parallel_data), 962 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 963 OMPT_GET_RETURN_ADDRESS(0)); 964 } 965 #endif 966 967 if (__kmp_env_consistency_check) { 968 __kmp_pop_sync(global_tid, ct_masked, loc); 969 } 970 } 971 972 /*! 973 @ingroup WORK_SHARING 974 @param loc source location information. 975 @param gtid global thread number. 976 977 Start execution of an <tt>ordered</tt> construct. 978 */ 979 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { 980 int cid = 0; 981 kmp_info_t *th; 982 KMP_DEBUG_ASSERT(__kmp_init_serial); 983 984 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); 985 __kmp_assert_valid_gtid(gtid); 986 987 if (!TCR_4(__kmp_init_parallel)) 988 __kmp_parallel_initialize(); 989 990 __kmp_resume_if_soft_paused(); 991 992 #if USE_ITT_BUILD 993 __kmp_itt_ordered_prep(gtid); 994 // TODO: ordered_wait_id 995 #endif /* USE_ITT_BUILD */ 996 997 th = __kmp_threads[gtid]; 998 999 #if OMPT_SUPPORT && OMPT_OPTIONAL 1000 kmp_team_t *team; 1001 ompt_wait_id_t lck; 1002 void *codeptr_ra; 1003 OMPT_STORE_RETURN_ADDRESS(gtid); 1004 if (ompt_enabled.enabled) { 1005 team = __kmp_team_from_gtid(gtid); 1006 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value; 1007 /* OMPT state update */ 1008 th->th.ompt_thread_info.wait_id = lck; 1009 th->th.ompt_thread_info.state = ompt_state_wait_ordered; 1010 1011 /* OMPT event callback */ 1012 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1013 if (ompt_enabled.ompt_callback_mutex_acquire) { 1014 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1015 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck, 1016 codeptr_ra); 1017 } 1018 } 1019 #endif 1020 1021 if (th->th.th_dispatch->th_deo_fcn != 0) 1022 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); 1023 else 1024 __kmp_parallel_deo(>id, &cid, loc); 1025 1026 #if OMPT_SUPPORT && OMPT_OPTIONAL 1027 if (ompt_enabled.enabled) { 1028 /* OMPT state update */ 1029 th->th.ompt_thread_info.state = ompt_state_work_parallel; 1030 th->th.ompt_thread_info.wait_id = 0; 1031 1032 /* OMPT event callback */ 1033 if (ompt_enabled.ompt_callback_mutex_acquired) { 1034 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1035 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1036 } 1037 } 1038 #endif 1039 1040 #if USE_ITT_BUILD 1041 __kmp_itt_ordered_start(gtid); 1042 #endif /* USE_ITT_BUILD */ 1043 } 1044 1045 /*! 1046 @ingroup WORK_SHARING 1047 @param loc source location information. 1048 @param gtid global thread number. 1049 1050 End execution of an <tt>ordered</tt> construct. 1051 */ 1052 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { 1053 int cid = 0; 1054 kmp_info_t *th; 1055 1056 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); 1057 __kmp_assert_valid_gtid(gtid); 1058 1059 #if USE_ITT_BUILD 1060 __kmp_itt_ordered_end(gtid); 1061 // TODO: ordered_wait_id 1062 #endif /* USE_ITT_BUILD */ 1063 1064 th = __kmp_threads[gtid]; 1065 1066 if (th->th.th_dispatch->th_dxo_fcn != 0) 1067 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); 1068 else 1069 __kmp_parallel_dxo(>id, &cid, loc); 1070 1071 #if OMPT_SUPPORT && OMPT_OPTIONAL 1072 OMPT_STORE_RETURN_ADDRESS(gtid); 1073 if (ompt_enabled.ompt_callback_mutex_released) { 1074 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1075 ompt_mutex_ordered, 1076 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid) 1077 ->t.t_ordered.dt.t_value, 1078 OMPT_LOAD_RETURN_ADDRESS(gtid)); 1079 } 1080 #endif 1081 } 1082 1083 #if KMP_USE_DYNAMIC_LOCK 1084 1085 static __forceinline void 1086 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, 1087 kmp_int32 gtid, kmp_indirect_locktag_t tag) { 1088 // Pointer to the allocated indirect lock is written to crit, while indexing 1089 // is ignored. 1090 void *idx; 1091 kmp_indirect_lock_t **lck; 1092 lck = (kmp_indirect_lock_t **)crit; 1093 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); 1094 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); 1095 KMP_SET_I_LOCK_LOCATION(ilk, loc); 1096 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); 1097 KA_TRACE(20, 1098 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); 1099 #if USE_ITT_BUILD 1100 __kmp_itt_critical_creating(ilk->lock, loc); 1101 #endif 1102 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); 1103 if (status == 0) { 1104 #if USE_ITT_BUILD 1105 __kmp_itt_critical_destroyed(ilk->lock); 1106 #endif 1107 // We don't really need to destroy the unclaimed lock here since it will be 1108 // cleaned up at program exit. 1109 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); 1110 } 1111 KMP_DEBUG_ASSERT(*lck != NULL); 1112 } 1113 1114 // Fast-path acquire tas lock 1115 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ 1116 { \ 1117 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1118 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1119 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1120 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1121 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ 1122 kmp_uint32 spins; \ 1123 KMP_FSYNC_PREPARE(l); \ 1124 KMP_INIT_YIELD(spins); \ 1125 kmp_backoff_t backoff = __kmp_spin_backoff_params; \ 1126 do { \ 1127 if (TCR_4(__kmp_nth) > \ 1128 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ 1129 KMP_YIELD(TRUE); \ 1130 } else { \ 1131 KMP_YIELD_SPIN(spins); \ 1132 } \ 1133 __kmp_spin_backoff(&backoff); \ 1134 } while ( \ 1135 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ 1136 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ 1137 } \ 1138 KMP_FSYNC_ACQUIRED(l); \ 1139 } 1140 1141 // Fast-path test tas lock 1142 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ 1143 { \ 1144 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ 1145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \ 1146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \ 1147 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \ 1148 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \ 1149 } 1150 1151 // Fast-path release tas lock 1152 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \ 1153 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); } 1154 1155 #if KMP_USE_FUTEX 1156 1157 #include <sys/syscall.h> 1158 #include <unistd.h> 1159 #ifndef FUTEX_WAIT 1160 #define FUTEX_WAIT 0 1161 #endif 1162 #ifndef FUTEX_WAKE 1163 #define FUTEX_WAKE 1 1164 #endif 1165 1166 // Fast-path acquire futex lock 1167 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ 1168 { \ 1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1170 kmp_int32 gtid_code = (gtid + 1) << 1; \ 1171 KMP_MB(); \ 1172 KMP_FSYNC_PREPARE(ftx); \ 1173 kmp_int32 poll_val; \ 1174 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ 1175 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1176 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ 1177 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ 1178 if (!cond) { \ 1179 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ 1180 poll_val | \ 1181 KMP_LOCK_BUSY(1, futex))) { \ 1182 continue; \ 1183 } \ 1184 poll_val |= KMP_LOCK_BUSY(1, futex); \ 1185 } \ 1186 kmp_int32 rc; \ 1187 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ 1188 NULL, NULL, 0)) != 0) { \ 1189 continue; \ 1190 } \ 1191 gtid_code |= 1; \ 1192 } \ 1193 KMP_FSYNC_ACQUIRED(ftx); \ 1194 } 1195 1196 // Fast-path test futex lock 1197 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ 1198 { \ 1199 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1200 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ 1201 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ 1202 KMP_FSYNC_ACQUIRED(ftx); \ 1203 rc = TRUE; \ 1204 } else { \ 1205 rc = FALSE; \ 1206 } \ 1207 } 1208 1209 // Fast-path release futex lock 1210 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ 1211 { \ 1212 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ 1213 KMP_MB(); \ 1214 KMP_FSYNC_RELEASING(ftx); \ 1215 kmp_int32 poll_val = \ 1216 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ 1217 if (KMP_LOCK_STRIP(poll_val) & 1) { \ 1218 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ 1219 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ 1220 } \ 1221 KMP_MB(); \ 1222 KMP_YIELD_OVERSUB(); \ 1223 } 1224 1225 #endif // KMP_USE_FUTEX 1226 1227 #else // KMP_USE_DYNAMIC_LOCK 1228 1229 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, 1230 ident_t const *loc, 1231 kmp_int32 gtid) { 1232 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; 1233 1234 // Because of the double-check, the following load doesn't need to be volatile 1235 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1236 1237 if (lck == NULL) { 1238 void *idx; 1239 1240 // Allocate & initialize the lock. 1241 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() 1242 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); 1243 __kmp_init_user_lock_with_checks(lck); 1244 __kmp_set_user_lock_location(lck, loc); 1245 #if USE_ITT_BUILD 1246 __kmp_itt_critical_creating(lck); 1247 // __kmp_itt_critical_creating() should be called *before* the first usage 1248 // of underlying lock. It is the only place where we can guarantee it. There 1249 // are chances the lock will destroyed with no usage, but it is not a 1250 // problem, because this is not real event seen by user but rather setting 1251 // name for object (lock). See more details in kmp_itt.h. 1252 #endif /* USE_ITT_BUILD */ 1253 1254 // Use a cmpxchg instruction to slam the start of the critical section with 1255 // the lock pointer. If another thread beat us to it, deallocate the lock, 1256 // and use the lock that the other thread allocated. 1257 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); 1258 1259 if (status == 0) { 1260 // Deallocate the lock and reload the value. 1261 #if USE_ITT_BUILD 1262 __kmp_itt_critical_destroyed(lck); 1263 // Let ITT know the lock is destroyed and the same memory location may be reused 1264 // for another purpose. 1265 #endif /* USE_ITT_BUILD */ 1266 __kmp_destroy_user_lock_with_checks(lck); 1267 __kmp_user_lock_free(&idx, gtid, lck); 1268 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); 1269 KMP_DEBUG_ASSERT(lck != NULL); 1270 } 1271 } 1272 return lck; 1273 } 1274 1275 #endif // KMP_USE_DYNAMIC_LOCK 1276 1277 /*! 1278 @ingroup WORK_SHARING 1279 @param loc source location information. 1280 @param global_tid global thread number. 1281 @param crit identity of the critical section. This could be a pointer to a lock 1282 associated with the critical section, or some other suitably unique value. 1283 1284 Enter code protected by a `critical` construct. 1285 This function blocks until the executing thread can enter the critical section. 1286 */ 1287 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1288 kmp_critical_name *crit) { 1289 #if KMP_USE_DYNAMIC_LOCK 1290 #if OMPT_SUPPORT && OMPT_OPTIONAL 1291 OMPT_STORE_RETURN_ADDRESS(global_tid); 1292 #endif // OMPT_SUPPORT 1293 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); 1294 #else 1295 KMP_COUNT_BLOCK(OMP_CRITICAL); 1296 #if OMPT_SUPPORT && OMPT_OPTIONAL 1297 ompt_state_t prev_state = ompt_state_undefined; 1298 ompt_thread_info_t ti; 1299 #endif 1300 kmp_user_lock_p lck; 1301 1302 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1303 __kmp_assert_valid_gtid(global_tid); 1304 1305 // TODO: add THR_OVHD_STATE 1306 1307 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1308 KMP_CHECK_USER_LOCK_INIT(); 1309 1310 if ((__kmp_user_lock_kind == lk_tas) && 1311 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1312 lck = (kmp_user_lock_p)crit; 1313 } 1314 #if KMP_USE_FUTEX 1315 else if ((__kmp_user_lock_kind == lk_futex) && 1316 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1317 lck = (kmp_user_lock_p)crit; 1318 } 1319 #endif 1320 else { // ticket, queuing or drdpa 1321 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 1322 } 1323 1324 if (__kmp_env_consistency_check) 1325 __kmp_push_sync(global_tid, ct_critical, loc, lck); 1326 1327 // since the critical directive binds to all threads, not just the current 1328 // team we have to check this even if we are in a serialized team. 1329 // also, even if we are the uber thread, we still have to conduct the lock, 1330 // as we have to contend with sibling threads. 1331 1332 #if USE_ITT_BUILD 1333 __kmp_itt_critical_acquiring(lck); 1334 #endif /* USE_ITT_BUILD */ 1335 #if OMPT_SUPPORT && OMPT_OPTIONAL 1336 OMPT_STORE_RETURN_ADDRESS(gtid); 1337 void *codeptr_ra = NULL; 1338 if (ompt_enabled.enabled) { 1339 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1340 /* OMPT state update */ 1341 prev_state = ti.state; 1342 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1343 ti.state = ompt_state_wait_critical; 1344 1345 /* OMPT event callback */ 1346 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); 1347 if (ompt_enabled.ompt_callback_mutex_acquire) { 1348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1349 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 1350 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1351 } 1352 } 1353 #endif 1354 // Value of 'crit' should be good for using as a critical_id of the critical 1355 // section directive. 1356 __kmp_acquire_user_lock_with_checks(lck, global_tid); 1357 1358 #if USE_ITT_BUILD 1359 __kmp_itt_critical_acquired(lck); 1360 #endif /* USE_ITT_BUILD */ 1361 #if OMPT_SUPPORT && OMPT_OPTIONAL 1362 if (ompt_enabled.enabled) { 1363 /* OMPT state update */ 1364 ti.state = prev_state; 1365 ti.wait_id = 0; 1366 1367 /* OMPT event callback */ 1368 if (ompt_enabled.ompt_callback_mutex_acquired) { 1369 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1370 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra); 1371 } 1372 } 1373 #endif 1374 KMP_POP_PARTITIONED_TIMER(); 1375 1376 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1377 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1378 #endif // KMP_USE_DYNAMIC_LOCK 1379 } 1380 1381 #if KMP_USE_DYNAMIC_LOCK 1382 1383 // Converts the given hint to an internal lock implementation 1384 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { 1385 #if KMP_USE_TSX 1386 #define KMP_TSX_LOCK(seq) lockseq_##seq 1387 #else 1388 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq 1389 #endif 1390 1391 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1392 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm) 1393 #else 1394 #define KMP_CPUINFO_RTM 0 1395 #endif 1396 1397 // Hints that do not require further logic 1398 if (hint & kmp_lock_hint_hle) 1399 return KMP_TSX_LOCK(hle); 1400 if (hint & kmp_lock_hint_rtm) 1401 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq; 1402 if (hint & kmp_lock_hint_adaptive) 1403 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; 1404 1405 // Rule out conflicting hints first by returning the default lock 1406 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) 1407 return __kmp_user_lock_seq; 1408 if ((hint & omp_lock_hint_speculative) && 1409 (hint & omp_lock_hint_nonspeculative)) 1410 return __kmp_user_lock_seq; 1411 1412 // Do not even consider speculation when it appears to be contended 1413 if (hint & omp_lock_hint_contended) 1414 return lockseq_queuing; 1415 1416 // Uncontended lock without speculation 1417 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) 1418 return lockseq_tas; 1419 1420 // Use RTM lock for speculation 1421 if (hint & omp_lock_hint_speculative) 1422 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq; 1423 1424 return __kmp_user_lock_seq; 1425 } 1426 1427 #if OMPT_SUPPORT && OMPT_OPTIONAL 1428 #if KMP_USE_DYNAMIC_LOCK 1429 static kmp_mutex_impl_t 1430 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { 1431 if (user_lock) { 1432 switch (KMP_EXTRACT_D_TAG(user_lock)) { 1433 case 0: 1434 break; 1435 #if KMP_USE_FUTEX 1436 case locktag_futex: 1437 return kmp_mutex_impl_queuing; 1438 #endif 1439 case locktag_tas: 1440 return kmp_mutex_impl_spin; 1441 #if KMP_USE_TSX 1442 case locktag_hle: 1443 case locktag_rtm_spin: 1444 return kmp_mutex_impl_speculative; 1445 #endif 1446 default: 1447 return kmp_mutex_impl_none; 1448 } 1449 ilock = KMP_LOOKUP_I_LOCK(user_lock); 1450 } 1451 KMP_ASSERT(ilock); 1452 switch (ilock->type) { 1453 #if KMP_USE_TSX 1454 case locktag_adaptive: 1455 case locktag_rtm_queuing: 1456 return kmp_mutex_impl_speculative; 1457 #endif 1458 case locktag_nested_tas: 1459 return kmp_mutex_impl_spin; 1460 #if KMP_USE_FUTEX 1461 case locktag_nested_futex: 1462 #endif 1463 case locktag_ticket: 1464 case locktag_queuing: 1465 case locktag_drdpa: 1466 case locktag_nested_ticket: 1467 case locktag_nested_queuing: 1468 case locktag_nested_drdpa: 1469 return kmp_mutex_impl_queuing; 1470 default: 1471 return kmp_mutex_impl_none; 1472 } 1473 } 1474 #else 1475 // For locks without dynamic binding 1476 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { 1477 switch (__kmp_user_lock_kind) { 1478 case lk_tas: 1479 return kmp_mutex_impl_spin; 1480 #if KMP_USE_FUTEX 1481 case lk_futex: 1482 #endif 1483 case lk_ticket: 1484 case lk_queuing: 1485 case lk_drdpa: 1486 return kmp_mutex_impl_queuing; 1487 #if KMP_USE_TSX 1488 case lk_hle: 1489 case lk_rtm_queuing: 1490 case lk_rtm_spin: 1491 case lk_adaptive: 1492 return kmp_mutex_impl_speculative; 1493 #endif 1494 default: 1495 return kmp_mutex_impl_none; 1496 } 1497 } 1498 #endif // KMP_USE_DYNAMIC_LOCK 1499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 1500 1501 /*! 1502 @ingroup WORK_SHARING 1503 @param loc source location information. 1504 @param global_tid global thread number. 1505 @param crit identity of the critical section. This could be a pointer to a lock 1506 associated with the critical section, or some other suitably unique value. 1507 @param hint the lock hint. 1508 1509 Enter code protected by a `critical` construct with a hint. The hint value is 1510 used to suggest a lock implementation. This function blocks until the executing 1511 thread can enter the critical section unless the hint suggests use of 1512 speculative execution and the hardware supports it. 1513 */ 1514 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1515 kmp_critical_name *crit, uint32_t hint) { 1516 KMP_COUNT_BLOCK(OMP_CRITICAL); 1517 kmp_user_lock_p lck; 1518 #if OMPT_SUPPORT && OMPT_OPTIONAL 1519 ompt_state_t prev_state = ompt_state_undefined; 1520 ompt_thread_info_t ti; 1521 // This is the case, if called from __kmpc_critical: 1522 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); 1523 if (!codeptr) 1524 codeptr = OMPT_GET_RETURN_ADDRESS(0); 1525 #endif 1526 1527 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); 1528 __kmp_assert_valid_gtid(global_tid); 1529 1530 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 1531 // Check if it is initialized. 1532 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait); 1533 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint); 1534 if (*lk == 0) { 1535 if (KMP_IS_D_LOCK(lockseq)) { 1536 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 1537 KMP_GET_D_TAG(lockseq)); 1538 } else { 1539 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq)); 1540 } 1541 } 1542 // Branch for accessing the actual lock object and set operation. This 1543 // branching is inevitable since this lock initialization does not follow the 1544 // normal dispatch path (lock table is not used). 1545 if (KMP_EXTRACT_D_TAG(lk) != 0) { 1546 lck = (kmp_user_lock_p)lk; 1547 if (__kmp_env_consistency_check) { 1548 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1549 __kmp_map_hint_to_lock(hint)); 1550 } 1551 #if USE_ITT_BUILD 1552 __kmp_itt_critical_acquiring(lck); 1553 #endif 1554 #if OMPT_SUPPORT && OMPT_OPTIONAL 1555 if (ompt_enabled.enabled) { 1556 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1557 /* OMPT state update */ 1558 prev_state = ti.state; 1559 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1560 ti.state = ompt_state_wait_critical; 1561 1562 /* OMPT event callback */ 1563 if (ompt_enabled.ompt_callback_mutex_acquire) { 1564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1565 ompt_mutex_critical, (unsigned int)hint, 1566 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck, 1567 codeptr); 1568 } 1569 } 1570 #endif 1571 #if KMP_USE_INLINED_TAS 1572 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) { 1573 KMP_ACQUIRE_TAS_LOCK(lck, global_tid); 1574 } else 1575 #elif KMP_USE_INLINED_FUTEX 1576 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) { 1577 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); 1578 } else 1579 #endif 1580 { 1581 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 1582 } 1583 } else { 1584 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 1585 lck = ilk->lock; 1586 if (__kmp_env_consistency_check) { 1587 __kmp_push_sync(global_tid, ct_critical, loc, lck, 1588 __kmp_map_hint_to_lock(hint)); 1589 } 1590 #if USE_ITT_BUILD 1591 __kmp_itt_critical_acquiring(lck); 1592 #endif 1593 #if OMPT_SUPPORT && OMPT_OPTIONAL 1594 if (ompt_enabled.enabled) { 1595 ti = __kmp_threads[global_tid]->th.ompt_thread_info; 1596 /* OMPT state update */ 1597 prev_state = ti.state; 1598 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck; 1599 ti.state = ompt_state_wait_critical; 1600 1601 /* OMPT event callback */ 1602 if (ompt_enabled.ompt_callback_mutex_acquire) { 1603 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 1604 ompt_mutex_critical, (unsigned int)hint, 1605 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck, 1606 codeptr); 1607 } 1608 } 1609 #endif 1610 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 1611 } 1612 KMP_POP_PARTITIONED_TIMER(); 1613 1614 #if USE_ITT_BUILD 1615 __kmp_itt_critical_acquired(lck); 1616 #endif /* USE_ITT_BUILD */ 1617 #if OMPT_SUPPORT && OMPT_OPTIONAL 1618 if (ompt_enabled.enabled) { 1619 /* OMPT state update */ 1620 ti.state = prev_state; 1621 ti.wait_id = 0; 1622 1623 /* OMPT event callback */ 1624 if (ompt_enabled.ompt_callback_mutex_acquired) { 1625 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 1626 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 1627 } 1628 } 1629 #endif 1630 1631 KMP_PUSH_PARTITIONED_TIMER(OMP_critical); 1632 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); 1633 } // __kmpc_critical_with_hint 1634 1635 #endif // KMP_USE_DYNAMIC_LOCK 1636 1637 /*! 1638 @ingroup WORK_SHARING 1639 @param loc source location information. 1640 @param global_tid global thread number . 1641 @param crit identity of the critical section. This could be a pointer to a lock 1642 associated with the critical section, or some other suitably unique value. 1643 1644 Leave a critical section, releasing any lock that was held during its execution. 1645 */ 1646 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1647 kmp_critical_name *crit) { 1648 kmp_user_lock_p lck; 1649 1650 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); 1651 1652 #if KMP_USE_DYNAMIC_LOCK 1653 int locktag = KMP_EXTRACT_D_TAG(crit); 1654 if (locktag) { 1655 lck = (kmp_user_lock_p)crit; 1656 KMP_ASSERT(lck != NULL); 1657 if (__kmp_env_consistency_check) { 1658 __kmp_pop_sync(global_tid, ct_critical, loc); 1659 } 1660 #if USE_ITT_BUILD 1661 __kmp_itt_critical_releasing(lck); 1662 #endif 1663 #if KMP_USE_INLINED_TAS 1664 if (locktag == locktag_tas && !__kmp_env_consistency_check) { 1665 KMP_RELEASE_TAS_LOCK(lck, global_tid); 1666 } else 1667 #elif KMP_USE_INLINED_FUTEX 1668 if (locktag == locktag_futex && !__kmp_env_consistency_check) { 1669 KMP_RELEASE_FUTEX_LOCK(lck, global_tid); 1670 } else 1671 #endif 1672 { 1673 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 1674 } 1675 } else { 1676 kmp_indirect_lock_t *ilk = 1677 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 1678 KMP_ASSERT(ilk != NULL); 1679 lck = ilk->lock; 1680 if (__kmp_env_consistency_check) { 1681 __kmp_pop_sync(global_tid, ct_critical, loc); 1682 } 1683 #if USE_ITT_BUILD 1684 __kmp_itt_critical_releasing(lck); 1685 #endif 1686 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); 1687 } 1688 1689 #else // KMP_USE_DYNAMIC_LOCK 1690 1691 if ((__kmp_user_lock_kind == lk_tas) && 1692 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { 1693 lck = (kmp_user_lock_p)crit; 1694 } 1695 #if KMP_USE_FUTEX 1696 else if ((__kmp_user_lock_kind == lk_futex) && 1697 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { 1698 lck = (kmp_user_lock_p)crit; 1699 } 1700 #endif 1701 else { // ticket, queuing or drdpa 1702 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); 1703 } 1704 1705 KMP_ASSERT(lck != NULL); 1706 1707 if (__kmp_env_consistency_check) 1708 __kmp_pop_sync(global_tid, ct_critical, loc); 1709 1710 #if USE_ITT_BUILD 1711 __kmp_itt_critical_releasing(lck); 1712 #endif /* USE_ITT_BUILD */ 1713 // Value of 'crit' should be good for using as a critical_id of the critical 1714 // section directive. 1715 __kmp_release_user_lock_with_checks(lck, global_tid); 1716 1717 #endif // KMP_USE_DYNAMIC_LOCK 1718 1719 #if OMPT_SUPPORT && OMPT_OPTIONAL 1720 /* OMPT release event triggers after lock is released; place here to trigger 1721 * for all #if branches */ 1722 OMPT_STORE_RETURN_ADDRESS(global_tid); 1723 if (ompt_enabled.ompt_callback_mutex_released) { 1724 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 1725 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, 1726 OMPT_LOAD_RETURN_ADDRESS(0)); 1727 } 1728 #endif 1729 1730 KMP_POP_PARTITIONED_TIMER(); 1731 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); 1732 } 1733 1734 /*! 1735 @ingroup SYNCHRONIZATION 1736 @param loc source location information 1737 @param global_tid thread id. 1738 @return one if the thread should execute the master block, zero otherwise 1739 1740 Start execution of a combined barrier and master. The barrier is executed inside 1741 this function. 1742 */ 1743 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1744 int status; 1745 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); 1746 __kmp_assert_valid_gtid(global_tid); 1747 1748 if (!TCR_4(__kmp_init_parallel)) 1749 __kmp_parallel_initialize(); 1750 1751 __kmp_resume_if_soft_paused(); 1752 1753 if (__kmp_env_consistency_check) 1754 __kmp_check_barrier(global_tid, ct_barrier, loc); 1755 1756 #if OMPT_SUPPORT 1757 ompt_frame_t *ompt_frame; 1758 if (ompt_enabled.enabled) { 1759 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1760 if (ompt_frame->enter_frame.ptr == NULL) 1761 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1762 } 1763 OMPT_STORE_RETURN_ADDRESS(global_tid); 1764 #endif 1765 #if USE_ITT_NOTIFY 1766 __kmp_threads[global_tid]->th.th_ident = loc; 1767 #endif 1768 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); 1769 #if OMPT_SUPPORT && OMPT_OPTIONAL 1770 if (ompt_enabled.enabled) { 1771 ompt_frame->enter_frame = ompt_data_none; 1772 } 1773 #endif 1774 1775 return (status != 0) ? 0 : 1; 1776 } 1777 1778 /*! 1779 @ingroup SYNCHRONIZATION 1780 @param loc source location information 1781 @param global_tid thread id. 1782 1783 Complete the execution of a combined barrier and master. This function should 1784 only be called at the completion of the <tt>master</tt> code. Other threads will 1785 still be waiting at the barrier and this call releases them. 1786 */ 1787 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { 1788 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); 1789 __kmp_assert_valid_gtid(global_tid); 1790 __kmp_end_split_barrier(bs_plain_barrier, global_tid); 1791 } 1792 1793 /*! 1794 @ingroup SYNCHRONIZATION 1795 @param loc source location information 1796 @param global_tid thread id. 1797 @return one if the thread should execute the master block, zero otherwise 1798 1799 Start execution of a combined barrier and master(nowait) construct. 1800 The barrier is executed inside this function. 1801 There is no equivalent "end" function, since the 1802 */ 1803 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { 1804 kmp_int32 ret; 1805 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); 1806 __kmp_assert_valid_gtid(global_tid); 1807 1808 if (!TCR_4(__kmp_init_parallel)) 1809 __kmp_parallel_initialize(); 1810 1811 __kmp_resume_if_soft_paused(); 1812 1813 if (__kmp_env_consistency_check) { 1814 if (loc == 0) { 1815 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? 1816 } 1817 __kmp_check_barrier(global_tid, ct_barrier, loc); 1818 } 1819 1820 #if OMPT_SUPPORT 1821 ompt_frame_t *ompt_frame; 1822 if (ompt_enabled.enabled) { 1823 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 1824 if (ompt_frame->enter_frame.ptr == NULL) 1825 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 1826 } 1827 OMPT_STORE_RETURN_ADDRESS(global_tid); 1828 #endif 1829 #if USE_ITT_NOTIFY 1830 __kmp_threads[global_tid]->th.th_ident = loc; 1831 #endif 1832 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 1833 #if OMPT_SUPPORT && OMPT_OPTIONAL 1834 if (ompt_enabled.enabled) { 1835 ompt_frame->enter_frame = ompt_data_none; 1836 } 1837 #endif 1838 1839 ret = __kmpc_master(loc, global_tid); 1840 1841 if (__kmp_env_consistency_check) { 1842 /* there's no __kmpc_end_master called; so the (stats) */ 1843 /* actions of __kmpc_end_master are done here */ 1844 if (ret) { 1845 /* only one thread should do the pop since only */ 1846 /* one did the push (see __kmpc_master()) */ 1847 __kmp_pop_sync(global_tid, ct_master, loc); 1848 } 1849 } 1850 1851 return (ret); 1852 } 1853 1854 /* The BARRIER for a SINGLE process section is always explicit */ 1855 /*! 1856 @ingroup WORK_SHARING 1857 @param loc source location information 1858 @param global_tid global thread number 1859 @return One if this thread should execute the single construct, zero otherwise. 1860 1861 Test whether to execute a <tt>single</tt> construct. 1862 There are no implicit barriers in the two "single" calls, rather the compiler 1863 should introduce an explicit barrier if it is required. 1864 */ 1865 1866 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { 1867 __kmp_assert_valid_gtid(global_tid); 1868 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); 1869 1870 if (rc) { 1871 // We are going to execute the single statement, so we should count it. 1872 KMP_COUNT_BLOCK(OMP_SINGLE); 1873 KMP_PUSH_PARTITIONED_TIMER(OMP_single); 1874 } 1875 1876 #if OMPT_SUPPORT && OMPT_OPTIONAL 1877 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1878 kmp_team_t *team = this_thr->th.th_team; 1879 int tid = __kmp_tid_from_gtid(global_tid); 1880 1881 if (ompt_enabled.enabled) { 1882 if (rc) { 1883 if (ompt_enabled.ompt_callback_work) { 1884 ompt_callbacks.ompt_callback(ompt_callback_work)( 1885 ompt_work_single_executor, ompt_scope_begin, 1886 &(team->t.ompt_team_info.parallel_data), 1887 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1888 1, OMPT_GET_RETURN_ADDRESS(0)); 1889 } 1890 } else { 1891 if (ompt_enabled.ompt_callback_work) { 1892 ompt_callbacks.ompt_callback(ompt_callback_work)( 1893 ompt_work_single_other, ompt_scope_begin, 1894 &(team->t.ompt_team_info.parallel_data), 1895 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1896 1, OMPT_GET_RETURN_ADDRESS(0)); 1897 ompt_callbacks.ompt_callback(ompt_callback_work)( 1898 ompt_work_single_other, ompt_scope_end, 1899 &(team->t.ompt_team_info.parallel_data), 1900 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1901 1, OMPT_GET_RETURN_ADDRESS(0)); 1902 } 1903 } 1904 } 1905 #endif 1906 1907 return rc; 1908 } 1909 1910 /*! 1911 @ingroup WORK_SHARING 1912 @param loc source location information 1913 @param global_tid global thread number 1914 1915 Mark the end of a <tt>single</tt> construct. This function should 1916 only be called by the thread that executed the block of code protected 1917 by the `single` construct. 1918 */ 1919 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { 1920 __kmp_assert_valid_gtid(global_tid); 1921 __kmp_exit_single(global_tid); 1922 KMP_POP_PARTITIONED_TIMER(); 1923 1924 #if OMPT_SUPPORT && OMPT_OPTIONAL 1925 kmp_info_t *this_thr = __kmp_threads[global_tid]; 1926 kmp_team_t *team = this_thr->th.th_team; 1927 int tid = __kmp_tid_from_gtid(global_tid); 1928 1929 if (ompt_enabled.ompt_callback_work) { 1930 ompt_callbacks.ompt_callback(ompt_callback_work)( 1931 ompt_work_single_executor, ompt_scope_end, 1932 &(team->t.ompt_team_info.parallel_data), 1933 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 1934 OMPT_GET_RETURN_ADDRESS(0)); 1935 } 1936 #endif 1937 } 1938 1939 /*! 1940 @ingroup WORK_SHARING 1941 @param loc Source location 1942 @param global_tid Global thread id 1943 1944 Mark the end of a statically scheduled loop. 1945 */ 1946 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { 1947 KMP_POP_PARTITIONED_TIMER(); 1948 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); 1949 1950 #if OMPT_SUPPORT && OMPT_OPTIONAL 1951 if (ompt_enabled.ompt_callback_work) { 1952 ompt_work_t ompt_work_type = ompt_work_loop; 1953 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); 1954 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); 1955 // Determine workshare type 1956 if (loc != NULL) { 1957 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 1958 ompt_work_type = ompt_work_loop; 1959 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 1960 ompt_work_type = ompt_work_sections; 1961 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 1962 ompt_work_type = ompt_work_distribute; 1963 } else { 1964 // use default set above. 1965 // a warning about this case is provided in __kmpc_for_static_init 1966 } 1967 KMP_DEBUG_ASSERT(ompt_work_type); 1968 } 1969 ompt_callbacks.ompt_callback(ompt_callback_work)( 1970 ompt_work_type, ompt_scope_end, &(team_info->parallel_data), 1971 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0)); 1972 } 1973 #endif 1974 if (__kmp_env_consistency_check) 1975 __kmp_pop_workshare(global_tid, ct_pdo, loc); 1976 } 1977 1978 // User routines which take C-style arguments (call by value) 1979 // different from the Fortran equivalent routines 1980 1981 void ompc_set_num_threads(int arg) { 1982 // !!!!! TODO: check the per-task binding 1983 __kmp_set_num_threads(arg, __kmp_entry_gtid()); 1984 } 1985 1986 void ompc_set_dynamic(int flag) { 1987 kmp_info_t *thread; 1988 1989 /* For the thread-private implementation of the internal controls */ 1990 thread = __kmp_entry_thread(); 1991 1992 __kmp_save_internal_controls(thread); 1993 1994 set__dynamic(thread, flag ? true : false); 1995 } 1996 1997 void ompc_set_nested(int flag) { 1998 kmp_info_t *thread; 1999 2000 /* For the thread-private internal controls implementation */ 2001 thread = __kmp_entry_thread(); 2002 2003 __kmp_save_internal_controls(thread); 2004 2005 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); 2006 } 2007 2008 void ompc_set_max_active_levels(int max_active_levels) { 2009 /* TO DO */ 2010 /* we want per-task implementation of this internal control */ 2011 2012 /* For the per-thread internal controls implementation */ 2013 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); 2014 } 2015 2016 void ompc_set_schedule(omp_sched_t kind, int modifier) { 2017 // !!!!! TODO: check the per-task binding 2018 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); 2019 } 2020 2021 int ompc_get_ancestor_thread_num(int level) { 2022 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); 2023 } 2024 2025 int ompc_get_team_size(int level) { 2026 return __kmp_get_team_size(__kmp_entry_gtid(), level); 2027 } 2028 2029 /* OpenMP 5.0 Affinity Format API */ 2030 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) { 2031 if (!__kmp_init_serial) { 2032 __kmp_serial_initialize(); 2033 } 2034 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, 2035 format, KMP_STRLEN(format) + 1); 2036 } 2037 2038 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) { 2039 size_t format_size; 2040 if (!__kmp_init_serial) { 2041 __kmp_serial_initialize(); 2042 } 2043 format_size = KMP_STRLEN(__kmp_affinity_format); 2044 if (buffer && size) { 2045 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, 2046 format_size + 1); 2047 } 2048 return format_size; 2049 } 2050 2051 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) { 2052 int gtid; 2053 if (!TCR_4(__kmp_init_middle)) { 2054 __kmp_middle_initialize(); 2055 } 2056 __kmp_assign_root_init_mask(); 2057 gtid = __kmp_get_gtid(); 2058 #if KMP_AFFINITY_SUPPORTED 2059 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2060 __kmp_affinity.flags.reset) { 2061 __kmp_reset_root_init_mask(gtid); 2062 } 2063 #endif 2064 __kmp_aux_display_affinity(gtid, format); 2065 } 2066 2067 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, 2068 char const *format) { 2069 int gtid; 2070 size_t num_required; 2071 kmp_str_buf_t capture_buf; 2072 if (!TCR_4(__kmp_init_middle)) { 2073 __kmp_middle_initialize(); 2074 } 2075 __kmp_assign_root_init_mask(); 2076 gtid = __kmp_get_gtid(); 2077 #if KMP_AFFINITY_SUPPORTED 2078 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && 2079 __kmp_affinity.flags.reset) { 2080 __kmp_reset_root_init_mask(gtid); 2081 } 2082 #endif 2083 __kmp_str_buf_init(&capture_buf); 2084 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); 2085 if (buffer && buf_size) { 2086 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, 2087 capture_buf.used + 1); 2088 } 2089 __kmp_str_buf_free(&capture_buf); 2090 return num_required; 2091 } 2092 2093 void kmpc_set_stacksize(int arg) { 2094 // __kmp_aux_set_stacksize initializes the library if needed 2095 __kmp_aux_set_stacksize(arg); 2096 } 2097 2098 void kmpc_set_stacksize_s(size_t arg) { 2099 // __kmp_aux_set_stacksize initializes the library if needed 2100 __kmp_aux_set_stacksize(arg); 2101 } 2102 2103 void kmpc_set_blocktime(int arg) { 2104 int gtid, tid, bt = arg; 2105 kmp_info_t *thread; 2106 2107 gtid = __kmp_entry_gtid(); 2108 tid = __kmp_tid_from_gtid(gtid); 2109 thread = __kmp_thread_from_gtid(gtid); 2110 2111 __kmp_aux_convert_blocktime(&bt); 2112 __kmp_aux_set_blocktime(bt, thread, tid); 2113 } 2114 2115 void kmpc_set_library(int arg) { 2116 // __kmp_user_set_library initializes the library if needed 2117 __kmp_user_set_library((enum library_type)arg); 2118 } 2119 2120 void kmpc_set_defaults(char const *str) { 2121 // __kmp_aux_set_defaults initializes the library if needed 2122 __kmp_aux_set_defaults(str, KMP_STRLEN(str)); 2123 } 2124 2125 void kmpc_set_disp_num_buffers(int arg) { 2126 // ignore after initialization because some teams have already 2127 // allocated dispatch buffers 2128 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF && 2129 arg <= KMP_MAX_DISP_NUM_BUFF) { 2130 __kmp_dispatch_num_buffers = arg; 2131 } 2132 } 2133 2134 int kmpc_set_affinity_mask_proc(int proc, void **mask) { 2135 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2136 return -1; 2137 #else 2138 if (!TCR_4(__kmp_init_middle)) { 2139 __kmp_middle_initialize(); 2140 } 2141 __kmp_assign_root_init_mask(); 2142 return __kmp_aux_set_affinity_mask_proc(proc, mask); 2143 #endif 2144 } 2145 2146 int kmpc_unset_affinity_mask_proc(int proc, void **mask) { 2147 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2148 return -1; 2149 #else 2150 if (!TCR_4(__kmp_init_middle)) { 2151 __kmp_middle_initialize(); 2152 } 2153 __kmp_assign_root_init_mask(); 2154 return __kmp_aux_unset_affinity_mask_proc(proc, mask); 2155 #endif 2156 } 2157 2158 int kmpc_get_affinity_mask_proc(int proc, void **mask) { 2159 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED 2160 return -1; 2161 #else 2162 if (!TCR_4(__kmp_init_middle)) { 2163 __kmp_middle_initialize(); 2164 } 2165 __kmp_assign_root_init_mask(); 2166 return __kmp_aux_get_affinity_mask_proc(proc, mask); 2167 #endif 2168 } 2169 2170 /* -------------------------------------------------------------------------- */ 2171 /*! 2172 @ingroup THREADPRIVATE 2173 @param loc source location information 2174 @param gtid global thread number 2175 @param cpy_size size of the cpy_data buffer 2176 @param cpy_data pointer to data to be copied 2177 @param cpy_func helper function to call for copying data 2178 @param didit flag variable: 1=single thread; 0=not single thread 2179 2180 __kmpc_copyprivate implements the interface for the private data broadcast 2181 needed for the copyprivate clause associated with a single region in an 2182 OpenMP<sup>*</sup> program (both C and Fortran). 2183 All threads participating in the parallel region call this routine. 2184 One of the threads (called the single thread) should have the <tt>didit</tt> 2185 variable set to 1 and all other threads should have that variable set to 0. 2186 All threads pass a pointer to a data buffer (cpy_data) that they have built. 2187 2188 The OpenMP specification forbids the use of nowait on the single region when a 2189 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a 2190 barrier internally to avoid race conditions, so the code generation for the 2191 single region should avoid generating a barrier after the call to @ref 2192 __kmpc_copyprivate. 2193 2194 The <tt>gtid</tt> parameter is the global thread id for the current thread. 2195 The <tt>loc</tt> parameter is a pointer to source location information. 2196 2197 Internal implementation: The single thread will first copy its descriptor 2198 address (cpy_data) to a team-private location, then the other threads will each 2199 call the function pointed to by the parameter cpy_func, which carries out the 2200 copy by copying the data using the cpy_data buffer. 2201 2202 The cpy_func routine used for the copy and the contents of the data area defined 2203 by cpy_data and cpy_size may be built in any fashion that will allow the copy 2204 to be done. For instance, the cpy_data buffer can hold the actual data to be 2205 copied or it may hold a list of pointers to the data. The cpy_func routine must 2206 interpret the cpy_data buffer appropriately. 2207 2208 The interface to cpy_func is as follows: 2209 @code 2210 void cpy_func( void *destination, void *source ) 2211 @endcode 2212 where void *destination is the cpy_data pointer for the thread being copied to 2213 and void *source is the cpy_data pointer for the thread being copied from. 2214 */ 2215 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, 2216 void *cpy_data, void (*cpy_func)(void *, void *), 2217 kmp_int32 didit) { 2218 void **data_ptr; 2219 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); 2220 __kmp_assert_valid_gtid(gtid); 2221 2222 KMP_MB(); 2223 2224 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2225 2226 if (__kmp_env_consistency_check) { 2227 if (loc == 0) { 2228 KMP_WARNING(ConstructIdentInvalid); 2229 } 2230 } 2231 2232 // ToDo: Optimize the following two barriers into some kind of split barrier 2233 2234 if (didit) 2235 *data_ptr = cpy_data; 2236 2237 #if OMPT_SUPPORT 2238 ompt_frame_t *ompt_frame; 2239 if (ompt_enabled.enabled) { 2240 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2241 if (ompt_frame->enter_frame.ptr == NULL) 2242 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2243 } 2244 OMPT_STORE_RETURN_ADDRESS(gtid); 2245 #endif 2246 /* This barrier is not a barrier region boundary */ 2247 #if USE_ITT_NOTIFY 2248 __kmp_threads[gtid]->th.th_ident = loc; 2249 #endif 2250 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2251 2252 if (!didit) 2253 (*cpy_func)(cpy_data, *data_ptr); 2254 2255 // Consider next barrier a user-visible barrier for barrier region boundaries 2256 // Nesting checks are already handled by the single construct checks 2257 { 2258 #if OMPT_SUPPORT 2259 OMPT_STORE_RETURN_ADDRESS(gtid); 2260 #endif 2261 #if USE_ITT_NOTIFY 2262 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. 2263 // tasks can overwrite the location) 2264 #endif 2265 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2266 #if OMPT_SUPPORT && OMPT_OPTIONAL 2267 if (ompt_enabled.enabled) { 2268 ompt_frame->enter_frame = ompt_data_none; 2269 } 2270 #endif 2271 } 2272 } 2273 2274 /* --------------------------------------------------------------------------*/ 2275 /*! 2276 @ingroup THREADPRIVATE 2277 @param loc source location information 2278 @param gtid global thread number 2279 @param cpy_data pointer to the data to be saved/copied or 0 2280 @return the saved pointer to the data 2281 2282 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate: 2283 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so 2284 coming from single), and returns that pointer in all calls (for single thread 2285 it's not needed). This version doesn't do any actual data copying. Data copying 2286 has to be done somewhere else, e.g. inline in the generated code. Due to this, 2287 this function doesn't have any barrier at the end of the function, like 2288 __kmpc_copyprivate does, so generated code needs barrier after copying of all 2289 data was done. 2290 */ 2291 void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) { 2292 void **data_ptr; 2293 2294 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid)); 2295 2296 KMP_MB(); 2297 2298 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; 2299 2300 if (__kmp_env_consistency_check) { 2301 if (loc == 0) { 2302 KMP_WARNING(ConstructIdentInvalid); 2303 } 2304 } 2305 2306 // ToDo: Optimize the following barrier 2307 2308 if (cpy_data) 2309 *data_ptr = cpy_data; 2310 2311 #if OMPT_SUPPORT 2312 ompt_frame_t *ompt_frame; 2313 if (ompt_enabled.enabled) { 2314 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 2315 if (ompt_frame->enter_frame.ptr == NULL) 2316 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 2317 OMPT_STORE_RETURN_ADDRESS(gtid); 2318 } 2319 #endif 2320 /* This barrier is not a barrier region boundary */ 2321 #if USE_ITT_NOTIFY 2322 __kmp_threads[gtid]->th.th_ident = loc; 2323 #endif 2324 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); 2325 2326 return *data_ptr; 2327 } 2328 2329 /* -------------------------------------------------------------------------- */ 2330 2331 #define INIT_LOCK __kmp_init_user_lock_with_checks 2332 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks 2333 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks 2334 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed 2335 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks 2336 #define ACQUIRE_NESTED_LOCK_TIMED \ 2337 __kmp_acquire_nested_user_lock_with_checks_timed 2338 #define RELEASE_LOCK __kmp_release_user_lock_with_checks 2339 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks 2340 #define TEST_LOCK __kmp_test_user_lock_with_checks 2341 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks 2342 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks 2343 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks 2344 2345 // TODO: Make check abort messages use location info & pass it into 2346 // with_checks routines 2347 2348 #if KMP_USE_DYNAMIC_LOCK 2349 2350 // internal lock initializer 2351 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, 2352 kmp_dyna_lockseq_t seq) { 2353 if (KMP_IS_D_LOCK(seq)) { 2354 KMP_INIT_D_LOCK(lock, seq); 2355 #if USE_ITT_BUILD 2356 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); 2357 #endif 2358 } else { 2359 KMP_INIT_I_LOCK(lock, seq); 2360 #if USE_ITT_BUILD 2361 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2362 __kmp_itt_lock_creating(ilk->lock, loc); 2363 #endif 2364 } 2365 } 2366 2367 // internal nest lock initializer 2368 static __forceinline void 2369 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, 2370 kmp_dyna_lockseq_t seq) { 2371 #if KMP_USE_TSX 2372 // Don't have nested lock implementation for speculative locks 2373 if (seq == lockseq_hle || seq == lockseq_rtm_queuing || 2374 seq == lockseq_rtm_spin || seq == lockseq_adaptive) 2375 seq = __kmp_user_lock_seq; 2376 #endif 2377 switch (seq) { 2378 case lockseq_tas: 2379 seq = lockseq_nested_tas; 2380 break; 2381 #if KMP_USE_FUTEX 2382 case lockseq_futex: 2383 seq = lockseq_nested_futex; 2384 break; 2385 #endif 2386 case lockseq_ticket: 2387 seq = lockseq_nested_ticket; 2388 break; 2389 case lockseq_queuing: 2390 seq = lockseq_nested_queuing; 2391 break; 2392 case lockseq_drdpa: 2393 seq = lockseq_nested_drdpa; 2394 break; 2395 default: 2396 seq = lockseq_nested_queuing; 2397 } 2398 KMP_INIT_I_LOCK(lock, seq); 2399 #if USE_ITT_BUILD 2400 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 2401 __kmp_itt_lock_creating(ilk->lock, loc); 2402 #endif 2403 } 2404 2405 /* initialize the lock with a hint */ 2406 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, 2407 uintptr_t hint) { 2408 KMP_DEBUG_ASSERT(__kmp_init_serial); 2409 if (__kmp_env_consistency_check && user_lock == NULL) { 2410 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); 2411 } 2412 2413 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2414 2415 #if OMPT_SUPPORT && OMPT_OPTIONAL 2416 // This is the case, if called from omp_init_lock_with_hint: 2417 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2418 if (!codeptr) 2419 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2420 if (ompt_enabled.ompt_callback_lock_init) { 2421 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2422 ompt_mutex_lock, (omp_lock_hint_t)hint, 2423 __ompt_get_mutex_impl_type(user_lock), 2424 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2425 } 2426 #endif 2427 } 2428 2429 /* initialize the lock with a hint */ 2430 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, 2431 void **user_lock, uintptr_t hint) { 2432 KMP_DEBUG_ASSERT(__kmp_init_serial); 2433 if (__kmp_env_consistency_check && user_lock == NULL) { 2434 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); 2435 } 2436 2437 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); 2438 2439 #if OMPT_SUPPORT && OMPT_OPTIONAL 2440 // This is the case, if called from omp_init_lock_with_hint: 2441 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2442 if (!codeptr) 2443 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2444 if (ompt_enabled.ompt_callback_lock_init) { 2445 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2446 ompt_mutex_nest_lock, (omp_lock_hint_t)hint, 2447 __ompt_get_mutex_impl_type(user_lock), 2448 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2449 } 2450 #endif 2451 } 2452 2453 #endif // KMP_USE_DYNAMIC_LOCK 2454 2455 /* initialize the lock */ 2456 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2457 #if KMP_USE_DYNAMIC_LOCK 2458 2459 KMP_DEBUG_ASSERT(__kmp_init_serial); 2460 if (__kmp_env_consistency_check && user_lock == NULL) { 2461 KMP_FATAL(LockIsUninitialized, "omp_init_lock"); 2462 } 2463 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2464 2465 #if OMPT_SUPPORT && OMPT_OPTIONAL 2466 // This is the case, if called from omp_init_lock_with_hint: 2467 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2468 if (!codeptr) 2469 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2470 if (ompt_enabled.ompt_callback_lock_init) { 2471 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2472 ompt_mutex_lock, omp_lock_hint_none, 2473 __ompt_get_mutex_impl_type(user_lock), 2474 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2475 } 2476 #endif 2477 2478 #else // KMP_USE_DYNAMIC_LOCK 2479 2480 static char const *const func = "omp_init_lock"; 2481 kmp_user_lock_p lck; 2482 KMP_DEBUG_ASSERT(__kmp_init_serial); 2483 2484 if (__kmp_env_consistency_check) { 2485 if (user_lock == NULL) { 2486 KMP_FATAL(LockIsUninitialized, func); 2487 } 2488 } 2489 2490 KMP_CHECK_USER_LOCK_INIT(); 2491 2492 if ((__kmp_user_lock_kind == lk_tas) && 2493 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2494 lck = (kmp_user_lock_p)user_lock; 2495 } 2496 #if KMP_USE_FUTEX 2497 else if ((__kmp_user_lock_kind == lk_futex) && 2498 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2499 lck = (kmp_user_lock_p)user_lock; 2500 } 2501 #endif 2502 else { 2503 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2504 } 2505 INIT_LOCK(lck); 2506 __kmp_set_user_lock_location(lck, loc); 2507 2508 #if OMPT_SUPPORT && OMPT_OPTIONAL 2509 // This is the case, if called from omp_init_lock_with_hint: 2510 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2511 if (!codeptr) 2512 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2513 if (ompt_enabled.ompt_callback_lock_init) { 2514 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2515 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2516 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2517 } 2518 #endif 2519 2520 #if USE_ITT_BUILD 2521 __kmp_itt_lock_creating(lck); 2522 #endif /* USE_ITT_BUILD */ 2523 2524 #endif // KMP_USE_DYNAMIC_LOCK 2525 } // __kmpc_init_lock 2526 2527 /* initialize the lock */ 2528 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2529 #if KMP_USE_DYNAMIC_LOCK 2530 2531 KMP_DEBUG_ASSERT(__kmp_init_serial); 2532 if (__kmp_env_consistency_check && user_lock == NULL) { 2533 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); 2534 } 2535 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); 2536 2537 #if OMPT_SUPPORT && OMPT_OPTIONAL 2538 // This is the case, if called from omp_init_lock_with_hint: 2539 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2540 if (!codeptr) 2541 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2542 if (ompt_enabled.ompt_callback_lock_init) { 2543 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2544 ompt_mutex_nest_lock, omp_lock_hint_none, 2545 __ompt_get_mutex_impl_type(user_lock), 2546 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2547 } 2548 #endif 2549 2550 #else // KMP_USE_DYNAMIC_LOCK 2551 2552 static char const *const func = "omp_init_nest_lock"; 2553 kmp_user_lock_p lck; 2554 KMP_DEBUG_ASSERT(__kmp_init_serial); 2555 2556 if (__kmp_env_consistency_check) { 2557 if (user_lock == NULL) { 2558 KMP_FATAL(LockIsUninitialized, func); 2559 } 2560 } 2561 2562 KMP_CHECK_USER_LOCK_INIT(); 2563 2564 if ((__kmp_user_lock_kind == lk_tas) && 2565 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2566 OMP_NEST_LOCK_T_SIZE)) { 2567 lck = (kmp_user_lock_p)user_lock; 2568 } 2569 #if KMP_USE_FUTEX 2570 else if ((__kmp_user_lock_kind == lk_futex) && 2571 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2572 OMP_NEST_LOCK_T_SIZE)) { 2573 lck = (kmp_user_lock_p)user_lock; 2574 } 2575 #endif 2576 else { 2577 lck = __kmp_user_lock_allocate(user_lock, gtid, 0); 2578 } 2579 2580 INIT_NESTED_LOCK(lck); 2581 __kmp_set_user_lock_location(lck, loc); 2582 2583 #if OMPT_SUPPORT && OMPT_OPTIONAL 2584 // This is the case, if called from omp_init_lock_with_hint: 2585 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2586 if (!codeptr) 2587 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2588 if (ompt_enabled.ompt_callback_lock_init) { 2589 ompt_callbacks.ompt_callback(ompt_callback_lock_init)( 2590 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2591 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2592 } 2593 #endif 2594 2595 #if USE_ITT_BUILD 2596 __kmp_itt_lock_creating(lck); 2597 #endif /* USE_ITT_BUILD */ 2598 2599 #endif // KMP_USE_DYNAMIC_LOCK 2600 } // __kmpc_init_nest_lock 2601 2602 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2603 #if KMP_USE_DYNAMIC_LOCK 2604 2605 #if USE_ITT_BUILD 2606 kmp_user_lock_p lck; 2607 if (KMP_EXTRACT_D_TAG(user_lock) == 0) { 2608 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; 2609 } else { 2610 lck = (kmp_user_lock_p)user_lock; 2611 } 2612 __kmp_itt_lock_destroyed(lck); 2613 #endif 2614 #if OMPT_SUPPORT && OMPT_OPTIONAL 2615 // This is the case, if called from omp_init_lock_with_hint: 2616 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2617 if (!codeptr) 2618 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2619 if (ompt_enabled.ompt_callback_lock_destroy) { 2620 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2621 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2622 } 2623 #endif 2624 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2625 #else 2626 kmp_user_lock_p lck; 2627 2628 if ((__kmp_user_lock_kind == lk_tas) && 2629 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2630 lck = (kmp_user_lock_p)user_lock; 2631 } 2632 #if KMP_USE_FUTEX 2633 else if ((__kmp_user_lock_kind == lk_futex) && 2634 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2635 lck = (kmp_user_lock_p)user_lock; 2636 } 2637 #endif 2638 else { 2639 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); 2640 } 2641 2642 #if OMPT_SUPPORT && OMPT_OPTIONAL 2643 // This is the case, if called from omp_init_lock_with_hint: 2644 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2645 if (!codeptr) 2646 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2647 if (ompt_enabled.ompt_callback_lock_destroy) { 2648 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2649 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2650 } 2651 #endif 2652 2653 #if USE_ITT_BUILD 2654 __kmp_itt_lock_destroyed(lck); 2655 #endif /* USE_ITT_BUILD */ 2656 DESTROY_LOCK(lck); 2657 2658 if ((__kmp_user_lock_kind == lk_tas) && 2659 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2660 ; 2661 } 2662 #if KMP_USE_FUTEX 2663 else if ((__kmp_user_lock_kind == lk_futex) && 2664 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2665 ; 2666 } 2667 #endif 2668 else { 2669 __kmp_user_lock_free(user_lock, gtid, lck); 2670 } 2671 #endif // KMP_USE_DYNAMIC_LOCK 2672 } // __kmpc_destroy_lock 2673 2674 /* destroy the lock */ 2675 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2676 #if KMP_USE_DYNAMIC_LOCK 2677 2678 #if USE_ITT_BUILD 2679 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); 2680 __kmp_itt_lock_destroyed(ilk->lock); 2681 #endif 2682 #if OMPT_SUPPORT && OMPT_OPTIONAL 2683 // This is the case, if called from omp_init_lock_with_hint: 2684 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2685 if (!codeptr) 2686 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2687 if (ompt_enabled.ompt_callback_lock_destroy) { 2688 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2689 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2690 } 2691 #endif 2692 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); 2693 2694 #else // KMP_USE_DYNAMIC_LOCK 2695 2696 kmp_user_lock_p lck; 2697 2698 if ((__kmp_user_lock_kind == lk_tas) && 2699 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2700 OMP_NEST_LOCK_T_SIZE)) { 2701 lck = (kmp_user_lock_p)user_lock; 2702 } 2703 #if KMP_USE_FUTEX 2704 else if ((__kmp_user_lock_kind == lk_futex) && 2705 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2706 OMP_NEST_LOCK_T_SIZE)) { 2707 lck = (kmp_user_lock_p)user_lock; 2708 } 2709 #endif 2710 else { 2711 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); 2712 } 2713 2714 #if OMPT_SUPPORT && OMPT_OPTIONAL 2715 // This is the case, if called from omp_init_lock_with_hint: 2716 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2717 if (!codeptr) 2718 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2719 if (ompt_enabled.ompt_callback_lock_destroy) { 2720 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( 2721 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2722 } 2723 #endif 2724 2725 #if USE_ITT_BUILD 2726 __kmp_itt_lock_destroyed(lck); 2727 #endif /* USE_ITT_BUILD */ 2728 2729 DESTROY_NESTED_LOCK(lck); 2730 2731 if ((__kmp_user_lock_kind == lk_tas) && 2732 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2733 OMP_NEST_LOCK_T_SIZE)) { 2734 ; 2735 } 2736 #if KMP_USE_FUTEX 2737 else if ((__kmp_user_lock_kind == lk_futex) && 2738 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2739 OMP_NEST_LOCK_T_SIZE)) { 2740 ; 2741 } 2742 #endif 2743 else { 2744 __kmp_user_lock_free(user_lock, gtid, lck); 2745 } 2746 #endif // KMP_USE_DYNAMIC_LOCK 2747 } // __kmpc_destroy_nest_lock 2748 2749 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2750 KMP_COUNT_BLOCK(OMP_set_lock); 2751 #if KMP_USE_DYNAMIC_LOCK 2752 int tag = KMP_EXTRACT_D_TAG(user_lock); 2753 #if USE_ITT_BUILD 2754 __kmp_itt_lock_acquiring( 2755 (kmp_user_lock_p) 2756 user_lock); // itt function will get to the right lock object. 2757 #endif 2758 #if OMPT_SUPPORT && OMPT_OPTIONAL 2759 // This is the case, if called from omp_init_lock_with_hint: 2760 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2761 if (!codeptr) 2762 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2763 if (ompt_enabled.ompt_callback_mutex_acquire) { 2764 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2765 ompt_mutex_lock, omp_lock_hint_none, 2766 __ompt_get_mutex_impl_type(user_lock), 2767 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2768 } 2769 #endif 2770 #if KMP_USE_INLINED_TAS 2771 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2772 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); 2773 } else 2774 #elif KMP_USE_INLINED_FUTEX 2775 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2776 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); 2777 } else 2778 #endif 2779 { 2780 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2781 } 2782 #if USE_ITT_BUILD 2783 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2784 #endif 2785 #if OMPT_SUPPORT && OMPT_OPTIONAL 2786 if (ompt_enabled.ompt_callback_mutex_acquired) { 2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2788 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2789 } 2790 #endif 2791 2792 #else // KMP_USE_DYNAMIC_LOCK 2793 2794 kmp_user_lock_p lck; 2795 2796 if ((__kmp_user_lock_kind == lk_tas) && 2797 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2798 lck = (kmp_user_lock_p)user_lock; 2799 } 2800 #if KMP_USE_FUTEX 2801 else if ((__kmp_user_lock_kind == lk_futex) && 2802 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 2803 lck = (kmp_user_lock_p)user_lock; 2804 } 2805 #endif 2806 else { 2807 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); 2808 } 2809 2810 #if USE_ITT_BUILD 2811 __kmp_itt_lock_acquiring(lck); 2812 #endif /* USE_ITT_BUILD */ 2813 #if OMPT_SUPPORT && OMPT_OPTIONAL 2814 // This is the case, if called from omp_init_lock_with_hint: 2815 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2816 if (!codeptr) 2817 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2818 if (ompt_enabled.ompt_callback_mutex_acquire) { 2819 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2820 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 2821 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2822 } 2823 #endif 2824 2825 ACQUIRE_LOCK(lck, gtid); 2826 2827 #if USE_ITT_BUILD 2828 __kmp_itt_lock_acquired(lck); 2829 #endif /* USE_ITT_BUILD */ 2830 2831 #if OMPT_SUPPORT && OMPT_OPTIONAL 2832 if (ompt_enabled.ompt_callback_mutex_acquired) { 2833 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2834 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2835 } 2836 #endif 2837 2838 #endif // KMP_USE_DYNAMIC_LOCK 2839 } 2840 2841 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2842 #if KMP_USE_DYNAMIC_LOCK 2843 2844 #if USE_ITT_BUILD 2845 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 2846 #endif 2847 #if OMPT_SUPPORT && OMPT_OPTIONAL 2848 // This is the case, if called from omp_init_lock_with_hint: 2849 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2850 if (!codeptr) 2851 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2852 if (ompt_enabled.enabled) { 2853 if (ompt_enabled.ompt_callback_mutex_acquire) { 2854 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2855 ompt_mutex_nest_lock, omp_lock_hint_none, 2856 __ompt_get_mutex_impl_type(user_lock), 2857 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2858 } 2859 } 2860 #endif 2861 int acquire_status = 2862 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); 2863 (void)acquire_status; 2864 #if USE_ITT_BUILD 2865 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 2866 #endif 2867 2868 #if OMPT_SUPPORT && OMPT_OPTIONAL 2869 if (ompt_enabled.enabled) { 2870 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2871 if (ompt_enabled.ompt_callback_mutex_acquired) { 2872 // lock_first 2873 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2874 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 2875 codeptr); 2876 } 2877 } else { 2878 if (ompt_enabled.ompt_callback_nest_lock) { 2879 // lock_next 2880 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2881 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2882 } 2883 } 2884 } 2885 #endif 2886 2887 #else // KMP_USE_DYNAMIC_LOCK 2888 int acquire_status; 2889 kmp_user_lock_p lck; 2890 2891 if ((__kmp_user_lock_kind == lk_tas) && 2892 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 2893 OMP_NEST_LOCK_T_SIZE)) { 2894 lck = (kmp_user_lock_p)user_lock; 2895 } 2896 #if KMP_USE_FUTEX 2897 else if ((__kmp_user_lock_kind == lk_futex) && 2898 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 2899 OMP_NEST_LOCK_T_SIZE)) { 2900 lck = (kmp_user_lock_p)user_lock; 2901 } 2902 #endif 2903 else { 2904 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); 2905 } 2906 2907 #if USE_ITT_BUILD 2908 __kmp_itt_lock_acquiring(lck); 2909 #endif /* USE_ITT_BUILD */ 2910 #if OMPT_SUPPORT && OMPT_OPTIONAL 2911 // This is the case, if called from omp_init_lock_with_hint: 2912 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2913 if (!codeptr) 2914 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2915 if (ompt_enabled.enabled) { 2916 if (ompt_enabled.ompt_callback_mutex_acquire) { 2917 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 2918 ompt_mutex_nest_lock, omp_lock_hint_none, 2919 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 2920 codeptr); 2921 } 2922 } 2923 #endif 2924 2925 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); 2926 2927 #if USE_ITT_BUILD 2928 __kmp_itt_lock_acquired(lck); 2929 #endif /* USE_ITT_BUILD */ 2930 2931 #if OMPT_SUPPORT && OMPT_OPTIONAL 2932 if (ompt_enabled.enabled) { 2933 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { 2934 if (ompt_enabled.ompt_callback_mutex_acquired) { 2935 // lock_first 2936 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 2937 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2938 } 2939 } else { 2940 if (ompt_enabled.ompt_callback_nest_lock) { 2941 // lock_next 2942 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 2943 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 2944 } 2945 } 2946 } 2947 #endif 2948 2949 #endif // KMP_USE_DYNAMIC_LOCK 2950 } 2951 2952 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 2953 #if KMP_USE_DYNAMIC_LOCK 2954 2955 int tag = KMP_EXTRACT_D_TAG(user_lock); 2956 #if USE_ITT_BUILD 2957 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2958 #endif 2959 #if KMP_USE_INLINED_TAS 2960 if (tag == locktag_tas && !__kmp_env_consistency_check) { 2961 KMP_RELEASE_TAS_LOCK(user_lock, gtid); 2962 } else 2963 #elif KMP_USE_INLINED_FUTEX 2964 if (tag == locktag_futex && !__kmp_env_consistency_check) { 2965 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); 2966 } else 2967 #endif 2968 { 2969 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); 2970 } 2971 2972 #if OMPT_SUPPORT && OMPT_OPTIONAL 2973 // This is the case, if called from omp_init_lock_with_hint: 2974 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 2975 if (!codeptr) 2976 codeptr = OMPT_GET_RETURN_ADDRESS(0); 2977 if (ompt_enabled.ompt_callback_mutex_released) { 2978 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 2979 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 2980 } 2981 #endif 2982 2983 #else // KMP_USE_DYNAMIC_LOCK 2984 2985 kmp_user_lock_p lck; 2986 2987 /* Can't use serial interval since not block structured */ 2988 /* release the lock */ 2989 2990 if ((__kmp_user_lock_kind == lk_tas) && 2991 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 2992 #if KMP_OS_LINUX && \ 2993 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 2994 // "fast" path implemented to fix customer performance issue 2995 #if USE_ITT_BUILD 2996 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 2997 #endif /* USE_ITT_BUILD */ 2998 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); 2999 KMP_MB(); 3000 3001 #if OMPT_SUPPORT && OMPT_OPTIONAL 3002 // This is the case, if called from omp_init_lock_with_hint: 3003 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3004 if (!codeptr) 3005 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3006 if (ompt_enabled.ompt_callback_mutex_released) { 3007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3008 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3009 } 3010 #endif 3011 3012 return; 3013 #else 3014 lck = (kmp_user_lock_p)user_lock; 3015 #endif 3016 } 3017 #if KMP_USE_FUTEX 3018 else if ((__kmp_user_lock_kind == lk_futex) && 3019 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3020 lck = (kmp_user_lock_p)user_lock; 3021 } 3022 #endif 3023 else { 3024 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); 3025 } 3026 3027 #if USE_ITT_BUILD 3028 __kmp_itt_lock_releasing(lck); 3029 #endif /* USE_ITT_BUILD */ 3030 3031 RELEASE_LOCK(lck, gtid); 3032 3033 #if OMPT_SUPPORT && OMPT_OPTIONAL 3034 // This is the case, if called from omp_init_lock_with_hint: 3035 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3036 if (!codeptr) 3037 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3038 if (ompt_enabled.ompt_callback_mutex_released) { 3039 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3040 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3041 } 3042 #endif 3043 3044 #endif // KMP_USE_DYNAMIC_LOCK 3045 } 3046 3047 /* release the lock */ 3048 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3049 #if KMP_USE_DYNAMIC_LOCK 3050 3051 #if USE_ITT_BUILD 3052 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3053 #endif 3054 int release_status = 3055 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); 3056 (void)release_status; 3057 3058 #if OMPT_SUPPORT && OMPT_OPTIONAL 3059 // This is the case, if called from omp_init_lock_with_hint: 3060 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3061 if (!codeptr) 3062 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3063 if (ompt_enabled.enabled) { 3064 if (release_status == KMP_LOCK_RELEASED) { 3065 if (ompt_enabled.ompt_callback_mutex_released) { 3066 // release_lock_last 3067 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3068 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3069 codeptr); 3070 } 3071 } else if (ompt_enabled.ompt_callback_nest_lock) { 3072 // release_lock_prev 3073 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3074 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3075 } 3076 } 3077 #endif 3078 3079 #else // KMP_USE_DYNAMIC_LOCK 3080 3081 kmp_user_lock_p lck; 3082 3083 /* Can't use serial interval since not block structured */ 3084 3085 if ((__kmp_user_lock_kind == lk_tas) && 3086 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3087 OMP_NEST_LOCK_T_SIZE)) { 3088 #if KMP_OS_LINUX && \ 3089 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 3090 // "fast" path implemented to fix customer performance issue 3091 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; 3092 #if USE_ITT_BUILD 3093 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); 3094 #endif /* USE_ITT_BUILD */ 3095 3096 #if OMPT_SUPPORT && OMPT_OPTIONAL 3097 int release_status = KMP_LOCK_STILL_HELD; 3098 #endif 3099 3100 if (--(tl->lk.depth_locked) == 0) { 3101 TCW_4(tl->lk.poll, 0); 3102 #if OMPT_SUPPORT && OMPT_OPTIONAL 3103 release_status = KMP_LOCK_RELEASED; 3104 #endif 3105 } 3106 KMP_MB(); 3107 3108 #if OMPT_SUPPORT && OMPT_OPTIONAL 3109 // This is the case, if called from omp_init_lock_with_hint: 3110 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3111 if (!codeptr) 3112 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3113 if (ompt_enabled.enabled) { 3114 if (release_status == KMP_LOCK_RELEASED) { 3115 if (ompt_enabled.ompt_callback_mutex_released) { 3116 // release_lock_last 3117 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3118 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3119 } 3120 } else if (ompt_enabled.ompt_callback_nest_lock) { 3121 // release_lock_previous 3122 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3123 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3124 } 3125 } 3126 #endif 3127 3128 return; 3129 #else 3130 lck = (kmp_user_lock_p)user_lock; 3131 #endif 3132 } 3133 #if KMP_USE_FUTEX 3134 else if ((__kmp_user_lock_kind == lk_futex) && 3135 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3136 OMP_NEST_LOCK_T_SIZE)) { 3137 lck = (kmp_user_lock_p)user_lock; 3138 } 3139 #endif 3140 else { 3141 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); 3142 } 3143 3144 #if USE_ITT_BUILD 3145 __kmp_itt_lock_releasing(lck); 3146 #endif /* USE_ITT_BUILD */ 3147 3148 int release_status; 3149 release_status = RELEASE_NESTED_LOCK(lck, gtid); 3150 #if OMPT_SUPPORT && OMPT_OPTIONAL 3151 // This is the case, if called from omp_init_lock_with_hint: 3152 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3153 if (!codeptr) 3154 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3155 if (ompt_enabled.enabled) { 3156 if (release_status == KMP_LOCK_RELEASED) { 3157 if (ompt_enabled.ompt_callback_mutex_released) { 3158 // release_lock_last 3159 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( 3160 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3161 } 3162 } else if (ompt_enabled.ompt_callback_nest_lock) { 3163 // release_lock_previous 3164 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3165 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3166 } 3167 } 3168 #endif 3169 3170 #endif // KMP_USE_DYNAMIC_LOCK 3171 } 3172 3173 /* try to acquire the lock */ 3174 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3175 KMP_COUNT_BLOCK(OMP_test_lock); 3176 3177 #if KMP_USE_DYNAMIC_LOCK 3178 int rc; 3179 int tag = KMP_EXTRACT_D_TAG(user_lock); 3180 #if USE_ITT_BUILD 3181 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3182 #endif 3183 #if OMPT_SUPPORT && OMPT_OPTIONAL 3184 // This is the case, if called from omp_init_lock_with_hint: 3185 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3186 if (!codeptr) 3187 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3188 if (ompt_enabled.ompt_callback_mutex_acquire) { 3189 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3190 ompt_mutex_test_lock, omp_lock_hint_none, 3191 __ompt_get_mutex_impl_type(user_lock), 3192 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3193 } 3194 #endif 3195 #if KMP_USE_INLINED_TAS 3196 if (tag == locktag_tas && !__kmp_env_consistency_check) { 3197 KMP_TEST_TAS_LOCK(user_lock, gtid, rc); 3198 } else 3199 #elif KMP_USE_INLINED_FUTEX 3200 if (tag == locktag_futex && !__kmp_env_consistency_check) { 3201 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); 3202 } else 3203 #endif 3204 { 3205 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); 3206 } 3207 if (rc) { 3208 #if USE_ITT_BUILD 3209 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3210 #endif 3211 #if OMPT_SUPPORT && OMPT_OPTIONAL 3212 if (ompt_enabled.ompt_callback_mutex_acquired) { 3213 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3214 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3215 } 3216 #endif 3217 return FTN_TRUE; 3218 } else { 3219 #if USE_ITT_BUILD 3220 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3221 #endif 3222 return FTN_FALSE; 3223 } 3224 3225 #else // KMP_USE_DYNAMIC_LOCK 3226 3227 kmp_user_lock_p lck; 3228 int rc; 3229 3230 if ((__kmp_user_lock_kind == lk_tas) && 3231 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { 3232 lck = (kmp_user_lock_p)user_lock; 3233 } 3234 #if KMP_USE_FUTEX 3235 else if ((__kmp_user_lock_kind == lk_futex) && 3236 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { 3237 lck = (kmp_user_lock_p)user_lock; 3238 } 3239 #endif 3240 else { 3241 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); 3242 } 3243 3244 #if USE_ITT_BUILD 3245 __kmp_itt_lock_acquiring(lck); 3246 #endif /* USE_ITT_BUILD */ 3247 #if OMPT_SUPPORT && OMPT_OPTIONAL 3248 // This is the case, if called from omp_init_lock_with_hint: 3249 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3250 if (!codeptr) 3251 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3252 if (ompt_enabled.ompt_callback_mutex_acquire) { 3253 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3254 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), 3255 (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3256 } 3257 #endif 3258 3259 rc = TEST_LOCK(lck, gtid); 3260 #if USE_ITT_BUILD 3261 if (rc) { 3262 __kmp_itt_lock_acquired(lck); 3263 } else { 3264 __kmp_itt_lock_cancelled(lck); 3265 } 3266 #endif /* USE_ITT_BUILD */ 3267 #if OMPT_SUPPORT && OMPT_OPTIONAL 3268 if (rc && ompt_enabled.ompt_callback_mutex_acquired) { 3269 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3270 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3271 } 3272 #endif 3273 3274 return (rc ? FTN_TRUE : FTN_FALSE); 3275 3276 /* Can't use serial interval since not block structured */ 3277 3278 #endif // KMP_USE_DYNAMIC_LOCK 3279 } 3280 3281 /* try to acquire the lock */ 3282 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { 3283 #if KMP_USE_DYNAMIC_LOCK 3284 int rc; 3285 #if USE_ITT_BUILD 3286 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); 3287 #endif 3288 #if OMPT_SUPPORT && OMPT_OPTIONAL 3289 // This is the case, if called from omp_init_lock_with_hint: 3290 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3291 if (!codeptr) 3292 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3293 if (ompt_enabled.ompt_callback_mutex_acquire) { 3294 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3295 ompt_mutex_test_nest_lock, omp_lock_hint_none, 3296 __ompt_get_mutex_impl_type(user_lock), 3297 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3298 } 3299 #endif 3300 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); 3301 #if USE_ITT_BUILD 3302 if (rc) { 3303 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); 3304 } else { 3305 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); 3306 } 3307 #endif 3308 #if OMPT_SUPPORT && OMPT_OPTIONAL 3309 if (ompt_enabled.enabled && rc) { 3310 if (rc == 1) { 3311 if (ompt_enabled.ompt_callback_mutex_acquired) { 3312 // lock_first 3313 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3314 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, 3315 codeptr); 3316 } 3317 } else { 3318 if (ompt_enabled.ompt_callback_nest_lock) { 3319 // lock_next 3320 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3321 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr); 3322 } 3323 } 3324 } 3325 #endif 3326 return rc; 3327 3328 #else // KMP_USE_DYNAMIC_LOCK 3329 3330 kmp_user_lock_p lck; 3331 int rc; 3332 3333 if ((__kmp_user_lock_kind == lk_tas) && 3334 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= 3335 OMP_NEST_LOCK_T_SIZE)) { 3336 lck = (kmp_user_lock_p)user_lock; 3337 } 3338 #if KMP_USE_FUTEX 3339 else if ((__kmp_user_lock_kind == lk_futex) && 3340 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= 3341 OMP_NEST_LOCK_T_SIZE)) { 3342 lck = (kmp_user_lock_p)user_lock; 3343 } 3344 #endif 3345 else { 3346 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); 3347 } 3348 3349 #if USE_ITT_BUILD 3350 __kmp_itt_lock_acquiring(lck); 3351 #endif /* USE_ITT_BUILD */ 3352 3353 #if OMPT_SUPPORT && OMPT_OPTIONAL 3354 // This is the case, if called from omp_init_lock_with_hint: 3355 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); 3356 if (!codeptr) 3357 codeptr = OMPT_GET_RETURN_ADDRESS(0); 3358 if (ompt_enabled.enabled) && 3359 ompt_enabled.ompt_callback_mutex_acquire) { 3360 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( 3361 ompt_mutex_test_nest_lock, omp_lock_hint_none, 3362 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck, 3363 codeptr); 3364 } 3365 #endif 3366 3367 rc = TEST_NESTED_LOCK(lck, gtid); 3368 #if USE_ITT_BUILD 3369 if (rc) { 3370 __kmp_itt_lock_acquired(lck); 3371 } else { 3372 __kmp_itt_lock_cancelled(lck); 3373 } 3374 #endif /* USE_ITT_BUILD */ 3375 #if OMPT_SUPPORT && OMPT_OPTIONAL 3376 if (ompt_enabled.enabled && rc) { 3377 if (rc == 1) { 3378 if (ompt_enabled.ompt_callback_mutex_acquired) { 3379 // lock_first 3380 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( 3381 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3382 } 3383 } else { 3384 if (ompt_enabled.ompt_callback_nest_lock) { 3385 // lock_next 3386 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( 3387 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr); 3388 } 3389 } 3390 } 3391 #endif 3392 return rc; 3393 3394 /* Can't use serial interval since not block structured */ 3395 3396 #endif // KMP_USE_DYNAMIC_LOCK 3397 } 3398 3399 // Interface to fast scalable reduce methods routines 3400 3401 // keep the selected method in a thread local structure for cross-function 3402 // usage: will be used in __kmpc_end_reduce* functions; 3403 // another solution: to re-determine the method one more time in 3404 // __kmpc_end_reduce* functions (new prototype required then) 3405 // AT: which solution is better? 3406 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ 3407 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) 3408 3409 #define __KMP_GET_REDUCTION_METHOD(gtid) \ 3410 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) 3411 3412 // description of the packed_reduction_method variable: look at the macros in 3413 // kmp.h 3414 3415 // used in a critical section reduce block 3416 static __forceinline void 3417 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3418 kmp_critical_name *crit) { 3419 3420 // this lock was visible to a customer and to the threading profile tool as a 3421 // serial overhead span (although it's used for an internal purpose only) 3422 // why was it visible in previous implementation? 3423 // should we keep it visible in new reduce block? 3424 kmp_user_lock_p lck; 3425 3426 #if KMP_USE_DYNAMIC_LOCK 3427 3428 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; 3429 // Check if it is initialized. 3430 if (*lk == 0) { 3431 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3432 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, 3433 KMP_GET_D_TAG(__kmp_user_lock_seq)); 3434 } else { 3435 __kmp_init_indirect_csptr(crit, loc, global_tid, 3436 KMP_GET_I_TAG(__kmp_user_lock_seq)); 3437 } 3438 } 3439 // Branch for accessing the actual lock object and set operation. This 3440 // branching is inevitable since this lock initialization does not follow the 3441 // normal dispatch path (lock table is not used). 3442 if (KMP_EXTRACT_D_TAG(lk) != 0) { 3443 lck = (kmp_user_lock_p)lk; 3444 KMP_DEBUG_ASSERT(lck != NULL); 3445 if (__kmp_env_consistency_check) { 3446 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3447 } 3448 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); 3449 } else { 3450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); 3451 lck = ilk->lock; 3452 KMP_DEBUG_ASSERT(lck != NULL); 3453 if (__kmp_env_consistency_check) { 3454 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); 3455 } 3456 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); 3457 } 3458 3459 #else // KMP_USE_DYNAMIC_LOCK 3460 3461 // We know that the fast reduction code is only emitted by Intel compilers 3462 // with 32 byte critical sections. If there isn't enough space, then we 3463 // have to use a pointer. 3464 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { 3465 lck = (kmp_user_lock_p)crit; 3466 } else { 3467 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); 3468 } 3469 KMP_DEBUG_ASSERT(lck != NULL); 3470 3471 if (__kmp_env_consistency_check) 3472 __kmp_push_sync(global_tid, ct_critical, loc, lck); 3473 3474 __kmp_acquire_user_lock_with_checks(lck, global_tid); 3475 3476 #endif // KMP_USE_DYNAMIC_LOCK 3477 } 3478 3479 // used in a critical section reduce block 3480 static __forceinline void 3481 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, 3482 kmp_critical_name *crit) { 3483 3484 kmp_user_lock_p lck; 3485 3486 #if KMP_USE_DYNAMIC_LOCK 3487 3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { 3489 lck = (kmp_user_lock_p)crit; 3490 if (__kmp_env_consistency_check) 3491 __kmp_pop_sync(global_tid, ct_critical, loc); 3492 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); 3493 } else { 3494 kmp_indirect_lock_t *ilk = 3495 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); 3496 if (__kmp_env_consistency_check) 3497 __kmp_pop_sync(global_tid, ct_critical, loc); 3498 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); 3499 } 3500 3501 #else // KMP_USE_DYNAMIC_LOCK 3502 3503 // We know that the fast reduction code is only emitted by Intel compilers 3504 // with 32 byte critical sections. If there isn't enough space, then we have 3505 // to use a pointer. 3506 if (__kmp_base_user_lock_size > 32) { 3507 lck = *((kmp_user_lock_p *)crit); 3508 KMP_ASSERT(lck != NULL); 3509 } else { 3510 lck = (kmp_user_lock_p)crit; 3511 } 3512 3513 if (__kmp_env_consistency_check) 3514 __kmp_pop_sync(global_tid, ct_critical, loc); 3515 3516 __kmp_release_user_lock_with_checks(lck, global_tid); 3517 3518 #endif // KMP_USE_DYNAMIC_LOCK 3519 } // __kmp_end_critical_section_reduce_block 3520 3521 static __forceinline int 3522 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p, 3523 int *task_state) { 3524 kmp_team_t *team; 3525 3526 // Check if we are inside the teams construct? 3527 if (th->th.th_teams_microtask) { 3528 *team_p = team = th->th.th_team; 3529 if (team->t.t_level == th->th.th_teams_level) { 3530 // This is reduction at teams construct. 3531 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 3532 // Let's swap teams temporarily for the reduction. 3533 th->th.th_info.ds.ds_tid = team->t.t_master_tid; 3534 th->th.th_team = team->t.t_parent; 3535 th->th.th_team_nproc = th->th.th_team->t.t_nproc; 3536 th->th.th_task_team = th->th.th_team->t.t_task_team[0]; 3537 *task_state = th->th.th_task_state; 3538 th->th.th_task_state = 0; 3539 3540 return 1; 3541 } 3542 } 3543 return 0; 3544 } 3545 3546 static __forceinline void 3547 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) { 3548 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction. 3549 th->th.th_info.ds.ds_tid = 0; 3550 th->th.th_team = team; 3551 th->th.th_team_nproc = team->t.t_nproc; 3552 th->th.th_task_team = team->t.t_task_team[task_state]; 3553 __kmp_type_convert(task_state, &(th->th.th_task_state)); 3554 } 3555 3556 /* 2.a.i. Reduce Block without a terminating barrier */ 3557 /*! 3558 @ingroup SYNCHRONIZATION 3559 @param loc source location information 3560 @param global_tid global thread number 3561 @param num_vars number of items (variables) to be reduced 3562 @param reduce_size size of data in bytes to be reduced 3563 @param reduce_data pointer to data to be reduced 3564 @param reduce_func callback function providing reduction operation on two 3565 operands and returning result of reduction in lhs_data 3566 @param lck pointer to the unique lock data structure 3567 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3568 threads if atomic reduction needed 3569 3570 The nowait version is used for a reduce clause with the nowait argument. 3571 */ 3572 kmp_int32 3573 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3574 size_t reduce_size, void *reduce_data, 3575 void (*reduce_func)(void *lhs_data, void *rhs_data), 3576 kmp_critical_name *lck) { 3577 3578 KMP_COUNT_BLOCK(REDUCE_nowait); 3579 int retval = 0; 3580 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3581 kmp_info_t *th; 3582 kmp_team_t *team; 3583 int teams_swapped = 0, task_state; 3584 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); 3585 __kmp_assert_valid_gtid(global_tid); 3586 3587 // why do we need this initialization here at all? 3588 // Reduction clause can not be used as a stand-alone directive. 3589 3590 // do not call __kmp_serial_initialize(), it will be called by 3591 // __kmp_parallel_initialize() if needed 3592 // possible detection of false-positive race by the threadchecker ??? 3593 if (!TCR_4(__kmp_init_parallel)) 3594 __kmp_parallel_initialize(); 3595 3596 __kmp_resume_if_soft_paused(); 3597 3598 // check correctness of reduce block nesting 3599 #if KMP_USE_DYNAMIC_LOCK 3600 if (__kmp_env_consistency_check) 3601 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3602 #else 3603 if (__kmp_env_consistency_check) 3604 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3605 #endif 3606 3607 th = __kmp_thread_from_gtid(global_tid); 3608 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3609 3610 // packed_reduction_method value will be reused by __kmp_end_reduce* function, 3611 // the value should be kept in a variable 3612 // the variable should be either a construct-specific or thread-specific 3613 // property, not a team specific property 3614 // (a thread can reach the next reduce block on the next construct, reduce 3615 // method may differ on the next construct) 3616 // an ident_t "loc" parameter could be used as a construct-specific property 3617 // (what if loc == 0?) 3618 // (if both construct-specific and team-specific variables were shared, 3619 // then unness extra syncs should be needed) 3620 // a thread-specific variable is better regarding two issues above (next 3621 // construct and extra syncs) 3622 // a thread-specific "th_local.reduction_method" variable is used currently 3623 // each thread executes 'determine' and 'set' lines (no need to execute by one 3624 // thread, to avoid unness extra syncs) 3625 3626 packed_reduction_method = __kmp_determine_reduction_method( 3627 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3628 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3629 3630 OMPT_REDUCTION_DECL(th, global_tid); 3631 if (packed_reduction_method == critical_reduce_block) { 3632 3633 OMPT_REDUCTION_BEGIN; 3634 3635 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3636 retval = 1; 3637 3638 } else if (packed_reduction_method == empty_reduce_block) { 3639 3640 OMPT_REDUCTION_BEGIN; 3641 3642 // usage: if team size == 1, no synchronization is required ( Intel 3643 // platforms only ) 3644 retval = 1; 3645 3646 } else if (packed_reduction_method == atomic_reduce_block) { 3647 3648 retval = 2; 3649 3650 // all threads should do this pop here (because __kmpc_end_reduce_nowait() 3651 // won't be called by the code gen) 3652 // (it's not quite good, because the checking block has been closed by 3653 // this 'pop', 3654 // but atomic operation has not been executed yet, will be executed 3655 // slightly later, literally on next instruction) 3656 if (__kmp_env_consistency_check) 3657 __kmp_pop_sync(global_tid, ct_reduce, loc); 3658 3659 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3660 tree_reduce_block)) { 3661 3662 // AT: performance issue: a real barrier here 3663 // AT: (if primary thread is slow, other threads are blocked here waiting for 3664 // the primary thread to come and release them) 3665 // AT: (it's not what a customer might expect specifying NOWAIT clause) 3666 // AT: (specifying NOWAIT won't result in improvement of performance, it'll 3667 // be confusing to a customer) 3668 // AT: another implementation of *barrier_gather*nowait() (or some other design) 3669 // might go faster and be more in line with sense of NOWAIT 3670 // AT: TO DO: do epcc test and compare times 3671 3672 // this barrier should be invisible to a customer and to the threading profile 3673 // tool (it's neither a terminating barrier nor customer's code, it's 3674 // used for an internal purpose) 3675 #if OMPT_SUPPORT 3676 // JP: can this barrier potentially leed to task scheduling? 3677 // JP: as long as there is a barrier in the implementation, OMPT should and 3678 // will provide the barrier events 3679 // so we set-up the necessary frame/return addresses. 3680 ompt_frame_t *ompt_frame; 3681 if (ompt_enabled.enabled) { 3682 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3683 if (ompt_frame->enter_frame.ptr == NULL) 3684 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3685 } 3686 OMPT_STORE_RETURN_ADDRESS(global_tid); 3687 #endif 3688 #if USE_ITT_NOTIFY 3689 __kmp_threads[global_tid]->th.th_ident = loc; 3690 #endif 3691 retval = 3692 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3693 global_tid, FALSE, reduce_size, reduce_data, reduce_func); 3694 retval = (retval != 0) ? (0) : (1); 3695 #if OMPT_SUPPORT && OMPT_OPTIONAL 3696 if (ompt_enabled.enabled) { 3697 ompt_frame->enter_frame = ompt_data_none; 3698 } 3699 #endif 3700 3701 // all other workers except primary thread should do this pop here 3702 // ( none of other workers will get to __kmpc_end_reduce_nowait() ) 3703 if (__kmp_env_consistency_check) { 3704 if (retval == 0) { 3705 __kmp_pop_sync(global_tid, ct_reduce, loc); 3706 } 3707 } 3708 3709 } else { 3710 3711 // should never reach this block 3712 KMP_ASSERT(0); // "unexpected method" 3713 } 3714 if (teams_swapped) { 3715 __kmp_restore_swapped_teams(th, team, task_state); 3716 } 3717 KA_TRACE( 3718 10, 3719 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", 3720 global_tid, packed_reduction_method, retval)); 3721 3722 return retval; 3723 } 3724 3725 /*! 3726 @ingroup SYNCHRONIZATION 3727 @param loc source location information 3728 @param global_tid global thread id. 3729 @param lck pointer to the unique lock data structure 3730 3731 Finish the execution of a reduce nowait. 3732 */ 3733 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 3734 kmp_critical_name *lck) { 3735 3736 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3737 3738 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); 3739 __kmp_assert_valid_gtid(global_tid); 3740 3741 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3742 3743 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); 3744 3745 if (packed_reduction_method == critical_reduce_block) { 3746 3747 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3748 OMPT_REDUCTION_END; 3749 3750 } else if (packed_reduction_method == empty_reduce_block) { 3751 3752 // usage: if team size == 1, no synchronization is required ( on Intel 3753 // platforms only ) 3754 3755 OMPT_REDUCTION_END; 3756 3757 } else if (packed_reduction_method == atomic_reduce_block) { 3758 3759 // neither primary thread nor other workers should get here 3760 // (code gen does not generate this call in case 2: atomic reduce block) 3761 // actually it's better to remove this elseif at all; 3762 // after removal this value will checked by the 'else' and will assert 3763 3764 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3765 tree_reduce_block)) { 3766 3767 // only primary thread gets here 3768 // OMPT: tree reduction is annotated in the barrier code 3769 3770 } else { 3771 3772 // should never reach this block 3773 KMP_ASSERT(0); // "unexpected method" 3774 } 3775 3776 if (__kmp_env_consistency_check) 3777 __kmp_pop_sync(global_tid, ct_reduce, loc); 3778 3779 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", 3780 global_tid, packed_reduction_method)); 3781 3782 return; 3783 } 3784 3785 /* 2.a.ii. Reduce Block with a terminating barrier */ 3786 3787 /*! 3788 @ingroup SYNCHRONIZATION 3789 @param loc source location information 3790 @param global_tid global thread number 3791 @param num_vars number of items (variables) to be reduced 3792 @param reduce_size size of data in bytes to be reduced 3793 @param reduce_data pointer to data to be reduced 3794 @param reduce_func callback function providing reduction operation on two 3795 operands and returning result of reduction in lhs_data 3796 @param lck pointer to the unique lock data structure 3797 @result 1 for the primary thread, 0 for all other team threads, 2 for all team 3798 threads if atomic reduction needed 3799 3800 A blocking reduce that includes an implicit barrier. 3801 */ 3802 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, 3803 size_t reduce_size, void *reduce_data, 3804 void (*reduce_func)(void *lhs_data, void *rhs_data), 3805 kmp_critical_name *lck) { 3806 KMP_COUNT_BLOCK(REDUCE_wait); 3807 int retval = 0; 3808 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3809 kmp_info_t *th; 3810 kmp_team_t *team; 3811 int teams_swapped = 0, task_state; 3812 3813 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); 3814 __kmp_assert_valid_gtid(global_tid); 3815 3816 // why do we need this initialization here at all? 3817 // Reduction clause can not be a stand-alone directive. 3818 3819 // do not call __kmp_serial_initialize(), it will be called by 3820 // __kmp_parallel_initialize() if needed 3821 // possible detection of false-positive race by the threadchecker ??? 3822 if (!TCR_4(__kmp_init_parallel)) 3823 __kmp_parallel_initialize(); 3824 3825 __kmp_resume_if_soft_paused(); 3826 3827 // check correctness of reduce block nesting 3828 #if KMP_USE_DYNAMIC_LOCK 3829 if (__kmp_env_consistency_check) 3830 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); 3831 #else 3832 if (__kmp_env_consistency_check) 3833 __kmp_push_sync(global_tid, ct_reduce, loc, NULL); 3834 #endif 3835 3836 th = __kmp_thread_from_gtid(global_tid); 3837 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3838 3839 packed_reduction_method = __kmp_determine_reduction_method( 3840 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); 3841 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); 3842 3843 OMPT_REDUCTION_DECL(th, global_tid); 3844 3845 if (packed_reduction_method == critical_reduce_block) { 3846 3847 OMPT_REDUCTION_BEGIN; 3848 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); 3849 retval = 1; 3850 3851 } else if (packed_reduction_method == empty_reduce_block) { 3852 3853 OMPT_REDUCTION_BEGIN; 3854 // usage: if team size == 1, no synchronization is required ( Intel 3855 // platforms only ) 3856 retval = 1; 3857 3858 } else if (packed_reduction_method == atomic_reduce_block) { 3859 3860 retval = 2; 3861 3862 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 3863 tree_reduce_block)) { 3864 3865 // case tree_reduce_block: 3866 // this barrier should be visible to a customer and to the threading profile 3867 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3868 #if OMPT_SUPPORT 3869 ompt_frame_t *ompt_frame; 3870 if (ompt_enabled.enabled) { 3871 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3872 if (ompt_frame->enter_frame.ptr == NULL) 3873 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3874 } 3875 OMPT_STORE_RETURN_ADDRESS(global_tid); 3876 #endif 3877 #if USE_ITT_NOTIFY 3878 __kmp_threads[global_tid]->th.th_ident = 3879 loc; // needed for correct notification of frames 3880 #endif 3881 retval = 3882 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 3883 global_tid, TRUE, reduce_size, reduce_data, reduce_func); 3884 retval = (retval != 0) ? (0) : (1); 3885 #if OMPT_SUPPORT && OMPT_OPTIONAL 3886 if (ompt_enabled.enabled) { 3887 ompt_frame->enter_frame = ompt_data_none; 3888 } 3889 #endif 3890 3891 // all other workers except primary thread should do this pop here 3892 // (none of other workers except primary will enter __kmpc_end_reduce()) 3893 if (__kmp_env_consistency_check) { 3894 if (retval == 0) { // 0: all other workers; 1: primary thread 3895 __kmp_pop_sync(global_tid, ct_reduce, loc); 3896 } 3897 } 3898 3899 } else { 3900 3901 // should never reach this block 3902 KMP_ASSERT(0); // "unexpected method" 3903 } 3904 if (teams_swapped) { 3905 __kmp_restore_swapped_teams(th, team, task_state); 3906 } 3907 3908 KA_TRACE(10, 3909 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", 3910 global_tid, packed_reduction_method, retval)); 3911 return retval; 3912 } 3913 3914 /*! 3915 @ingroup SYNCHRONIZATION 3916 @param loc source location information 3917 @param global_tid global thread id. 3918 @param lck pointer to the unique lock data structure 3919 3920 Finish the execution of a blocking reduce. 3921 The <tt>lck</tt> pointer must be the same as that used in the corresponding 3922 start function. 3923 */ 3924 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 3925 kmp_critical_name *lck) { 3926 3927 PACKED_REDUCTION_METHOD_T packed_reduction_method; 3928 kmp_info_t *th; 3929 kmp_team_t *team; 3930 int teams_swapped = 0, task_state; 3931 3932 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); 3933 __kmp_assert_valid_gtid(global_tid); 3934 3935 th = __kmp_thread_from_gtid(global_tid); 3936 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state); 3937 3938 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); 3939 3940 // this barrier should be visible to a customer and to the threading profile 3941 // tool (it's a terminating barrier on constructs if NOWAIT not specified) 3942 OMPT_REDUCTION_DECL(th, global_tid); 3943 3944 if (packed_reduction_method == critical_reduce_block) { 3945 __kmp_end_critical_section_reduce_block(loc, global_tid, lck); 3946 3947 OMPT_REDUCTION_END; 3948 3949 // TODO: implicit barrier: should be exposed 3950 #if OMPT_SUPPORT 3951 ompt_frame_t *ompt_frame; 3952 if (ompt_enabled.enabled) { 3953 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3954 if (ompt_frame->enter_frame.ptr == NULL) 3955 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3956 } 3957 OMPT_STORE_RETURN_ADDRESS(global_tid); 3958 #endif 3959 #if USE_ITT_NOTIFY 3960 __kmp_threads[global_tid]->th.th_ident = loc; 3961 #endif 3962 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3963 #if OMPT_SUPPORT && OMPT_OPTIONAL 3964 if (ompt_enabled.enabled) { 3965 ompt_frame->enter_frame = ompt_data_none; 3966 } 3967 #endif 3968 3969 } else if (packed_reduction_method == empty_reduce_block) { 3970 3971 OMPT_REDUCTION_END; 3972 3973 // usage: if team size==1, no synchronization is required (Intel platforms only) 3974 3975 // TODO: implicit barrier: should be exposed 3976 #if OMPT_SUPPORT 3977 ompt_frame_t *ompt_frame; 3978 if (ompt_enabled.enabled) { 3979 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 3980 if (ompt_frame->enter_frame.ptr == NULL) 3981 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 3982 } 3983 OMPT_STORE_RETURN_ADDRESS(global_tid); 3984 #endif 3985 #if USE_ITT_NOTIFY 3986 __kmp_threads[global_tid]->th.th_ident = loc; 3987 #endif 3988 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 3989 #if OMPT_SUPPORT && OMPT_OPTIONAL 3990 if (ompt_enabled.enabled) { 3991 ompt_frame->enter_frame = ompt_data_none; 3992 } 3993 #endif 3994 3995 } else if (packed_reduction_method == atomic_reduce_block) { 3996 3997 #if OMPT_SUPPORT 3998 ompt_frame_t *ompt_frame; 3999 if (ompt_enabled.enabled) { 4000 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); 4001 if (ompt_frame->enter_frame.ptr == NULL) 4002 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); 4003 } 4004 OMPT_STORE_RETURN_ADDRESS(global_tid); 4005 #endif 4006 // TODO: implicit barrier: should be exposed 4007 #if USE_ITT_NOTIFY 4008 __kmp_threads[global_tid]->th.th_ident = loc; 4009 #endif 4010 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); 4011 #if OMPT_SUPPORT && OMPT_OPTIONAL 4012 if (ompt_enabled.enabled) { 4013 ompt_frame->enter_frame = ompt_data_none; 4014 } 4015 #endif 4016 4017 } else if (TEST_REDUCTION_METHOD(packed_reduction_method, 4018 tree_reduce_block)) { 4019 4020 // only primary thread executes here (primary releases all other workers) 4021 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), 4022 global_tid); 4023 4024 } else { 4025 4026 // should never reach this block 4027 KMP_ASSERT(0); // "unexpected method" 4028 } 4029 if (teams_swapped) { 4030 __kmp_restore_swapped_teams(th, team, task_state); 4031 } 4032 4033 if (__kmp_env_consistency_check) 4034 __kmp_pop_sync(global_tid, ct_reduce, loc); 4035 4036 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", 4037 global_tid, packed_reduction_method)); 4038 4039 return; 4040 } 4041 4042 #undef __KMP_GET_REDUCTION_METHOD 4043 #undef __KMP_SET_REDUCTION_METHOD 4044 4045 /* end of interface to fast scalable reduce routines */ 4046 4047 kmp_uint64 __kmpc_get_taskid() { 4048 4049 kmp_int32 gtid; 4050 kmp_info_t *thread; 4051 4052 gtid = __kmp_get_gtid(); 4053 if (gtid < 0) { 4054 return 0; 4055 } 4056 thread = __kmp_thread_from_gtid(gtid); 4057 return thread->th.th_current_task->td_task_id; 4058 4059 } // __kmpc_get_taskid 4060 4061 kmp_uint64 __kmpc_get_parent_taskid() { 4062 4063 kmp_int32 gtid; 4064 kmp_info_t *thread; 4065 kmp_taskdata_t *parent_task; 4066 4067 gtid = __kmp_get_gtid(); 4068 if (gtid < 0) { 4069 return 0; 4070 } 4071 thread = __kmp_thread_from_gtid(gtid); 4072 parent_task = thread->th.th_current_task->td_parent; 4073 return (parent_task == NULL ? 0 : parent_task->td_task_id); 4074 4075 } // __kmpc_get_parent_taskid 4076 4077 /*! 4078 @ingroup WORK_SHARING 4079 @param loc source location information. 4080 @param gtid global thread number. 4081 @param num_dims number of associated doacross loops. 4082 @param dims info on loops bounds. 4083 4084 Initialize doacross loop information. 4085 Expect compiler send us inclusive bounds, 4086 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. 4087 */ 4088 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, 4089 const struct kmp_dim *dims) { 4090 __kmp_assert_valid_gtid(gtid); 4091 int j, idx; 4092 kmp_int64 last, trace_count; 4093 kmp_info_t *th = __kmp_threads[gtid]; 4094 kmp_team_t *team = th->th.th_team; 4095 kmp_uint32 *flags; 4096 kmp_disp_t *pr_buf = th->th.th_dispatch; 4097 dispatch_shared_info_t *sh_buf; 4098 4099 KA_TRACE( 4100 20, 4101 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", 4102 gtid, num_dims, !team->t.t_serialized)); 4103 KMP_DEBUG_ASSERT(dims != NULL); 4104 KMP_DEBUG_ASSERT(num_dims > 0); 4105 4106 if (team->t.t_serialized) { 4107 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); 4108 return; // no dependencies if team is serialized 4109 } 4110 KMP_DEBUG_ASSERT(team->t.t_nproc > 1); 4111 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for 4112 // the next loop 4113 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4114 4115 // Save bounds info into allocated private buffer 4116 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); 4117 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( 4118 th, sizeof(kmp_int64) * (4 * num_dims + 1)); 4119 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4120 pr_buf->th_doacross_info[0] = 4121 (kmp_int64)num_dims; // first element is number of dimensions 4122 // Save also address of num_done in order to access it later without knowing 4123 // the buffer index 4124 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; 4125 pr_buf->th_doacross_info[2] = dims[0].lo; 4126 pr_buf->th_doacross_info[3] = dims[0].up; 4127 pr_buf->th_doacross_info[4] = dims[0].st; 4128 last = 5; 4129 for (j = 1; j < num_dims; ++j) { 4130 kmp_int64 4131 range_length; // To keep ranges of all dimensions but the first dims[0] 4132 if (dims[j].st == 1) { // most common case 4133 // AC: should we care of ranges bigger than LLONG_MAX? (not for now) 4134 range_length = dims[j].up - dims[j].lo + 1; 4135 } else { 4136 if (dims[j].st > 0) { 4137 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); 4138 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; 4139 } else { // negative increment 4140 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); 4141 range_length = 4142 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; 4143 } 4144 } 4145 pr_buf->th_doacross_info[last++] = range_length; 4146 pr_buf->th_doacross_info[last++] = dims[j].lo; 4147 pr_buf->th_doacross_info[last++] = dims[j].up; 4148 pr_buf->th_doacross_info[last++] = dims[j].st; 4149 } 4150 4151 // Compute total trip count. 4152 // Start with range of dims[0] which we don't need to keep in the buffer. 4153 if (dims[0].st == 1) { // most common case 4154 trace_count = dims[0].up - dims[0].lo + 1; 4155 } else if (dims[0].st > 0) { 4156 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); 4157 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; 4158 } else { // negative increment 4159 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); 4160 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; 4161 } 4162 for (j = 1; j < num_dims; ++j) { 4163 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges 4164 } 4165 KMP_DEBUG_ASSERT(trace_count > 0); 4166 4167 // Check if shared buffer is not occupied by other loop (idx - 4168 // __kmp_dispatch_num_buffers) 4169 if (idx != sh_buf->doacross_buf_idx) { 4170 // Shared buffer is occupied, wait for it to be free 4171 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, 4172 __kmp_eq_4, NULL); 4173 } 4174 #if KMP_32_BIT_ARCH 4175 // Check if we are the first thread. After the CAS the first thread gets 0, 4176 // others get 1 if initialization is in progress, allocated pointer otherwise. 4177 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated. 4178 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32( 4179 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1); 4180 #else 4181 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( 4182 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL); 4183 #endif 4184 if (flags == NULL) { 4185 // we are the first thread, allocate the array of flags 4186 size_t size = 4187 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration 4188 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); 4189 KMP_MB(); 4190 sh_buf->doacross_flags = flags; 4191 } else if (flags == (kmp_uint32 *)1) { 4192 #if KMP_32_BIT_ARCH 4193 // initialization is still in progress, need to wait 4194 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1) 4195 #else 4196 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL) 4197 #endif 4198 KMP_YIELD(TRUE); 4199 KMP_MB(); 4200 } else { 4201 KMP_MB(); 4202 } 4203 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value 4204 pr_buf->th_doacross_flags = 4205 sh_buf->doacross_flags; // save private copy in order to not 4206 // touch shared buffer on each iteration 4207 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); 4208 } 4209 4210 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) { 4211 __kmp_assert_valid_gtid(gtid); 4212 kmp_int64 shft; 4213 size_t num_dims, i; 4214 kmp_uint32 flag; 4215 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4216 kmp_info_t *th = __kmp_threads[gtid]; 4217 kmp_team_t *team = th->th.th_team; 4218 kmp_disp_t *pr_buf; 4219 kmp_int64 lo, up, st; 4220 4221 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); 4222 if (team->t.t_serialized) { 4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); 4224 return; // no dependencies if team is serialized 4225 } 4226 4227 // calculate sequential iteration number and check out-of-bounds condition 4228 pr_buf = th->th.th_dispatch; 4229 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4230 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4231 lo = pr_buf->th_doacross_info[2]; 4232 up = pr_buf->th_doacross_info[3]; 4233 st = pr_buf->th_doacross_info[4]; 4234 #if OMPT_SUPPORT && OMPT_OPTIONAL 4235 ompt_dependence_t deps[num_dims]; 4236 #endif 4237 if (st == 1) { // most common case 4238 if (vec[0] < lo || vec[0] > up) { 4239 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4240 "bounds [%lld,%lld]\n", 4241 gtid, vec[0], lo, up)); 4242 return; 4243 } 4244 iter_number = vec[0] - lo; 4245 } else if (st > 0) { 4246 if (vec[0] < lo || vec[0] > up) { 4247 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4248 "bounds [%lld,%lld]\n", 4249 gtid, vec[0], lo, up)); 4250 return; 4251 } 4252 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4253 } else { // negative increment 4254 if (vec[0] > lo || vec[0] < up) { 4255 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4256 "bounds [%lld,%lld]\n", 4257 gtid, vec[0], lo, up)); 4258 return; 4259 } 4260 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4261 } 4262 #if OMPT_SUPPORT && OMPT_OPTIONAL 4263 deps[0].variable.value = iter_number; 4264 deps[0].dependence_type = ompt_dependence_type_sink; 4265 #endif 4266 for (i = 1; i < num_dims; ++i) { 4267 kmp_int64 iter, ln; 4268 size_t j = i * 4; 4269 ln = pr_buf->th_doacross_info[j + 1]; 4270 lo = pr_buf->th_doacross_info[j + 2]; 4271 up = pr_buf->th_doacross_info[j + 3]; 4272 st = pr_buf->th_doacross_info[j + 4]; 4273 if (st == 1) { 4274 if (vec[i] < lo || vec[i] > up) { 4275 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4276 "bounds [%lld,%lld]\n", 4277 gtid, vec[i], lo, up)); 4278 return; 4279 } 4280 iter = vec[i] - lo; 4281 } else if (st > 0) { 4282 if (vec[i] < lo || vec[i] > up) { 4283 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4284 "bounds [%lld,%lld]\n", 4285 gtid, vec[i], lo, up)); 4286 return; 4287 } 4288 iter = (kmp_uint64)(vec[i] - lo) / st; 4289 } else { // st < 0 4290 if (vec[i] > lo || vec[i] < up) { 4291 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " 4292 "bounds [%lld,%lld]\n", 4293 gtid, vec[i], lo, up)); 4294 return; 4295 } 4296 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4297 } 4298 iter_number = iter + ln * iter_number; 4299 #if OMPT_SUPPORT && OMPT_OPTIONAL 4300 deps[i].variable.value = iter; 4301 deps[i].dependence_type = ompt_dependence_type_sink; 4302 #endif 4303 } 4304 shft = iter_number % 32; // use 32-bit granularity 4305 iter_number >>= 5; // divided by 32 4306 flag = 1 << shft; 4307 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { 4308 KMP_YIELD(TRUE); 4309 } 4310 KMP_MB(); 4311 #if OMPT_SUPPORT && OMPT_OPTIONAL 4312 if (ompt_enabled.ompt_callback_dependences) { 4313 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4314 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4315 } 4316 #endif 4317 KA_TRACE(20, 4318 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", 4319 gtid, (iter_number << 5) + shft)); 4320 } 4321 4322 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) { 4323 __kmp_assert_valid_gtid(gtid); 4324 kmp_int64 shft; 4325 size_t num_dims, i; 4326 kmp_uint32 flag; 4327 kmp_int64 iter_number; // iteration number of "collapsed" loop nest 4328 kmp_info_t *th = __kmp_threads[gtid]; 4329 kmp_team_t *team = th->th.th_team; 4330 kmp_disp_t *pr_buf; 4331 kmp_int64 lo, st; 4332 4333 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); 4334 if (team->t.t_serialized) { 4335 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); 4336 return; // no dependencies if team is serialized 4337 } 4338 4339 // calculate sequential iteration number (same as in "wait" but no 4340 // out-of-bounds checks) 4341 pr_buf = th->th.th_dispatch; 4342 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); 4343 num_dims = (size_t)pr_buf->th_doacross_info[0]; 4344 lo = pr_buf->th_doacross_info[2]; 4345 st = pr_buf->th_doacross_info[4]; 4346 #if OMPT_SUPPORT && OMPT_OPTIONAL 4347 ompt_dependence_t deps[num_dims]; 4348 #endif 4349 if (st == 1) { // most common case 4350 iter_number = vec[0] - lo; 4351 } else if (st > 0) { 4352 iter_number = (kmp_uint64)(vec[0] - lo) / st; 4353 } else { // negative increment 4354 iter_number = (kmp_uint64)(lo - vec[0]) / (-st); 4355 } 4356 #if OMPT_SUPPORT && OMPT_OPTIONAL 4357 deps[0].variable.value = iter_number; 4358 deps[0].dependence_type = ompt_dependence_type_source; 4359 #endif 4360 for (i = 1; i < num_dims; ++i) { 4361 kmp_int64 iter, ln; 4362 size_t j = i * 4; 4363 ln = pr_buf->th_doacross_info[j + 1]; 4364 lo = pr_buf->th_doacross_info[j + 2]; 4365 st = pr_buf->th_doacross_info[j + 4]; 4366 if (st == 1) { 4367 iter = vec[i] - lo; 4368 } else if (st > 0) { 4369 iter = (kmp_uint64)(vec[i] - lo) / st; 4370 } else { // st < 0 4371 iter = (kmp_uint64)(lo - vec[i]) / (-st); 4372 } 4373 iter_number = iter + ln * iter_number; 4374 #if OMPT_SUPPORT && OMPT_OPTIONAL 4375 deps[i].variable.value = iter; 4376 deps[i].dependence_type = ompt_dependence_type_source; 4377 #endif 4378 } 4379 #if OMPT_SUPPORT && OMPT_OPTIONAL 4380 if (ompt_enabled.ompt_callback_dependences) { 4381 ompt_callbacks.ompt_callback(ompt_callback_dependences)( 4382 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims); 4383 } 4384 #endif 4385 shft = iter_number % 32; // use 32-bit granularity 4386 iter_number >>= 5; // divided by 32 4387 flag = 1 << shft; 4388 KMP_MB(); 4389 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) 4390 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); 4391 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, 4392 (iter_number << 5) + shft)); 4393 } 4394 4395 void __kmpc_doacross_fini(ident_t *loc, int gtid) { 4396 __kmp_assert_valid_gtid(gtid); 4397 kmp_int32 num_done; 4398 kmp_info_t *th = __kmp_threads[gtid]; 4399 kmp_team_t *team = th->th.th_team; 4400 kmp_disp_t *pr_buf = th->th.th_dispatch; 4401 4402 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); 4403 if (team->t.t_serialized) { 4404 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); 4405 return; // nothing to do 4406 } 4407 num_done = 4408 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1; 4409 if (num_done == th->th.th_team_nproc) { 4410 // we are the last thread, need to free shared resources 4411 int idx = pr_buf->th_doacross_buf_idx - 1; 4412 dispatch_shared_info_t *sh_buf = 4413 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; 4414 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == 4415 (kmp_int64)&sh_buf->doacross_num_done); 4416 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done); 4417 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); 4418 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags)); 4419 sh_buf->doacross_flags = NULL; 4420 sh_buf->doacross_num_done = 0; 4421 sh_buf->doacross_buf_idx += 4422 __kmp_dispatch_num_buffers; // free buffer for future re-use 4423 } 4424 // free private resources (need to keep buffer index forever) 4425 pr_buf->th_doacross_flags = NULL; 4426 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); 4427 pr_buf->th_doacross_info = NULL; 4428 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); 4429 } 4430 4431 /* OpenMP 5.1 Memory Management routines */ 4432 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) { 4433 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator); 4434 } 4435 4436 void *omp_aligned_alloc(size_t align, size_t size, 4437 omp_allocator_handle_t allocator) { 4438 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator); 4439 } 4440 4441 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) { 4442 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator); 4443 } 4444 4445 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size, 4446 omp_allocator_handle_t allocator) { 4447 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator); 4448 } 4449 4450 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator, 4451 omp_allocator_handle_t free_allocator) { 4452 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator, 4453 free_allocator); 4454 } 4455 4456 void omp_free(void *ptr, omp_allocator_handle_t allocator) { 4457 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator); 4458 } 4459 /* end of OpenMP 5.1 Memory Management routines */ 4460 4461 int __kmpc_get_target_offload(void) { 4462 if (!__kmp_init_serial) { 4463 __kmp_serial_initialize(); 4464 } 4465 return __kmp_target_offload; 4466 } 4467 4468 int __kmpc_pause_resource(kmp_pause_status_t level) { 4469 if (!__kmp_init_serial) { 4470 return 1; // Can't pause if runtime is not initialized 4471 } 4472 return __kmp_pause_resource(level); 4473 } 4474 4475 void __kmpc_error(ident_t *loc, int severity, const char *message) { 4476 if (!__kmp_init_serial) 4477 __kmp_serial_initialize(); 4478 4479 KMP_ASSERT(severity == severity_warning || severity == severity_fatal); 4480 4481 #if OMPT_SUPPORT 4482 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) { 4483 ompt_callbacks.ompt_callback(ompt_callback_error)( 4484 (ompt_severity_t)severity, message, KMP_STRLEN(message), 4485 OMPT_GET_RETURN_ADDRESS(0)); 4486 } 4487 #endif // OMPT_SUPPORT 4488 4489 char *src_loc; 4490 if (loc && loc->psource) { 4491 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false); 4492 src_loc = 4493 __kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col); 4494 __kmp_str_loc_free(&str_loc); 4495 } else { 4496 src_loc = __kmp_str_format("unknown"); 4497 } 4498 4499 if (severity == severity_warning) 4500 KMP_WARNING(UserDirectedWarning, src_loc, message); 4501 else 4502 KMP_FATAL(UserDirectedError, src_loc, message); 4503 4504 __kmp_str_free(&src_loc); 4505 } 4506 4507 // Mark begin of scope directive. 4508 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4509 // reserved is for extension of scope directive and not used. 4510 #if OMPT_SUPPORT && OMPT_OPTIONAL 4511 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4512 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4513 int tid = __kmp_tid_from_gtid(gtid); 4514 ompt_callbacks.ompt_callback(ompt_callback_work)( 4515 ompt_work_scope, ompt_scope_begin, 4516 &(team->t.ompt_team_info.parallel_data), 4517 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4518 OMPT_GET_RETURN_ADDRESS(0)); 4519 } 4520 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4521 } 4522 4523 // Mark end of scope directive 4524 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) { 4525 // reserved is for extension of scope directive and not used. 4526 #if OMPT_SUPPORT && OMPT_OPTIONAL 4527 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) { 4528 kmp_team_t *team = __kmp_threads[gtid]->th.th_team; 4529 int tid = __kmp_tid_from_gtid(gtid); 4530 ompt_callbacks.ompt_callback(ompt_callback_work)( 4531 ompt_work_scope, ompt_scope_end, 4532 &(team->t.ompt_team_info.parallel_data), 4533 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1, 4534 OMPT_GET_RETURN_ADDRESS(0)); 4535 } 4536 #endif // OMPT_SUPPORT && OMPT_OPTIONAL 4537 } 4538 4539 #ifdef KMP_USE_VERSION_SYMBOLS 4540 // For GOMP compatibility there are two versions of each omp_* API. 4541 // One is the plain C symbol and one is the Fortran symbol with an appended 4542 // underscore. When we implement a specific ompc_* version of an omp_* 4543 // function, we want the plain GOMP versioned symbol to alias the ompc_* version 4544 // instead of the Fortran versions in kmp_ftn_entry.h 4545 extern "C" { 4546 // Have to undef these from omp.h so they aren't translated into 4547 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below 4548 #ifdef omp_set_affinity_format 4549 #undef omp_set_affinity_format 4550 #endif 4551 #ifdef omp_get_affinity_format 4552 #undef omp_get_affinity_format 4553 #endif 4554 #ifdef omp_display_affinity 4555 #undef omp_display_affinity 4556 #endif 4557 #ifdef omp_capture_affinity 4558 #undef omp_capture_affinity 4559 #endif 4560 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50, 4561 "OMP_5.0"); 4562 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50, 4563 "OMP_5.0"); 4564 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50, 4565 "OMP_5.0"); 4566 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50, 4567 "OMP_5.0"); 4568 } // extern "C" 4569 #endif 4570