1 //===-------- Interface.h - OpenMP interface ---------------------- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #ifndef OMPTARGET_DEVICERTL_INTERFACE_H 13 #define OMPTARGET_DEVICERTL_INTERFACE_H 14 15 #include "Shared/Environment.h" 16 17 #include "DeviceTypes.h" 18 19 /// External API 20 /// 21 ///{ 22 23 extern "C" { 24 25 /// ICV: dyn-var, constant 0 26 /// 27 /// setter: ignored. 28 /// getter: returns 0. 29 /// 30 ///{ 31 void omp_set_dynamic(int); 32 int omp_get_dynamic(void); 33 ///} 34 35 /// ICV: nthreads-var, integer 36 /// 37 /// scope: data environment 38 /// 39 /// setter: ignored. 40 /// getter: returns false. 41 /// 42 /// implementation notes: 43 /// 44 /// 45 ///{ 46 void omp_set_num_threads(int); 47 int omp_get_max_threads(void); 48 ///} 49 50 /// ICV: thread-limit-var, computed 51 /// 52 /// getter: returns thread limited defined during launch. 53 /// 54 ///{ 55 int omp_get_thread_limit(void); 56 ///} 57 58 /// ICV: max-active-level-var, constant 1 59 /// 60 /// setter: ignored. 61 /// getter: returns 1. 62 /// 63 ///{ 64 void omp_set_max_active_levels(int); 65 int omp_get_max_active_levels(void); 66 ///} 67 68 /// ICV: places-partition-var 69 /// 70 /// 71 ///{ 72 ///} 73 74 /// ICV: active-level-var, 0 or 1 75 /// 76 /// getter: returns 0 or 1. 77 /// 78 ///{ 79 int omp_get_active_level(void); 80 ///} 81 82 /// ICV: level-var 83 /// 84 /// getter: returns parallel region nesting 85 /// 86 ///{ 87 int omp_get_level(void); 88 ///} 89 90 /// ICV: run-sched-var 91 /// 92 /// 93 ///{ 94 void omp_set_schedule(omp_sched_t, int); 95 void omp_get_schedule(omp_sched_t *, int *); 96 ///} 97 98 /// TODO this is incomplete. 99 int omp_get_num_threads(void); 100 int omp_get_thread_num(void); 101 void omp_set_nested(int); 102 103 int omp_get_nested(void); 104 105 void omp_set_max_active_levels(int Level); 106 107 int omp_get_max_active_levels(void); 108 109 omp_proc_bind_t omp_get_proc_bind(void); 110 111 int omp_get_num_places(void); 112 113 int omp_get_place_num_procs(int place_num); 114 115 void omp_get_place_proc_ids(int place_num, int *ids); 116 117 int omp_get_place_num(void); 118 119 int omp_get_partition_num_places(void); 120 121 void omp_get_partition_place_nums(int *place_nums); 122 123 int omp_get_cancellation(void); 124 125 void omp_set_default_device(int deviceId); 126 127 int omp_get_default_device(void); 128 129 int omp_get_num_devices(void); 130 131 int omp_get_device_num(void); 132 133 int omp_get_num_teams(void); 134 135 int omp_get_team_num(); 136 137 int omp_get_initial_device(void); 138 139 void *llvm_omp_target_dynamic_shared_alloc(); 140 141 /// Synchronization 142 /// 143 ///{ 144 void omp_init_lock(omp_lock_t *Lock); 145 146 void omp_destroy_lock(omp_lock_t *Lock); 147 148 void omp_set_lock(omp_lock_t *Lock); 149 150 void omp_unset_lock(omp_lock_t *Lock); 151 152 int omp_test_lock(omp_lock_t *Lock); 153 ///} 154 155 /// Tasking 156 /// 157 ///{ 158 int omp_in_final(void); 159 160 int omp_get_max_task_priority(void); 161 ///} 162 163 /// Misc 164 /// 165 ///{ 166 double omp_get_wtick(void); 167 168 double omp_get_wtime(void); 169 ///} 170 } 171 172 extern "C" { 173 /// Allocate \p Bytes in "shareable" memory and return the address. Needs to be 174 /// called balanced with __kmpc_free_shared like a stack (push/pop). Can be 175 /// called by any thread, allocation happens *per thread*. 176 void *__kmpc_alloc_shared(uint64_t Bytes); 177 178 /// Deallocate \p Ptr. Needs to be called balanced with __kmpc_alloc_shared like 179 /// a stack (push/pop). Can be called by any thread. \p Ptr has to be the 180 /// allocated by __kmpc_alloc_shared by the same thread. 181 void __kmpc_free_shared(void *Ptr, uint64_t Bytes); 182 183 /// Get a pointer to the memory buffer containing dynamically allocated shared 184 /// memory configured at launch. 185 void *__kmpc_get_dynamic_shared(); 186 187 /// Allocate sufficient space for \p NumArgs sequential `void*` and store the 188 /// allocation address in \p GlobalArgs. 189 /// 190 /// Called by the main thread prior to a parallel region. 191 /// 192 /// We also remember it in GlobalArgsPtr to ensure the worker threads and 193 /// deallocation function know the allocation address too. 194 void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs); 195 196 /// Deallocate the memory allocated by __kmpc_begin_sharing_variables. 197 /// 198 /// Called by the main thread after a parallel region. 199 void __kmpc_end_sharing_variables(); 200 201 /// Store the allocation address obtained via __kmpc_begin_sharing_variables in 202 /// \p GlobalArgs. 203 /// 204 /// Called by the worker threads in the parallel region (function). 205 void __kmpc_get_shared_variables(void ***GlobalArgs); 206 207 /// External interface to get the thread ID. 208 uint32_t __kmpc_get_hardware_thread_id_in_block(); 209 210 /// External interface to get the number of threads. 211 uint32_t __kmpc_get_hardware_num_threads_in_block(); 212 213 /// External interface to get the warp size. 214 uint32_t __kmpc_get_warp_size(); 215 216 /// Kernel 217 /// 218 ///{ 219 // Forward declaration 220 struct KernelEnvironmentTy; 221 222 int8_t __kmpc_is_spmd_exec_mode(); 223 224 int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment, 225 KernelLaunchEnvironmentTy &KernelLaunchEnvironment); 226 227 void __kmpc_target_deinit(); 228 229 ///} 230 231 /// Reduction 232 /// 233 ///{ 234 void *__kmpc_reduction_get_fixed_buffer(); 235 236 int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc, 237 uint64_t reduce_data_size, 238 void *reduce_data, 239 ShuffleReductFnTy shflFct, 240 InterWarpCopyFnTy cpyFct); 241 242 int32_t __kmpc_nvptx_teams_reduce_nowait_v2( 243 IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records, 244 uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct, 245 InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct, 246 ListGlobalFnTy glcpyFct, ListGlobalFnTy glredFct); 247 ///} 248 249 /// Synchronization 250 /// 251 ///{ 252 void __kmpc_ordered(IdentTy *Loc, int32_t TId); 253 254 void __kmpc_end_ordered(IdentTy *Loc, int32_t TId); 255 256 int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId); 257 258 void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId); 259 260 void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId); 261 262 void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId); 263 264 int32_t __kmpc_master(IdentTy *Loc, int32_t TId); 265 266 void __kmpc_end_master(IdentTy *Loc, int32_t TId); 267 268 int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter); 269 270 void __kmpc_end_masked(IdentTy *Loc, int32_t TId); 271 272 int32_t __kmpc_single(IdentTy *Loc, int32_t TId); 273 274 void __kmpc_end_single(IdentTy *Loc, int32_t TId); 275 276 void __kmpc_flush(IdentTy *Loc); 277 278 uint64_t __kmpc_warp_active_thread_mask(void); 279 280 void __kmpc_syncwarp(uint64_t Mask); 281 282 void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name); 283 284 void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name); 285 ///} 286 287 /// Parallelism 288 /// 289 ///{ 290 /// TODO 291 void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn); 292 293 /// TODO 294 bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn); 295 296 /// TODO 297 void __kmpc_kernel_end_parallel(); 298 299 /// TODO 300 void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind); 301 302 /// TODO 303 void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId, int32_t NumTeams, 304 int32_t ThreadLimit); 305 306 /// TODO 307 uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t); 308 309 ///} 310 311 /// Tasking 312 /// 313 ///{ 314 TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t, 315 size_t TaskSizeInclPrivateValues, 316 size_t SharedValuesSize, 317 TaskFnTy TaskFn); 318 319 int32_t __kmpc_omp_task(IdentTy *Loc, uint32_t TId, 320 TaskDescriptorTy *TaskDescriptor); 321 322 int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId, 323 TaskDescriptorTy *TaskDescriptor, int32_t, 324 void *, int32_t, void *); 325 326 void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId, 327 TaskDescriptorTy *TaskDescriptor); 328 329 void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId, 330 TaskDescriptorTy *TaskDescriptor); 331 332 void __kmpc_omp_wait_deps(IdentTy *Loc, uint32_t TId, int32_t, void *, int32_t, 333 void *); 334 335 void __kmpc_taskgroup(IdentTy *Loc, uint32_t TId); 336 337 void __kmpc_end_taskgroup(IdentTy *Loc, uint32_t TId); 338 339 int32_t __kmpc_omp_taskyield(IdentTy *Loc, uint32_t TId, int); 340 341 int32_t __kmpc_omp_taskwait(IdentTy *Loc, uint32_t TId); 342 343 void __kmpc_taskloop(IdentTy *Loc, uint32_t TId, 344 TaskDescriptorTy *TaskDescriptor, int, 345 uint64_t *LowerBound, uint64_t *UpperBound, int64_t, int, 346 int32_t, uint64_t, void *); 347 ///} 348 349 /// Misc 350 /// 351 ///{ 352 int32_t __kmpc_cancellationpoint(IdentTy *Loc, int32_t TId, int32_t CancelVal); 353 354 int32_t __kmpc_cancel(IdentTy *Loc, int32_t TId, int32_t CancelVal); 355 ///} 356 357 /// Shuffle 358 /// 359 ///{ 360 int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size); 361 int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size); 362 363 ///} 364 } 365 366 #endif 367