1 //===---------------- Implementation of GPU utils ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H 10 #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H 11 12 #include "src/__support/macros/attributes.h" 13 #include "src/__support/macros/config.h" 14 #include "src/__support/macros/properties/architectures.h" 15 16 #if !__has_include(<gpuintrin.h>) 17 #error "Unsupported compiler" 18 #endif 19 20 #include <gpuintrin.h> 21 22 namespace LIBC_NAMESPACE_DECL { 23 namespace gpu { 24 25 template <typename T> using Private = __gpu_private T; 26 template <typename T> using Constant = __gpu_constant T; 27 template <typename T> using Local = __gpu_local T; 28 template <typename T> using Global = __gpu_local T; 29 30 LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); } 31 32 LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); } 33 34 LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); } 35 36 LIBC_INLINE uint64_t get_num_blocks() { 37 return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); 38 } 39 40 LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); } 41 42 LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); } 43 44 LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); } 45 46 LIBC_INLINE uint64_t get_block_id() { 47 return get_block_id_x() + get_num_blocks_x() * get_block_id_y() + 48 get_num_blocks_x() * get_num_blocks_y() * get_block_id_z(); 49 } 50 51 LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); } 52 53 LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); } 54 55 LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); } 56 57 LIBC_INLINE uint64_t get_num_threads() { 58 return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); 59 } 60 61 LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); } 62 63 LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); } 64 65 LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); } 66 67 LIBC_INLINE uint64_t get_thread_id() { 68 return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() + 69 get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); 70 } 71 72 LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); } 73 74 LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); } 75 76 LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); } 77 78 LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { 79 return __gpu_read_first_lane_u32(lane_mask, x); 80 } 81 82 LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { 83 return __gpu_ballot(lane_mask, x); 84 } 85 86 LIBC_INLINE void sync_threads() { __gpu_sync_threads(); } 87 88 LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); } 89 90 LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x) { 91 return __gpu_shuffle_idx_u32(lane_mask, idx, x); 92 } 93 94 [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); } 95 96 LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { 97 return __gpu_is_first_in_lane(lane_mask); 98 } 99 100 LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) { 101 return __gpu_lane_sum_u32(lane_mask, x); 102 } 103 104 LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) { 105 return __gpu_lane_scan_u32(lane_mask, x); 106 } 107 108 LIBC_INLINE uint64_t fixed_frequency_clock() { 109 return __builtin_readsteadycounter(); 110 } 111 112 LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } 113 114 } // namespace gpu 115 } // namespace LIBC_NAMESPACE_DECL 116 117 #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H 118