111cc826cSJoseph Huber // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 211cc826cSJoseph Huber // RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ 311cc826cSJoseph Huber // RUN: -internal-isystem %S/../../lib/Headers/ \ 411cc826cSJoseph Huber // RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \ 511cc826cSJoseph Huber // RUN: | FileCheck %s --check-prefix=AMDGPU 611cc826cSJoseph Huber // 711cc826cSJoseph Huber // RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ 811cc826cSJoseph Huber // RUN: -internal-isystem %S/../../lib/Headers/ \ 911cc826cSJoseph Huber // RUN: -target-feature +ptx62 \ 1011cc826cSJoseph Huber // RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \ 1111cc826cSJoseph Huber // RUN: | FileCheck %s --check-prefix=NVPTX 1211cc826cSJoseph Huber 1311cc826cSJoseph Huber #include <gpuintrin.h> 1411cc826cSJoseph Huber 1511cc826cSJoseph Huber // AMDGPU-LABEL: define protected amdgpu_kernel void @foo( 1611cc826cSJoseph Huber // AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] { 1711cc826cSJoseph Huber // AMDGPU-NEXT: [[ENTRY:.*:]] 1811cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR7:[0-9]+]] 1911cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL1:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR7]] 2011cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR7]] 2111cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL3:%.*]] = call i32 @__gpu_num_blocks(i32 noundef 0) #[[ATTR7]] 2211cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR7]] 2311cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL5:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR7]] 2411cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL6:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR7]] 2511cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL7:%.*]] = call i32 @__gpu_block_id(i32 noundef 0) #[[ATTR7]] 2611cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL8:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR7]] 2711cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL9:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR7]] 2811cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL10:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR7]] 2911cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL11:%.*]] = call i32 @__gpu_num_threads(i32 noundef 0) #[[ATTR7]] 3011cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL12:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR7]] 3111cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL13:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR7]] 3211cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL14:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR7]] 3311cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL15:%.*]] = call i32 @__gpu_thread_id(i32 noundef 0) #[[ATTR7]] 3411cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL16:%.*]] = call i32 @__gpu_num_lanes() #[[ATTR7]] 3511cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL17:%.*]] = call i32 @__gpu_lane_id() #[[ATTR7]] 3611cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL18:%.*]] = call i64 @__gpu_lane_mask() #[[ATTR7]] 3711cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL19:%.*]] = call i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) #[[ATTR7]] 3811cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR7]] 3911cc826cSJoseph Huber // AMDGPU-NEXT: call void @__gpu_sync_threads() #[[ATTR7]] 4011cc826cSJoseph Huber // AMDGPU-NEXT: call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR7]] 4111cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1) #[[ATTR7]] 4211cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR7]] 4311cc826cSJoseph Huber // AMDGPU-NEXT: [[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR7]] 4411cc826cSJoseph Huber // AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]] 4511cc826cSJoseph Huber // AMDGPU-NEXT: unreachable 4611cc826cSJoseph Huber // 47*4583f6d3SAlex MacLean // NVPTX-LABEL: define protected ptx_kernel void @foo( 4811cc826cSJoseph Huber // NVPTX-SAME: ) #[[ATTR0:[0-9]+]] { 4911cc826cSJoseph Huber // NVPTX-NEXT: [[ENTRY:.*:]] 5011cc826cSJoseph Huber // NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]] 5111cc826cSJoseph Huber // NVPTX-NEXT: [[CALL1:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR6]] 5211cc826cSJoseph Huber // NVPTX-NEXT: [[CALL2:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR6]] 5311cc826cSJoseph Huber // NVPTX-NEXT: [[CALL3:%.*]] = call i32 @__gpu_num_blocks(i32 noundef 0) #[[ATTR6]] 5411cc826cSJoseph Huber // NVPTX-NEXT: [[CALL4:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR6]] 5511cc826cSJoseph Huber // NVPTX-NEXT: [[CALL5:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR6]] 5611cc826cSJoseph Huber // NVPTX-NEXT: [[CALL6:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR6]] 5711cc826cSJoseph Huber // NVPTX-NEXT: [[CALL7:%.*]] = call i32 @__gpu_block_id(i32 noundef 0) #[[ATTR6]] 5811cc826cSJoseph Huber // NVPTX-NEXT: [[CALL8:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR6]] 5911cc826cSJoseph Huber // NVPTX-NEXT: [[CALL9:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR6]] 6011cc826cSJoseph Huber // NVPTX-NEXT: [[CALL10:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR6]] 6111cc826cSJoseph Huber // NVPTX-NEXT: [[CALL11:%.*]] = call i32 @__gpu_num_threads(i32 noundef 0) #[[ATTR6]] 6211cc826cSJoseph Huber // NVPTX-NEXT: [[CALL12:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]] 6311cc826cSJoseph Huber // NVPTX-NEXT: [[CALL13:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]] 6411cc826cSJoseph Huber // NVPTX-NEXT: [[CALL14:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]] 6511cc826cSJoseph Huber // NVPTX-NEXT: [[CALL15:%.*]] = call i32 @__gpu_thread_id(i32 noundef 0) #[[ATTR6]] 6611cc826cSJoseph Huber // NVPTX-NEXT: [[CALL16:%.*]] = call i32 @__gpu_num_lanes() #[[ATTR6]] 6711cc826cSJoseph Huber // NVPTX-NEXT: [[CALL17:%.*]] = call i32 @__gpu_lane_id() #[[ATTR6]] 6811cc826cSJoseph Huber // NVPTX-NEXT: [[CALL18:%.*]] = call i64 @__gpu_lane_mask() #[[ATTR6]] 6911cc826cSJoseph Huber // NVPTX-NEXT: [[CALL19:%.*]] = call i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) #[[ATTR6]] 7011cc826cSJoseph Huber // NVPTX-NEXT: [[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR6]] 7111cc826cSJoseph Huber // NVPTX-NEXT: call void @__gpu_sync_threads() #[[ATTR6]] 7211cc826cSJoseph Huber // NVPTX-NEXT: call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]] 7311cc826cSJoseph Huber // NVPTX-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1) #[[ATTR6]] 7411cc826cSJoseph Huber // NVPTX-NEXT: [[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR6]] 7511cc826cSJoseph Huber // NVPTX-NEXT: [[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR6]] 7611cc826cSJoseph Huber // NVPTX-NEXT: call void @__gpu_exit() #[[ATTR7:[0-9]+]] 7711cc826cSJoseph Huber // NVPTX-NEXT: unreachable 7811cc826cSJoseph Huber // 7911cc826cSJoseph Huber __gpu_kernel void foo() { 8011cc826cSJoseph Huber __gpu_num_blocks_x(); 8111cc826cSJoseph Huber __gpu_num_blocks_y(); 8211cc826cSJoseph Huber __gpu_num_blocks_z(); 8311cc826cSJoseph Huber __gpu_num_blocks(0); 8411cc826cSJoseph Huber __gpu_block_id_x(); 8511cc826cSJoseph Huber __gpu_block_id_y(); 8611cc826cSJoseph Huber __gpu_block_id_z(); 8711cc826cSJoseph Huber __gpu_block_id(0); 8811cc826cSJoseph Huber __gpu_num_threads_x(); 8911cc826cSJoseph Huber __gpu_num_threads_y(); 9011cc826cSJoseph Huber __gpu_num_threads_z(); 9111cc826cSJoseph Huber __gpu_num_threads(0); 9211cc826cSJoseph Huber __gpu_thread_id_x(); 9311cc826cSJoseph Huber __gpu_thread_id_y(); 9411cc826cSJoseph Huber __gpu_thread_id_z(); 9511cc826cSJoseph Huber __gpu_thread_id(0); 9611cc826cSJoseph Huber __gpu_num_lanes(); 9711cc826cSJoseph Huber __gpu_lane_id(); 9811cc826cSJoseph Huber __gpu_lane_mask(); 9911cc826cSJoseph Huber __gpu_read_first_lane_u32(-1, -1); 10011cc826cSJoseph Huber __gpu_ballot(-1, 1); 10111cc826cSJoseph Huber __gpu_sync_threads(); 10211cc826cSJoseph Huber __gpu_sync_lane(-1); 10311cc826cSJoseph Huber __gpu_shuffle_idx_u32(-1, -1, -1); 10411cc826cSJoseph Huber __gpu_first_lane_id(-1); 10511cc826cSJoseph Huber __gpu_is_first_in_lane(-1); 10611cc826cSJoseph Huber __gpu_exit(); 10711cc826cSJoseph Huber } 108