1 // RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" "-target-feature" "+ptx80" "-target-cpu" "sm_90" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s 2 3 // CHECK: define{{.*}} void @_Z6kernelPlPvj( kernel(long * out,void * ptr,unsigned u)4__attribute__((global)) void kernel(long *out, void *ptr, unsigned u) { 5 int i = 0; 6 // CHECK: call i1 @llvm.nvvm.isspacep.shared.cluster 7 out[i++] = __nvvm_isspacep_shared_cluster(ptr); 8 9 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.x() 10 out[i++] = __nvvm_read_ptx_sreg_clusterid_x(); 11 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.y() 12 out[i++] = __nvvm_read_ptx_sreg_clusterid_y(); 13 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.z() 14 out[i++] = __nvvm_read_ptx_sreg_clusterid_z(); 15 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clusterid.w() 16 out[i++] = __nvvm_read_ptx_sreg_clusterid_w(); 17 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.x() 18 out[i++] = __nvvm_read_ptx_sreg_nclusterid_x(); 19 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.y() 20 out[i++] = __nvvm_read_ptx_sreg_nclusterid_y(); 21 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.z() 22 out[i++] = __nvvm_read_ptx_sreg_nclusterid_z(); 23 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nclusterid.w() 24 out[i++] = __nvvm_read_ptx_sreg_nclusterid_w(); 25 26 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.x() 27 out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_x(); 28 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.y() 29 out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_y(); 30 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.z() 31 out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_z(); 32 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctaid.w() 33 out[i++] = __nvvm_read_ptx_sreg_cluster_ctaid_w(); 34 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.x() 35 out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_x(); 36 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.y() 37 out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_y(); 38 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.z() 39 out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_z(); 40 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctaid.w() 41 out[i++] = __nvvm_read_ptx_sreg_cluster_nctaid_w(); 42 43 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.ctarank() 44 out[i++] = __nvvm_read_ptx_sreg_cluster_ctarank(); 45 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.cluster.nctarank() 46 out[i++] = __nvvm_read_ptx_sreg_cluster_nctarank(); 47 // CHECK: call i1 @llvm.nvvm.is_explicit_cluster() 48 out[i++] = __nvvm_is_explicit_cluster(); 49 50 auto * sptr = (__attribute__((address_space(3))) void *)ptr; 51 // CHECK: call ptr @llvm.nvvm.mapa(ptr %{{.*}}, i32 %{{.*}}) 52 out[i++] = (long) __nvvm_mapa(ptr, u); 53 // CHECK: call ptr addrspace(3) @llvm.nvvm.mapa.shared.cluster(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) 54 out[i++] = (long) __nvvm_mapa_shared_cluster(sptr, u); 55 // CHECK: call i32 @llvm.nvvm.getctarank(ptr {{.*}}) 56 out[i++] = __nvvm_getctarank(ptr); 57 // CHECK: call i32 @llvm.nvvm.getctarank.shared.cluster(ptr addrspace(3) {{.*}}) 58 out[i++] = __nvvm_getctarank_shared_cluster(sptr); 59 60 // CHECK: call void @llvm.nvvm.barrier.cluster.arrive() 61 __nvvm_barrier_cluster_arrive(); 62 // CHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed() 63 __nvvm_barrier_cluster_arrive_relaxed(); 64 // CHECK: call void @llvm.nvvm.barrier.cluster.wait() 65 __nvvm_barrier_cluster_wait(); 66 // CHECK: call void @llvm.nvvm.fence.sc.cluster() 67 __nvvm_fence_sc_cluster(); 68 69 // CHECK: ret void 70 } 71