1 // REQUIRES: nvptx-registered-target 2 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 3 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 4 5 int read_tid() { 6 7 // CHECK: call i32 @llvm.ptx.read.tid.x() 8 // CHECK: call i32 @llvm.ptx.read.tid.y() 9 // CHECK: call i32 @llvm.ptx.read.tid.z() 10 // CHECK: call i32 @llvm.ptx.read.tid.w() 11 12 int x = __builtin_ptx_read_tid_x(); 13 int y = __builtin_ptx_read_tid_y(); 14 int z = __builtin_ptx_read_tid_z(); 15 int w = __builtin_ptx_read_tid_w(); 16 17 return x + y + z + w; 18 19 } 20 21 int read_ntid() { 22 23 // CHECK: call i32 @llvm.ptx.read.ntid.x() 24 // CHECK: call i32 @llvm.ptx.read.ntid.y() 25 // CHECK: call i32 @llvm.ptx.read.ntid.z() 26 // CHECK: call i32 @llvm.ptx.read.ntid.w() 27 28 int x = __builtin_ptx_read_ntid_x(); 29 int y = __builtin_ptx_read_ntid_y(); 30 int z = __builtin_ptx_read_ntid_z(); 31 int w = __builtin_ptx_read_ntid_w(); 32 33 return x + y + z + w; 34 35 } 36 37 int read_ctaid() { 38 39 // CHECK: call i32 @llvm.ptx.read.ctaid.x() 40 // CHECK: call i32 @llvm.ptx.read.ctaid.y() 41 // CHECK: call i32 @llvm.ptx.read.ctaid.z() 42 // CHECK: call i32 @llvm.ptx.read.ctaid.w() 43 44 int x = __builtin_ptx_read_ctaid_x(); 45 int y = __builtin_ptx_read_ctaid_y(); 46 int z = __builtin_ptx_read_ctaid_z(); 47 int w = __builtin_ptx_read_ctaid_w(); 48 49 return x + y + z + w; 50 51 } 52 53 int read_nctaid() { 54 55 // CHECK: call i32 @llvm.ptx.read.nctaid.x() 56 // CHECK: call i32 @llvm.ptx.read.nctaid.y() 57 // CHECK: call i32 @llvm.ptx.read.nctaid.z() 58 // CHECK: call i32 @llvm.ptx.read.nctaid.w() 59 60 int x = __builtin_ptx_read_nctaid_x(); 61 int y = __builtin_ptx_read_nctaid_y(); 62 int z = __builtin_ptx_read_nctaid_z(); 63 int w = __builtin_ptx_read_nctaid_w(); 64 65 return x + y + z + w; 66 67 } 68 69 int read_ids() { 70 71 // CHECK: call i32 @llvm.ptx.read.laneid() 72 // CHECK: call i32 @llvm.ptx.read.warpid() 73 // CHECK: call i32 @llvm.ptx.read.nwarpid() 74 // CHECK: call i32 @llvm.ptx.read.smid() 75 // CHECK: call i32 @llvm.ptx.read.nsmid() 76 // CHECK: call i32 @llvm.ptx.read.gridid() 77 78 int a = __builtin_ptx_read_laneid(); 79 int b = __builtin_ptx_read_warpid(); 80 int c = __builtin_ptx_read_nwarpid(); 81 int d = __builtin_ptx_read_smid(); 82 int e = __builtin_ptx_read_nsmid(); 83 int f = __builtin_ptx_read_gridid(); 84 85 return a + b + c + d + e + f; 86 87 } 88 89 int read_lanemasks() { 90 91 // CHECK: call i32 @llvm.ptx.read.lanemask.eq() 92 // CHECK: call i32 @llvm.ptx.read.lanemask.le() 93 // CHECK: call i32 @llvm.ptx.read.lanemask.lt() 94 // CHECK: call i32 @llvm.ptx.read.lanemask.ge() 95 // CHECK: call i32 @llvm.ptx.read.lanemask.gt() 96 97 int a = __builtin_ptx_read_lanemask_eq(); 98 int b = __builtin_ptx_read_lanemask_le(); 99 int c = __builtin_ptx_read_lanemask_lt(); 100 int d = __builtin_ptx_read_lanemask_ge(); 101 int e = __builtin_ptx_read_lanemask_gt(); 102 103 return a + b + c + d + e; 104 105 } 106 107 108 long read_clocks() { 109 110 // CHECK: call i32 @llvm.ptx.read.clock() 111 // CHECK: call i64 @llvm.ptx.read.clock64() 112 113 int a = __builtin_ptx_read_clock(); 114 long b = __builtin_ptx_read_clock64(); 115 116 return (long)a + b; 117 118 } 119 120 int read_pms() { 121 122 // CHECK: call i32 @llvm.ptx.read.pm0() 123 // CHECK: call i32 @llvm.ptx.read.pm1() 124 // CHECK: call i32 @llvm.ptx.read.pm2() 125 // CHECK: call i32 @llvm.ptx.read.pm3() 126 127 int a = __builtin_ptx_read_pm0(); 128 int b = __builtin_ptx_read_pm1(); 129 int c = __builtin_ptx_read_pm2(); 130 int d = __builtin_ptx_read_pm3(); 131 132 return a + b + c + d; 133 134 } 135 136 void sync() { 137 138 // CHECK: call void @llvm.ptx.bar.sync(i32 0) 139 140 __builtin_ptx_bar_sync(0); 141 142 } 143 144 145 // NVVM intrinsics 146 147 // The idea is not to test all intrinsics, just that Clang is recognizing the 148 // builtins defined in BuiltinsNVPTX.def 149 void nvvm_math(float f1, float f2, double d1, double d2) { 150 // CHECK: call float @llvm.nvvm.fmax.f 151 float t1 = __nvvm_fmax_f(f1, f2); 152 // CHECK: call float @llvm.nvvm.fmin.f 153 float t2 = __nvvm_fmin_f(f1, f2); 154 // CHECK: call float @llvm.nvvm.sqrt.rn.f 155 float t3 = __nvvm_sqrt_rn_f(f1); 156 // CHECK: call float @llvm.nvvm.rcp.rn.f 157 float t4 = __nvvm_rcp_rn_f(f2); 158 // CHECK: call float @llvm.nvvm.add.rn.f 159 float t5 = __nvvm_add_rn_f(f1, f2); 160 161 // CHECK: call double @llvm.nvvm.fmax.d 162 double td1 = __nvvm_fmax_d(d1, d2); 163 // CHECK: call double @llvm.nvvm.fmin.d 164 double td2 = __nvvm_fmin_d(d1, d2); 165 // CHECK: call double @llvm.nvvm.sqrt.rn.d 166 double td3 = __nvvm_sqrt_rn_d(d1); 167 // CHECK: call double @llvm.nvvm.rcp.rn.d 168 double td4 = __nvvm_rcp_rn_d(d2); 169 170 // CHECK: call void @llvm.nvvm.membar.cta() 171 __nvvm_membar_cta(); 172 // CHECK: call void @llvm.nvvm.membar.gl() 173 __nvvm_membar_gl(); 174 // CHECK: call void @llvm.nvvm.membar.sys() 175 __nvvm_membar_sys(); 176 // CHECK: call void @llvm.nvvm.barrier0() 177 __nvvm_bar0(); 178 } 179