1; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s 3; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %} 4; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %} 5 6; CHECK-LABEL: test_fabsf( 7define float @test_fabsf(float %f) { 8; CHECK: abs.f32 9 %x = call float @llvm.fabs.f32(float %f) 10 ret float %x 11} 12 13; CHECK-LABEL: test_fabs( 14define double @test_fabs(double %d) { 15; CHECK: abs.f64 16 %x = call double @llvm.fabs.f64(double %d) 17 ret double %x 18} 19 20; CHECK-LABEL: test_nvvm_sqrt( 21define float @test_nvvm_sqrt(float %a) { 22; CHECK: sqrt.rn.f32 23 %val = call float @llvm.nvvm.sqrt.f(float %a) 24 ret float %val 25} 26 27; CHECK-LABEL: test_llvm_sqrt( 28define float @test_llvm_sqrt(float %a) { 29; CHECK: sqrt.rn.f32 30 %val = call float @llvm.sqrt.f32(float %a) 31 ret float %val 32} 33 34; CHECK-LABEL: test_bitreverse32( 35define i32 @test_bitreverse32(i32 %a) { 36; CHECK: brev.b32 37 %val = call i32 @llvm.bitreverse.i32(i32 %a) 38 ret i32 %val 39} 40 41; CHECK-LABEL: test_bitreverse64( 42define i64 @test_bitreverse64(i64 %a) { 43; CHECK: brev.b64 44 %val = call i64 @llvm.bitreverse.i64(i64 %a) 45 ret i64 %val 46} 47 48; CHECK-LABEL: test_popc32( 49define i32 @test_popc32(i32 %a) { 50; CHECK: popc.b32 51 %val = call i32 @llvm.ctpop.i32(i32 %a) 52 ret i32 %val 53} 54 55; CHECK-LABEL: test_popc64 56define i64 @test_popc64(i64 %a) { 57; CHECK: popc.b64 58; CHECK: cvt.u64.u32 59 %val = call i64 @llvm.ctpop.i64(i64 %a) 60 ret i64 %val 61} 62 63; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so 64; if this function returns an i32, there's no need to do any type conversions 65; in the ptx. 66; CHECK-LABEL: test_popc64_trunc 67define i32 @test_popc64_trunc(i64 %a) { 68; CHECK: popc.b64 69; CHECK-NOT: cvt. 70 %val = call i64 @llvm.ctpop.i64(i64 %a) 71 %trunc = trunc i64 %val to i32 72 ret i32 %trunc 73} 74 75; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and 76; then converting back to i16. 77; CHECK-LABEL: test_popc16 78define void @test_popc16(i16 %a, ptr %b) { 79; CHECK: cvt.u32.u16 80; CHECK: popc.b32 81; CHECK: cvt.u16.u32 82 %val = call i16 @llvm.ctpop.i16(i16 %a) 83 store i16 %val, ptr %b 84 ret void 85} 86 87; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need 88; to do any conversions after calling popc.b32, because that returns an i32. 89; CHECK-LABEL: test_popc16_to_32 90define i32 @test_popc16_to_32(i16 %a) { 91; CHECK: cvt.u32.u16 92; CHECK: popc.b32 93; CHECK-NOT: cvt. 94 %val = call i16 @llvm.ctpop.i16(i16 %a) 95 %zext = zext i16 %val to i32 96 ret i32 %zext 97} 98 99; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may 100; be CSE'd. 101; CHECK-LABEL: test_tid 102define i32 @test_tid() { 103; CHECK: mov.u32 %r{{.*}}, %tid.x; 104 %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 105; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x; 106 %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() 107 %ret = add i32 %a, %b 108; CHECK: ret 109 ret i32 %ret 110} 111 112; reading clock() or clock64() should not be CSE'd as each read may return 113; different value. 114; CHECK-LABEL: test_clock 115define i32 @test_clock() { 116; CHECK: mov.u32 %r{{.*}}, %clock; 117 %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock() 118; CHECK: mov.u32 %r{{.*}}, %clock; 119 %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock() 120 %ret = add i32 %a, %b 121; CHECK: ret 122 ret i32 %ret 123} 124 125; CHECK-LABEL: test_clock64 126define i64 @test_clock64() { 127; CHECK: mov.u64 %r{{.*}}, %clock64; 128 %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64() 129; CHECK: mov.u64 %r{{.*}}, %clock64; 130 %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64() 131 %ret = add i64 %a, %b 132; CHECK: ret 133 ret i64 %ret 134} 135 136; CHECK-LABEL: test_exit 137define void @test_exit() { 138; CHECK: exit; 139 call void @llvm.nvvm.exit() 140 ret void 141} 142 143; CHECK-LABEL: test_globaltimer 144define i64 @test_globaltimer() { 145; CHECK: mov.u64 %r{{.*}}, %globaltimer; 146 %a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer() 147; CHECK: mov.u64 %r{{.*}}, %globaltimer; 148 %b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer() 149 %ret = add i64 %a, %b 150; CHECK: ret 151 ret i64 %ret 152} 153 154; CHECK-LABEL: test_cyclecounter 155define i64 @test_cyclecounter() { 156; CHECK: mov.u64 %r{{.*}}, %clock64; 157 %a = tail call i64 @llvm.readcyclecounter() 158; CHECK: mov.u64 %r{{.*}}, %clock64; 159 %b = tail call i64 @llvm.readcyclecounter() 160 %ret = add i64 %a, %b 161; CHECK: ret 162 ret i64 %ret 163} 164 165; CHECK-LABEL: test_steadycounter 166define i64 @test_steadycounter() { 167; CHECK: mov.u64 %r{{.*}}, %globaltimer; 168 %a = tail call i64 @llvm.readsteadycounter() 169; CHECK: mov.u64 %r{{.*}}, %globaltimer; 170 %b = tail call i64 @llvm.readsteadycounter() 171 %ret = add i64 %a, %b 172; CHECK: ret 173 ret i64 %ret 174} 175 176declare float @llvm.fabs.f32(float) 177declare double @llvm.fabs.f64(double) 178declare float @llvm.nvvm.sqrt.f(float) 179declare float @llvm.sqrt.f32(float) 180declare i32 @llvm.bitreverse.i32(i32) 181declare i64 @llvm.bitreverse.i64(i64) 182declare i16 @llvm.ctpop.i16(i16) 183declare i32 @llvm.ctpop.i32(i32) 184declare i64 @llvm.ctpop.i64(i64) 185 186declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() 187declare i32 @llvm.nvvm.read.ptx.sreg.clock() 188declare i64 @llvm.nvvm.read.ptx.sreg.clock64() 189declare void @llvm.nvvm.exit() 190declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer() 191declare i64 @llvm.readcyclecounter() 192declare i64 @llvm.readsteadycounter() 193