xref: /llvm-project/llvm/test/CodeGen/NVPTX/intrinsics.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc < %s -mtriple=nvptx -mcpu=sm_60 | FileCheck %s
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_60 | FileCheck %s
3; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_60 | %ptxas-verify %}
4; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_60 | %ptxas-verify %}
5
6; CHECK-LABEL: test_fabsf(
7define float @test_fabsf(float %f) {
8; CHECK: abs.f32
9  %x = call float @llvm.fabs.f32(float %f)
10  ret float %x
11}
12
13; CHECK-LABEL: test_fabs(
14define double @test_fabs(double %d) {
15; CHECK: abs.f64
16  %x = call double @llvm.fabs.f64(double %d)
17  ret double %x
18}
19
20; CHECK-LABEL: test_nvvm_sqrt(
21define float @test_nvvm_sqrt(float %a) {
22; CHECK: sqrt.rn.f32
23  %val = call float @llvm.nvvm.sqrt.f(float %a)
24  ret float %val
25}
26
27; CHECK-LABEL: test_llvm_sqrt(
28define float @test_llvm_sqrt(float %a) {
29; CHECK: sqrt.rn.f32
30  %val = call float @llvm.sqrt.f32(float %a)
31  ret float %val
32}
33
34; CHECK-LABEL: test_bitreverse32(
35define i32 @test_bitreverse32(i32 %a) {
36; CHECK: brev.b32
37  %val = call i32 @llvm.bitreverse.i32(i32 %a)
38  ret i32 %val
39}
40
41; CHECK-LABEL: test_bitreverse64(
42define i64 @test_bitreverse64(i64 %a) {
43; CHECK: brev.b64
44  %val = call i64 @llvm.bitreverse.i64(i64 %a)
45  ret i64 %val
46}
47
48; CHECK-LABEL: test_popc32(
49define i32 @test_popc32(i32 %a) {
50; CHECK: popc.b32
51  %val = call i32 @llvm.ctpop.i32(i32 %a)
52  ret i32 %val
53}
54
55; CHECK-LABEL: test_popc64
56define i64 @test_popc64(i64 %a) {
57; CHECK: popc.b64
58; CHECK: cvt.u64.u32
59  %val = call i64 @llvm.ctpop.i64(i64 %a)
60  ret i64 %val
61}
62
63; NVPTX popc.b64 returns an i32 even though @llvm.ctpop.i64 returns an i64, so
64; if this function returns an i32, there's no need to do any type conversions
65; in the ptx.
66; CHECK-LABEL: test_popc64_trunc
67define i32 @test_popc64_trunc(i64 %a) {
68; CHECK: popc.b64
69; CHECK-NOT: cvt.
70  %val = call i64 @llvm.ctpop.i64(i64 %a)
71  %trunc = trunc i64 %val to i32
72  ret i32 %trunc
73}
74
75; llvm.ctpop.i16 is implemenented by converting to i32, running popc.b32, and
76; then converting back to i16.
77; CHECK-LABEL: test_popc16
78define void @test_popc16(i16 %a, ptr %b) {
79; CHECK: cvt.u32.u16
80; CHECK: popc.b32
81; CHECK: cvt.u16.u32
82  %val = call i16 @llvm.ctpop.i16(i16 %a)
83  store i16 %val, ptr %b
84  ret void
85}
86
87; If we call llvm.ctpop.i16 and then zext the result to i32, we shouldn't need
88; to do any conversions after calling popc.b32, because that returns an i32.
89; CHECK-LABEL: test_popc16_to_32
90define i32 @test_popc16_to_32(i16 %a) {
91; CHECK: cvt.u32.u16
92; CHECK: popc.b32
93; CHECK-NOT: cvt.
94  %val = call i16 @llvm.ctpop.i16(i16 %a)
95  %zext = zext i16 %val to i32
96  ret i32 %zext
97}
98
99; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
100; be CSE'd.
101; CHECK-LABEL: test_tid
102define i32 @test_tid() {
103; CHECK: mov.u32         %r{{.*}}, %tid.x;
104  %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
105; CHECK-NOT: mov.u32         %r{{.*}}, %tid.x;
106  %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
107  %ret = add i32 %a, %b
108; CHECK: ret
109  ret i32 %ret
110}
111
112; reading clock() or clock64() should not be CSE'd as each read may return
113; different value.
114; CHECK-LABEL: test_clock
115define i32 @test_clock() {
116; CHECK: mov.u32         %r{{.*}}, %clock;
117  %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
118; CHECK: mov.u32         %r{{.*}}, %clock;
119  %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
120  %ret = add i32 %a, %b
121; CHECK: ret
122  ret i32 %ret
123}
124
125; CHECK-LABEL: test_clock64
126define i64 @test_clock64() {
127; CHECK: mov.u64         %r{{.*}}, %clock64;
128  %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
129; CHECK: mov.u64         %r{{.*}}, %clock64;
130  %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
131  %ret = add i64 %a, %b
132; CHECK: ret
133  ret i64 %ret
134}
135
136; CHECK-LABEL: test_exit
137define void @test_exit() {
138; CHECK: exit;
139  call void @llvm.nvvm.exit()
140  ret void
141}
142
143; CHECK-LABEL: test_globaltimer
144define i64 @test_globaltimer() {
145; CHECK: mov.u64         %r{{.*}}, %globaltimer;
146  %a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
147; CHECK: mov.u64         %r{{.*}}, %globaltimer;
148  %b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
149  %ret = add i64 %a, %b
150; CHECK: ret
151  ret i64 %ret
152}
153
154; CHECK-LABEL: test_cyclecounter
155define i64 @test_cyclecounter() {
156; CHECK: mov.u64         %r{{.*}}, %clock64;
157  %a = tail call i64 @llvm.readcyclecounter()
158; CHECK: mov.u64         %r{{.*}}, %clock64;
159  %b = tail call i64 @llvm.readcyclecounter()
160  %ret = add i64 %a, %b
161; CHECK: ret
162  ret i64 %ret
163}
164
165; CHECK-LABEL: test_steadycounter
166define i64 @test_steadycounter() {
167; CHECK: mov.u64         %r{{.*}}, %globaltimer;
168  %a = tail call i64 @llvm.readsteadycounter()
169; CHECK: mov.u64         %r{{.*}}, %globaltimer;
170  %b = tail call i64 @llvm.readsteadycounter()
171  %ret = add i64 %a, %b
172; CHECK: ret
173  ret i64 %ret
174}
175
176declare float @llvm.fabs.f32(float)
177declare double @llvm.fabs.f64(double)
178declare float @llvm.nvvm.sqrt.f(float)
179declare float @llvm.sqrt.f32(float)
180declare i32 @llvm.bitreverse.i32(i32)
181declare i64 @llvm.bitreverse.i64(i64)
182declare i16 @llvm.ctpop.i16(i16)
183declare i32 @llvm.ctpop.i32(i32)
184declare i64 @llvm.ctpop.i64(i64)
185
186declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
187declare i32 @llvm.nvvm.read.ptx.sreg.clock()
188declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
189declare void @llvm.nvvm.exit()
190declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
191declare i64 @llvm.readcyclecounter()
192declare i64 @llvm.readsteadycounter()
193