xref: /llvm-project/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll (revision fb33af08e4c105a05855f8beeb972d493410e72f)
1; Test to make sure NVVM intrinsics are automatically upgraded.
2; RUN: llvm-as < %s | llvm-dis | FileCheck %s
3; RUN: verify-uselistorder %s
4
5declare i32 @llvm.nvvm.brev32(i32)
6declare i64 @llvm.nvvm.brev64(i64)
7declare i32 @llvm.nvvm.clz.i(i32)
8declare i32 @llvm.nvvm.clz.ll(i64)
9declare i32 @llvm.nvvm.popc.i(i32)
10declare i32 @llvm.nvvm.popc.ll(i64)
11declare float @llvm.nvvm.h2f(i16)
12
13declare i32 @llvm.nvvm.abs.i(i32)
14declare i64 @llvm.nvvm.abs.ll(i64)
15
16declare i16 @llvm.nvvm.max.s(i16, i16)
17declare i32 @llvm.nvvm.max.i(i32, i32)
18declare i64 @llvm.nvvm.max.ll(i64, i64)
19declare i16 @llvm.nvvm.max.us(i16, i16)
20declare i32 @llvm.nvvm.max.ui(i32, i32)
21declare i64 @llvm.nvvm.max.ull(i64, i64)
22declare i16 @llvm.nvvm.min.s(i16, i16)
23declare i32 @llvm.nvvm.min.i(i32, i32)
24declare i64 @llvm.nvvm.min.ll(i64, i64)
25declare i16 @llvm.nvvm.min.us(i16, i16)
26declare i32 @llvm.nvvm.min.ui(i32, i32)
27declare i64 @llvm.nvvm.min.ull(i64, i64)
28
29declare i32 @llvm.nvvm.bitcast.f2i(float)
30declare float @llvm.nvvm.bitcast.i2f(i32)
31declare i64 @llvm.nvvm.bitcast.d2ll(double)
32declare double @llvm.nvvm.bitcast.ll2d(i64)
33
34declare i32 @llvm.nvvm.rotate.b32(i32, i32)
35declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
36declare i64 @llvm.nvvm.rotate.b64(i64, i32)
37
38declare ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr)
39declare ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr)
40declare ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr)
41declare ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr)
42declare ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1))
43declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3))
44declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
45declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5))
46
47declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1), i32)
48declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1), i32)
49declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1), i32)
50declare i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr, i32)
51declare ptr @llvm.nvvm.ldg.global.p.p0(ptr, i32)
52declare float @llvm.nvvm.ldg.global.f.f32.p0(ptr, i32)
53
54; CHECK-LABEL: @simple_upgrade
55define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
56; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
57  %r1 = call i32 @llvm.nvvm.brev32(i32 %a)
58
59; CHECK: call i64 @llvm.bitreverse.i64(i64 %b)
60  %r2 = call i64 @llvm.nvvm.brev64(i64 %b)
61
62; CHECK: call i32 @llvm.ctlz.i32(i32 %a, i1 false)
63  %r3 = call i32 @llvm.nvvm.clz.i(i32 %a)
64
65; CHECK: [[clz:%[a-zA-Z0-9.]+]] = call i64 @llvm.ctlz.i64(i64 %b, i1 false)
66; CHECK: trunc i64 [[clz]] to i32
67  %r4 = call i32 @llvm.nvvm.clz.ll(i64 %b)
68
69; CHECK: call i32 @llvm.ctpop.i32(i32 %a)
70  %r5 = call i32 @llvm.nvvm.popc.i(i32 %a)
71
72; CHECK: [[popc:%[a-zA-Z0-9.]+]] = call i64 @llvm.ctpop.i64(i64 %b)
73; CHECK: trunc i64 [[popc]] to i32
74  %r6 = call i32 @llvm.nvvm.popc.ll(i64 %b)
75
76; CHECK: call float @llvm.convert.from.fp16.f32(i16 %c)
77  %r7 = call float @llvm.nvvm.h2f(i16 %c)
78  ret void
79}
80
81; CHECK-LABEL: @abs
82define void @abs(i32 %a, i64 %b) {
83; CHECK-DAG: [[negi:%[a-zA-Z0-9.]+]] = sub i32 0, %a
84; CHECK-DAG: [[cmpi:%[a-zA-Z0-9.]+]] = icmp sge i32 %a, 0
85; CHECK: select i1 [[cmpi]], i32 %a, i32 [[negi]]
86  %r1 = call i32 @llvm.nvvm.abs.i(i32 %a)
87
88; CHECK-DAG: [[negll:%[a-zA-Z0-9.]+]] = sub i64 0, %b
89; CHECK-DAG: [[cmpll:%[a-zA-Z0-9.]+]] = icmp sge i64 %b, 0
90; CHECK: select i1 [[cmpll]], i64 %b, i64 [[negll]]
91  %r2 = call i64 @llvm.nvvm.abs.ll(i64 %b)
92
93  ret void
94}
95
96; CHECK-LABEL: @min_max
97define void @min_max(i16 %a1, i16 %a2, i32 %b1, i32 %b2, i64 %c1, i64 %c2) {
98; CHECK: [[maxs:%[a-zA-Z0-9.]+]] = icmp sge i16 %a1, %a2
99; CHECK: select i1 [[maxs]], i16 %a1, i16 %a2
100  %r1 = call i16 @llvm.nvvm.max.s(i16 %a1, i16 %a2)
101
102; CHECK: [[maxi:%[a-zA-Z0-9.]+]] = icmp sge i32 %b1, %b2
103; CHECK: select i1 [[maxi]], i32 %b1, i32 %b2
104  %r2 = call i32 @llvm.nvvm.max.i(i32 %b1, i32 %b2)
105
106; CHECK: [[maxll:%[a-zA-Z0-9.]+]] = icmp sge i64 %c1, %c2
107; CHECK: select i1 [[maxll]], i64 %c1, i64 %c2
108  %r3 = call i64 @llvm.nvvm.max.ll(i64 %c1, i64 %c2)
109
110; CHECK: [[maxus:%[a-zA-Z0-9.]+]] = icmp uge i16 %a1, %a2
111; CHECK: select i1 [[maxus]], i16 %a1, i16 %a2
112  %r4 = call i16 @llvm.nvvm.max.us(i16 %a1, i16 %a2)
113
114; CHECK: [[maxui:%[a-zA-Z0-9.]+]] = icmp uge i32 %b1, %b2
115; CHECK: select i1 [[maxui]], i32 %b1, i32 %b2
116  %r5 = call i32 @llvm.nvvm.max.ui(i32 %b1, i32 %b2)
117
118; CHECK: [[maxull:%[a-zA-Z0-9.]+]] = icmp uge i64 %c1, %c2
119; CHECK: select i1 [[maxull]], i64 %c1, i64 %c2
120  %r6 = call i64 @llvm.nvvm.max.ull(i64 %c1, i64 %c2)
121
122; CHECK: [[mins:%[a-zA-Z0-9.]+]] = icmp sle i16 %a1, %a2
123; CHECK: select i1 [[mins]], i16 %a1, i16 %a2
124  %r7 = call i16 @llvm.nvvm.min.s(i16 %a1, i16 %a2)
125
126; CHECK: [[mini:%[a-zA-Z0-9.]+]] = icmp sle i32 %b1, %b2
127; CHECK: select i1 [[mini]], i32 %b1, i32 %b2
128  %r8 = call i32 @llvm.nvvm.min.i(i32 %b1, i32 %b2)
129
130; CHECK: [[minll:%[a-zA-Z0-9.]+]] = icmp sle i64 %c1, %c2
131; CHECK: select i1 [[minll]], i64 %c1, i64 %c2
132  %r9 = call i64 @llvm.nvvm.min.ll(i64 %c1, i64 %c2)
133
134; CHECK: [[minus:%[a-zA-Z0-9.]+]] = icmp ule i16 %a1, %a2
135; CHECK: select i1 [[minus]], i16 %a1, i16 %a2
136  %r10 = call i16 @llvm.nvvm.min.us(i16 %a1, i16 %a2)
137
138; CHECK: [[minui:%[a-zA-Z0-9.]+]] = icmp ule i32 %b1, %b2
139; CHECK: select i1 [[minui]], i32 %b1, i32 %b2
140  %r11 = call i32 @llvm.nvvm.min.ui(i32 %b1, i32 %b2)
141
142; CHECK: [[minull:%[a-zA-Z0-9.]+]] = icmp ule i64 %c1, %c2
143; CHECK: select i1 [[minull]], i64 %c1, i64 %c2
144  %r12 = call i64 @llvm.nvvm.min.ull(i64 %c1, i64 %c2)
145
146  ret void
147}
148
149; CHECK-LABEL: @bitcast
150define void @bitcast(i32 %a, i64 %b, float %c, double %d) {
151; CHECK: bitcast float %c to i32
152; CHECK: bitcast i32 %a to float
153; CHECK: bitcast double %d to i64
154; CHECK: bitcast i64 %b to double
155;
156  %r1 = call i32 @llvm.nvvm.bitcast.f2i(float %c)
157  %r2 = call float @llvm.nvvm.bitcast.i2f(i32 %a)
158  %r3 = call i64 @llvm.nvvm.bitcast.d2ll(double %d)
159  %r4 = call double @llvm.nvvm.bitcast.ll2d(i64 %b)
160
161  ret void
162}
163
164; CHECK-LABEL: @rotate
165define void @rotate(i32 %a, i64 %b) {
166; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 6)
167; CHECK: call i64 @llvm.fshr.i64(i64 %b, i64 %b, i64 7)
168; CHECK: call i64 @llvm.fshl.i64(i64 %b, i64 %b, i64 8)
169;
170  %r1 = call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 6)
171  %r2 = call i64 @llvm.nvvm.rotate.right.b64(i64 %b, i32 7)
172  %r3 = call i64 @llvm.nvvm.rotate.b64(i64 %b, i32 8)
173  ret void
174}
175
176; CHECK-LABEL: @addrspacecast
177define void @addrspacecast(ptr %p0) {
178; CHECK: %1 = addrspacecast ptr %p0 to ptr addrspace(1)
179; CHECK: %2 = addrspacecast ptr addrspace(1) %1 to ptr
180; CHECK: %3 = addrspacecast ptr %2 to ptr addrspace(3)
181; CHECK: %4 = addrspacecast ptr addrspace(3) %3 to ptr
182; CHECK: %5 = addrspacecast ptr %4 to ptr addrspace(4)
183; CHECK: %6 = addrspacecast ptr addrspace(4) %5 to ptr
184; CHECK: %7 = addrspacecast ptr %6 to ptr addrspace(5)
185; CHECK: %8 = addrspacecast ptr addrspace(5) %7 to ptr
186;
187  %p1 = call ptr addrspace(1) @llvm.nvvm.ptr.gen.to.global.p1.p0(ptr %p0)
188  %p2 = call ptr @llvm.nvvm.ptr.global.to.gen.p0.p1(ptr addrspace(1) %p1)
189
190  %p3 = call ptr addrspace(3) @llvm.nvvm.ptr.gen.to.shared.p3.p0(ptr %p2)
191  %p4 = call ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3) %p3)
192
193  %p5 = call ptr addrspace(4) @llvm.nvvm.ptr.gen.to.constant.p4.p0(ptr %p4)
194  %p6 = call ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4) %p5)
195
196  %p7 = call ptr addrspace(5) @llvm.nvvm.ptr.gen.to.local.p5.p0(ptr %p6)
197  %p8 = call ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5) %p7)
198
199  ret void
200}
201
202; CHECK-LABEL: @ldg
203define void @ldg(ptr %p0, ptr addrspace(1) %p1) {
204; CHECK: %1 = load i32, ptr addrspace(1) %p1, align 4, !invariant.load !0
205; CHECK: %2 = load ptr, ptr addrspace(1) %p1, align 8, !invariant.load !0
206; CHECK: %3 = load float, ptr addrspace(1) %p1, align 16, !invariant.load !0
207
208; CHECK: %4 = addrspacecast ptr %p0 to ptr addrspace(1)
209; CHECK: %5 = load i32, ptr addrspace(1) %4, align 4, !invariant.load !0
210; CHECK: %6 = addrspacecast ptr %p0 to ptr addrspace(1)
211; CHECK: %7 = load ptr, ptr addrspace(1) %6, align 8, !invariant.load !0
212; CHECK: %8 = addrspacecast ptr %p0 to ptr addrspace(1)
213; CHECK: %9 = load float, ptr addrspace(1) %8, align 16, !invariant.load !0
214;
215  %v1 = call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %p1, i32 4)
216  %v2 = call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %p1, i32 8 )
217  %v3 = call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %p1, i32 16)
218
219  %v4 = call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr %p0, i32 4)
220  %v5 = call ptr @llvm.nvvm.ldg.global.p.p0(ptr %p0, i32 8)
221  %v6 = call float @llvm.nvvm.ldg.global.f.f32.p0(ptr %p0, i32 16)
222
223  ret void
224}