xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll (revision d9847cde4841140a95404ea7b7d3a57f8bfbf976)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=irtranslator %s -o - | FileCheck %s
3
4; Check that we correctly skip over disabled inputs
5define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 {
6  ; CHECK-LABEL: name: disabled_input
7  ; CHECK: bb.1.main_body:
8  ; CHECK-NEXT:   liveins: $sgpr2, $vgpr0
9  ; CHECK-NEXT: {{  $}}
10  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
11  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
12  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
13  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
14  ; CHECK-NEXT:   S_ENDPGM 0
15main_body:
16  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
17  ret void
18}
19
20define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 {
21  ; CHECK-LABEL: name: disabled_input_struct
22  ; CHECK: bb.1.main_body:
23  ; CHECK-NEXT:   liveins: $sgpr2, $vgpr0
24  ; CHECK-NEXT: {{  $}}
25  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
26  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
27  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
28  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
29  ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
30  ; CHECK-NEXT:   S_ENDPGM 0
31main_body:
32  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
33  ret void
34}
35
36define amdgpu_ps float @vgpr_return(i32 %vgpr) {
37  ; CHECK-LABEL: name: vgpr_return
38  ; CHECK: bb.1 (%ir-block.0):
39  ; CHECK-NEXT:   liveins: $vgpr0
40  ; CHECK-NEXT: {{  $}}
41  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
42  ; CHECK-NEXT:   $vgpr0 = COPY [[COPY]](s32)
43  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
44  %cast = bitcast i32 %vgpr to float
45  ret float %cast
46}
47
48define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) {
49  ; CHECK-LABEL: name: sgpr_return_i32
50  ; CHECK: bb.1 (%ir-block.0):
51  ; CHECK-NEXT:   liveins: $vgpr0
52  ; CHECK-NEXT: {{  $}}
53  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
54  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
55  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
56  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
57  ret i32 %vgpr
58}
59
60define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) {
61  ; CHECK-LABEL: name: sgpr_return_i64
62  ; CHECK: bb.1 (%ir-block.0):
63  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
64  ; CHECK-NEXT: {{  $}}
65  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
66  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
67  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
68  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
69  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
70  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
71  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
72  ; CHECK-NEXT:   $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
73  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
74  ret i64 %vgpr
75}
76
77define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) {
78  ; CHECK-LABEL: name: sgpr_return_v2i32
79  ; CHECK: bb.1 (%ir-block.0):
80  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
81  ; CHECK-NEXT: {{  $}}
82  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
83  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
84  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
85  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>)
86  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
87  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
88  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
89  ; CHECK-NEXT:   $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
90  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
91  ret <2 x i32> %vgpr
92}
93
94define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1) {
95  ; CHECK-LABEL: name: sgpr_struct_return_i32_i32
96  ; CHECK: bb.1 (%ir-block.0):
97  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
98  ; CHECK-NEXT: {{  $}}
99  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
100  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
101  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
102  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32)
103  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
104  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
105  ; CHECK-NEXT:   $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
106  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
107  %insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0
108  %value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1
109  ret { i32, i32 } %value
110}
111
112define amdgpu_ps ptr addrspace(3) @sgpr_return_p3i8(ptr addrspace(3) %vgpr) {
113  ; CHECK-LABEL: name: sgpr_return_p3i8
114  ; CHECK: bb.1 (%ir-block.0):
115  ; CHECK-NEXT:   liveins: $vgpr0
116  ; CHECK-NEXT: {{  $}}
117  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
118  ; CHECK-NEXT:   [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3)
119  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32)
120  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
121  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
122  ret ptr addrspace(3) %vgpr
123}
124
125define amdgpu_ps ptr addrspace(1) @sgpr_return_p1i8(ptr addrspace(1) %vgpr) {
126  ; CHECK-LABEL: name: sgpr_return_p1i8
127  ; CHECK: bb.1 (%ir-block.0):
128  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
129  ; CHECK-NEXT: {{  $}}
130  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
131  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
132  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
133  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1)
134  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32)
135  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
136  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32)
137  ; CHECK-NEXT:   $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32)
138  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
139  ret ptr addrspace(1) %vgpr
140}
141
142define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
143  ; CHECK-LABEL: name: sgpr_return_v2i16
144  ; CHECK: bb.1 (%ir-block.0):
145  ; CHECK-NEXT:   liveins: $vgpr0
146  ; CHECK-NEXT: {{  $}}
147  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
148  ; CHECK-NEXT:   [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
149  ; CHECK-NEXT:   [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32)
150  ; CHECK-NEXT:   $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32)
151  ; CHECK-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0
152  ret <2 x i16> %vgpr
153}
154
155declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)  #0
156
157attributes #0 = { nounwind }
158attributes #1 = { "InitialPSInputAddr"="0x00002" }
159