xref: /llvm-project/llvm/test/CodeGen/AMDGPU/ps-shader-arg-count.ll (revision 69f7d81d0a47781e5d4820873c20f725f3d0236e)
1*69f7d81dSDavid Stuttard;RUN: llc < %s -mtriple=amdgcn-pal -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
2*69f7d81dSDavid Stuttard;RUN: llc < %s -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
3*69f7d81dSDavid Stuttard
4*69f7d81dSDavid Stuttard; ;CHECK-LABEL: {{^}}_amdgpu_ps_1_arg:
5*69f7d81dSDavid Stuttard; ;CHECK: NumVgprs: 4
6*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_1_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
7*69f7d81dSDavid Stuttard.entry:
8*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
9*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
10*69f7d81dSDavid Stuttard  %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1, 0
11*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret2
12*69f7d81dSDavid Stuttard}
13*69f7d81dSDavid Stuttard
14*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_3_arg:
15*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 6
16*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_3_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
17*69f7d81dSDavid Stuttard.entry:
18*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
19*69f7d81dSDavid Stuttard  %i2 = extractelement <2 x float> %arg4, i32 0
20*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
21*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
22*69f7d81dSDavid Stuttard  %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1
23*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2
24*69f7d81dSDavid Stuttard  %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0
25*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret2
26*69f7d81dSDavid Stuttard}
27*69f7d81dSDavid Stuttard
28*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_gap:
29*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4
30*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_gap(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
31*69f7d81dSDavid Stuttard.entry:
32*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
33*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
34*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
35*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1, float %i3, i32 1
36*69f7d81dSDavid Stuttard  %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0
37*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret2
38*69f7d81dSDavid Stuttard}
39*69f7d81dSDavid Stuttard
40*69f7d81dSDavid Stuttard; Using InitialPSInputAddr of 0x2 causes the 2nd VGPR arg to be included in the packing - this increases the total number of VGPRs and in turn makes arg3 not be packed to be
41*69f7d81dSDavid Stuttard; adjacent to arg1 (the only 2 used arguments)
42*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_no_pack:
43*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 6
44*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_no_pack(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #1 {
45*69f7d81dSDavid Stuttard.entry:
46*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
47*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
48*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
49*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1, float %i3, i32 1
50*69f7d81dSDavid Stuttard  %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0
51*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret2
52*69f7d81dSDavid Stuttard}
53*69f7d81dSDavid Stuttard
54*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg:
55*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 24
56*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
57*69f7d81dSDavid Stuttard.entry:
58*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
59*69f7d81dSDavid Stuttard  %i2 = extractelement <2 x float> %arg4, i32 0
60*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
61*69f7d81dSDavid Stuttard  %i4 = extractelement <3 x float> %arg6, i32 1
62*69f7d81dSDavid Stuttard  %i5 = extractelement <2 x float> %arg7, i32 0
63*69f7d81dSDavid Stuttard  %i6 = extractelement <2 x float> %arg8, i32 0
64*69f7d81dSDavid Stuttard  %i7 = extractelement <2 x float> %arg9, i32 1
65*69f7d81dSDavid Stuttard
66*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
67*69f7d81dSDavid Stuttard  %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1
68*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2
69*69f7d81dSDavid Stuttard  %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3
70*69f7d81dSDavid Stuttard
71*69f7d81dSDavid Stuttard  %ret2 = insertelement <4 x float> undef, float %i5, i32 0
72*69f7d81dSDavid Stuttard  %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1
73*69f7d81dSDavid Stuttard  %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2
74*69f7d81dSDavid Stuttard  %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3
75*69f7d81dSDavid Stuttard
76*69f7d81dSDavid Stuttard  %ret3 = insertelement <4 x float> undef, float %arg11, i32 0
77*69f7d81dSDavid Stuttard  %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1
78*69f7d81dSDavid Stuttard  %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2
79*69f7d81dSDavid Stuttard  %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3
80*69f7d81dSDavid Stuttard
81*69f7d81dSDavid Stuttard  %arg15.f = bitcast i32 %arg15 to float
82*69f7d81dSDavid Stuttard  %arg16.f = bitcast i32 %arg16 to float
83*69f7d81dSDavid Stuttard  %arg17.f = bitcast i32 %arg17 to float
84*69f7d81dSDavid Stuttard  %arg18.f = bitcast i32 %arg18 to float
85*69f7d81dSDavid Stuttard
86*69f7d81dSDavid Stuttard  %ret4 = insertelement <4 x float> undef, float %arg15.f, i32 0
87*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> %ret4, float %arg16.f, i32 1
88*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %arg17.f, i32 2
89*69f7d81dSDavid Stuttard  %ret4.3 = insertelement <4 x float> %ret4.2, float %arg18.f, i32 3
90*69f7d81dSDavid Stuttard
91*69f7d81dSDavid Stuttard  %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0
92*69f7d81dSDavid Stuttard  %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1
93*69f7d81dSDavid Stuttard  %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2
94*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3
95*69f7d81dSDavid Stuttard
96*69f7d81dSDavid Stuttard  ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res
97*69f7d81dSDavid Stuttard}
98*69f7d81dSDavid Stuttard
99*69f7d81dSDavid Stuttard; Extra arguments have to be allocated even if they're unused
100*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra_unused:
101*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26
102*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
103*69f7d81dSDavid Stuttard.entry:
104*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
105*69f7d81dSDavid Stuttard  %i2 = extractelement <2 x float> %arg4, i32 0
106*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
107*69f7d81dSDavid Stuttard  %i4 = extractelement <3 x float> %arg6, i32 1
108*69f7d81dSDavid Stuttard  %i5 = extractelement <2 x float> %arg7, i32 0
109*69f7d81dSDavid Stuttard  %i6 = extractelement <2 x float> %arg8, i32 0
110*69f7d81dSDavid Stuttard  %i7 = extractelement <2 x float> %arg9, i32 1
111*69f7d81dSDavid Stuttard
112*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
113*69f7d81dSDavid Stuttard  %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1
114*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2
115*69f7d81dSDavid Stuttard  %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3
116*69f7d81dSDavid Stuttard
117*69f7d81dSDavid Stuttard  %ret2 = insertelement <4 x float> undef, float %i5, i32 0
118*69f7d81dSDavid Stuttard  %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1
119*69f7d81dSDavid Stuttard  %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2
120*69f7d81dSDavid Stuttard  %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3
121*69f7d81dSDavid Stuttard
122*69f7d81dSDavid Stuttard  %ret3 = insertelement <4 x float> undef, float %arg11, i32 0
123*69f7d81dSDavid Stuttard  %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1
124*69f7d81dSDavid Stuttard  %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2
125*69f7d81dSDavid Stuttard  %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3
126*69f7d81dSDavid Stuttard
127*69f7d81dSDavid Stuttard  %arg15.f = bitcast i32 %arg15 to float
128*69f7d81dSDavid Stuttard  %arg16.f = bitcast i32 %arg16 to float
129*69f7d81dSDavid Stuttard  %arg17.f = bitcast i32 %arg17 to float
130*69f7d81dSDavid Stuttard  %arg18.f = bitcast i32 %arg18 to float
131*69f7d81dSDavid Stuttard
132*69f7d81dSDavid Stuttard  %ret4 = insertelement <4 x float> undef, float %arg15.f, i32 0
133*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> %ret4, float %arg16.f, i32 1
134*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %arg17.f, i32 2
135*69f7d81dSDavid Stuttard  %ret4.3 = insertelement <4 x float> %ret4.2, float %arg18.f, i32 3
136*69f7d81dSDavid Stuttard
137*69f7d81dSDavid Stuttard  %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0
138*69f7d81dSDavid Stuttard  %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1
139*69f7d81dSDavid Stuttard  %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2
140*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3
141*69f7d81dSDavid Stuttard
142*69f7d81dSDavid Stuttard  ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res
143*69f7d81dSDavid Stuttard}
144*69f7d81dSDavid Stuttard
145*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra:
146*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26
147*69f7d81dSDavid Stuttard; CHECK: NumVGPRsForWavesPerEU: 26
148*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
149*69f7d81dSDavid Stuttard.entry:
150*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
151*69f7d81dSDavid Stuttard  %i2 = extractelement <2 x float> %arg4, i32 0
152*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
153*69f7d81dSDavid Stuttard  %i4 = extractelement <3 x float> %arg6, i32 1
154*69f7d81dSDavid Stuttard  %i5 = extractelement <2 x float> %arg7, i32 0
155*69f7d81dSDavid Stuttard  %i6 = extractelement <2 x float> %arg8, i32 0
156*69f7d81dSDavid Stuttard  %i7 = extractelement <2 x float> %arg9, i32 1
157*69f7d81dSDavid Stuttard
158*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
159*69f7d81dSDavid Stuttard  %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1
160*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2
161*69f7d81dSDavid Stuttard  %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3
162*69f7d81dSDavid Stuttard
163*69f7d81dSDavid Stuttard  %ret2 = insertelement <4 x float> undef, float %i5, i32 0
164*69f7d81dSDavid Stuttard  %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1
165*69f7d81dSDavid Stuttard  %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2
166*69f7d81dSDavid Stuttard  %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3
167*69f7d81dSDavid Stuttard
168*69f7d81dSDavid Stuttard  %ret3 = insertelement <4 x float> undef, float %arg11, i32 0
169*69f7d81dSDavid Stuttard  %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1
170*69f7d81dSDavid Stuttard  %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2
171*69f7d81dSDavid Stuttard  %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3
172*69f7d81dSDavid Stuttard
173*69f7d81dSDavid Stuttard  %arg15.f = bitcast i32 %arg15 to float
174*69f7d81dSDavid Stuttard  %arg16.f = bitcast i32 %arg16 to float
175*69f7d81dSDavid Stuttard  %arg17.f = bitcast i32 %arg17 to float
176*69f7d81dSDavid Stuttard  %arg18.f = bitcast i32 %arg18 to float
177*69f7d81dSDavid Stuttard
178*69f7d81dSDavid Stuttard  %arg15_16.f = fadd float %arg15.f, %arg16.f
179*69f7d81dSDavid Stuttard  %arg17_18.f = fadd float %arg17.f, %arg18.f
180*69f7d81dSDavid Stuttard
181*69f7d81dSDavid Stuttard  %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0
182*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1
183*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %arg15_16.f, i32 2
184*69f7d81dSDavid Stuttard  %ret4.3 = insertelement <4 x float> %ret4.2, float %arg17_18.f, i32 3
185*69f7d81dSDavid Stuttard
186*69f7d81dSDavid Stuttard  %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0
187*69f7d81dSDavid Stuttard  %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1
188*69f7d81dSDavid Stuttard  %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2
189*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3
190*69f7d81dSDavid Stuttard
191*69f7d81dSDavid Stuttard  ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res
192*69f7d81dSDavid Stuttard}
193*69f7d81dSDavid Stuttard
194*69f7d81dSDavid Stuttard; Check that when no input args are used we get the minimum allocation - note that we always enable the first input
195*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused:
196*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4
197*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 {
198*69f7d81dSDavid Stuttard.entry:
199*69f7d81dSDavid Stuttard  ret { <4 x float> } undef
200*69f7d81dSDavid Stuttard}
201*69f7d81dSDavid Stuttard
202*69f7d81dSDavid Stuttard; Check that when no input args are used we get the minimum allocation - note that we always enable the first input
203*69f7d81dSDavid Stuttard; Additionally set the PSInputAddr to 0 via the metadata
204*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_ia0:
205*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4
206*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_ia0(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #3 {
207*69f7d81dSDavid Stuttard.entry:
208*69f7d81dSDavid Stuttard  ret { <4 x float> } undef
209*69f7d81dSDavid Stuttard}
210*69f7d81dSDavid Stuttard
211*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used:
212*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4
213*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
214*69f7d81dSDavid Stuttard.entry:
215*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0
216*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1
217*69f7d81dSDavid Stuttard
218*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { <4 x float> } undef, <4 x float> %ret4.2, 0
219*69f7d81dSDavid Stuttard
220*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret.res
221*69f7d81dSDavid Stuttard}
222*69f7d81dSDavid Stuttard
223*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_used:
224*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 5
225*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
226*69f7d81dSDavid Stuttard.entry:
227*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> undef, float %arg14, i32 0
228*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg1, i32 1
229*69f7d81dSDavid Stuttard  %ret4.3 = insertelement <4 x float> %ret4.2, float %extra_arg2, i32 2
230*69f7d81dSDavid Stuttard
231*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { <4 x float> } undef, <4 x float> %ret4.3, 0
232*69f7d81dSDavid Stuttard
233*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret.res
234*69f7d81dSDavid Stuttard}
235*69f7d81dSDavid Stuttard
236*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_unused:
237*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 7
238*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
239*69f7d81dSDavid Stuttard.entry:
240*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> undef, float %arg12, i32 0
241*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %arg13, i32 1
242*69f7d81dSDavid Stuttard  %ret4.3 = insertelement <4 x float> %ret4.2, float %arg14, i32 2
243*69f7d81dSDavid Stuttard
244*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { <4 x float> } undef, <4 x float> %ret4.3, 0
245*69f7d81dSDavid Stuttard
246*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret.res
247*69f7d81dSDavid Stuttard}
248*69f7d81dSDavid Stuttard
249*69f7d81dSDavid Stuttard; Extra unused inputs are always added to the allocation
250*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused:
251*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4
252*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
253*69f7d81dSDavid Stuttard.entry:
254*69f7d81dSDavid Stuttard
255*69f7d81dSDavid Stuttard  ret { <4 x float> } undef
256*69f7d81dSDavid Stuttard}
257*69f7d81dSDavid Stuttard
258*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used_no_packing:
259*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26
260*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 {
261*69f7d81dSDavid Stuttard.entry:
262*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0
263*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1
264*69f7d81dSDavid Stuttard
265*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { <4 x float> } undef, <4 x float> %ret4.2, 0
266*69f7d81dSDavid Stuttard
267*69f7d81dSDavid Stuttard  ret { <4 x float> } %ret.res
268*69f7d81dSDavid Stuttard}
269*69f7d81dSDavid Stuttard
270*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused_no_packing:
271*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26
272*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 {
273*69f7d81dSDavid Stuttard.entry:
274*69f7d81dSDavid Stuttard  ret { <4 x float> } undef
275*69f7d81dSDavid Stuttard}
276*69f7d81dSDavid Stuttard
277*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_arg_extra:
278*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 24
279*69f7d81dSDavid Stuttard; CHECK: NumVGPRsForWavesPerEU: 24
280*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 {
281*69f7d81dSDavid Stuttard.entry:
282*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
283*69f7d81dSDavid Stuttard  %i2 = extractelement <2 x float> %arg4, i32 0
284*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
285*69f7d81dSDavid Stuttard  %i4 = extractelement <3 x float> %arg6, i32 1
286*69f7d81dSDavid Stuttard  %i5 = extractelement <2 x float> %arg7, i32 0
287*69f7d81dSDavid Stuttard  %i6 = extractelement <2 x float> %arg8, i32 0
288*69f7d81dSDavid Stuttard  %i7 = extractelement <2 x float> %arg9, i32 1
289*69f7d81dSDavid Stuttard
290*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
291*69f7d81dSDavid Stuttard  %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1
292*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2
293*69f7d81dSDavid Stuttard  %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3
294*69f7d81dSDavid Stuttard
295*69f7d81dSDavid Stuttard  %ret2 = insertelement <4 x float> undef, float %i5, i32 0
296*69f7d81dSDavid Stuttard  %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1
297*69f7d81dSDavid Stuttard  %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2
298*69f7d81dSDavid Stuttard  %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3
299*69f7d81dSDavid Stuttard
300*69f7d81dSDavid Stuttard  %ret3 = insertelement <4 x float> undef, float %arg11, i32 0
301*69f7d81dSDavid Stuttard  %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1
302*69f7d81dSDavid Stuttard  %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2
303*69f7d81dSDavid Stuttard  %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3
304*69f7d81dSDavid Stuttard
305*69f7d81dSDavid Stuttard  %arg15.f = bitcast i32 %arg15 to float
306*69f7d81dSDavid Stuttard  %arg16.f = bitcast i32 %arg16 to float
307*69f7d81dSDavid Stuttard
308*69f7d81dSDavid Stuttard  %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0
309*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1
310*69f7d81dSDavid Stuttard  %ret4.2 = insertelement <4 x float> %ret4.1, float %arg15.f, i32 2
311*69f7d81dSDavid Stuttard  %ret4.3 = insertelement <4 x float> %ret4.2, float %arg16.f, i32 3
312*69f7d81dSDavid Stuttard
313*69f7d81dSDavid Stuttard  %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0
314*69f7d81dSDavid Stuttard  %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1
315*69f7d81dSDavid Stuttard  %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2
316*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3
317*69f7d81dSDavid Stuttard
318*69f7d81dSDavid Stuttard  ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res
319*69f7d81dSDavid Stuttard}
320*69f7d81dSDavid Stuttard
321*69f7d81dSDavid Stuttard;CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_no_packing_arg_extra:
322*69f7d81dSDavid Stuttard;CHECK: NumVgprs: 26
323*69f7d81dSDavid Stuttard;CHECK: NumVGPRsForWavesPerEU: 26
324*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_no_packing_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 {
325*69f7d81dSDavid Stuttard.entry:
326*69f7d81dSDavid Stuttard  %i1 = extractelement <2 x float> %arg3, i32 1
327*69f7d81dSDavid Stuttard  %i2 = extractelement <2 x float> %arg4, i32 0
328*69f7d81dSDavid Stuttard  %i3 = extractelement <2 x float> %arg5, i32 1
329*69f7d81dSDavid Stuttard  %i4 = extractelement <3 x float> %arg6, i32 1
330*69f7d81dSDavid Stuttard  %i5 = extractelement <2 x float> %arg7, i32 0
331*69f7d81dSDavid Stuttard  %i6 = extractelement <2 x float> %arg8, i32 0
332*69f7d81dSDavid Stuttard  %i7 = extractelement <2 x float> %arg9, i32 1
333*69f7d81dSDavid Stuttard
334*69f7d81dSDavid Stuttard  %ret1 = insertelement <4 x float> undef, float %i1, i32 0
335*69f7d81dSDavid Stuttard  %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1
336*69f7d81dSDavid Stuttard  %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2
337*69f7d81dSDavid Stuttard  %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3
338*69f7d81dSDavid Stuttard
339*69f7d81dSDavid Stuttard  %ret2 = insertelement <4 x float> undef, float %i5, i32 0
340*69f7d81dSDavid Stuttard  %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1
341*69f7d81dSDavid Stuttard  %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2
342*69f7d81dSDavid Stuttard  %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3
343*69f7d81dSDavid Stuttard
344*69f7d81dSDavid Stuttard  %ret3 = insertelement <4 x float> undef, float %arg11, i32 0
345*69f7d81dSDavid Stuttard  %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1
346*69f7d81dSDavid Stuttard  %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2
347*69f7d81dSDavid Stuttard  %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3
348*69f7d81dSDavid Stuttard
349*69f7d81dSDavid Stuttard  %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0
350*69f7d81dSDavid Stuttard  %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1
351*69f7d81dSDavid Stuttard
352*69f7d81dSDavid Stuttard  %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0
353*69f7d81dSDavid Stuttard  %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1
354*69f7d81dSDavid Stuttard  %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2
355*69f7d81dSDavid Stuttard  %ret.res  = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.1, 3
356*69f7d81dSDavid Stuttard
357*69f7d81dSDavid Stuttard  ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res
358*69f7d81dSDavid Stuttard}
359*69f7d81dSDavid Stuttard
360*69f7d81dSDavid Stuttardattributes #0 = { nounwind "target-features"=",+wavefrontsize64,+cumode"  }
361*69f7d81dSDavid Stuttardattributes #1 = { nounwind "InitialPSInputAddr"="2" "target-features"=",+wavefrontsize64,+cumode" }
362*69f7d81dSDavid Stuttardattributes #2 = { nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" }
363*69f7d81dSDavid Stuttardattributes #3 = { nounwind "InitialPSInputAddr"="0" "target-features"=",+wavefrontsize64,+cumode" }
364