1;RUN: llc < %s -mtriple=amdgcn-pal -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK 2;RUN: llc < %s -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK 3 4; ;CHECK-LABEL: {{^}}_amdgpu_ps_1_arg: 5; ;CHECK: NumVgprs: 4 6define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_1_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 7.entry: 8 %i1 = extractelement <2 x float> %arg3, i32 1 9 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 10 %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1, 0 11 ret { <4 x float> } %ret2 12} 13 14; CHECK-LABEL: {{^}}_amdgpu_ps_3_arg: 15; CHECK: NumVgprs: 6 16define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_3_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 17.entry: 18 %i1 = extractelement <2 x float> %arg3, i32 1 19 %i2 = extractelement <2 x float> %arg4, i32 0 20 %i3 = extractelement <2 x float> %arg5, i32 1 21 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 22 %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 23 %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 24 %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0 25 ret { <4 x float> } %ret2 26} 27 28; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_gap: 29; CHECK: NumVgprs: 4 30define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_gap(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 31.entry: 32 %i1 = extractelement <2 x float> %arg3, i32 1 33 %i3 = extractelement <2 x float> %arg5, i32 1 34 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 35 %ret1.2 = insertelement <4 x float> %ret1, float %i3, i32 1 36 %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0 37 ret { <4 x float> } %ret2 38} 39 40; Using InitialPSInputAddr of 0x2 causes the 2nd VGPR arg to be included in the packing - this increases the total number of VGPRs and in turn makes arg3 not be packed to be 41; adjacent to arg1 (the only 2 used arguments) 42; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_no_pack: 43; CHECK: NumVgprs: 6 44define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_no_pack(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #1 { 45.entry: 46 %i1 = extractelement <2 x float> %arg3, i32 1 47 %i3 = extractelement <2 x float> %arg5, i32 1 48 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 49 %ret1.2 = insertelement <4 x float> %ret1, float %i3, i32 1 50 %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0 51 ret { <4 x float> } %ret2 52} 53 54; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg: 55; CHECK: NumVgprs: 24 56define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 57.entry: 58 %i1 = extractelement <2 x float> %arg3, i32 1 59 %i2 = extractelement <2 x float> %arg4, i32 0 60 %i3 = extractelement <2 x float> %arg5, i32 1 61 %i4 = extractelement <3 x float> %arg6, i32 1 62 %i5 = extractelement <2 x float> %arg7, i32 0 63 %i6 = extractelement <2 x float> %arg8, i32 0 64 %i7 = extractelement <2 x float> %arg9, i32 1 65 66 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 67 %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 68 %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 69 %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 70 71 %ret2 = insertelement <4 x float> undef, float %i5, i32 0 72 %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 73 %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 74 %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 75 76 %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 77 %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 78 %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 79 %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 80 81 %arg15.f = bitcast i32 %arg15 to float 82 %arg16.f = bitcast i32 %arg16 to float 83 %arg17.f = bitcast i32 %arg17 to float 84 %arg18.f = bitcast i32 %arg18 to float 85 86 %ret4 = insertelement <4 x float> undef, float %arg15.f, i32 0 87 %ret4.1 = insertelement <4 x float> %ret4, float %arg16.f, i32 1 88 %ret4.2 = insertelement <4 x float> %ret4.1, float %arg17.f, i32 2 89 %ret4.3 = insertelement <4 x float> %ret4.2, float %arg18.f, i32 3 90 91 %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 92 %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 93 %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 94 %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 95 96 ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 97} 98 99; Extra arguments have to be allocated even if they're unused 100; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra_unused: 101; CHECK: NumVgprs: 26 102define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 103.entry: 104 %i1 = extractelement <2 x float> %arg3, i32 1 105 %i2 = extractelement <2 x float> %arg4, i32 0 106 %i3 = extractelement <2 x float> %arg5, i32 1 107 %i4 = extractelement <3 x float> %arg6, i32 1 108 %i5 = extractelement <2 x float> %arg7, i32 0 109 %i6 = extractelement <2 x float> %arg8, i32 0 110 %i7 = extractelement <2 x float> %arg9, i32 1 111 112 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 113 %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 114 %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 115 %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 116 117 %ret2 = insertelement <4 x float> undef, float %i5, i32 0 118 %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 119 %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 120 %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 121 122 %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 123 %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 124 %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 125 %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 126 127 %arg15.f = bitcast i32 %arg15 to float 128 %arg16.f = bitcast i32 %arg16 to float 129 %arg17.f = bitcast i32 %arg17 to float 130 %arg18.f = bitcast i32 %arg18 to float 131 132 %ret4 = insertelement <4 x float> undef, float %arg15.f, i32 0 133 %ret4.1 = insertelement <4 x float> %ret4, float %arg16.f, i32 1 134 %ret4.2 = insertelement <4 x float> %ret4.1, float %arg17.f, i32 2 135 %ret4.3 = insertelement <4 x float> %ret4.2, float %arg18.f, i32 3 136 137 %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 138 %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 139 %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 140 %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 141 142 ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 143} 144 145; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra: 146; CHECK: NumVgprs: 26 147; CHECK: NumVGPRsForWavesPerEU: 26 148define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 149.entry: 150 %i1 = extractelement <2 x float> %arg3, i32 1 151 %i2 = extractelement <2 x float> %arg4, i32 0 152 %i3 = extractelement <2 x float> %arg5, i32 1 153 %i4 = extractelement <3 x float> %arg6, i32 1 154 %i5 = extractelement <2 x float> %arg7, i32 0 155 %i6 = extractelement <2 x float> %arg8, i32 0 156 %i7 = extractelement <2 x float> %arg9, i32 1 157 158 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 159 %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 160 %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 161 %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 162 163 %ret2 = insertelement <4 x float> undef, float %i5, i32 0 164 %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 165 %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 166 %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 167 168 %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 169 %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 170 %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 171 %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 172 173 %arg15.f = bitcast i32 %arg15 to float 174 %arg16.f = bitcast i32 %arg16 to float 175 %arg17.f = bitcast i32 %arg17 to float 176 %arg18.f = bitcast i32 %arg18 to float 177 178 %arg15_16.f = fadd float %arg15.f, %arg16.f 179 %arg17_18.f = fadd float %arg17.f, %arg18.f 180 181 %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0 182 %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1 183 %ret4.2 = insertelement <4 x float> %ret4.1, float %arg15_16.f, i32 2 184 %ret4.3 = insertelement <4 x float> %ret4.2, float %arg17_18.f, i32 3 185 186 %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 187 %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 188 %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 189 %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 190 191 ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 192} 193 194; Check that when no input args are used we get the minimum allocation - note that we always enable the first input 195; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused: 196; CHECK: NumVgprs: 4 197define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 198.entry: 199 ret { <4 x float> } undef 200} 201 202; Check that when no input args are used we get the minimum allocation - note that we always enable the first input 203; Additionally set the PSInputAddr to 0 via the metadata 204; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_ia0: 205; CHECK: NumVgprs: 4 206define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_ia0(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #3 { 207.entry: 208 ret { <4 x float> } undef 209} 210 211; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used: 212; CHECK: NumVgprs: 4 213define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 214.entry: 215 %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0 216 %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1 217 218 %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.2, 0 219 220 ret { <4 x float> } %ret.res 221} 222 223; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_used: 224; CHECK: NumVgprs: 5 225define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 226.entry: 227 %ret4.1 = insertelement <4 x float> undef, float %arg14, i32 0 228 %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg1, i32 1 229 %ret4.3 = insertelement <4 x float> %ret4.2, float %extra_arg2, i32 2 230 231 %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.3, 0 232 233 ret { <4 x float> } %ret.res 234} 235 236; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_unused: 237; CHECK: NumVgprs: 7 238define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 239.entry: 240 %ret4.1 = insertelement <4 x float> undef, float %arg12, i32 0 241 %ret4.2 = insertelement <4 x float> %ret4.1, float %arg13, i32 1 242 %ret4.3 = insertelement <4 x float> %ret4.2, float %arg14, i32 2 243 244 %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.3, 0 245 246 ret { <4 x float> } %ret.res 247} 248 249; Extra unused inputs are always added to the allocation 250; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused: 251; CHECK: NumVgprs: 4 252define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 253.entry: 254 255 ret { <4 x float> } undef 256} 257 258; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used_no_packing: 259; CHECK: NumVgprs: 26 260define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 { 261.entry: 262 %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0 263 %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1 264 265 %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.2, 0 266 267 ret { <4 x float> } %ret.res 268} 269 270; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused_no_packing: 271; CHECK: NumVgprs: 26 272define dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 { 273.entry: 274 ret { <4 x float> } undef 275} 276 277; CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_arg_extra: 278; CHECK: NumVgprs: 24 279; CHECK: NumVGPRsForWavesPerEU: 24 280define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 281.entry: 282 %i1 = extractelement <2 x float> %arg3, i32 1 283 %i2 = extractelement <2 x float> %arg4, i32 0 284 %i3 = extractelement <2 x float> %arg5, i32 1 285 %i4 = extractelement <3 x float> %arg6, i32 1 286 %i5 = extractelement <2 x float> %arg7, i32 0 287 %i6 = extractelement <2 x float> %arg8, i32 0 288 %i7 = extractelement <2 x float> %arg9, i32 1 289 290 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 291 %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 292 %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 293 %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 294 295 %ret2 = insertelement <4 x float> undef, float %i5, i32 0 296 %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 297 %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 298 %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 299 300 %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 301 %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 302 %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 303 %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 304 305 %arg15.f = bitcast i32 %arg15 to float 306 %arg16.f = bitcast i32 %arg16 to float 307 308 %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0 309 %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1 310 %ret4.2 = insertelement <4 x float> %ret4.1, float %arg15.f, i32 2 311 %ret4.3 = insertelement <4 x float> %ret4.2, float %arg16.f, i32 3 312 313 %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 314 %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 315 %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 316 %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 317 318 ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 319} 320 321;CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_no_packing_arg_extra: 322;CHECK: NumVgprs: 26 323;CHECK: NumVGPRsForWavesPerEU: 26 324define dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_no_packing_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 { 325.entry: 326 %i1 = extractelement <2 x float> %arg3, i32 1 327 %i2 = extractelement <2 x float> %arg4, i32 0 328 %i3 = extractelement <2 x float> %arg5, i32 1 329 %i4 = extractelement <3 x float> %arg6, i32 1 330 %i5 = extractelement <2 x float> %arg7, i32 0 331 %i6 = extractelement <2 x float> %arg8, i32 0 332 %i7 = extractelement <2 x float> %arg9, i32 1 333 334 %ret1 = insertelement <4 x float> undef, float %i1, i32 0 335 %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 336 %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 337 %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 338 339 %ret2 = insertelement <4 x float> undef, float %i5, i32 0 340 %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 341 %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 342 %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 343 344 %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 345 %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 346 %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 347 %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 348 349 %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0 350 %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1 351 352 %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 353 %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 354 %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 355 %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.1, 3 356 357 ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 358} 359 360attributes #0 = { nounwind "target-features"=",+wavefrontsize64,+cumode" } 361attributes #1 = { nounwind "InitialPSInputAddr"="2" "target-features"=",+wavefrontsize64,+cumode" } 362attributes #2 = { nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" } 363attributes #3 = { nounwind "InitialPSInputAddr"="0" "target-features"=",+wavefrontsize64,+cumode" } 364