1*69f7d81dSDavid Stuttard;RUN: llc < %s -mtriple=amdgcn-pal -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK 2*69f7d81dSDavid Stuttard;RUN: llc < %s -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK 3*69f7d81dSDavid Stuttard 4*69f7d81dSDavid Stuttard; ;CHECK-LABEL: {{^}}_amdgpu_ps_1_arg: 5*69f7d81dSDavid Stuttard; ;CHECK: NumVgprs: 4 6*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_1_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 7*69f7d81dSDavid Stuttard.entry: 8*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 9*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 10*69f7d81dSDavid Stuttard %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1, 0 11*69f7d81dSDavid Stuttard ret { <4 x float> } %ret2 12*69f7d81dSDavid Stuttard} 13*69f7d81dSDavid Stuttard 14*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_3_arg: 15*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 6 16*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_3_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 17*69f7d81dSDavid Stuttard.entry: 18*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 19*69f7d81dSDavid Stuttard %i2 = extractelement <2 x float> %arg4, i32 0 20*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 21*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 22*69f7d81dSDavid Stuttard %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 23*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 24*69f7d81dSDavid Stuttard %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0 25*69f7d81dSDavid Stuttard ret { <4 x float> } %ret2 26*69f7d81dSDavid Stuttard} 27*69f7d81dSDavid Stuttard 28*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_gap: 29*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4 30*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_gap(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 31*69f7d81dSDavid Stuttard.entry: 32*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 33*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 34*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 35*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1, float %i3, i32 1 36*69f7d81dSDavid Stuttard %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0 37*69f7d81dSDavid Stuttard ret { <4 x float> } %ret2 38*69f7d81dSDavid Stuttard} 39*69f7d81dSDavid Stuttard 40*69f7d81dSDavid Stuttard; Using InitialPSInputAddr of 0x2 causes the 2nd VGPR arg to be included in the packing - this increases the total number of VGPRs and in turn makes arg3 not be packed to be 41*69f7d81dSDavid Stuttard; adjacent to arg1 (the only 2 used arguments) 42*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_2_arg_no_pack: 43*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 6 44*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_2_arg_no_pack(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #1 { 45*69f7d81dSDavid Stuttard.entry: 46*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 47*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 48*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 49*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1, float %i3, i32 1 50*69f7d81dSDavid Stuttard %ret2 = insertvalue { <4 x float> } undef, <4 x float> %ret1.2, 0 51*69f7d81dSDavid Stuttard ret { <4 x float> } %ret2 52*69f7d81dSDavid Stuttard} 53*69f7d81dSDavid Stuttard 54*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg: 55*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 24 56*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 57*69f7d81dSDavid Stuttard.entry: 58*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 59*69f7d81dSDavid Stuttard %i2 = extractelement <2 x float> %arg4, i32 0 60*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 61*69f7d81dSDavid Stuttard %i4 = extractelement <3 x float> %arg6, i32 1 62*69f7d81dSDavid Stuttard %i5 = extractelement <2 x float> %arg7, i32 0 63*69f7d81dSDavid Stuttard %i6 = extractelement <2 x float> %arg8, i32 0 64*69f7d81dSDavid Stuttard %i7 = extractelement <2 x float> %arg9, i32 1 65*69f7d81dSDavid Stuttard 66*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 67*69f7d81dSDavid Stuttard %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 68*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 69*69f7d81dSDavid Stuttard %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 70*69f7d81dSDavid Stuttard 71*69f7d81dSDavid Stuttard %ret2 = insertelement <4 x float> undef, float %i5, i32 0 72*69f7d81dSDavid Stuttard %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 73*69f7d81dSDavid Stuttard %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 74*69f7d81dSDavid Stuttard %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 75*69f7d81dSDavid Stuttard 76*69f7d81dSDavid Stuttard %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 77*69f7d81dSDavid Stuttard %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 78*69f7d81dSDavid Stuttard %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 79*69f7d81dSDavid Stuttard %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 80*69f7d81dSDavid Stuttard 81*69f7d81dSDavid Stuttard %arg15.f = bitcast i32 %arg15 to float 82*69f7d81dSDavid Stuttard %arg16.f = bitcast i32 %arg16 to float 83*69f7d81dSDavid Stuttard %arg17.f = bitcast i32 %arg17 to float 84*69f7d81dSDavid Stuttard %arg18.f = bitcast i32 %arg18 to float 85*69f7d81dSDavid Stuttard 86*69f7d81dSDavid Stuttard %ret4 = insertelement <4 x float> undef, float %arg15.f, i32 0 87*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> %ret4, float %arg16.f, i32 1 88*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %arg17.f, i32 2 89*69f7d81dSDavid Stuttard %ret4.3 = insertelement <4 x float> %ret4.2, float %arg18.f, i32 3 90*69f7d81dSDavid Stuttard 91*69f7d81dSDavid Stuttard %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 92*69f7d81dSDavid Stuttard %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 93*69f7d81dSDavid Stuttard %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 94*69f7d81dSDavid Stuttard %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 95*69f7d81dSDavid Stuttard 96*69f7d81dSDavid Stuttard ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 97*69f7d81dSDavid Stuttard} 98*69f7d81dSDavid Stuttard 99*69f7d81dSDavid Stuttard; Extra arguments have to be allocated even if they're unused 100*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra_unused: 101*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26 102*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 103*69f7d81dSDavid Stuttard.entry: 104*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 105*69f7d81dSDavid Stuttard %i2 = extractelement <2 x float> %arg4, i32 0 106*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 107*69f7d81dSDavid Stuttard %i4 = extractelement <3 x float> %arg6, i32 1 108*69f7d81dSDavid Stuttard %i5 = extractelement <2 x float> %arg7, i32 0 109*69f7d81dSDavid Stuttard %i6 = extractelement <2 x float> %arg8, i32 0 110*69f7d81dSDavid Stuttard %i7 = extractelement <2 x float> %arg9, i32 1 111*69f7d81dSDavid Stuttard 112*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 113*69f7d81dSDavid Stuttard %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 114*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 115*69f7d81dSDavid Stuttard %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 116*69f7d81dSDavid Stuttard 117*69f7d81dSDavid Stuttard %ret2 = insertelement <4 x float> undef, float %i5, i32 0 118*69f7d81dSDavid Stuttard %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 119*69f7d81dSDavid Stuttard %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 120*69f7d81dSDavid Stuttard %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 121*69f7d81dSDavid Stuttard 122*69f7d81dSDavid Stuttard %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 123*69f7d81dSDavid Stuttard %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 124*69f7d81dSDavid Stuttard %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 125*69f7d81dSDavid Stuttard %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 126*69f7d81dSDavid Stuttard 127*69f7d81dSDavid Stuttard %arg15.f = bitcast i32 %arg15 to float 128*69f7d81dSDavid Stuttard %arg16.f = bitcast i32 %arg16 to float 129*69f7d81dSDavid Stuttard %arg17.f = bitcast i32 %arg17 to float 130*69f7d81dSDavid Stuttard %arg18.f = bitcast i32 %arg18 to float 131*69f7d81dSDavid Stuttard 132*69f7d81dSDavid Stuttard %ret4 = insertelement <4 x float> undef, float %arg15.f, i32 0 133*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> %ret4, float %arg16.f, i32 1 134*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %arg17.f, i32 2 135*69f7d81dSDavid Stuttard %ret4.3 = insertelement <4 x float> %ret4.2, float %arg18.f, i32 3 136*69f7d81dSDavid Stuttard 137*69f7d81dSDavid Stuttard %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 138*69f7d81dSDavid Stuttard %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 139*69f7d81dSDavid Stuttard %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 140*69f7d81dSDavid Stuttard %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 141*69f7d81dSDavid Stuttard 142*69f7d81dSDavid Stuttard ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 143*69f7d81dSDavid Stuttard} 144*69f7d81dSDavid Stuttard 145*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_arg_extra: 146*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26 147*69f7d81dSDavid Stuttard; CHECK: NumVGPRsForWavesPerEU: 26 148*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_all_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 149*69f7d81dSDavid Stuttard.entry: 150*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 151*69f7d81dSDavid Stuttard %i2 = extractelement <2 x float> %arg4, i32 0 152*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 153*69f7d81dSDavid Stuttard %i4 = extractelement <3 x float> %arg6, i32 1 154*69f7d81dSDavid Stuttard %i5 = extractelement <2 x float> %arg7, i32 0 155*69f7d81dSDavid Stuttard %i6 = extractelement <2 x float> %arg8, i32 0 156*69f7d81dSDavid Stuttard %i7 = extractelement <2 x float> %arg9, i32 1 157*69f7d81dSDavid Stuttard 158*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 159*69f7d81dSDavid Stuttard %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 160*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 161*69f7d81dSDavid Stuttard %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 162*69f7d81dSDavid Stuttard 163*69f7d81dSDavid Stuttard %ret2 = insertelement <4 x float> undef, float %i5, i32 0 164*69f7d81dSDavid Stuttard %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 165*69f7d81dSDavid Stuttard %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 166*69f7d81dSDavid Stuttard %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 167*69f7d81dSDavid Stuttard 168*69f7d81dSDavid Stuttard %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 169*69f7d81dSDavid Stuttard %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 170*69f7d81dSDavid Stuttard %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 171*69f7d81dSDavid Stuttard %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 172*69f7d81dSDavid Stuttard 173*69f7d81dSDavid Stuttard %arg15.f = bitcast i32 %arg15 to float 174*69f7d81dSDavid Stuttard %arg16.f = bitcast i32 %arg16 to float 175*69f7d81dSDavid Stuttard %arg17.f = bitcast i32 %arg17 to float 176*69f7d81dSDavid Stuttard %arg18.f = bitcast i32 %arg18 to float 177*69f7d81dSDavid Stuttard 178*69f7d81dSDavid Stuttard %arg15_16.f = fadd float %arg15.f, %arg16.f 179*69f7d81dSDavid Stuttard %arg17_18.f = fadd float %arg17.f, %arg18.f 180*69f7d81dSDavid Stuttard 181*69f7d81dSDavid Stuttard %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0 182*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1 183*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %arg15_16.f, i32 2 184*69f7d81dSDavid Stuttard %ret4.3 = insertelement <4 x float> %ret4.2, float %arg17_18.f, i32 3 185*69f7d81dSDavid Stuttard 186*69f7d81dSDavid Stuttard %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 187*69f7d81dSDavid Stuttard %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 188*69f7d81dSDavid Stuttard %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 189*69f7d81dSDavid Stuttard %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 190*69f7d81dSDavid Stuttard 191*69f7d81dSDavid Stuttard ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 192*69f7d81dSDavid Stuttard} 193*69f7d81dSDavid Stuttard 194*69f7d81dSDavid Stuttard; Check that when no input args are used we get the minimum allocation - note that we always enable the first input 195*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused: 196*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4 197*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #0 { 198*69f7d81dSDavid Stuttard.entry: 199*69f7d81dSDavid Stuttard ret { <4 x float> } undef 200*69f7d81dSDavid Stuttard} 201*69f7d81dSDavid Stuttard 202*69f7d81dSDavid Stuttard; Check that when no input args are used we get the minimum allocation - note that we always enable the first input 203*69f7d81dSDavid Stuttard; Additionally set the PSInputAddr to 0 via the metadata 204*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_ia0: 205*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4 206*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_ia0(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18) local_unnamed_addr #3 { 207*69f7d81dSDavid Stuttard.entry: 208*69f7d81dSDavid Stuttard ret { <4 x float> } undef 209*69f7d81dSDavid Stuttard} 210*69f7d81dSDavid Stuttard 211*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used: 212*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4 213*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 214*69f7d81dSDavid Stuttard.entry: 215*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0 216*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1 217*69f7d81dSDavid Stuttard 218*69f7d81dSDavid Stuttard %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.2, 0 219*69f7d81dSDavid Stuttard 220*69f7d81dSDavid Stuttard ret { <4 x float> } %ret.res 221*69f7d81dSDavid Stuttard} 222*69f7d81dSDavid Stuttard 223*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_used: 224*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 5 225*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_used(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 226*69f7d81dSDavid Stuttard.entry: 227*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> undef, float %arg14, i32 0 228*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg1, i32 1 229*69f7d81dSDavid Stuttard %ret4.3 = insertelement <4 x float> %ret4.2, float %extra_arg2, i32 2 230*69f7d81dSDavid Stuttard 231*69f7d81dSDavid Stuttard %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.3, 0 232*69f7d81dSDavid Stuttard 233*69f7d81dSDavid Stuttard ret { <4 x float> } %ret.res 234*69f7d81dSDavid Stuttard} 235*69f7d81dSDavid Stuttard 236*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_part_unused_extra_unused: 237*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 7 238*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_part_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 239*69f7d81dSDavid Stuttard.entry: 240*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> undef, float %arg12, i32 0 241*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %arg13, i32 1 242*69f7d81dSDavid Stuttard %ret4.3 = insertelement <4 x float> %ret4.2, float %arg14, i32 2 243*69f7d81dSDavid Stuttard 244*69f7d81dSDavid Stuttard %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.3, 0 245*69f7d81dSDavid Stuttard 246*69f7d81dSDavid Stuttard ret { <4 x float> } %ret.res 247*69f7d81dSDavid Stuttard} 248*69f7d81dSDavid Stuttard 249*69f7d81dSDavid Stuttard; Extra unused inputs are always added to the allocation 250*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused: 251*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 4 252*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 253*69f7d81dSDavid Stuttard.entry: 254*69f7d81dSDavid Stuttard 255*69f7d81dSDavid Stuttard ret { <4 x float> } undef 256*69f7d81dSDavid Stuttard} 257*69f7d81dSDavid Stuttard 258*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_used_no_packing: 259*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26 260*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_used_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 { 261*69f7d81dSDavid Stuttard.entry: 262*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> undef, float %extra_arg1, i32 0 263*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %extra_arg2, i32 1 264*69f7d81dSDavid Stuttard 265*69f7d81dSDavid Stuttard %ret.res = insertvalue { <4 x float> } undef, <4 x float> %ret4.2, 0 266*69f7d81dSDavid Stuttard 267*69f7d81dSDavid Stuttard ret { <4 x float> } %ret.res 268*69f7d81dSDavid Stuttard} 269*69f7d81dSDavid Stuttard 270*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_all_unused_extra_unused_no_packing: 271*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 26 272*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float> } @_amdgpu_ps_all_unused_extra_unused_no_packing(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 { 273*69f7d81dSDavid Stuttard.entry: 274*69f7d81dSDavid Stuttard ret { <4 x float> } undef 275*69f7d81dSDavid Stuttard} 276*69f7d81dSDavid Stuttard 277*69f7d81dSDavid Stuttard; CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_arg_extra: 278*69f7d81dSDavid Stuttard; CHECK: NumVgprs: 24 279*69f7d81dSDavid Stuttard; CHECK: NumVGPRsForWavesPerEU: 24 280*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #0 { 281*69f7d81dSDavid Stuttard.entry: 282*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 283*69f7d81dSDavid Stuttard %i2 = extractelement <2 x float> %arg4, i32 0 284*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 285*69f7d81dSDavid Stuttard %i4 = extractelement <3 x float> %arg6, i32 1 286*69f7d81dSDavid Stuttard %i5 = extractelement <2 x float> %arg7, i32 0 287*69f7d81dSDavid Stuttard %i6 = extractelement <2 x float> %arg8, i32 0 288*69f7d81dSDavid Stuttard %i7 = extractelement <2 x float> %arg9, i32 1 289*69f7d81dSDavid Stuttard 290*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 291*69f7d81dSDavid Stuttard %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 292*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 293*69f7d81dSDavid Stuttard %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 294*69f7d81dSDavid Stuttard 295*69f7d81dSDavid Stuttard %ret2 = insertelement <4 x float> undef, float %i5, i32 0 296*69f7d81dSDavid Stuttard %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 297*69f7d81dSDavid Stuttard %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 298*69f7d81dSDavid Stuttard %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 299*69f7d81dSDavid Stuttard 300*69f7d81dSDavid Stuttard %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 301*69f7d81dSDavid Stuttard %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 302*69f7d81dSDavid Stuttard %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 303*69f7d81dSDavid Stuttard %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 304*69f7d81dSDavid Stuttard 305*69f7d81dSDavid Stuttard %arg15.f = bitcast i32 %arg15 to float 306*69f7d81dSDavid Stuttard %arg16.f = bitcast i32 %arg16 to float 307*69f7d81dSDavid Stuttard 308*69f7d81dSDavid Stuttard %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0 309*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1 310*69f7d81dSDavid Stuttard %ret4.2 = insertelement <4 x float> %ret4.1, float %arg15.f, i32 2 311*69f7d81dSDavid Stuttard %ret4.3 = insertelement <4 x float> %ret4.2, float %arg16.f, i32 3 312*69f7d81dSDavid Stuttard 313*69f7d81dSDavid Stuttard %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 314*69f7d81dSDavid Stuttard %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 315*69f7d81dSDavid Stuttard %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 316*69f7d81dSDavid Stuttard %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.3, 3 317*69f7d81dSDavid Stuttard 318*69f7d81dSDavid Stuttard ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 319*69f7d81dSDavid Stuttard} 320*69f7d81dSDavid Stuttard 321*69f7d81dSDavid Stuttard;CHECK-LABEL: {{^}}_amdgpu_ps_some_unused_no_packing_arg_extra: 322*69f7d81dSDavid Stuttard;CHECK: NumVgprs: 26 323*69f7d81dSDavid Stuttard;CHECK: NumVGPRsForWavesPerEU: 26 324*69f7d81dSDavid Stuttarddefine dllexport amdgpu_ps { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @_amdgpu_ps_some_unused_no_packing_arg_extra(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x float> %arg3, <2 x float> %arg4, <2 x float> %arg5, <3 x float> %arg6, <2 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, i32 %arg15, i32 %arg16, i32 %arg17, i32 %arg18, float %extra_arg1, float %extra_arg2) local_unnamed_addr #2 { 325*69f7d81dSDavid Stuttard.entry: 326*69f7d81dSDavid Stuttard %i1 = extractelement <2 x float> %arg3, i32 1 327*69f7d81dSDavid Stuttard %i2 = extractelement <2 x float> %arg4, i32 0 328*69f7d81dSDavid Stuttard %i3 = extractelement <2 x float> %arg5, i32 1 329*69f7d81dSDavid Stuttard %i4 = extractelement <3 x float> %arg6, i32 1 330*69f7d81dSDavid Stuttard %i5 = extractelement <2 x float> %arg7, i32 0 331*69f7d81dSDavid Stuttard %i6 = extractelement <2 x float> %arg8, i32 0 332*69f7d81dSDavid Stuttard %i7 = extractelement <2 x float> %arg9, i32 1 333*69f7d81dSDavid Stuttard 334*69f7d81dSDavid Stuttard %ret1 = insertelement <4 x float> undef, float %i1, i32 0 335*69f7d81dSDavid Stuttard %ret1.1 = insertelement <4 x float> %ret1, float %i2, i32 1 336*69f7d81dSDavid Stuttard %ret1.2 = insertelement <4 x float> %ret1.1, float %i3, i32 2 337*69f7d81dSDavid Stuttard %ret1.3 = insertelement <4 x float> %ret1.2, float %i4, i32 3 338*69f7d81dSDavid Stuttard 339*69f7d81dSDavid Stuttard %ret2 = insertelement <4 x float> undef, float %i5, i32 0 340*69f7d81dSDavid Stuttard %ret2.1 = insertelement <4 x float> %ret2, float %i6, i32 1 341*69f7d81dSDavid Stuttard %ret2.2 = insertelement <4 x float> %ret2.1, float %i7, i32 2 342*69f7d81dSDavid Stuttard %ret2.3 = insertelement <4 x float> %ret2.2, float %arg10, i32 3 343*69f7d81dSDavid Stuttard 344*69f7d81dSDavid Stuttard %ret3 = insertelement <4 x float> undef, float %arg11, i32 0 345*69f7d81dSDavid Stuttard %ret3.1 = insertelement <4 x float> %ret3, float %arg12, i32 1 346*69f7d81dSDavid Stuttard %ret3.2 = insertelement <4 x float> %ret3.1, float %arg13, i32 2 347*69f7d81dSDavid Stuttard %ret3.3 = insertelement <4 x float> %ret3.2, float %arg14, i32 3 348*69f7d81dSDavid Stuttard 349*69f7d81dSDavid Stuttard %ret4 = insertelement <4 x float> undef, float %extra_arg1, i32 0 350*69f7d81dSDavid Stuttard %ret4.1 = insertelement <4 x float> %ret4, float %extra_arg2, i32 1 351*69f7d81dSDavid Stuttard 352*69f7d81dSDavid Stuttard %ret.res1 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } undef, <4 x float> %ret1.3, 0 353*69f7d81dSDavid Stuttard %ret.res2 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res1, <4 x float> %ret2.3, 1 354*69f7d81dSDavid Stuttard %ret.res3 = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res2, <4 x float> %ret3.3, 2 355*69f7d81dSDavid Stuttard %ret.res = insertvalue { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res3, <4 x float> %ret4.1, 3 356*69f7d81dSDavid Stuttard 357*69f7d81dSDavid Stuttard ret { < 4 x float>, <4 x float>, <4 x float>, <4 x float> } %ret.res 358*69f7d81dSDavid Stuttard} 359*69f7d81dSDavid Stuttard 360*69f7d81dSDavid Stuttardattributes #0 = { nounwind "target-features"=",+wavefrontsize64,+cumode" } 361*69f7d81dSDavid Stuttardattributes #1 = { nounwind "InitialPSInputAddr"="2" "target-features"=",+wavefrontsize64,+cumode" } 362*69f7d81dSDavid Stuttardattributes #2 = { nounwind "InitialPSInputAddr"="0xffff" "target-features"=",+wavefrontsize64,+cumode" } 363*69f7d81dSDavid Stuttardattributes #3 = { nounwind "InitialPSInputAddr"="0" "target-features"=",+wavefrontsize64,+cumode" } 364