1; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 5 6; GCN-LABEL: {{^}}vgpr: 7; GCN-DAG: v_mov_b32_e32 v1, v0 8; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm 9; GCN: s_waitcnt expcnt(0) 10; GCN: v_add_f32_e32 v0, 1.0, v1 11; GCN-NOT: s_endpgm 12define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 13bb: 14 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 15 %x = fadd float %arg3, 1.000000e+00 16 %a = insertvalue { float, float } undef, float %x, 0 17 %b = insertvalue { float, float } %a, float %arg3, 1 18 ret { float, float } %b 19} 20 21; GCN-LABEL: {{^}}vgpr_literal: 22; GCN: exp mrt0 v0, v0, v0, v0 done vm 23 24; GCN-DAG: v_mov_b32_e32 v0, 1.0 25; GCN-DAG: v_mov_b32_e32 v1, 2.0 26; GCN-DAG: v_mov_b32_e32 v2, 4.0 27; GCN-DAG: v_mov_b32_e32 v3, -1.0 28; GCN-DAG: s_waitcnt expcnt(0) 29; GCN-NOT: s_endpgm 30define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 31bb: 32 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 33 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 } 34} 35 36; GCN: .long 165580 37; GCN-NEXT: .long 562 38; GCN-NEXT: .long 165584 39; GCN-NEXT: .long 562 40; GCN-LABEL: {{^}}vgpr_ps_addr0: 41; GCN-NOT: v_mov_b32_e32 v0 42; GCN-NOT: v_mov_b32_e32 v1 43; GCN-NOT: v_mov_b32_e32 v2 44; GCN: v_mov_b32_e32 v3, v4 45; GCN: v_mov_b32_e32 v4, v6 46; GCN-NOT: s_endpgm 47define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { 48bb: 49 %i0 = extractelement <2 x i32> %arg4, i32 0 50 %i1 = extractelement <2 x i32> %arg4, i32 1 51 %i2 = extractelement <2 x i32> %arg7, i32 0 52 %i3 = extractelement <2 x i32> %arg8, i32 0 53 %f0 = bitcast i32 %i0 to float 54 %f1 = bitcast i32 %i1 to float 55 %f2 = bitcast i32 %i2 to float 56 %f3 = bitcast i32 %i3 to float 57 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 58 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 59 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 60 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 61 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 62 ret { float, float, float, float, float } %r4 63} 64 65; GCN: .long 165580 66; GCN-NEXT: .long 1 67; GCN-NEXT: .long 165584 68; GCN-NEXT: .long 1 69; GCN-LABEL: {{^}}ps_input_ena_no_inputs: 70; GCN: v_mov_b32_e32 v0, 1.0 71; GCN-NOT: s_endpgm 72define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { 73bb: 74 ret float 1.000000e+00 75} 76 77; GCN: .long 165580 78; GCN-NEXT: .long 2081 79; GCN-NEXT: .long 165584 80; GCN-NEXT: .long 2081 81; GCN-LABEL: {{^}}ps_input_ena_pos_w: 82; GCN-DAG: v_mov_b32_e32 v0, v4 83; GCN-DAG: v_mov_b32_e32 v1, v2 84; GCN-DAG: v_mov_b32_e32 v2, v3 85; GCN-NOT: s_endpgm 86define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { 87bb: 88 %f = bitcast <2 x i32> %arg8 to <2 x float> 89 %s = insertvalue { float, <2 x float> } undef, float %arg14, 0 90 %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1 91 ret { float, <2 x float> } %s1 92} 93 94; GCN: .long 165580 95; GCN-NEXT: .long 562 96; GCN-NEXT: .long 165584 97; GCN-NEXT: .long 563 98; GCN-LABEL: {{^}}vgpr_ps_addr1: 99; GCN-DAG: v_mov_b32_e32 v0, v2 100; GCN-DAG: v_mov_b32_e32 v1, v3 101; GCN: v_mov_b32_e32 v2, v4 102; GCN-DAG: v_mov_b32_e32 v3, v6 103; GCN-DAG: v_mov_b32_e32 v4, v8 104; GCN-NOT: s_endpgm 105define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 { 106bb: 107 %i0 = extractelement <2 x i32> %arg4, i32 0 108 %i1 = extractelement <2 x i32> %arg4, i32 1 109 %i2 = extractelement <2 x i32> %arg7, i32 0 110 %i3 = extractelement <2 x i32> %arg8, i32 0 111 %f0 = bitcast i32 %i0 to float 112 %f1 = bitcast i32 %i1 to float 113 %f2 = bitcast i32 %i2 to float 114 %f3 = bitcast i32 %i3 to float 115 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 116 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 117 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 118 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 119 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 120 ret { float, float, float, float, float } %r4 121} 122 123; GCN: .long 165580 124; GCN-NEXT: .long 562 125; GCN-NEXT: .long 165584 126; GCN-NEXT: .long 631 127; GCN-LABEL: {{^}}vgpr_ps_addr119: 128; GCN-DAG: v_mov_b32_e32 v0, v2 129; GCN-DAG: v_mov_b32_e32 v1, v3 130; GCN-DAG: v_mov_b32_e32 v2, v6 131; GCN-DAG: v_mov_b32_e32 v3, v8 132; GCN-DAG: v_mov_b32_e32 v4, v12 133; GCN-NOT: s_endpgm 134define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { 135bb: 136 %i0 = extractelement <2 x i32> %arg4, i32 0 137 %i1 = extractelement <2 x i32> %arg4, i32 1 138 %i2 = extractelement <2 x i32> %arg7, i32 0 139 %i3 = extractelement <2 x i32> %arg8, i32 0 140 %f0 = bitcast i32 %i0 to float 141 %f1 = bitcast i32 %i1 to float 142 %f2 = bitcast i32 %i2 to float 143 %f3 = bitcast i32 %i3 to float 144 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 145 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 146 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 147 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 148 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 149 ret { float, float, float, float, float } %r4 150} 151 152; GCN: .long 165580 153; GCN-NEXT: .long 562 154; GCN-NEXT: .long 165584 155; GCN-NEXT: .long 946 156; GCN-LABEL: {{^}}vgpr_ps_addr418: 157; GCN-NOT: v_mov_b32_e32 v0 158; GCN-NOT: v_mov_b32_e32 v1 159; GCN-NOT: v_mov_b32_e32 v2 160; GCN: v_mov_b32_e32 v3, v4 161; GCN: v_mov_b32_e32 v4, v8 162; GCN-NOT: s_endpgm 163define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 { 164bb: 165 %i0 = extractelement <2 x i32> %arg4, i32 0 166 %i1 = extractelement <2 x i32> %arg4, i32 1 167 %i2 = extractelement <2 x i32> %arg7, i32 0 168 %i3 = extractelement <2 x i32> %arg8, i32 0 169 %f0 = bitcast i32 %i0 to float 170 %f1 = bitcast i32 %i1 to float 171 %f2 = bitcast i32 %i2 to float 172 %f3 = bitcast i32 %i3 to float 173 %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0 174 %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1 175 %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2 176 %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3 177 %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4 178 ret { float, float, float, float, float } %r4 179} 180 181; GCN-LABEL: {{^}}sgpr: 182; GCN-DAG: s_mov_b32 s2, s3 183; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2 184; GCN-NOT: s_endpgm 185define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 186bb: 187 %x = add i32 %arg2, 2 188 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0 189 %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1 190 %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2 191 ret { i32, i32, i32 } %c 192} 193 194; GCN-LABEL: {{^}}sgpr_literal: 195; GCN: s_mov_b32 s0, 5 196; GCN-NOT: s_mov_b32 s0, s0 197; GCN-DAG: s_mov_b32 s1, 6 198; GCN-DAG: s_mov_b32 s2, 7 199; GCN-DAG: s_mov_b32 s3, 8 200; GCN-NOT: s_endpgm 201define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 202bb: 203 %x = add i32 %arg2, 2 204 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 } 205} 206 207; GCN-LABEL: {{^}}both: 208; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm 209; GCN-DAG: v_mov_b32_e32 v1, v0 210; GCN-DAG: s_mov_b32 s1, s2 211; GCN-DAG: s_waitcnt expcnt(0) 212; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 213; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2 214; GCN-DAG: s_mov_b32 s2, s3 215; GCN-NOT: s_endpgm 216define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 217bb: 218 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 219 %v = fadd float %arg3, 1.000000e+00 220 %s = add i32 %arg2, 2 221 %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0 222 %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1 223 %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2 224 %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3 225 %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4 226 ret { float, i32, float, i32, i32 } %a4 227} 228 229; GCN-LABEL: {{^}}structure_literal: 230; GCN: exp mrt0 v0, v0, v0, v0 done vm 231 232; GCN-DAG: v_mov_b32_e32 v0, 1.0 233; GCN-DAG: s_mov_b32 s0, 2 234; GCN-DAG: s_mov_b32 s1, 3 235; GCN-DAG: v_mov_b32_e32 v1, 2.0 236; GCN-DAG: v_mov_b32_e32 v2, 4.0 237; GCN-DAG: s_waitcnt expcnt(0) 238define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { 239bb: 240 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 241 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } } 242} 243 244; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size: 245; GCN: codeLenInByte = 0{{$}} 246define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 { 247 ret float undef 248} 249 250declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 251 252attributes #0 = { nounwind } 253attributes #1 = { nounwind "InitialPSInputAddr"="0" } 254attributes #2 = { nounwind "InitialPSInputAddr"="1" } 255attributes #3 = { nounwind "InitialPSInputAddr"="119" } 256attributes #4 = { nounwind "InitialPSInputAddr"="418" } 257