1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s 5 6define amdgpu_ps void @v_interp_f32(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 7; GFX11-LABEL: v_interp_f32: 8; GFX11: ; %bb.0: ; %main_body 9; GFX11-NEXT: s_mov_b32 s3, exec_lo 10; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 11; GFX11-NEXT: s_mov_b32 m0, s2 12; GFX11-NEXT: lds_param_load v0, attr0.y wait_vdst:15 13; GFX11-NEXT: lds_param_load v1, attr1.x wait_vdst:15 14; GFX11-NEXT: s_mov_b32 exec_lo, s3 15; GFX11-NEXT: v_mov_b32_e32 v2, s0 16; GFX11-NEXT: v_mov_b32_e32 v4, s1 17; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 18; GFX11-NEXT: v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1 19; GFX11-NEXT: v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0 20; GFX11-NEXT: v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7 21; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 22; GFX11-NEXT: v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7 23; GFX11-NEXT: exp mrt0 v3, v2, v5, v4 done 24; GFX11-NEXT: s_endpgm 25; 26; GFX12-LABEL: v_interp_f32: 27; GFX12: ; %bb.0: ; %main_body 28; GFX12-NEXT: s_mov_b32 s3, exec_lo 29; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 30; GFX12-NEXT: s_mov_b32 m0, s2 31; GFX12-NEXT: ds_param_load v0, attr0.y wait_va_vdst:15 wait_vm_vsrc:1 32; GFX12-NEXT: ds_param_load v1, attr1.x wait_va_vdst:15 wait_vm_vsrc:1 33; GFX12-NEXT: s_mov_b32 exec_lo, s3 34; GFX12-NEXT: v_mov_b32_e32 v2, s0 35; GFX12-NEXT: v_mov_b32_e32 v4, s1 36; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 37; GFX12-NEXT: v_interp_p10_f32 v3, v0, v2, v0 wait_exp:1 38; GFX12-NEXT: v_interp_p10_f32 v2, v1, v2, v1 wait_exp:0 39; GFX12-NEXT: v_interp_p2_f32 v5, v0, v4, v3 wait_exp:7 40; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 41; GFX12-NEXT: v_interp_p2_f32 v4, v1, v4, v5 wait_exp:7 42; GFX12-NEXT: export mrt0 v3, v2, v5, v4 done 43; GFX12-NEXT: s_endpgm 44main_body: 45 %p0 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %m0) 46 %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) 47 %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0) 48 %p1_0 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0) 49 %p0_1 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1) 50 %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0) 51 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_0, float %p0_1, float %p1_0, float %p1_1, i1 true, i1 true) #0 52 ret void 53} 54 55define amdgpu_ps void @v_interp_f32_many(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 56; GFX11-LABEL: v_interp_f32_many: 57; GFX11: ; %bb.0: ; %main_body 58; GFX11-NEXT: s_mov_b32 s3, exec_lo 59; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 60; GFX11-NEXT: s_mov_b32 m0, s2 61; GFX11-NEXT: lds_param_load v0, attr0.x wait_vdst:15 62; GFX11-NEXT: lds_param_load v1, attr1.x wait_vdst:15 63; GFX11-NEXT: lds_param_load v2, attr2.x wait_vdst:15 64; GFX11-NEXT: lds_param_load v3, attr3.x wait_vdst:15 65; GFX11-NEXT: s_mov_b32 exec_lo, s3 66; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 67; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) 68; GFX11-NEXT: v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3 69; GFX11-NEXT: v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2 70; GFX11-NEXT: v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1 71; GFX11-NEXT: v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0 72; GFX11-NEXT: v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7 73; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 74; GFX11-NEXT: v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7 75; GFX11-NEXT: v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7 76; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 77; GFX11-NEXT: v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7 78; GFX11-NEXT: exp mrt0 v6, v7, v8, v4 done 79; GFX11-NEXT: s_endpgm 80; 81; GFX12-LABEL: v_interp_f32_many: 82; GFX12: ; %bb.0: ; %main_body 83; GFX12-NEXT: s_mov_b32 s3, exec_lo 84; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 85; GFX12-NEXT: s_mov_b32 m0, s2 86; GFX12-NEXT: ds_param_load v0, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 87; GFX12-NEXT: ds_param_load v1, attr1.x wait_va_vdst:15 wait_vm_vsrc:1 88; GFX12-NEXT: ds_param_load v2, attr2.x wait_va_vdst:15 wait_vm_vsrc:1 89; GFX12-NEXT: ds_param_load v3, attr3.x wait_va_vdst:15 wait_vm_vsrc:1 90; GFX12-NEXT: s_mov_b32 exec_lo, s3 91; GFX12-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 92; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4) 93; GFX12-NEXT: v_interp_p10_f32 v6, v0, v4, v0 wait_exp:3 94; GFX12-NEXT: v_interp_p10_f32 v7, v1, v4, v1 wait_exp:2 95; GFX12-NEXT: v_interp_p10_f32 v8, v2, v4, v2 wait_exp:1 96; GFX12-NEXT: v_interp_p10_f32 v4, v3, v4, v3 wait_exp:0 97; GFX12-NEXT: v_interp_p2_f32 v6, v0, v5, v6 wait_exp:7 98; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 99; GFX12-NEXT: v_interp_p2_f32 v7, v1, v5, v7 wait_exp:7 100; GFX12-NEXT: v_interp_p2_f32 v8, v2, v5, v8 wait_exp:7 101; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 102; GFX12-NEXT: v_interp_p2_f32 v4, v3, v5, v4 wait_exp:7 103; GFX12-NEXT: export mrt0 v6, v7, v8, v4 done 104; GFX12-NEXT: s_endpgm 105main_body: 106 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) 107 %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) 108 %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0) 109 %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0) 110 %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0) 111 %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0) 112 %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1) 113 %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0) 114 %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2) 115 %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0) 116 %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3) 117 %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0) 118 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0 119 ret void 120} 121 122define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 { 123; GFX11-LABEL: v_interp_f32_many_vm: 124; GFX11: ; %bb.0: ; %main_body 125; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off offset:4 126; GFX11-NEXT: s_mov_b32 m0, s0 127; GFX11-NEXT: s_mov_b32 s0, exec_lo 128; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 129; GFX11-NEXT: lds_param_load v2, attr0.x wait_vdst:15 130; GFX11-NEXT: lds_param_load v3, attr1.x wait_vdst:15 131; GFX11-NEXT: lds_param_load v4, attr2.x wait_vdst:15 132; GFX11-NEXT: lds_param_load v5, attr3.x wait_vdst:15 133; GFX11-NEXT: s_mov_b32 exec_lo, s0 134; GFX11-NEXT: s_waitcnt vmcnt(0) 135; GFX11-NEXT: v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3 136; GFX11-NEXT: v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2 137; GFX11-NEXT: v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1 138; GFX11-NEXT: v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0 139; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 140; GFX11-NEXT: v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7 141; GFX11-NEXT: v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7 142; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 143; GFX11-NEXT: v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7 144; GFX11-NEXT: v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7 145; GFX11-NEXT: exp mrt0 v6, v7, v8, v0 done 146; GFX11-NEXT: s_endpgm 147; 148; GFX12-LABEL: v_interp_f32_many_vm: 149; GFX12: ; %bb.0: ; %main_body 150; GFX12-NEXT: global_load_b64 v[0:1], v[0:1], off offset:4 151; GFX12-NEXT: s_mov_b32 m0, s0 152; GFX12-NEXT: s_mov_b32 s0, exec_lo 153; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 154; GFX12-NEXT: ds_param_load v2, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 155; GFX12-NEXT: ds_param_load v3, attr1.x wait_va_vdst:15 wait_vm_vsrc:1 156; GFX12-NEXT: ds_param_load v4, attr2.x wait_va_vdst:15 wait_vm_vsrc:1 157; GFX12-NEXT: ds_param_load v5, attr3.x wait_va_vdst:15 wait_vm_vsrc:1 158; GFX12-NEXT: s_mov_b32 exec_lo, s0 159; GFX12-NEXT: s_wait_loadcnt 0x0 160; GFX12-NEXT: v_interp_p10_f32 v6, v2, v0, v2 wait_exp:3 161; GFX12-NEXT: v_interp_p10_f32 v7, v3, v0, v3 wait_exp:2 162; GFX12-NEXT: v_interp_p10_f32 v8, v4, v0, v4 wait_exp:1 163; GFX12-NEXT: v_interp_p10_f32 v0, v5, v0, v5 wait_exp:0 164; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 165; GFX12-NEXT: v_interp_p2_f32 v6, v2, v1, v6 wait_exp:7 166; GFX12-NEXT: v_interp_p2_f32 v7, v3, v1, v7 wait_exp:7 167; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 168; GFX12-NEXT: v_interp_p2_f32 v8, v4, v1, v8 wait_exp:7 169; GFX12-NEXT: v_interp_p2_f32 v0, v5, v1, v0 wait_exp:7 170; GFX12-NEXT: export mrt0 v6, v7, v8, v0 done 171; GFX12-NEXT: s_endpgm 172main_body: 173 %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1 174 %i = load float, ptr addrspace(1) %i.ptr, align 4 175 %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2 176 %j = load float, ptr addrspace(1) %j.ptr, align 4 177 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) 178 %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) 179 %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0) 180 %p3 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 3, i32 %m0) 181 %p0_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p0, float %i, float %p0) 182 %p0_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p0, float %j, float %p0_0) 183 %p1_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p1, float %i, float %p1) 184 %p1_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p1, float %j, float %p1_0) 185 %p2_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p2, float %i, float %p2) 186 %p2_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p2, float %j, float %p2_0) 187 %p3_0 = call float @llvm.amdgcn.interp.inreg.p10(float %p3, float %i, float %p3) 188 %p3_1 = call float @llvm.amdgcn.interp.inreg.p2(float %p3, float %j, float %p3_0) 189 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %p0_1, float %p1_1, float %p2_1, float %p3_1, i1 true, i1 true) #0 190 ret void 191} 192 193define amdgpu_ps half @v_interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 194; GFX11-TRUE16-LABEL: v_interp_f16: 195; GFX11-TRUE16: ; %bb.0: ; %main_body 196; GFX11-TRUE16-NEXT: s_mov_b32 s3, exec_lo 197; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo 198; GFX11-TRUE16-NEXT: s_mov_b32 m0, s2 199; GFX11-TRUE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15 200; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s3 201; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0 202; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s1 203; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 204; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v3, v1.l, v0, v1.l wait_exp:0 205; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v4, v1.h, v0, v1.h wait_exp:7 206; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 207; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 208; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7 209; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h 210; GFX11-TRUE16-NEXT: ; return to shader part epilog 211; 212; GFX11-FAKE16-LABEL: v_interp_f16: 213; GFX11-FAKE16: ; %bb.0: ; %main_body 214; GFX11-FAKE16-NEXT: s_mov_b32 s3, exec_lo 215; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo 216; GFX11-FAKE16-NEXT: s_mov_b32 m0, s2 217; GFX11-FAKE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15 218; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s3 219; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0 220; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s1 221; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 222; GFX11-FAKE16-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0 223; GFX11-FAKE16-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 224; GFX11-FAKE16-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7 225; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 226; GFX11-FAKE16-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 227; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v3, v0 228; GFX11-FAKE16-NEXT: ; return to shader part epilog 229; 230; GFX12-LABEL: v_interp_f16: 231; GFX12: ; %bb.0: ; %main_body 232; GFX12-NEXT: s_mov_b32 s3, exec_lo 233; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 234; GFX12-NEXT: s_mov_b32 m0, s2 235; GFX12-NEXT: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 236; GFX12-NEXT: s_mov_b32 exec_lo, s3 237; GFX12-NEXT: v_mov_b32_e32 v0, s0 238; GFX12-NEXT: v_mov_b32_e32 v2, s1 239; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 240; GFX12-NEXT: v_interp_p10_f16_f32 v3, v1, v0, v1 wait_exp:0 241; GFX12-NEXT: v_interp_p10_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 242; GFX12-NEXT: v_interp_p2_f16_f32 v3, v1, v2, v3 wait_exp:7 243; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 244; GFX12-NEXT: v_interp_p2_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 245; GFX12-NEXT: v_add_f16_e32 v0, v3, v0 246; GFX12-NEXT: ; return to shader part epilog 247main_body: 248 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) 249 %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 0) 250 %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %l_p0, i1 0) 251 %h_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float %p0, float %i, float %p0, i1 1) 252 %h_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float %p0, float %j, float %h_p0, i1 1) 253 %res = fadd half %l_p1, %h_p1 254 ret half %res 255} 256 257define amdgpu_ps half @v_interp_rtz_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 { 258; GFX11-TRUE16-LABEL: v_interp_rtz_f16: 259; GFX11-TRUE16: ; %bb.0: ; %main_body 260; GFX11-TRUE16-NEXT: s_mov_b32 s3, exec_lo 261; GFX11-TRUE16-NEXT: s_wqm_b32 exec_lo, exec_lo 262; GFX11-TRUE16-NEXT: s_mov_b32 m0, s2 263; GFX11-TRUE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15 264; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s3 265; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, s0 266; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, s1 267; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 268; GFX11-TRUE16-NEXT: v_interp_p10_rtz_f16_f32 v3, v1.l, v0, v1.l wait_exp:0 269; GFX11-TRUE16-NEXT: v_interp_p10_rtz_f16_f32 v4, v1.h, v0, v1.h wait_exp:7 270; GFX11-TRUE16-NEXT: v_interp_p2_rtz_f16_f32 v0.l, v1.l, v2, v3 wait_exp:7 271; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 272; GFX11-TRUE16-NEXT: v_interp_p2_rtz_f16_f32 v0.h, v1.h, v2, v4 wait_exp:7 273; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h 274; GFX11-TRUE16-NEXT: ; return to shader part epilog 275; 276; GFX11-FAKE16-LABEL: v_interp_rtz_f16: 277; GFX11-FAKE16: ; %bb.0: ; %main_body 278; GFX11-FAKE16-NEXT: s_mov_b32 s3, exec_lo 279; GFX11-FAKE16-NEXT: s_wqm_b32 exec_lo, exec_lo 280; GFX11-FAKE16-NEXT: s_mov_b32 m0, s2 281; GFX11-FAKE16-NEXT: lds_param_load v1, attr0.x wait_vdst:15 282; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s3 283; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, s0 284; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s1 285; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 286; GFX11-FAKE16-NEXT: v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0 287; GFX11-FAKE16-NEXT: v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 288; GFX11-FAKE16-NEXT: v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7 289; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 290; GFX11-FAKE16-NEXT: v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 291; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v3, v0 292; GFX11-FAKE16-NEXT: ; return to shader part epilog 293; 294; GFX12-LABEL: v_interp_rtz_f16: 295; GFX12: ; %bb.0: ; %main_body 296; GFX12-NEXT: s_mov_b32 s3, exec_lo 297; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 298; GFX12-NEXT: s_mov_b32 m0, s2 299; GFX12-NEXT: ds_param_load v1, attr0.x wait_va_vdst:15 wait_vm_vsrc:1 300; GFX12-NEXT: s_mov_b32 exec_lo, s3 301; GFX12-NEXT: v_mov_b32_e32 v0, s0 302; GFX12-NEXT: v_mov_b32_e32 v2, s1 303; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 304; GFX12-NEXT: v_interp_p10_rtz_f16_f32 v3, v1, v0, v1 wait_exp:0 305; GFX12-NEXT: v_interp_p10_rtz_f16_f32 v0, v1, v0, v1 op_sel:[1,0,1,0] wait_exp:7 306; GFX12-NEXT: v_interp_p2_rtz_f16_f32 v3, v1, v2, v3 wait_exp:7 307; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 308; GFX12-NEXT: v_interp_p2_rtz_f16_f32 v0, v1, v2, v0 op_sel:[1,0,0,0] wait_exp:7 309; GFX12-NEXT: v_add_f16_e32 v0, v3, v0 310; GFX12-NEXT: ; return to shader part epilog 311main_body: 312 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) 313 %l_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p0, float %i, float %p0, i1 0) 314 %l_p1 = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p0, float %j, float %l_p0, i1 0) 315 %h_p0 = call float @llvm.amdgcn.interp.p10.rtz.f16(float %p0, float %i, float %p0, i1 1) 316 %h_p1 = call half @llvm.amdgcn.interp.p2.rtz.f16(float %p0, float %j, float %h_p0, i1 1) 317 %res = fadd half %l_p1, %h_p1 318 ret half %res 319} 320 321define amdgpu_ps half @v_interp_f16_imm_params(float inreg %i, float inreg %j) #0 { 322; GFX11-TRUE16-LABEL: v_interp_f16_imm_params: 323; GFX11-TRUE16: ; %bb.0: ; %main_body 324; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0 325; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, 0 326; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, s1 327; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 328; GFX11-TRUE16-NEXT: v_interp_p10_f16_f32 v1, v0.l, v1, v0.l wait_exp:7 329; GFX11-TRUE16-NEXT: v_interp_p2_f16_f32 v0.l, v0.l, v3, v2 wait_exp:7 330; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 331; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1 332; GFX11-TRUE16-NEXT: v_add_f16_e32 v0.l, v0.h, v0.l 333; GFX11-TRUE16-NEXT: ; return to shader part epilog 334; 335; GFX11-FAKE16-LABEL: v_interp_f16_imm_params: 336; GFX11-FAKE16: ; %bb.0: ; %main_body 337; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 338; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s1 339; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 340; GFX11-FAKE16-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7 341; GFX11-FAKE16-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7 342; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 343; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 344; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v1, v0 345; GFX11-FAKE16-NEXT: ; return to shader part epilog 346; 347; GFX12-LABEL: v_interp_f16_imm_params: 348; GFX12: ; %bb.0: ; %main_body 349; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 350; GFX12-NEXT: v_mov_b32_e32 v2, s1 351; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 352; GFX12-NEXT: v_interp_p10_f16_f32 v1, v0, v1, v0 wait_exp:7 353; GFX12-NEXT: v_interp_p2_f16_f32 v0, v0, v2, v0 wait_exp:7 354; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 355; GFX12-NEXT: v_cvt_f16_f32_e32 v1, v1 356; GFX12-NEXT: v_add_f16_e32 v0, v1, v0 357; GFX12-NEXT: ; return to shader part epilog 358main_body: 359 %l_p0 = call float @llvm.amdgcn.interp.inreg.p10.f16(float 0.0, float %i, float 0.0, i1 0) 360 %l_p1 = call half @llvm.amdgcn.interp.inreg.p2.f16(float 0.0, float %j, float 0.0, i1 0) 361 %h = fptrunc float %l_p0 to half 362 %res = fadd half %h, %l_p1 363 ret half %res 364} 365 366declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1 367declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0 368declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0 369declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0 370declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0 371declare float @llvm.amdgcn.interp.p10.rtz.f16(float, float, float, i1) #0 372declare half @llvm.amdgcn.interp.p2.rtz.f16(float, float, float, i1) #0 373declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 374declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0 375 376attributes #0 = { nounwind } 377attributes #1 = { nounwind readnone } 378