137512d76SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 537512d76SMatt Arsenault 637512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2(float %x, float %y, float %D) #0 { 737512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2: 837512d76SMatt Arsenault; GFX6: ; %bb.0: 937512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1037512d76SMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 1137512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v4, v3 1237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v5, -v3, v4, 1.0 1337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, v5, v4, v4 1437512d76SMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v5, vcc, v0, v2, v0 1537512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 1637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 1737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 1837512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 1937512d76SMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v5, s[4:5], v2, v2, v1 2037512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v7, v5 2137512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 2237512d76SMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v0, v3, v2, v0 2337512d76SMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v4, vcc, v1, v2, v1 2437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v3, -v5, v7, 1.0 2537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v3, v3, v7, v7 2637512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v6, v4, v3 2737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v5, v6, v4 2837512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v7, v3, v6 2937512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, -v5, v6, v4 3037512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v3, v4, v3, v6 3137512d76SMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v1, v3, v2, v1 3237512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 3337512d76SMatt Arsenault; 3437512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2: 3537512d76SMatt Arsenault; GFX9: ; %bb.0: 3637512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3737512d76SMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 3837512d76SMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v1 3937512d76SMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v5, vcc, v0, v2, v0 4037512d76SMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v6, s[4:5], v1, v2, v1 4137512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v7, v3 4237512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v8, v4 4337512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v9, -v3, v7, 1.0 4437512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, v9, v7, v7 4537512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v10, -v4, v8, 1.0 4637512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v8, v10, v8, v8 4737512d76SMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v9, v5, v7 4837512d76SMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v10, v6, v8 4937512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v11, -v3, v9, v5 5037512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v12, -v4, v10, v6 5137512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v9, v11, v7, v9 5237512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v10, v12, v8, v10 5337512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v3, -v3, v9, v5 5437512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v4, -v4, v10, v6 5537512d76SMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v3, v3, v7, v9 5637512d76SMatt Arsenault; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 5737512d76SMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v4, v4, v8, v10 5837512d76SMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v0, v3, v2, v0 5937512d76SMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v1, v4, v2, v1 6037512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 6137512d76SMatt Arsenault; 6237512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2: 6337512d76SMatt Arsenault; GFX11: ; %bb.0: 6437512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6537512d76SMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, v0 6637512d76SMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v4, null, v2, v2, v1 6737512d76SMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v9, vcc_lo, v0, v2, v0 6837512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 6937512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v5, v3 7037512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v6, v4 7137512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 7237512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v7, -v3, v5, 1.0 7337512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v8, -v4, v6, 1.0 7437512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 7537512d76SMatt Arsenault; GFX11-NEXT: v_dual_fmac_f32 v5, v7, v5 :: v_dual_fmac_f32 v6, v8, v6 7637512d76SMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v7, s0, v1, v2, v1 7737512d76SMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v8, v9, v5 7837512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7937512d76SMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v10, v7, v6 8037512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v11, -v3, v8, v9 8137512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8237512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v12, -v4, v10, v7 8337512d76SMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v8, v11, v5 8437512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8537512d76SMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v10, v12, v6 8637512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v3, -v3, v8, v9 8737512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 8837512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v4, -v4, v10, v7 8937512d76SMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v3, v3, v5, v8 9037512d76SMatt Arsenault; GFX11-NEXT: s_mov_b32 vcc_lo, s0 9137512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 9237512d76SMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v4, v4, v6, v10 9337512d76SMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v0, v3, v2, v0 9437512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 9537512d76SMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v1, v4, v2, v1 9637512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 9737512d76SMatt Arsenault %div0 = fdiv float %x, %D 9837512d76SMatt Arsenault %div1 = fdiv float %y, %D 9937512d76SMatt Arsenault %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 10037512d76SMatt Arsenault %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 10137512d76SMatt Arsenault ret <2 x float> %insert.1 10237512d76SMatt Arsenault} 10337512d76SMatt Arsenault 10437512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_arcp(float %x, float %y, float %D) #0 { 10537512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp: 10637512d76SMatt Arsenault; GFX6: ; %bb.0: 10737512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 10937512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v4, v3 11037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v5, -v3, v4, 1.0 11137512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, v5, v4, v4 112e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v5, vcc, 1.0, v2, 1.0 11337512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 11437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 11537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 11637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 11737512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 118e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 119e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 120e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 12137512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 12237512d76SMatt Arsenault; 12337512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_arcp: 12437512d76SMatt Arsenault; GFX9: ; %bb.0: 12537512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 127e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v4, vcc, 1.0, v2, 1.0 128e561e7cbSMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v5, v3 129e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v6, -v3, v5, 1.0 130e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v5, v6, v5, v5 131e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v6, v4, v5 132e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, -v3, v6, v4 133e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v6, v7, v5, v6 134e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v3, -v3, v6, v4 135e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v3, v3, v5, v6 136e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 137e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 138e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 13937512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 14037512d76SMatt Arsenault; 14137512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_arcp: 14237512d76SMatt Arsenault; GFX11: ; %bb.0: 14337512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, 1.0 145e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v6, vcc_lo, 1.0, v2, 1.0 146e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 147e561e7cbSMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v4, v3 14837512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 149e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 150e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 151e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 152e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v5, v6, v4 153e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v7, -v3, v5, v6 154e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 155e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v5, v7, v4 156e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v3, -v3, v5, v6 157e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 158e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v5 159e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 160e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 161e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 162e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 16337512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 16437512d76SMatt Arsenault %div0 = fdiv arcp float %x, %D 16537512d76SMatt Arsenault %div1 = fdiv arcp float %y, %D 16637512d76SMatt Arsenault %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 16737512d76SMatt Arsenault %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 16837512d76SMatt Arsenault ret <2 x float> %insert.1 16937512d76SMatt Arsenault} 17037512d76SMatt Arsenault 17137512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_arcp_daz(float %x, float %y, float %D) #1 { 17237512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp_daz: 17337512d76SMatt Arsenault; GFX6: ; %bb.0: 17437512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 17637512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v4, v3 177e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v5, vcc, 1.0, v2, 1.0 17837512d76SMatt Arsenault; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 17937512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v3, v4, 1.0 18037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, v6, v4, v4 18137512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 18237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 18337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 18437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 18537512d76SMatt Arsenault; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 18637512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 187e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 188e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 189e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 19037512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 19137512d76SMatt Arsenault; 19237512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_arcp_daz: 19337512d76SMatt Arsenault; GFX9: ; %bb.0: 19437512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 196e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v4, vcc, 1.0, v2, 1.0 19737512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v5, v3 19837512d76SMatt Arsenault; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 19937512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v6, -v3, v5, 1.0 20037512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v5, v6, v5, v5 20137512d76SMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v6, v4, v5 20237512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, -v3, v6, v4 20337512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v6, v7, v5, v6 20437512d76SMatt Arsenault; GFX9-NEXT: v_fma_f32 v3, -v3, v6, v4 20537512d76SMatt Arsenault; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 20637512d76SMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v3, v3, v5, v6 207e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 208e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 209e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 21037512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 21137512d76SMatt Arsenault; 21237512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_arcp_daz: 21337512d76SMatt Arsenault; GFX11: ; %bb.0: 21437512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, 1.0 216e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, 1.0, v2, 1.0 21737512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) 21837512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v4, v3 21937512d76SMatt Arsenault; GFX11-NEXT: s_denorm_mode 15 22037512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 22137512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 22237512d76SMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 22337512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 22437512d76SMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 22537512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 22637512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 22737512d76SMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 22837512d76SMatt Arsenault; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 22937512d76SMatt Arsenault; GFX11-NEXT: s_denorm_mode 12 23037512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 23137512d76SMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 232e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 233e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 234e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 235e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 23637512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 23737512d76SMatt Arsenault %div0 = fdiv arcp float %x, %D 23837512d76SMatt Arsenault %div1 = fdiv arcp float %y, %D 23937512d76SMatt Arsenault %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 24037512d76SMatt Arsenault %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 24137512d76SMatt Arsenault ret <2 x float> %insert.1 24237512d76SMatt Arsenault} 24337512d76SMatt Arsenault 24437512d76SMatt Arsenaultdefine <2 x half> @v_repeat_divisor_f16_x2_arcp(half %x, half %y, half %D) #0 { 24537512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f16_x2_arcp: 24637512d76SMatt Arsenault; GFX6: ; %bb.0: 24737512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24837512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 24937512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 250e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 25137512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 25237512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 253e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 254e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 25537512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v4, v3 256e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v5, vcc, 1.0, v2, 1.0 25737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v3, v4, 1.0 25837512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, v6, v4, v4 25937512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 26037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 26137512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 26237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 26337512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 264e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 265e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 266e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 26737512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 26837512d76SMatt Arsenault; 26937512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f16_x2_arcp: 27037512d76SMatt Arsenault; GFX9: ; %bb.0: 27137512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27237512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_e32 v2, v2 27337512d76SMatt Arsenault; GFX9-NEXT: v_mul_f16_e32 v0, v0, v2 27437512d76SMatt Arsenault; GFX9-NEXT: v_mul_f16_e32 v1, v1, v2 27537512d76SMatt Arsenault; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 27637512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 27737512d76SMatt Arsenault; 27837512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f16_x2_arcp: 27937512d76SMatt Arsenault; GFX11: ; %bb.0: 28037512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28137512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v2, v2 28237512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 28337512d76SMatt Arsenault; GFX11-NEXT: v_mul_f16_e32 v0, v0, v2 28437512d76SMatt Arsenault; GFX11-NEXT: v_mul_f16_e32 v1, v1, v2 28537512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 28637512d76SMatt Arsenault; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 28737512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 28837512d76SMatt Arsenault %div0 = fdiv arcp half %x, %D 28937512d76SMatt Arsenault %div1 = fdiv arcp half %y, %D 29037512d76SMatt Arsenault %insert.0 = insertelement <2 x half> poison, half %div0, i32 0 29137512d76SMatt Arsenault %insert.1 = insertelement <2 x half> %insert.0, half %div1, i32 1 29237512d76SMatt Arsenault ret <2 x half> %insert.1 29337512d76SMatt Arsenault} 29437512d76SMatt Arsenault 29537512d76SMatt Arsenaultdefine <2 x double> @v_repeat_divisor_f64_x2_arcp(double %x, double %y, double %D) #0 { 29637512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f64_x2_arcp: 29737512d76SMatt Arsenault; GFX6: ; %bb.0: 29837512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], 1.0 30037512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] 30137512d76SMatt Arsenault; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 30237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 30337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 304e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[4:5], 1.0 30537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f64 v[12:13], -v[6:7], v[8:9], 1.0 306e561e7cbSMatt Arsenault; GFX6-NEXT: s_mov_b32 s4, 0x3ff00000 30737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 308e561e7cbSMatt Arsenault; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, v11 309e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] 31037512d76SMatt Arsenault; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 311e561e7cbSMatt Arsenault; GFX6-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] 312e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] 313e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0 314e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] 315e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] 31637512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 31737512d76SMatt Arsenault; 31837512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f64_x2_arcp: 31937512d76SMatt Arsenault; GFX9: ; %bb.0: 32037512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 321e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], 1.0 322e561e7cbSMatt Arsenault; GFX9-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] 323e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 324e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 325e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f64 v[10:11], vcc, 1.0, v[4:5], 1.0 326e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f64 v[12:13], -v[6:7], v[8:9], 1.0 327e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 328e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] 329e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] 330e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] 331e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0 332e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] 333e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] 33437512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 33537512d76SMatt Arsenault; 33637512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f64_x2_arcp: 33737512d76SMatt Arsenault; GFX11: ; %bb.0: 33837512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[4:5], v[4:5], 1.0 340e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f64 v[12:13], vcc_lo, 1.0, v[4:5], 1.0 341e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 342e561e7cbSMatt Arsenault; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] 34337512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 344e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 345e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 346e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 347e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 348e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 349e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 350e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f64 v[10:11], v[12:13], v[8:9] 351e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f64 v[6:7], -v[6:7], v[10:11], v[12:13] 352e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 353e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[10:11] 354e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0 355e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 356e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] 357e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] 35837512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 35937512d76SMatt Arsenault %div0 = fdiv arcp double %x, %D 36037512d76SMatt Arsenault %div1 = fdiv arcp double %y, %D 36137512d76SMatt Arsenault %insert.0 = insertelement <2 x double> poison, double %div0, i32 0 36237512d76SMatt Arsenault %insert.1 = insertelement <2 x double> %insert.0, double %div1, i32 1 36337512d76SMatt Arsenault ret <2 x double> %insert.1 36437512d76SMatt Arsenault} 36537512d76SMatt Arsenault 36637512d76SMatt Arsenaultdefine <3 x float> @v_repeat_divisor_f32_x3_arcp(float %x, float %y, float %z, float %D) #0 { 36737512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x3_arcp: 36837512d76SMatt Arsenault; GFX6: ; %bb.0: 36937512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, 1.0 37137512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v5, v4 37237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v4, v5, 1.0 37337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v5, v6, v5, v5 374e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v6, vcc, 1.0, v3, 1.0 37537512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v7, v6, v5 37637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v8, -v4, v7, v6 37737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, v8, v5, v7 37837512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, -v4, v7, v6 37937512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v4, v4, v5, v7 380e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 381e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 382e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 383e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v2, v2, v3 38437512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 38537512d76SMatt Arsenault; 38637512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x3_arcp: 38737512d76SMatt Arsenault; GFX9: ; %bb.0: 38837512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, 1.0 390e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v5, vcc, 1.0, v3, 1.0 391e561e7cbSMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v6, v4 392e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, -v4, v6, 1.0 393e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v6, v7, v6, v6 394e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v7, v5, v6 395e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v8, -v4, v7, v5 396e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, v8, v6, v7 397e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v4, -v4, v7, v5 398e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v4, v4, v6, v7 399e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 400e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v3 401e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 402e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v2, v2, v3 40337512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 40437512d76SMatt Arsenault; 40537512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x3_arcp: 40637512d76SMatt Arsenault; GFX11: ; %bb.0: 40737512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v4, null, v3, v3, 1.0 409e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v7, vcc_lo, 1.0, v3, 1.0 410e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 411e561e7cbSMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v5, v4 41237512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 413e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v6, -v4, v5, 1.0 414e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v5 415e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 416e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v6, v7, v5 417e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v8, -v4, v6, v7 418e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 419e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v6, v8, v5 420e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v4, -v4, v6, v7 421e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 422e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v4, v4, v5, v6 423e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 42437512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 425e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v3 426e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v0, v0, v3 427e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v2, v2, v3 42837512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 42937512d76SMatt Arsenault %div0 = fdiv arcp float %x, %D 43037512d76SMatt Arsenault %div1 = fdiv arcp float %y, %D 43137512d76SMatt Arsenault %div2 = fdiv arcp float %z, %D 43237512d76SMatt Arsenault %insert.0 = insertelement <3 x float> poison, float %div0, i32 0 43337512d76SMatt Arsenault %insert.1 = insertelement <3 x float> %insert.0, float %div1, i32 1 43437512d76SMatt Arsenault %insert.2 = insertelement <3 x float> %insert.1, float %div2, i32 2 43537512d76SMatt Arsenault ret <3 x float> %insert.2 43637512d76SMatt Arsenault} 43737512d76SMatt Arsenault 43837512d76SMatt Arsenaultdefine <4 x float> @v_repeat_divisor_f32_x4_arcp(float %x, float %y, float %z, float %w, float %D) #0 { 43937512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x4_arcp: 44037512d76SMatt Arsenault; GFX6: ; %bb.0: 44137512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 442e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v5, s[4:5], v4, v4, 1.0 44337512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v6, v5 44437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v5, v6, 1.0 44537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v7, v6, v6 446e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v7, vcc, 1.0, v4, 1.0 44737512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v8, v7, v6 44837512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, -v5, v8, v7 44937512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v8, v9, v6, v8 45037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v5, -v5, v8, v7 45137512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v5, v5, v6, v8 452e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v4, v5, v4, 1.0 453e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v4 454e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v4 455e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v2, v2, v4 456e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v3, v3, v4 45737512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 45837512d76SMatt Arsenault; 45937512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x4_arcp: 46037512d76SMatt Arsenault; GFX9: ; %bb.0: 46137512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v5, s[4:5], v4, v4, 1.0 463e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v6, vcc, 1.0, v4, 1.0 464e561e7cbSMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v7, v5 465e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v8, -v5, v7, 1.0 466e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, v8, v7, v7 467e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v8, v6, v7 468e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v9, -v5, v8, v6 469e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v8, v9, v7, v8 470e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v5, -v5, v8, v6 471e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v5, v5, v7, v8 472e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v4, v5, v4, 1.0 473e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 474e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4 475e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v2, v2, v4 476e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v3, v3, v4 47737512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 47837512d76SMatt Arsenault; 47937512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x4_arcp: 48037512d76SMatt Arsenault; GFX11: ; %bb.0: 48137512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v5, null, v4, v4, 1.0 483e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v4, 1.0 484e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 485e561e7cbSMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v6, v5 48637512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 487e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v7, -v5, v6, 1.0 488e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v6 489e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 490e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v7, v8, v6 491e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v9, -v5, v7, v8 492e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 493e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v7, v9, v6 494e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v5, -v5, v7, v8 495e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 496e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v5, v5, v6, v7 497e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v4, v5, v4, 1.0 498e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 499e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v0, v0, v4 500e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v4 501e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v2, v2, v4 502e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v3, v3, v4 50337512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 50437512d76SMatt Arsenault %div0 = fdiv arcp float %x, %D 50537512d76SMatt Arsenault %div1 = fdiv arcp float %y, %D 50637512d76SMatt Arsenault %div2 = fdiv arcp float %z, %D 50737512d76SMatt Arsenault %div3 = fdiv arcp float %w, %D 50837512d76SMatt Arsenault %insert.0 = insertelement <4 x float> poison, float %div0, i32 0 50937512d76SMatt Arsenault %insert.1 = insertelement <4 x float> %insert.0, float %div1, i32 1 51037512d76SMatt Arsenault %insert.2 = insertelement <4 x float> %insert.1, float %div2, i32 2 51137512d76SMatt Arsenault %insert.3 = insertelement <4 x float> %insert.2, float %div3, i32 3 51237512d76SMatt Arsenault ret <4 x float> %insert.3 51337512d76SMatt Arsenault} 51437512d76SMatt Arsenault 51537512d76SMatt Arsenaultdefine <3 x half> @v_repeat_divisor_f16_x3_arcp(half %x, half %y, half %z, half %D) #0 { 51637512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f16_x3_arcp: 51737512d76SMatt Arsenault; GFX6: ; %bb.0: 51837512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51937512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 52037512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 521e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 522e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 52337512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 52437512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 525e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 526e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 527e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, 1.0 52837512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v5, v4 52937512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v4, v5, 1.0 53037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v5, v6, v5, v5 531e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v6, vcc, 1.0, v3, 1.0 53237512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v7, v6, v5 53337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v8, -v4, v7, v6 53437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, v8, v5, v7 53537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v4, -v4, v7, v6 53637512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v4, v4, v5, v7 537e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 538e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 539e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 540e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v2, v2, v3 54137512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 54237512d76SMatt Arsenault; 54337512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f16_x3_arcp: 54437512d76SMatt Arsenault; GFX9: ; %bb.0: 54537512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54637512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_e32 v3, v3 54737512d76SMatt Arsenault; GFX9-NEXT: v_mul_f16_e32 v0, v0, v3 54837512d76SMatt Arsenault; GFX9-NEXT: v_mul_f16_e32 v4, v1, v3 54937512d76SMatt Arsenault; GFX9-NEXT: v_mul_f16_e32 v1, v2, v3 55037512d76SMatt Arsenault; GFX9-NEXT: v_pack_b32_f16 v0, v0, v4 55137512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 55237512d76SMatt Arsenault; 55337512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f16_x3_arcp: 55437512d76SMatt Arsenault; GFX11: ; %bb.0: 55537512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55637512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v3, v3 55737512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 55837512d76SMatt Arsenault; GFX11-NEXT: v_mul_f16_e32 v0, v0, v3 55937512d76SMatt Arsenault; GFX11-NEXT: v_mul_f16_e32 v1, v1, v3 56037512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 56137512d76SMatt Arsenault; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 56237512d76SMatt Arsenault; GFX11-NEXT: v_mul_f16_e32 v1, v2, v3 56337512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 56437512d76SMatt Arsenault %div0 = fdiv arcp half %x, %D 56537512d76SMatt Arsenault %div1 = fdiv arcp half %y, %D 56637512d76SMatt Arsenault %div2 = fdiv arcp half %z, %D 56737512d76SMatt Arsenault %insert.0 = insertelement <3 x half> poison, half %div0, i32 0 56837512d76SMatt Arsenault %insert.1 = insertelement <3 x half> %insert.0, half %div1, i32 1 56937512d76SMatt Arsenault %insert.2 = insertelement <3 x half> %insert.1, half %div2, i32 2 57037512d76SMatt Arsenault ret <3 x half> %insert.2 57137512d76SMatt Arsenault} 57237512d76SMatt Arsenault 57337512d76SMatt Arsenaultdefine <4 x float> @v_repeat_divisor_v2f32_x2(<2 x float> %x, <2 x float> %y, <2 x float> %D) #0 { 57437512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_v2f32_x2: 57537512d76SMatt Arsenault; GFX6: ; %bb.0: 57637512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, 1.0 57837512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v7, v6 57937512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v8, -v6, v7, 1.0 58037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, v8, v7, v7 581e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v8, vcc, 1.0, v4, 1.0 58237512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v9, v8, v7 58337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, -v6, v9, v8 58437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, v10, v7, v9 58537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v6, v9, v8 586e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v8, s[4:5], v5, v5, 1.0 58737512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v10, v8 58837512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v6, v6, v7, v9 589e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 590e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v7, vcc, 1.0, v5, 1.0 59137512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v8, v10, 1.0 59237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v6, v10, v10 59337512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v9, v7, v6 59437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, -v8, v9, v7 59537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, v10, v6, v9 59637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v8, v9, v7 59737512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v6, v7, v6, v9 598e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v5, v6, v5, 1.0 599e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v4 600e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v5 601e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v2, v2, v4 602e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v3, v3, v5 60337512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 60437512d76SMatt Arsenault; 60537512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_v2f32_x2: 60637512d76SMatt Arsenault; GFX9: ; %bb.0: 60737512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 608e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, 1.0 609e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v7, s[4:5], v5, v5, 1.0 610e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v8, vcc, 1.0, v4, 1.0 611e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_scale_f32 v9, s[4:5], 1.0, v5, 1.0 612e561e7cbSMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v10, v6 613e561e7cbSMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v11, v7 614e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v12, -v6, v10, 1.0 615e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v10, v12, v10, v10 616e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v13, -v7, v11, 1.0 617e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v11, v13, v11, v11 618e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v12, v8, v10 619e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v13, v9, v11 620e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v14, -v6, v12, v8 621e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v15, -v7, v13, v9 622e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v12, v14, v10, v12 623e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v6, -v6, v12, v8 624e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v8, v15, v11, v13 625e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v6, v6, v10, v12 626e561e7cbSMatt Arsenault; GFX9-NEXT: v_fma_f32 v7, -v7, v8, v9 62737512d76SMatt Arsenault; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 628e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fmas_f32 v7, v7, v11, v8 629e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 630e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 631e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v2, v2, v4 632e561e7cbSMatt Arsenault; GFX9-NEXT: v_div_fixup_f32 v5, v7, v5, 1.0 633e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5 634e561e7cbSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v3, v3, v5 63537512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 63637512d76SMatt Arsenault; 63737512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_v2f32_x2: 63837512d76SMatt Arsenault; GFX11: ; %bb.0: 63937512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v6, null, v4, v4, 1.0 641e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v7, null, v5, v5, 1.0 642e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v12, vcc_lo, 1.0, v4, 1.0 64337512d76SMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 644e561e7cbSMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v8, v6 645e561e7cbSMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v9, v7 64637512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 647e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v10, -v6, v8, 1.0 648e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v11, -v7, v9, 1.0 649e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 650e561e7cbSMatt Arsenault; GFX11-NEXT: v_dual_fmac_f32 v8, v10, v8 :: v_dual_fmac_f32 v9, v11, v9 651e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_scale_f32 v10, s0, 1.0, v5, 1.0 652e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v11, v12, v8 653e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 654e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v14, -v6, v11, v12 655e561e7cbSMatt Arsenault; GFX11-NEXT: v_fmac_f32_e32 v11, v14, v8 656e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 657e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v6, -v6, v11, v12 658e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v6, v6, v8, v11 65937512d76SMatt Arsenault; GFX11-NEXT: s_mov_b32 vcc_lo, s0 660e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 661e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 662e561e7cbSMatt Arsenault; GFX11-NEXT: v_dual_mul_f32 v13, v10, v9 :: v_dual_mul_f32 v0, v0, v4 663e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 664e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v15, -v7, v13, v10 665e561e7cbSMatt Arsenault; GFX11-NEXT: v_dual_mul_f32 v2, v2, v4 :: v_dual_fmac_f32 v13, v15, v9 666e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 667e561e7cbSMatt Arsenault; GFX11-NEXT: v_fma_f32 v7, -v7, v13, v10 668e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fmas_f32 v7, v7, v9, v13 669e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 670e561e7cbSMatt Arsenault; GFX11-NEXT: v_div_fixup_f32 v5, v7, v5, 1.0 671e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v5 672e561e7cbSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v3, v3, v5 67337512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 67437512d76SMatt Arsenault %div0 = fdiv arcp <2 x float> %x, %D 67537512d76SMatt Arsenault %div1 = fdiv arcp <2 x float> %y, %D 67637512d76SMatt Arsenault %shuffle = shufflevector <2 x float> %div0, <2 x float> %div1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 67737512d76SMatt Arsenault ret <4 x float> %shuffle 67837512d76SMatt Arsenault} 67937512d76SMatt Arsenault 68037512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_ulp25(float %x, float %y, float %D) #0 { 68137512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_ulp25: 68237512d76SMatt Arsenault; GFX6: ; %bb.0: 68337512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6848287f3afSMatt Arsenault; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 6858287f3afSMatt Arsenault; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v2 6868287f3afSMatt Arsenault; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, s4 6878287f3afSMatt Arsenault; GFX6-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc 6888287f3afSMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v3, v3 6898287f3afSMatt Arsenault; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 6908287f3afSMatt Arsenault; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v2 6918287f3afSMatt Arsenault; GFX6-NEXT: v_ldexp_f32_e32 v2, v3, v2 6928287f3afSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 6938287f3afSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 69437512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 69537512d76SMatt Arsenault; 69637512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_ulp25: 69737512d76SMatt Arsenault; GFX9: ; %bb.0: 69837512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6998287f3afSMatt Arsenault; GFX9-NEXT: v_frexp_mant_f32_e32 v3, v2 7008287f3afSMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v3, v3 7018287f3afSMatt Arsenault; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 7028287f3afSMatt Arsenault; GFX9-NEXT: v_sub_u32_e32 v2, 0, v2 7038287f3afSMatt Arsenault; GFX9-NEXT: v_ldexp_f32 v2, v3, v2 7048287f3afSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 7058287f3afSMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 70637512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 70737512d76SMatt Arsenault; 70837512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_ulp25: 70937512d76SMatt Arsenault; GFX11: ; %bb.0: 71037512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7118287f3afSMatt Arsenault; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v2 7128287f3afSMatt Arsenault; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 7138287f3afSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 7148287f3afSMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v3, v3 7158287f3afSMatt Arsenault; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0, v2 71637512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 7178287f3afSMatt Arsenault; GFX11-NEXT: v_ldexp_f32 v2, v3, v2 7188287f3afSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7198287f3afSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 7208287f3afSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 72137512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 72237512d76SMatt Arsenault %div0 = fdiv arcp float %x, %D, !fpmath !0 72337512d76SMatt Arsenault %div1 = fdiv arcp float %y, %D, !fpmath !0 72437512d76SMatt Arsenault %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 72537512d76SMatt Arsenault %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 72637512d76SMatt Arsenault ret <2 x float> %insert.1 72737512d76SMatt Arsenault} 72837512d76SMatt Arsenault 72937512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_daz_ulp25(float %x, float %y, float %D) #1 { 73037512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_daz_ulp25: 73137512d76SMatt Arsenault; GFX6: ; %bb.0: 73237512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73337512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v2, v2 73437512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 73537512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 73637512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 73737512d76SMatt Arsenault; 73837512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_daz_ulp25: 73937512d76SMatt Arsenault; GFX9: ; %bb.0: 74037512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74137512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f32_e32 v2, v2 74237512d76SMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 74337512d76SMatt Arsenault; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 74437512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 74537512d76SMatt Arsenault; 74637512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_daz_ulp25: 74737512d76SMatt Arsenault; GFX11: ; %bb.0: 74837512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74937512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f32_e32 v2, v2 75037512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 75137512d76SMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 7528287f3afSMatt Arsenault; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 75337512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 75437512d76SMatt Arsenault %div0 = fdiv arcp float %x, %D, !fpmath !0 75537512d76SMatt Arsenault %div1 = fdiv arcp float %y, %D, !fpmath !0 75637512d76SMatt Arsenault %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 75737512d76SMatt Arsenault %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 75837512d76SMatt Arsenault ret <2 x float> %insert.1 75937512d76SMatt Arsenault} 76037512d76SMatt Arsenault 76137512d76SMatt Arsenaultdefine <4 x half> @v_repeat_divisor_v2f16_x2(<2 x half> %x, <2 x half> %y, <2 x half> %D) #0 { 76237512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_v2f16_x2: 76337512d76SMatt Arsenault; GFX6: ; %bb.0: 76437512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76537512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 76637512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 76737512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 768e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 769e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 770e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 771e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 772e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 773e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, 1.0 774e561e7cbSMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v7, v6 775e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 776e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 777e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 77837512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v8, -v6, v7, 1.0 77937512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, v8, v7, v7 780e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v8, vcc, 1.0, v4, 1.0 78137512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v9, v8, v7 78237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, -v6, v9, v8 78337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, v10, v7, v9 78437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v6, v9, v8 785e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v8, s[4:5], v5, v5, 1.0 78637512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v10, v8 78737512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v6, v6, v7, v9 788e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 789e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v7, vcc, 1.0, v5, 1.0 79037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, -v8, v10, 1.0 79137512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v6, v6, v10, v10 79237512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v9, v7, v6 79337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, -v8, v9, v7 794e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 79537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, v10, v6, v9 79637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v7, -v8, v9, v7 79737512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v6, v7, v6, v9 798e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v5, v6, v5, 1.0 799e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v4 800e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v5 801e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v2, v2, v4 802e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v3, v3, v5 80337512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 80437512d76SMatt Arsenault; 80537512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_v2f16_x2: 80637512d76SMatt Arsenault; GFX9: ; %bb.0: 80737512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80837512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_sdwa v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 80937512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_e32 v2, v2 810e561e7cbSMatt Arsenault; GFX9-NEXT: v_pack_b32_f16 v2, v2, v3 811e561e7cbSMatt Arsenault; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 812e561e7cbSMatt Arsenault; GFX9-NEXT: v_pk_mul_f16 v1, v1, v2 81337512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 81437512d76SMatt Arsenault; 81537512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_v2f16_x2: 81637512d76SMatt Arsenault; GFX11: ; %bb.0: 81737512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81837512d76SMatt Arsenault; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v2 81937512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v2, v2 820e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 82137512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v3, v3 82237512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 823e561e7cbSMatt Arsenault; GFX11-NEXT: v_pack_b32_f16 v2, v2, v3 824e561e7cbSMatt Arsenault; GFX11-NEXT: v_pk_mul_f16 v0, v0, v2 825e561e7cbSMatt Arsenault; GFX11-NEXT: v_pk_mul_f16 v1, v1, v2 82637512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 82737512d76SMatt Arsenault %div0 = fdiv arcp <2 x half> %x, %D 82837512d76SMatt Arsenault %div1 = fdiv arcp <2 x half> %y, %D 82937512d76SMatt Arsenault %shuffle = shufflevector <2 x half> %div0, <2 x half> %div1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 83037512d76SMatt Arsenault ret <4 x half> %shuffle 83137512d76SMatt Arsenault} 83237512d76SMatt Arsenault 83337512d76SMatt Arsenaultdefine <6 x half> @v_repeat_divisor_v3f16_x2(<3 x half> %x, <3 x half> %y, <3 x half> %D) #0 { 83437512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_v3f16_x2: 83537512d76SMatt Arsenault; GFX6: ; %bb.0: 83637512d76SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83737512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v6, v6 83837512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v7 83937512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v8, v8 840e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 841e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v6 842e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v7, v7 843e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v8 844e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 845e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v9, s[4:5], v6, v6, 1.0 846e561e7cbSMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v10, v9 84737512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 848e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 849e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 85037512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v11, -v9, v10, 1.0 85137512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, v11, v10, v10 852e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v11, vcc, 1.0, v6, 1.0 85337512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v12, v11, v10 85437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v13, -v9, v12, v11 85537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v12, v13, v10, v12 85637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, -v9, v12, v11 857e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v11, s[4:5], v7, v7, 1.0 85837512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v13, v11 85937512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v9, v9, v10, v12 860e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v6, v9, v6, 1.0 861e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v10, vcc, 1.0, v7, 1.0 86237512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, -v11, v13, 1.0 86337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, v9, v13, v13 86437512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v12, v10, v9 86537512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v13, -v11, v12, v10 86637512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v12, v13, v9, v12 86737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, -v11, v12, v10 868e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v11, s[4:5], v8, v8, 1.0 86937512d76SMatt Arsenault; GFX6-NEXT: v_rcp_f32_e32 v13, v11 87037512d76SMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v9, v10, v9, v12 871e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 872e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v7, v9, v7, 1.0 87337512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, -v11, v13, 1.0 87437512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v9, v9, v13, v13 875e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_scale_f32 v10, vcc, 1.0, v8, 1.0 87637512d76SMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v12, v10, v9 87737512d76SMatt Arsenault; GFX6-NEXT: v_fma_f32 v13, -v11, v12, v10 87837512d76SMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 879e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 880e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 881e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 882e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 883e561e7cbSMatt Arsenault; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 884e561e7cbSMatt Arsenault; GFX6-NEXT: v_fma_f32 v12, v13, v9, v12 885e561e7cbSMatt Arsenault; GFX6-NEXT: v_fma_f32 v10, -v11, v12, v10 886e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fmas_f32 v9, v10, v9, v12 887e561e7cbSMatt Arsenault; GFX6-NEXT: v_div_fixup_f32 v8, v9, v8, 1.0 888e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6 889e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v1, v1, v7 890e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v2, v2, v8 891e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v3, v3, v6 892e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v4, v4, v7 893e561e7cbSMatt Arsenault; GFX6-NEXT: v_mul_f32_e32 v5, v5, v8 89437512d76SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 89537512d76SMatt Arsenault; 89637512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_v3f16_x2: 89737512d76SMatt Arsenault; GFX9: ; %bb.0: 89837512d76SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89937512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_sdwa v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 90037512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_e32 v4, v4 90137512d76SMatt Arsenault; GFX9-NEXT: v_rcp_f16_e32 v5, v5 902e561e7cbSMatt Arsenault; GFX9-NEXT: s_movk_i32 s4, 0x7e00 903e561e7cbSMatt Arsenault; GFX9-NEXT: v_pack_b32_f16 v4, v4, v6 904e561e7cbSMatt Arsenault; GFX9-NEXT: v_pack_b32_f16 v5, v5, s4 905e561e7cbSMatt Arsenault; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 906e561e7cbSMatt Arsenault; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 907e561e7cbSMatt Arsenault; GFX9-NEXT: v_pk_mul_f16 v3, v3, v5 908e561e7cbSMatt Arsenault; GFX9-NEXT: v_pk_mul_f16 v4, v2, v4 909e561e7cbSMatt Arsenault; GFX9-NEXT: v_alignbit_b32 v2, v3, v4, 16 910e561e7cbSMatt Arsenault; GFX9-NEXT: v_pack_b32_f16 v1, v1, v4 91137512d76SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 91237512d76SMatt Arsenault; 91337512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_v3f16_x2: 91437512d76SMatt Arsenault; GFX11: ; %bb.0: 91537512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91637512d76SMatt Arsenault; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v4 91737512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v4, v4 91837512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v5, v5 919e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) 92037512d76SMatt Arsenault; GFX11-NEXT: v_rcp_f16_e32 v6, v6 92137512d76SMatt Arsenault; GFX11-NEXT: s_waitcnt_depctr 0xfff 922e561e7cbSMatt Arsenault; GFX11-NEXT: v_pack_b32_f16 v5, v5, 0x7e00 923e561e7cbSMatt Arsenault; GFX11-NEXT: v_pack_b32_f16 v4, v4, v6 924e561e7cbSMatt Arsenault; GFX11-NEXT: v_pk_mul_f16 v1, v1, v5 925e561e7cbSMatt Arsenault; GFX11-NEXT: v_pk_mul_f16 v3, v3, v5 926e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) 927e561e7cbSMatt Arsenault; GFX11-NEXT: v_pk_mul_f16 v2, v2, v4 928e561e7cbSMatt Arsenault; GFX11-NEXT: v_pk_mul_f16 v0, v0, v4 92937512d76SMatt Arsenault; GFX11-NEXT: v_pack_b32_f16 v1, v1, v2 930e561e7cbSMatt Arsenault; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 931e561e7cbSMatt Arsenault; GFX11-NEXT: v_alignbit_b32 v2, v3, v2, 16 93237512d76SMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 93337512d76SMatt Arsenault %div0 = fdiv arcp <3 x half> %x, %D 93437512d76SMatt Arsenault %div1 = fdiv arcp <3 x half> %y, %D 93537512d76SMatt Arsenault %shuffle = shufflevector <3 x half> %div0, <3 x half> %div1, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> 93637512d76SMatt Arsenault ret <6 x half> %shuffle 93737512d76SMatt Arsenault} 93837512d76SMatt Arsenault 93937512d76SMatt Arsenaultattributes #0 = { "denormal-fp-math-f32"="ieee,ieee" } 94037512d76SMatt Arsenaultattributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 94137512d76SMatt Arsenault 94237512d76SMatt Arsenault!0 = !{float 2.5} 94337512d76SMatt Arsenault;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 94437512d76SMatt Arsenault; GCN: {{.*}} 945