xref: /llvm-project/llvm/test/CodeGen/AMDGPU/repeated-divisor.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
137512d76SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
4*9e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
537512d76SMatt Arsenault
637512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2(float %x, float %y, float %D) #0 {
737512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2:
837512d76SMatt Arsenault; GFX6:       ; %bb.0:
937512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037512d76SMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
1137512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v4, v3
1237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
1337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, v5, v4, v4
1437512d76SMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v5, vcc, v0, v2, v0
1537512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v6, v5, v4
1637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v3, v6, v5
1737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v7, v4, v6
1837512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v3, -v3, v6, v5
1937512d76SMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v5, s[4:5], v2, v2, v1
2037512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v7, v5
2137512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
2237512d76SMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v0, v3, v2, v0
2337512d76SMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v4, vcc, v1, v2, v1
2437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v3, -v5, v7, 1.0
2537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v3, v3, v7, v7
2637512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v6, v4, v3
2737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v5, v6, v4
2837512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v7, v3, v6
2937512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, -v5, v6, v4
3037512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v3, v4, v3, v6
3137512d76SMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v1, v3, v2, v1
3237512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
3337512d76SMatt Arsenault;
3437512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2:
3537512d76SMatt Arsenault; GFX9:       ; %bb.0:
3637512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3737512d76SMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v0
3837512d76SMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v1
3937512d76SMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v5, vcc, v0, v2, v0
4037512d76SMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v6, s[4:5], v1, v2, v1
4137512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v7, v3
4237512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v8, v4
4337512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v9, -v3, v7, 1.0
4437512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, v9, v7, v7
4537512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v10, -v4, v8, 1.0
4637512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v8, v10, v8, v8
4737512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v9, v5, v7
4837512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v10, v6, v8
4937512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v11, -v3, v9, v5
5037512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v12, -v4, v10, v6
5137512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v9, v11, v7, v9
5237512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v10, v12, v8, v10
5337512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v3, -v3, v9, v5
5437512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v4, -v4, v10, v6
5537512d76SMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v3, v3, v7, v9
5637512d76SMatt Arsenault; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
5737512d76SMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v4, v4, v8, v10
5837512d76SMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v0, v3, v2, v0
5937512d76SMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v1, v4, v2, v1
6037512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6137512d76SMatt Arsenault;
6237512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2:
6337512d76SMatt Arsenault; GFX11:       ; %bb.0:
6437512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6537512d76SMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v3, null, v2, v2, v0
6637512d76SMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v4, null, v2, v2, v1
6737512d76SMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v9, vcc_lo, v0, v2, v0
6837512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
6937512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v5, v3
7037512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v6, v4
7137512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7237512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v7, -v3, v5, 1.0
7337512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v8, -v4, v6, 1.0
7437512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7537512d76SMatt Arsenault; GFX11-NEXT:    v_dual_fmac_f32 v5, v7, v5 :: v_dual_fmac_f32 v6, v8, v6
7637512d76SMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v7, s0, v1, v2, v1
7737512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v8, v9, v5
7837512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7937512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v10, v7, v6
8037512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v11, -v3, v8, v9
8137512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8237512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v12, -v4, v10, v7
8337512d76SMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v8, v11, v5
8437512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8537512d76SMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v10, v12, v6
8637512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v3, -v3, v8, v9
8737512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
8837512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v4, -v4, v10, v7
8937512d76SMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v5, v8
9037512d76SMatt Arsenault; GFX11-NEXT:    s_mov_b32 vcc_lo, s0
9137512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
9237512d76SMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v4, v4, v6, v10
9337512d76SMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v2, v0
9437512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
9537512d76SMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v1, v4, v2, v1
9637512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
9737512d76SMatt Arsenault  %div0 = fdiv float %x, %D
9837512d76SMatt Arsenault  %div1 = fdiv float %y, %D
9937512d76SMatt Arsenault  %insert.0 = insertelement <2 x float> poison, float %div0, i32 0
10037512d76SMatt Arsenault  %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1
10137512d76SMatt Arsenault  ret <2 x float> %insert.1
10237512d76SMatt Arsenault}
10337512d76SMatt Arsenault
10437512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_arcp(float %x, float %y, float %D) #0 {
10537512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp:
10637512d76SMatt Arsenault; GFX6:       ; %bb.0:
10737512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, 1.0
10937512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v4, v3
11037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
11137512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, v5, v4, v4
112e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v5, vcc, 1.0, v2, 1.0
11337512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v6, v5, v4
11437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v3, v6, v5
11537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v7, v4, v6
11637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v3, -v3, v6, v5
11737512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
118e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
119e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
120e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v2
12137512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
12237512d76SMatt Arsenault;
12337512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_arcp:
12437512d76SMatt Arsenault; GFX9:       ; %bb.0:
12537512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, 1.0
127e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v4, vcc, 1.0, v2, 1.0
128e561e7cbSMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v5, v3
129e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
130e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v5, v6, v5, v5
131e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v6, v4, v5
132e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, -v3, v6, v4
133e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v6, v7, v5, v6
134e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v3, -v3, v6, v4
135e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
136e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
137e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v2
138e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v2
13937512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
14037512d76SMatt Arsenault;
14137512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_arcp:
14237512d76SMatt Arsenault; GFX11:       ; %bb.0:
14337512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v3, null, v2, v2, 1.0
145e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v6, vcc_lo, 1.0, v2, 1.0
146e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
147e561e7cbSMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
14837512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
149e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
150e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
151e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
152e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v5, v6, v4
153e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v7, -v3, v5, v6
154e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
155e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v5, v7, v4
156e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v3, -v3, v5, v6
157e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
158e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v5
159e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
160e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
161e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v2
162e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
16337512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
16437512d76SMatt Arsenault  %div0 = fdiv arcp float %x, %D
16537512d76SMatt Arsenault  %div1 = fdiv arcp float %y, %D
16637512d76SMatt Arsenault  %insert.0 = insertelement <2 x float> poison, float %div0, i32 0
16737512d76SMatt Arsenault  %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1
16837512d76SMatt Arsenault  ret <2 x float> %insert.1
16937512d76SMatt Arsenault}
17037512d76SMatt Arsenault
17137512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_arcp_daz(float %x, float %y, float %D) #1 {
17237512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp_daz:
17337512d76SMatt Arsenault; GFX6:       ; %bb.0:
17437512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, 1.0
17637512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v4, v3
177e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v5, vcc, 1.0, v2, 1.0
17837512d76SMatt Arsenault; GFX6-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
17937512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
18037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, v6, v4, v4
18137512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v6, v5, v4
18237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v3, v6, v5
18337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v7, v4, v6
18437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v3, -v3, v6, v5
18537512d76SMatt Arsenault; GFX6-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
18637512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
187e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
188e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
189e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v2
19037512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
19137512d76SMatt Arsenault;
19237512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_arcp_daz:
19337512d76SMatt Arsenault; GFX9:       ; %bb.0:
19437512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, 1.0
196e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v4, vcc, 1.0, v2, 1.0
19737512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v5, v3
19837512d76SMatt Arsenault; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
19937512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
20037512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v5, v6, v5, v5
20137512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v6, v4, v5
20237512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, -v3, v6, v4
20337512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v6, v7, v5, v6
20437512d76SMatt Arsenault; GFX9-NEXT:    v_fma_f32 v3, -v3, v6, v4
20537512d76SMatt Arsenault; GFX9-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
20637512d76SMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
207e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
208e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v2
209e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v2
21037512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
21137512d76SMatt Arsenault;
21237512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_arcp_daz:
21337512d76SMatt Arsenault; GFX11:       ; %bb.0:
21437512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v3, null, v2, v2, 1.0
216e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, 1.0, v2, 1.0
21737512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
21837512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
21937512d76SMatt Arsenault; GFX11-NEXT:    s_denorm_mode 15
22037512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
22137512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
22237512d76SMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
22337512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
22437512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
22537512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
22637512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
22737512d76SMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
22837512d76SMatt Arsenault; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
22937512d76SMatt Arsenault; GFX11-NEXT:    s_denorm_mode 12
23037512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
23137512d76SMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
232e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
233e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
234e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
235e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v2
23637512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
23737512d76SMatt Arsenault  %div0 = fdiv arcp float %x, %D
23837512d76SMatt Arsenault  %div1 = fdiv arcp float %y, %D
23937512d76SMatt Arsenault  %insert.0 = insertelement <2 x float> poison, float %div0, i32 0
24037512d76SMatt Arsenault  %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1
24137512d76SMatt Arsenault  ret <2 x float> %insert.1
24237512d76SMatt Arsenault}
24337512d76SMatt Arsenault
24437512d76SMatt Arsenaultdefine <2 x half> @v_repeat_divisor_f16_x2_arcp(half %x, half %y, half %D) #0 {
24537512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f16_x2_arcp:
24637512d76SMatt Arsenault; GFX6:       ; %bb.0:
24737512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24837512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
24937512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
250e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
25137512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
25237512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
253e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
254e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, 1.0
25537512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v4, v3
256e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v5, vcc, 1.0, v2, 1.0
25737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
25837512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, v6, v4, v4
25937512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v6, v5, v4
26037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v3, v6, v5
26137512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v7, v4, v6
26237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v3, -v3, v6, v5
26337512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
264e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v2, v3, v2, 1.0
265e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
266e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v2
26737512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
26837512d76SMatt Arsenault;
26937512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f16_x2_arcp:
27037512d76SMatt Arsenault; GFX9:       ; %bb.0:
27137512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27237512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_e32 v2, v2
27337512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v2
27437512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f16_e32 v1, v1, v2
27537512d76SMatt Arsenault; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
27637512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
27737512d76SMatt Arsenault;
27837512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f16_x2_arcp:
27937512d76SMatt Arsenault; GFX11:       ; %bb.0:
28037512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28137512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v2, v2
28237512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
28337512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
28437512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f16_e32 v1, v1, v2
28537512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
28637512d76SMatt Arsenault; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
28737512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
28837512d76SMatt Arsenault  %div0 = fdiv arcp half %x, %D
28937512d76SMatt Arsenault  %div1 = fdiv arcp half %y, %D
29037512d76SMatt Arsenault  %insert.0 = insertelement <2 x half> poison, half %div0, i32 0
29137512d76SMatt Arsenault  %insert.1 = insertelement <2 x half> %insert.0, half %div1, i32 1
29237512d76SMatt Arsenault  ret <2 x half> %insert.1
29337512d76SMatt Arsenault}
29437512d76SMatt Arsenault
29537512d76SMatt Arsenaultdefine <2 x double> @v_repeat_divisor_f64_x2_arcp(double %x, double %y, double %D) #0 {
29637512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f64_x2_arcp:
29737512d76SMatt Arsenault; GFX6:       ; %bb.0:
29837512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], 1.0
30037512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f64_e32 v[8:9], v[6:7]
30137512d76SMatt Arsenault; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v5, v7
30237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
30337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
304e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], 1.0, v[4:5], 1.0
30537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[8:9], 1.0
306e561e7cbSMatt Arsenault; GFX6-NEXT:    s_mov_b32 s4, 0x3ff00000
30737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
308e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], s4, v11
309e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f64 v[12:13], v[10:11], v[8:9]
31037512d76SMatt Arsenault; GFX6-NEXT:    s_xor_b64 vcc, s[4:5], vcc
311e561e7cbSMatt Arsenault; GFX6-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11]
312e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13]
313e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0
314e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
315e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f64 v[2:3], v[2:3], v[4:5]
31637512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
31737512d76SMatt Arsenault;
31837512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f64_x2_arcp:
31937512d76SMatt Arsenault; GFX9:       ; %bb.0:
32037512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], 1.0
322e561e7cbSMatt Arsenault; GFX9-NEXT:    v_rcp_f64_e32 v[8:9], v[6:7]
323e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
324e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
325e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f64 v[10:11], vcc, 1.0, v[4:5], 1.0
326e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f64 v[12:13], -v[6:7], v[8:9], 1.0
327e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
328e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f64 v[12:13], v[10:11], v[8:9]
329e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11]
330e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13]
331e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0
332e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
333e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f64 v[2:3], v[2:3], v[4:5]
33437512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
33537512d76SMatt Arsenault;
33637512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f64_x2_arcp:
33737512d76SMatt Arsenault; GFX11:       ; %bb.0:
33837512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f64 v[6:7], null, v[4:5], v[4:5], 1.0
340e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f64 v[12:13], vcc_lo, 1.0, v[4:5], 1.0
341e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
342e561e7cbSMatt Arsenault; GFX11-NEXT:    v_rcp_f64_e32 v[8:9], v[6:7]
34337512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
344e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
345e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
346e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
347e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
348e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]
349e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
350e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f64 v[10:11], v[12:13], v[8:9]
351e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[10:11], v[12:13]
352e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
353e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[10:11]
354e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0
355e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
356e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f64 v[0:1], v[0:1], v[4:5]
357e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f64 v[2:3], v[2:3], v[4:5]
35837512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
35937512d76SMatt Arsenault  %div0 = fdiv arcp double %x, %D
36037512d76SMatt Arsenault  %div1 = fdiv arcp double %y, %D
36137512d76SMatt Arsenault  %insert.0 = insertelement <2 x double> poison, double %div0, i32 0
36237512d76SMatt Arsenault  %insert.1 = insertelement <2 x double> %insert.0, double %div1, i32 1
36337512d76SMatt Arsenault  ret <2 x double> %insert.1
36437512d76SMatt Arsenault}
36537512d76SMatt Arsenault
36637512d76SMatt Arsenaultdefine <3 x float> @v_repeat_divisor_f32_x3_arcp(float %x, float %y, float %z, float %D) #0 {
36737512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x3_arcp:
36837512d76SMatt Arsenault; GFX6:       ; %bb.0:
36937512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v4, s[4:5], v3, v3, 1.0
37137512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v5, v4
37237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
37337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v5, v6, v5, v5
374e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v6, vcc, 1.0, v3, 1.0
37537512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v7, v6, v5
37637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v8, -v4, v7, v6
37737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, v8, v5, v7
37837512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, -v4, v7, v6
37937512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
380e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v3, v4, v3, 1.0
381e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v3
382e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v3
383e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v2, v2, v3
38437512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
38537512d76SMatt Arsenault;
38637512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x3_arcp:
38737512d76SMatt Arsenault; GFX9:       ; %bb.0:
38837512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v4, s[4:5], v3, v3, 1.0
390e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v5, vcc, 1.0, v3, 1.0
391e561e7cbSMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v6, v4
392e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, -v4, v6, 1.0
393e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v6, v7, v6, v6
394e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v7, v5, v6
395e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v8, -v4, v7, v5
396e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, v8, v6, v7
397e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v4, -v4, v7, v5
398e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v4, v4, v6, v7
399e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v3, v4, v3, 1.0
400e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v3
401e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v3
402e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v3
40337512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
40437512d76SMatt Arsenault;
40537512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x3_arcp:
40637512d76SMatt Arsenault; GFX11:       ; %bb.0:
40737512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v4, null, v3, v3, 1.0
409e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v7, vcc_lo, 1.0, v3, 1.0
410e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
411e561e7cbSMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v5, v4
41237512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
413e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
414e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v5
415e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
416e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v6, v7, v5
417e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v8, -v4, v6, v7
418e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
419e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v6, v8, v5
420e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v4, -v4, v6, v7
421e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
422e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v4, v4, v5, v6
423e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v3, v4, v3, 1.0
42437512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
425e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v3
426e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v3
427e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v2, v2, v3
42837512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
42937512d76SMatt Arsenault  %div0 = fdiv arcp float %x, %D
43037512d76SMatt Arsenault  %div1 = fdiv arcp float %y, %D
43137512d76SMatt Arsenault  %div2 = fdiv arcp float %z, %D
43237512d76SMatt Arsenault  %insert.0 = insertelement <3 x float> poison, float %div0, i32 0
43337512d76SMatt Arsenault  %insert.1 = insertelement <3 x float> %insert.0, float %div1, i32 1
43437512d76SMatt Arsenault  %insert.2 = insertelement <3 x float> %insert.1, float %div2, i32 2
43537512d76SMatt Arsenault  ret <3 x float> %insert.2
43637512d76SMatt Arsenault}
43737512d76SMatt Arsenault
43837512d76SMatt Arsenaultdefine <4 x float> @v_repeat_divisor_f32_x4_arcp(float %x, float %y, float %z, float %w, float %D) #0 {
43937512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x4_arcp:
44037512d76SMatt Arsenault; GFX6:       ; %bb.0:
44137512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v5, s[4:5], v4, v4, 1.0
44337512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v6, v5
44437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v5, v6, 1.0
44537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v7, v6, v6
446e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v7, vcc, 1.0, v4, 1.0
44737512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v8, v7, v6
44837512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, -v5, v8, v7
44937512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v8, v9, v6, v8
45037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v5, -v5, v8, v7
45137512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v5, v5, v6, v8
452e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v4, v5, v4, 1.0
453e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v4
454e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v4
455e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v2, v2, v4
456e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v3, v3, v4
45737512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
45837512d76SMatt Arsenault;
45937512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x4_arcp:
46037512d76SMatt Arsenault; GFX9:       ; %bb.0:
46137512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v5, s[4:5], v4, v4, 1.0
463e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v6, vcc, 1.0, v4, 1.0
464e561e7cbSMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v7, v5
465e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v8, -v5, v7, 1.0
466e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, v8, v7, v7
467e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v8, v6, v7
468e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v9, -v5, v8, v6
469e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v8, v9, v7, v8
470e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v5, -v5, v8, v6
471e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v5, v5, v7, v8
472e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v4, v5, v4, 1.0
473e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v4
474e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v4
475e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v4
476e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v3, v3, v4
47737512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
47837512d76SMatt Arsenault;
47937512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x4_arcp:
48037512d76SMatt Arsenault; GFX11:       ; %bb.0:
48137512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v5, null, v4, v4, 1.0
483e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v8, vcc_lo, 1.0, v4, 1.0
484e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
485e561e7cbSMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v6, v5
48637512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
487e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v7, -v5, v6, 1.0
488e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v6
489e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
490e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v7, v8, v6
491e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v9, -v5, v7, v8
492e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
493e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v7, v9, v6
494e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v5, -v5, v7, v8
495e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
496e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v5, v5, v6, v7
497e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v4, v5, v4, 1.0
498e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
499e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v4
500e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v4
501e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v2, v2, v4
502e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v3, v3, v4
50337512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
50437512d76SMatt Arsenault  %div0 = fdiv arcp float %x, %D
50537512d76SMatt Arsenault  %div1 = fdiv arcp float %y, %D
50637512d76SMatt Arsenault  %div2 = fdiv arcp float %z, %D
50737512d76SMatt Arsenault  %div3 = fdiv arcp float %w, %D
50837512d76SMatt Arsenault  %insert.0 = insertelement <4 x float> poison, float %div0, i32 0
50937512d76SMatt Arsenault  %insert.1 = insertelement <4 x float> %insert.0, float %div1, i32 1
51037512d76SMatt Arsenault  %insert.2 = insertelement <4 x float> %insert.1, float %div2, i32 2
51137512d76SMatt Arsenault  %insert.3 = insertelement <4 x float> %insert.2, float %div3, i32 3
51237512d76SMatt Arsenault  ret <4 x float> %insert.3
51337512d76SMatt Arsenault}
51437512d76SMatt Arsenault
51537512d76SMatt Arsenaultdefine <3 x half> @v_repeat_divisor_f16_x3_arcp(half %x, half %y, half %z, half %D) #0 {
51637512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f16_x3_arcp:
51737512d76SMatt Arsenault; GFX6:       ; %bb.0:
51837512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51937512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
52037512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
521e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
522e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
52337512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
52437512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
525e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
526e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
527e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v4, s[4:5], v3, v3, 1.0
52837512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v5, v4
52937512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
53037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v5, v6, v5, v5
531e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v6, vcc, 1.0, v3, 1.0
53237512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v7, v6, v5
53337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v8, -v4, v7, v6
53437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, v8, v5, v7
53537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v4, -v4, v7, v6
53637512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
537e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v3, v4, v3, 1.0
538e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v3
539e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v3
540e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v2, v2, v3
54137512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
54237512d76SMatt Arsenault;
54337512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f16_x3_arcp:
54437512d76SMatt Arsenault; GFX9:       ; %bb.0:
54537512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54637512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_e32 v3, v3
54737512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v3
54837512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f16_e32 v4, v1, v3
54937512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f16_e32 v1, v2, v3
55037512d76SMatt Arsenault; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v4
55137512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
55237512d76SMatt Arsenault;
55337512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f16_x3_arcp:
55437512d76SMatt Arsenault; GFX11:       ; %bb.0:
55537512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55637512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v3, v3
55737512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
55837512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v3
55937512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f16_e32 v1, v1, v3
56037512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
56137512d76SMatt Arsenault; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
56237512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f16_e32 v1, v2, v3
56337512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
56437512d76SMatt Arsenault  %div0 = fdiv arcp half %x, %D
56537512d76SMatt Arsenault  %div1 = fdiv arcp half %y, %D
56637512d76SMatt Arsenault  %div2 = fdiv arcp half %z, %D
56737512d76SMatt Arsenault  %insert.0 = insertelement <3 x half> poison, half %div0, i32 0
56837512d76SMatt Arsenault  %insert.1 = insertelement <3 x half> %insert.0, half %div1, i32 1
56937512d76SMatt Arsenault  %insert.2 = insertelement <3 x half> %insert.1, half %div2, i32 2
57037512d76SMatt Arsenault  ret <3 x half> %insert.2
57137512d76SMatt Arsenault}
57237512d76SMatt Arsenault
57337512d76SMatt Arsenaultdefine <4 x float> @v_repeat_divisor_v2f32_x2(<2 x float> %x, <2 x float> %y, <2 x float> %D) #0 {
57437512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_v2f32_x2:
57537512d76SMatt Arsenault; GFX6:       ; %bb.0:
57637512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v6, s[4:5], v4, v4, 1.0
57837512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v7, v6
57937512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v8, -v6, v7, 1.0
58037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, v8, v7, v7
581e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v8, vcc, 1.0, v4, 1.0
58237512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v9, v8, v7
58337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, -v6, v9, v8
58437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, v10, v7, v9
58537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v6, v9, v8
586e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v8, s[4:5], v5, v5, 1.0
58737512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v10, v8
58837512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v6, v6, v7, v9
589e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v4, v6, v4, 1.0
590e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v7, vcc, 1.0, v5, 1.0
59137512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v8, v10, 1.0
59237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v6, v10, v10
59337512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v9, v7, v6
59437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, -v8, v9, v7
59537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, v10, v6, v9
59637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v8, v9, v7
59737512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v6, v7, v6, v9
598e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v5, v6, v5, 1.0
599e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v4
600e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v5
601e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v2, v2, v4
602e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v3, v3, v5
60337512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
60437512d76SMatt Arsenault;
60537512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_v2f32_x2:
60637512d76SMatt Arsenault; GFX9:       ; %bb.0:
60737512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v6, s[4:5], v4, v4, 1.0
609e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v7, s[4:5], v5, v5, 1.0
610e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v8, vcc, 1.0, v4, 1.0
611e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_scale_f32 v9, s[4:5], 1.0, v5, 1.0
612e561e7cbSMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v10, v6
613e561e7cbSMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v11, v7
614e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v12, -v6, v10, 1.0
615e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v10, v12, v10, v10
616e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v13, -v7, v11, 1.0
617e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v11, v13, v11, v11
618e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v12, v8, v10
619e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v13, v9, v11
620e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v14, -v6, v12, v8
621e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v15, -v7, v13, v9
622e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v12, v14, v10, v12
623e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v6, -v6, v12, v8
624e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v8, v15, v11, v13
625e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v6, v6, v10, v12
626e561e7cbSMatt Arsenault; GFX9-NEXT:    v_fma_f32 v7, -v7, v8, v9
62737512d76SMatt Arsenault; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
628e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fmas_f32 v7, v7, v11, v8
629e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v4, v6, v4, 1.0
630e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v4
631e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v4
632e561e7cbSMatt Arsenault; GFX9-NEXT:    v_div_fixup_f32 v5, v7, v5, 1.0
633e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v5
634e561e7cbSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v3, v3, v5
63537512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
63637512d76SMatt Arsenault;
63737512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_v2f32_x2:
63837512d76SMatt Arsenault; GFX11:       ; %bb.0:
63937512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v6, null, v4, v4, 1.0
641e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v7, null, v5, v5, 1.0
642e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v12, vcc_lo, 1.0, v4, 1.0
64337512d76SMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
644e561e7cbSMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v8, v6
645e561e7cbSMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v9, v7
64637512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
647e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v10, -v6, v8, 1.0
648e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v11, -v7, v9, 1.0
649e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
650e561e7cbSMatt Arsenault; GFX11-NEXT:    v_dual_fmac_f32 v8, v10, v8 :: v_dual_fmac_f32 v9, v11, v9
651e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_scale_f32 v10, s0, 1.0, v5, 1.0
652e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v11, v12, v8
653e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
654e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v14, -v6, v11, v12
655e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fmac_f32_e32 v11, v14, v8
656e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
657e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v6, -v6, v11, v12
658e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v6, v6, v8, v11
65937512d76SMatt Arsenault; GFX11-NEXT:    s_mov_b32 vcc_lo, s0
660e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
661e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v4, v6, v4, 1.0
662e561e7cbSMatt Arsenault; GFX11-NEXT:    v_dual_mul_f32 v13, v10, v9 :: v_dual_mul_f32 v0, v0, v4
663e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
664e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v15, -v7, v13, v10
665e561e7cbSMatt Arsenault; GFX11-NEXT:    v_dual_mul_f32 v2, v2, v4 :: v_dual_fmac_f32 v13, v15, v9
666e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
667e561e7cbSMatt Arsenault; GFX11-NEXT:    v_fma_f32 v7, -v7, v13, v10
668e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fmas_f32 v7, v7, v9, v13
669e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
670e561e7cbSMatt Arsenault; GFX11-NEXT:    v_div_fixup_f32 v5, v7, v5, 1.0
671e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v5
672e561e7cbSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v3, v3, v5
67337512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
67437512d76SMatt Arsenault  %div0 = fdiv arcp <2 x float> %x, %D
67537512d76SMatt Arsenault  %div1 = fdiv arcp <2 x float> %y, %D
67637512d76SMatt Arsenault  %shuffle = shufflevector <2 x float> %div0, <2 x float> %div1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
67737512d76SMatt Arsenault  ret <4 x float> %shuffle
67837512d76SMatt Arsenault}
67937512d76SMatt Arsenault
68037512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_ulp25(float %x, float %y, float %D) #0 {
68137512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_ulp25:
68237512d76SMatt Arsenault; GFX6:       ; %bb.0:
68337512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6848287f3afSMatt Arsenault; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
6858287f3afSMatt Arsenault; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v2
6868287f3afSMatt Arsenault; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v2|, s4
6878287f3afSMatt Arsenault; GFX6-NEXT:    v_cndmask_b32_e32 v3, v2, v3, vcc
6888287f3afSMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
6898287f3afSMatt Arsenault; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v2, v2
6908287f3afSMatt Arsenault; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, 0, v2
6918287f3afSMatt Arsenault; GFX6-NEXT:    v_ldexp_f32_e32 v2, v3, v2
6928287f3afSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
6938287f3afSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v2
69437512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
69537512d76SMatt Arsenault;
69637512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_ulp25:
69737512d76SMatt Arsenault; GFX9:       ; %bb.0:
69837512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6998287f3afSMatt Arsenault; GFX9-NEXT:    v_frexp_mant_f32_e32 v3, v2
7008287f3afSMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v3, v3
7018287f3afSMatt Arsenault; GFX9-NEXT:    v_frexp_exp_i32_f32_e32 v2, v2
7028287f3afSMatt Arsenault; GFX9-NEXT:    v_sub_u32_e32 v2, 0, v2
7038287f3afSMatt Arsenault; GFX9-NEXT:    v_ldexp_f32 v2, v3, v2
7048287f3afSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v2
7058287f3afSMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v2
70637512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
70737512d76SMatt Arsenault;
70837512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_ulp25:
70937512d76SMatt Arsenault; GFX11:       ; %bb.0:
71037512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7118287f3afSMatt Arsenault; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v2
7128287f3afSMatt Arsenault; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v2, v2
7138287f3afSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7148287f3afSMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v3, v3
7158287f3afSMatt Arsenault; GFX11-NEXT:    v_sub_nc_u32_e32 v2, 0, v2
71637512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7178287f3afSMatt Arsenault; GFX11-NEXT:    v_ldexp_f32 v2, v3, v2
7188287f3afSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7198287f3afSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v2
7208287f3afSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
72137512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
72237512d76SMatt Arsenault  %div0 = fdiv arcp float %x, %D, !fpmath !0
72337512d76SMatt Arsenault  %div1 = fdiv arcp float %y, %D, !fpmath !0
72437512d76SMatt Arsenault  %insert.0 = insertelement <2 x float> poison, float %div0, i32 0
72537512d76SMatt Arsenault  %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1
72637512d76SMatt Arsenault  ret <2 x float> %insert.1
72737512d76SMatt Arsenault}
72837512d76SMatt Arsenault
72937512d76SMatt Arsenaultdefine <2 x float> @v_repeat_divisor_f32_x2_daz_ulp25(float %x, float %y, float %D) #1 {
73037512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_f32_x2_daz_ulp25:
73137512d76SMatt Arsenault; GFX6:       ; %bb.0:
73237512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73337512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
73437512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
73537512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v2
73637512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
73737512d76SMatt Arsenault;
73837512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_f32_x2_daz_ulp25:
73937512d76SMatt Arsenault; GFX9:       ; %bb.0:
74037512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74137512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
74237512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v2
74337512d76SMatt Arsenault; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v2
74437512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
74537512d76SMatt Arsenault;
74637512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_f32_x2_daz_ulp25:
74737512d76SMatt Arsenault; GFX11:       ; %bb.0:
74837512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74937512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
75037512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
75137512d76SMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v2
7528287f3afSMatt Arsenault; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
75337512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
75437512d76SMatt Arsenault  %div0 = fdiv arcp float %x, %D, !fpmath !0
75537512d76SMatt Arsenault  %div1 = fdiv arcp float %y, %D, !fpmath !0
75637512d76SMatt Arsenault  %insert.0 = insertelement <2 x float> poison, float %div0, i32 0
75737512d76SMatt Arsenault  %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1
75837512d76SMatt Arsenault  ret <2 x float> %insert.1
75937512d76SMatt Arsenault}
76037512d76SMatt Arsenault
76137512d76SMatt Arsenaultdefine <4 x half> @v_repeat_divisor_v2f16_x2(<2 x half> %x, <2 x half> %y, <2 x half> %D) #0 {
76237512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_v2f16_x2:
76337512d76SMatt Arsenault; GFX6:       ; %bb.0:
76437512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76537512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v4, v4
76637512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v5, v5
76737512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
768e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
769e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
770e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
771e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
772e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
773e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v6, s[4:5], v4, v4, 1.0
774e561e7cbSMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v7, v6
775e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
776e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
777e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
77837512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v8, -v6, v7, 1.0
77937512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, v8, v7, v7
780e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v8, vcc, 1.0, v4, 1.0
78137512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v9, v8, v7
78237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, -v6, v9, v8
78337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, v10, v7, v9
78437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v6, v9, v8
785e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v8, s[4:5], v5, v5, 1.0
78637512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v10, v8
78737512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v6, v6, v7, v9
788e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v4, v6, v4, 1.0
789e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v7, vcc, 1.0, v5, 1.0
79037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, -v8, v10, 1.0
79137512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v6, v6, v10, v10
79237512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v9, v7, v6
79337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, -v8, v9, v7
794e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
79537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, v10, v6, v9
79637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v7, -v8, v9, v7
79737512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v6, v7, v6, v9
798e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v5, v6, v5, 1.0
799e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v4
800e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v5
801e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v2, v2, v4
802e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v3, v3, v5
80337512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
80437512d76SMatt Arsenault;
80537512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_v2f16_x2:
80637512d76SMatt Arsenault; GFX9:       ; %bb.0:
80737512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80837512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_sdwa v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
80937512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_e32 v2, v2
810e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pack_b32_f16 v2, v2, v3
811e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v2
812e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v2
81337512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
81437512d76SMatt Arsenault;
81537512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_v2f16_x2:
81637512d76SMatt Arsenault; GFX11:       ; %bb.0:
81737512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81837512d76SMatt Arsenault; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
81937512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v2, v2
820e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
82137512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v3, v3
82237512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
823e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pack_b32_f16 v2, v2, v3
824e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v2
825e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pk_mul_f16 v1, v1, v2
82637512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
82737512d76SMatt Arsenault  %div0 = fdiv arcp <2 x half> %x, %D
82837512d76SMatt Arsenault  %div1 = fdiv arcp <2 x half> %y, %D
82937512d76SMatt Arsenault  %shuffle = shufflevector <2 x half> %div0, <2 x half> %div1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
83037512d76SMatt Arsenault  ret <4 x half> %shuffle
83137512d76SMatt Arsenault}
83237512d76SMatt Arsenault
83337512d76SMatt Arsenaultdefine <6 x half> @v_repeat_divisor_v3f16_x2(<3 x half> %x, <3 x half> %y, <3 x half> %D) #0 {
83437512d76SMatt Arsenault; GFX6-LABEL: v_repeat_divisor_v3f16_x2:
83537512d76SMatt Arsenault; GFX6:       ; %bb.0:
83637512d76SMatt Arsenault; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83737512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v6, v6
83837512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v7, v7
83937512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v8, v8
840e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v5, v5
841e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v6, v6
842e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v7, v7
843e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v8, v8
844e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v4, v4
845e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v9, s[4:5], v6, v6, 1.0
846e561e7cbSMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v10, v9
84737512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
848e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
849e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
85037512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v11, -v9, v10, 1.0
85137512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, v11, v10, v10
852e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v11, vcc, 1.0, v6, 1.0
85337512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v12, v11, v10
85437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v13, -v9, v12, v11
85537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v12, v13, v10, v12
85637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, -v9, v12, v11
857e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v11, s[4:5], v7, v7, 1.0
85837512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v13, v11
85937512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v9, v9, v10, v12
860e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v6, v9, v6, 1.0
861e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v10, vcc, 1.0, v7, 1.0
86237512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, -v11, v13, 1.0
86337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, v9, v13, v13
86437512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v12, v10, v9
86537512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v13, -v11, v12, v10
86637512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v12, v13, v9, v12
86737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, -v11, v12, v10
868e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v11, s[4:5], v8, v8, 1.0
86937512d76SMatt Arsenault; GFX6-NEXT:    v_rcp_f32_e32 v13, v11
87037512d76SMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v9, v10, v9, v12
871e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
872e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v7, v9, v7, 1.0
87337512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, -v11, v13, 1.0
87437512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v9, v9, v13, v13
875e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_scale_f32 v10, vcc, 1.0, v8, 1.0
87637512d76SMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v12, v10, v9
87737512d76SMatt Arsenault; GFX6-NEXT:    v_fma_f32 v13, -v11, v12, v10
87837512d76SMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
879e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
880e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
881e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
882e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
883e561e7cbSMatt Arsenault; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
884e561e7cbSMatt Arsenault; GFX6-NEXT:    v_fma_f32 v12, v13, v9, v12
885e561e7cbSMatt Arsenault; GFX6-NEXT:    v_fma_f32 v10, -v11, v12, v10
886e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fmas_f32 v9, v10, v9, v12
887e561e7cbSMatt Arsenault; GFX6-NEXT:    v_div_fixup_f32 v8, v9, v8, 1.0
888e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v6
889e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v7
890e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v2, v2, v8
891e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v3, v3, v6
892e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v4, v4, v7
893e561e7cbSMatt Arsenault; GFX6-NEXT:    v_mul_f32_e32 v5, v5, v8
89437512d76SMatt Arsenault; GFX6-NEXT:    s_setpc_b64 s[30:31]
89537512d76SMatt Arsenault;
89637512d76SMatt Arsenault; GFX9-LABEL: v_repeat_divisor_v3f16_x2:
89737512d76SMatt Arsenault; GFX9:       ; %bb.0:
89837512d76SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89937512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_sdwa v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
90037512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_e32 v4, v4
90137512d76SMatt Arsenault; GFX9-NEXT:    v_rcp_f16_e32 v5, v5
902e561e7cbSMatt Arsenault; GFX9-NEXT:    s_movk_i32 s4, 0x7e00
903e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pack_b32_f16 v4, v4, v6
904e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pack_b32_f16 v5, v5, s4
905e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v4
906e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v5
907e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pk_mul_f16 v3, v3, v5
908e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pk_mul_f16 v4, v2, v4
909e561e7cbSMatt Arsenault; GFX9-NEXT:    v_alignbit_b32 v2, v3, v4, 16
910e561e7cbSMatt Arsenault; GFX9-NEXT:    v_pack_b32_f16 v1, v1, v4
91137512d76SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
91237512d76SMatt Arsenault;
91337512d76SMatt Arsenault; GFX11-LABEL: v_repeat_divisor_v3f16_x2:
91437512d76SMatt Arsenault; GFX11:       ; %bb.0:
91537512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91637512d76SMatt Arsenault; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v4
91737512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v4, v4
91837512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v5, v5
919e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
92037512d76SMatt Arsenault; GFX11-NEXT:    v_rcp_f16_e32 v6, v6
92137512d76SMatt Arsenault; GFX11-NEXT:    s_waitcnt_depctr 0xfff
922e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pack_b32_f16 v5, v5, 0x7e00
923e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pack_b32_f16 v4, v4, v6
924e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pk_mul_f16 v1, v1, v5
925e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pk_mul_f16 v3, v3, v5
926e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
927e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pk_mul_f16 v2, v2, v4
928e561e7cbSMatt Arsenault; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v4
92937512d76SMatt Arsenault; GFX11-NEXT:    v_pack_b32_f16 v1, v1, v2
930e561e7cbSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
931e561e7cbSMatt Arsenault; GFX11-NEXT:    v_alignbit_b32 v2, v3, v2, 16
93237512d76SMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
93337512d76SMatt Arsenault  %div0 = fdiv arcp <3 x half> %x, %D
93437512d76SMatt Arsenault  %div1 = fdiv arcp <3 x half> %y, %D
93537512d76SMatt Arsenault  %shuffle = shufflevector <3 x half> %div0, <3 x half> %div1, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
93637512d76SMatt Arsenault  ret <6 x half> %shuffle
93737512d76SMatt Arsenault}
93837512d76SMatt Arsenault
93937512d76SMatt Arsenaultattributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }
94037512d76SMatt Arsenaultattributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
94137512d76SMatt Arsenault
94237512d76SMatt Arsenault!0 = !{float 2.5}
94337512d76SMatt Arsenault;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
94437512d76SMatt Arsenault; GCN: {{.*}}
945