xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fdiv.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX67,GFX6,GFX6-FASTFMA %s
3; RUN: llc -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -check-prefixes=GCN,GFX678,GFX67,GFX6,GFX6-SLOWFMA %s
4; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX67,GFX7 %s
5; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
8; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s
9
10; These tests check that fdiv is expanded correctly and also test that the
11; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
12; instruction groups.
13
14; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div.
15
16define amdgpu_kernel void @s_fdiv_f32_ninf(ptr addrspace(1) %out, float %a, float %b) #0 {
17; GFX6-FASTFMA-LABEL: s_fdiv_f32_ninf:
18; GFX6-FASTFMA:       ; %bb.0: ; %entry
19; GFX6-FASTFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
20; GFX6-FASTFMA-NEXT:    s_mov_b32 s7, 0xf000
21; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, -1
22; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
23; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v1, s2
24; GFX6-FASTFMA-NEXT:    s_mov_b32 s4, s0
25; GFX6-FASTFMA-NEXT:    s_mov_b32 s5, s1
26; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[0:1], s3, s3, v1
27; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
28; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s3
29; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s2, v0, s2
30; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
31; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
32; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
33; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v0, v3
34; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v4, v0
35; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
36; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v2, v4, v0
37; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
38; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
39; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v0, s3, v1
40; GFX6-FASTFMA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
41; GFX6-FASTFMA-NEXT:    s_endpgm
42;
43; GFX6-SLOWFMA-LABEL: s_fdiv_f32_ninf:
44; GFX6-SLOWFMA:       ; %bb.0: ; %entry
45; GFX6-SLOWFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
46; GFX6-SLOWFMA-NEXT:    s_mov_b32 s7, 0xf000
47; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, -1
48; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
49; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v0, s2
50; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
51; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v2, s3
52; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s2, v2, s2
53; GFX6-SLOWFMA-NEXT:    s_mov_b32 s4, s0
54; GFX6-SLOWFMA-NEXT:    s_mov_b32 s5, s1
55; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
56; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
57; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
58; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
59; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
60; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
61; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
62; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
63; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
64; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
65; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s3, v0
66; GFX6-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
67; GFX6-SLOWFMA-NEXT:    s_endpgm
68;
69; GFX7-LABEL: s_fdiv_f32_ninf:
70; GFX7:       ; %bb.0: ; %entry
71; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
72; GFX7-NEXT:    s_mov_b32 s7, 0xf000
73; GFX7-NEXT:    s_mov_b32 s6, -1
74; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
75; GFX7-NEXT:    v_mov_b32_e32 v1, s2
76; GFX7-NEXT:    s_mov_b32 s4, s0
77; GFX7-NEXT:    s_mov_b32 s5, s1
78; GFX7-NEXT:    v_div_scale_f32 v2, s[0:1], s3, s3, v1
79; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
80; GFX7-NEXT:    v_mov_b32_e32 v0, s3
81; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s2, v0, s2
82; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
83; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
84; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
85; GFX7-NEXT:    v_mul_f32_e32 v4, v0, v3
86; GFX7-NEXT:    v_fma_f32 v5, -v2, v4, v0
87; GFX7-NEXT:    v_fma_f32 v4, v5, v3, v4
88; GFX7-NEXT:    v_fma_f32 v0, -v2, v4, v0
89; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
90; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
91; GFX7-NEXT:    v_div_fixup_f32 v0, v0, s3, v1
92; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0
93; GFX7-NEXT:    s_endpgm
94;
95; GFX8-LABEL: s_fdiv_f32_ninf:
96; GFX8:       ; %bb.0: ; %entry
97; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
98; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
99; GFX8-NEXT:    v_mov_b32_e32 v0, s2
100; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
101; GFX8-NEXT:    v_mov_b32_e32 v2, s3
102; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s2, v2, s2
103; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
104; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
105; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
106; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
107; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
108; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
109; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
110; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
111; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
112; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
113; GFX8-NEXT:    v_div_fixup_f32 v2, v1, s3, v0
114; GFX8-NEXT:    v_mov_b32_e32 v0, s0
115; GFX8-NEXT:    v_mov_b32_e32 v1, s1
116; GFX8-NEXT:    flat_store_dword v[0:1], v2
117; GFX8-NEXT:    s_endpgm
118;
119; GFX10-LABEL: s_fdiv_f32_ninf:
120; GFX10:       ; %bb.0: ; %entry
121; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
122; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
123; GFX10-NEXT:    v_div_scale_f32 v0, s4, s3, s3, s2
124; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, s2, s3, s2
125; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
126; GFX10-NEXT:    s_denorm_mode 15
127; GFX10-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
128; GFX10-NEXT:    v_fmac_f32_e32 v1, v3, v1
129; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
130; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
131; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
132; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
133; GFX10-NEXT:    s_denorm_mode 12
134; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
135; GFX10-NEXT:    v_mov_b32_e32 v1, 0
136; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s3, s2
137; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
138; GFX10-NEXT:    s_endpgm
139;
140; GFX11-LABEL: s_fdiv_f32_ninf:
141; GFX11:       ; %bb.0: ; %entry
142; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
143; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
144; GFX11-NEXT:    v_div_scale_f32 v0, null, s3, s3, s2
145; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, s2, s3, s2
146; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
147; GFX11-NEXT:    s_denorm_mode 15
148; GFX11-NEXT:    s_waitcnt_depctr 0xfff
149; GFX11-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
150; GFX11-NEXT:    v_fmac_f32_e32 v1, v3, v1
151; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
152; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
153; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
154; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
155; GFX11-NEXT:    s_denorm_mode 12
156; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
157; GFX11-NEXT:    v_mov_b32_e32 v1, 0
158; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s3, s2
159; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
160; GFX11-NEXT:    s_endpgm
161;
162; EG-LABEL: s_fdiv_f32_ninf:
163; EG:       ; %bb.0: ; %entry
164; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
165; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
166; EG-NEXT:    CF_END
167; EG-NEXT:    PAD
168; EG-NEXT:    ALU clause starting at 4:
169; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
170; EG-NEXT:     MUL_IEEE T0.X, KC0[2].Z, PS,
171; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
172; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
173entry:
174  %fdiv = fdiv ninf float %a, %b
175  store float %fdiv, ptr addrspace(1) %out
176  ret void
177}
178
179define amdgpu_kernel void @s_fdiv_f32_ieee(ptr addrspace(1) %out, float %a, float %b) #1 {
180; GFX6-FASTFMA-LABEL: s_fdiv_f32_ieee:
181; GFX6-FASTFMA:       ; %bb.0: ; %entry
182; GFX6-FASTFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
183; GFX6-FASTFMA-NEXT:    s_mov_b32 s7, 0xf000
184; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, -1
185; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
186; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s2
187; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
188; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
189; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v3, s3
190; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s2, v3, s2
191; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
192; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
193; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
194; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
195; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
196; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
197; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
198; GFX6-FASTFMA-NEXT:    s_mov_b32 s4, s0
199; GFX6-FASTFMA-NEXT:    s_mov_b32 s5, s1
200; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s3, v0
201; GFX6-FASTFMA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
202; GFX6-FASTFMA-NEXT:    s_endpgm
203;
204; GFX6-SLOWFMA-LABEL: s_fdiv_f32_ieee:
205; GFX6-SLOWFMA:       ; %bb.0: ; %entry
206; GFX6-SLOWFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
207; GFX6-SLOWFMA-NEXT:    s_mov_b32 s7, 0xf000
208; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, -1
209; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
210; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v0, s2
211; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
212; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v2, s3
213; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s2, v2, s2
214; GFX6-SLOWFMA-NEXT:    s_mov_b32 s4, s0
215; GFX6-SLOWFMA-NEXT:    s_mov_b32 s5, s1
216; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
217; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
218; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
219; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
220; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
221; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
222; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
223; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
224; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s3, v0
225; GFX6-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
226; GFX6-SLOWFMA-NEXT:    s_endpgm
227;
228; GFX7-LABEL: s_fdiv_f32_ieee:
229; GFX7:       ; %bb.0: ; %entry
230; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
231; GFX7-NEXT:    s_mov_b32 s7, 0xf000
232; GFX7-NEXT:    s_mov_b32 s6, -1
233; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
234; GFX7-NEXT:    v_mov_b32_e32 v0, s2
235; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
236; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
237; GFX7-NEXT:    v_mov_b32_e32 v3, s3
238; GFX7-NEXT:    v_div_scale_f32 v3, vcc, s2, v3, s2
239; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
240; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
241; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
242; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
243; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
244; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
245; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
246; GFX7-NEXT:    s_mov_b32 s4, s0
247; GFX7-NEXT:    s_mov_b32 s5, s1
248; GFX7-NEXT:    v_div_fixup_f32 v0, v1, s3, v0
249; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0
250; GFX7-NEXT:    s_endpgm
251;
252; GFX8-LABEL: s_fdiv_f32_ieee:
253; GFX8:       ; %bb.0: ; %entry
254; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
255; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
256; GFX8-NEXT:    v_mov_b32_e32 v0, s2
257; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
258; GFX8-NEXT:    v_mov_b32_e32 v2, s3
259; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s2, v2, s2
260; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
261; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
262; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
263; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
264; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
265; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
266; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
267; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
268; GFX8-NEXT:    v_div_fixup_f32 v2, v1, s3, v0
269; GFX8-NEXT:    v_mov_b32_e32 v0, s0
270; GFX8-NEXT:    v_mov_b32_e32 v1, s1
271; GFX8-NEXT:    flat_store_dword v[0:1], v2
272; GFX8-NEXT:    s_endpgm
273;
274; GFX10-LABEL: s_fdiv_f32_ieee:
275; GFX10:       ; %bb.0: ; %entry
276; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
277; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
278; GFX10-NEXT:    v_div_scale_f32 v0, s4, s3, s3, s2
279; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
280; GFX10-NEXT:    v_fma_f32 v2, -v0, v1, 1.0
281; GFX10-NEXT:    v_fmac_f32_e32 v1, v2, v1
282; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, s2, s3, s2
283; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
284; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
285; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
286; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
287; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
288; GFX10-NEXT:    v_mov_b32_e32 v1, 0
289; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s3, s2
290; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
291; GFX10-NEXT:    s_endpgm
292;
293; GFX11-LABEL: s_fdiv_f32_ieee:
294; GFX11:       ; %bb.0: ; %entry
295; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
296; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
297; GFX11-NEXT:    v_div_scale_f32 v0, null, s3, s3, s2
298; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
299; GFX11-NEXT:    s_waitcnt_depctr 0xfff
300; GFX11-NEXT:    v_fma_f32 v2, -v0, v1, 1.0
301; GFX11-NEXT:    v_fmac_f32_e32 v1, v2, v1
302; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, s2, s3, s2
303; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
304; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
305; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
306; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
307; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
308; GFX11-NEXT:    v_mov_b32_e32 v1, 0
309; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s3, s2
310; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
311; GFX11-NEXT:    s_endpgm
312;
313; EG-LABEL: s_fdiv_f32_ieee:
314; EG:       ; %bb.0: ; %entry
315; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
316; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
317; EG-NEXT:    CF_END
318; EG-NEXT:    PAD
319; EG-NEXT:    ALU clause starting at 4:
320; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
321; EG-NEXT:     MUL_IEEE T0.X, KC0[2].Z, PS,
322; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
323; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
324entry:
325  %fdiv = fdiv float %a, %b
326  store float %fdiv, ptr addrspace(1) %out
327  ret void
328}
329
330define amdgpu_kernel void @s_fdiv_25ulp_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
331; GFX67-LABEL: s_fdiv_25ulp_f32:
332; GFX67:       ; %bb.0: ; %entry
333; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
334; GFX67-NEXT:    v_mov_b32_e32 v0, 0x6f800000
335; GFX67-NEXT:    v_mov_b32_e32 v1, 0x2f800000
336; GFX67-NEXT:    s_mov_b32 s7, 0xf000
337; GFX67-NEXT:    s_mov_b32 s6, -1
338; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
339; GFX67-NEXT:    v_cmp_gt_f32_e64 vcc, |s3|, v0
340; GFX67-NEXT:    v_cndmask_b32_e32 v0, 1.0, v1, vcc
341; GFX67-NEXT:    v_mul_f32_e32 v1, s3, v0
342; GFX67-NEXT:    v_rcp_f32_e32 v1, v1
343; GFX67-NEXT:    s_mov_b32 s4, s0
344; GFX67-NEXT:    s_mov_b32 s5, s1
345; GFX67-NEXT:    v_mul_f32_e32 v1, s2, v1
346; GFX67-NEXT:    v_mul_f32_e32 v0, v0, v1
347; GFX67-NEXT:    buffer_store_dword v0, off, s[4:7], 0
348; GFX67-NEXT:    s_endpgm
349;
350; GFX8-LABEL: s_fdiv_25ulp_f32:
351; GFX8:       ; %bb.0: ; %entry
352; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
353; GFX8-NEXT:    v_mov_b32_e32 v0, 0x6f800000
354; GFX8-NEXT:    v_mov_b32_e32 v1, 0x2f800000
355; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
356; GFX8-NEXT:    v_cmp_gt_f32_e64 vcc, |s3|, v0
357; GFX8-NEXT:    v_cndmask_b32_e32 v0, 1.0, v1, vcc
358; GFX8-NEXT:    v_mul_f32_e32 v1, s3, v0
359; GFX8-NEXT:    v_rcp_f32_e32 v1, v1
360; GFX8-NEXT:    v_mul_f32_e32 v1, s2, v1
361; GFX8-NEXT:    v_mul_f32_e32 v2, v0, v1
362; GFX8-NEXT:    v_mov_b32_e32 v0, s0
363; GFX8-NEXT:    v_mov_b32_e32 v1, s1
364; GFX8-NEXT:    flat_store_dword v[0:1], v2
365; GFX8-NEXT:    s_endpgm
366;
367; GFX10-LABEL: s_fdiv_25ulp_f32:
368; GFX10:       ; %bb.0: ; %entry
369; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
370; GFX10-NEXT:    v_mov_b32_e32 v2, 0
371; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
372; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |s3|
373; GFX10-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0x2f800000, s4
374; GFX10-NEXT:    v_mul_f32_e32 v1, s3, v0
375; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
376; GFX10-NEXT:    v_mul_f32_e32 v1, s2, v1
377; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
378; GFX10-NEXT:    global_store_dword v2, v0, s[0:1]
379; GFX10-NEXT:    s_endpgm
380;
381; GFX11-LABEL: s_fdiv_25ulp_f32:
382; GFX11:       ; %bb.0: ; %entry
383; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
384; GFX11-NEXT:    v_mov_b32_e32 v2, 0
385; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
386; GFX11-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |s3|
387; GFX11-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0x2f800000, s4
388; GFX11-NEXT:    v_mul_f32_e32 v1, s3, v0
389; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
390; GFX11-NEXT:    s_waitcnt_depctr 0xfff
391; GFX11-NEXT:    v_mul_f32_e32 v1, s2, v1
392; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
393; GFX11-NEXT:    global_store_b32 v2, v0, s[0:1]
394; GFX11-NEXT:    s_endpgm
395;
396; EG-LABEL: s_fdiv_25ulp_f32:
397; EG:       ; %bb.0: ; %entry
398; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
399; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
400; EG-NEXT:    CF_END
401; EG-NEXT:    PAD
402; EG-NEXT:    ALU clause starting at 4:
403; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
404; EG-NEXT:     MUL_IEEE T0.X, KC0[2].Z, PS,
405; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
406; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
407entry:
408  %fdiv = fdiv float %a, %b, !fpmath !0
409  store float %fdiv, ptr addrspace(1) %out
410  ret void
411}
412
413; Use correct fdiv
414define amdgpu_kernel void @s_fdiv_25ulp_ieee_f32(ptr addrspace(1) %out, float %a, float %b) #1 {
415; GFX6-LABEL: s_fdiv_25ulp_ieee_f32:
416; GFX6:       ; %bb.0: ; %entry
417; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
418; GFX6-NEXT:    v_mov_b32_e32 v0, 0x7f800000
419; GFX6-NEXT:    s_mov_b32 s7, 0xf000
420; GFX6-NEXT:    s_mov_b32 s6, -1
421; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
422; GFX6-NEXT:    v_frexp_mant_f32_e32 v1, s3
423; GFX6-NEXT:    v_mov_b32_e32 v2, s3
424; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |s3|, v0
425; GFX6-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
426; GFX6-NEXT:    v_rcp_f32_e32 v1, v1
427; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, s2
428; GFX6-NEXT:    v_mov_b32_e32 v4, s2
429; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |s2|, v0
430; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v2, s3
431; GFX6-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
432; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v3, s2
433; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v1
434; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v3, v2
435; GFX6-NEXT:    s_mov_b32 s4, s0
436; GFX6-NEXT:    s_mov_b32 s5, s1
437; GFX6-NEXT:    v_ldexp_f32_e32 v0, v0, v1
438; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
439; GFX6-NEXT:    s_endpgm
440;
441; GFX7-LABEL: s_fdiv_25ulp_ieee_f32:
442; GFX7:       ; %bb.0: ; %entry
443; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
444; GFX7-NEXT:    s_mov_b32 s7, 0xf000
445; GFX7-NEXT:    s_mov_b32 s6, -1
446; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
447; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, s3
448; GFX7-NEXT:    v_rcp_f32_e32 v0, v0
449; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, s3
450; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v2, s2
451; GFX7-NEXT:    v_frexp_mant_f32_e32 v3, s2
452; GFX7-NEXT:    v_mul_f32_e32 v0, v3, v0
453; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v2, v1
454; GFX7-NEXT:    s_mov_b32 s4, s0
455; GFX7-NEXT:    s_mov_b32 s5, s1
456; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
457; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0
458; GFX7-NEXT:    s_endpgm
459;
460; GFX8-LABEL: s_fdiv_25ulp_ieee_f32:
461; GFX8:       ; %bb.0: ; %entry
462; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
463; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
464; GFX8-NEXT:    v_frexp_mant_f32_e32 v1, s3
465; GFX8-NEXT:    v_rcp_f32_e32 v1, v1
466; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v0, s3
467; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v2, s2
468; GFX8-NEXT:    v_frexp_mant_f32_e32 v3, s2
469; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v2, v0
470; GFX8-NEXT:    v_mul_f32_e32 v1, v3, v1
471; GFX8-NEXT:    v_ldexp_f32 v2, v1, v0
472; GFX8-NEXT:    v_mov_b32_e32 v0, s0
473; GFX8-NEXT:    v_mov_b32_e32 v1, s1
474; GFX8-NEXT:    flat_store_dword v[0:1], v2
475; GFX8-NEXT:    s_endpgm
476;
477; GFX10-LABEL: s_fdiv_25ulp_ieee_f32:
478; GFX10:       ; %bb.0: ; %entry
479; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
480; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
481; GFX10-NEXT:    v_frexp_mant_f32_e32 v0, s3
482; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, s3
483; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, s2
484; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v3, s2
485; GFX10-NEXT:    v_rcp_f32_e32 v0, v0
486; GFX10-NEXT:    v_sub_nc_u32_e32 v1, v3, v1
487; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
488; GFX10-NEXT:    v_mov_b32_e32 v2, 0
489; GFX10-NEXT:    v_ldexp_f32 v0, v0, v1
490; GFX10-NEXT:    global_store_dword v2, v0, s[0:1]
491; GFX10-NEXT:    s_endpgm
492;
493; GFX11-LABEL: s_fdiv_25ulp_ieee_f32:
494; GFX11:       ; %bb.0: ; %entry
495; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
496; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
497; GFX11-NEXT:    v_frexp_mant_f32_e32 v0, s3
498; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, s3
499; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, s2
500; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v3, s2
501; GFX11-NEXT:    v_rcp_f32_e32 v0, v0
502; GFX11-NEXT:    v_sub_nc_u32_e32 v1, v3, v1
503; GFX11-NEXT:    s_waitcnt_depctr 0xfff
504; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
505; GFX11-NEXT:    v_mov_b32_e32 v2, 0
506; GFX11-NEXT:    v_ldexp_f32 v0, v0, v1
507; GFX11-NEXT:    global_store_b32 v2, v0, s[0:1]
508; GFX11-NEXT:    s_endpgm
509;
510; EG-LABEL: s_fdiv_25ulp_ieee_f32:
511; EG:       ; %bb.0: ; %entry
512; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
513; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
514; EG-NEXT:    CF_END
515; EG-NEXT:    PAD
516; EG-NEXT:    ALU clause starting at 4:
517; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
518; EG-NEXT:     MUL_IEEE T0.X, KC0[2].Z, PS,
519; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
520; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
521entry:
522  %fdiv = fdiv float %a, %b, !fpmath !0
523  store float %fdiv, ptr addrspace(1) %out
524  ret void
525}
526
527define amdgpu_kernel void @s_fdiv_fast_ieee_f32(ptr addrspace(1) %out, float %a, float %b) #1 {
528; GFX67-LABEL: s_fdiv_fast_ieee_f32:
529; GFX67:       ; %bb.0: ; %entry
530; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
531; GFX67-NEXT:    s_mov_b32 s7, 0xf000
532; GFX67-NEXT:    s_mov_b32 s6, -1
533; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
534; GFX67-NEXT:    v_rcp_f32_e32 v0, s3
535; GFX67-NEXT:    s_mov_b32 s4, s0
536; GFX67-NEXT:    s_mov_b32 s5, s1
537; GFX67-NEXT:    v_mul_f32_e32 v0, s2, v0
538; GFX67-NEXT:    buffer_store_dword v0, off, s[4:7], 0
539; GFX67-NEXT:    s_endpgm
540;
541; GFX8-LABEL: s_fdiv_fast_ieee_f32:
542; GFX8:       ; %bb.0: ; %entry
543; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
544; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
545; GFX8-NEXT:    v_rcp_f32_e32 v0, s3
546; GFX8-NEXT:    v_mul_f32_e32 v2, s2, v0
547; GFX8-NEXT:    v_mov_b32_e32 v0, s0
548; GFX8-NEXT:    v_mov_b32_e32 v1, s1
549; GFX8-NEXT:    flat_store_dword v[0:1], v2
550; GFX8-NEXT:    s_endpgm
551;
552; GFX10-LABEL: s_fdiv_fast_ieee_f32:
553; GFX10:       ; %bb.0: ; %entry
554; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
555; GFX10-NEXT:    v_mov_b32_e32 v1, 0
556; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
557; GFX10-NEXT:    v_rcp_f32_e32 v0, s3
558; GFX10-NEXT:    v_mul_f32_e32 v0, s2, v0
559; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
560; GFX10-NEXT:    s_endpgm
561;
562; GFX11-LABEL: s_fdiv_fast_ieee_f32:
563; GFX11:       ; %bb.0: ; %entry
564; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
565; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
566; GFX11-NEXT:    v_rcp_f32_e32 v0, s3
567; GFX11-NEXT:    s_waitcnt_depctr 0xfff
568; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
569; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
570; GFX11-NEXT:    s_endpgm
571;
572; EG-LABEL: s_fdiv_fast_ieee_f32:
573; EG:       ; %bb.0: ; %entry
574; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
575; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
576; EG-NEXT:    CF_END
577; EG-NEXT:    PAD
578; EG-NEXT:    ALU clause starting at 4:
579; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
580; EG-NEXT:     MUL_IEEE T0.X, PS, KC0[2].Z,
581; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
582; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
583entry:
584  %fdiv = fdiv fast float %a, %b
585  store float %fdiv, ptr addrspace(1) %out
586  ret void
587}
588
589define amdgpu_kernel void @s_fdiv_f32_fast_math(ptr addrspace(1) %out, float %a, float %b) #0 {
590; GFX67-LABEL: s_fdiv_f32_fast_math:
591; GFX67:       ; %bb.0: ; %entry
592; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
593; GFX67-NEXT:    s_mov_b32 s7, 0xf000
594; GFX67-NEXT:    s_mov_b32 s6, -1
595; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
596; GFX67-NEXT:    v_rcp_f32_e32 v0, s3
597; GFX67-NEXT:    s_mov_b32 s4, s0
598; GFX67-NEXT:    s_mov_b32 s5, s1
599; GFX67-NEXT:    v_mul_f32_e32 v0, s2, v0
600; GFX67-NEXT:    buffer_store_dword v0, off, s[4:7], 0
601; GFX67-NEXT:    s_endpgm
602;
603; GFX8-LABEL: s_fdiv_f32_fast_math:
604; GFX8:       ; %bb.0: ; %entry
605; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
606; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
607; GFX8-NEXT:    v_rcp_f32_e32 v0, s3
608; GFX8-NEXT:    v_mul_f32_e32 v2, s2, v0
609; GFX8-NEXT:    v_mov_b32_e32 v0, s0
610; GFX8-NEXT:    v_mov_b32_e32 v1, s1
611; GFX8-NEXT:    flat_store_dword v[0:1], v2
612; GFX8-NEXT:    s_endpgm
613;
614; GFX10-LABEL: s_fdiv_f32_fast_math:
615; GFX10:       ; %bb.0: ; %entry
616; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
617; GFX10-NEXT:    v_mov_b32_e32 v1, 0
618; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
619; GFX10-NEXT:    v_rcp_f32_e32 v0, s3
620; GFX10-NEXT:    v_mul_f32_e32 v0, s2, v0
621; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
622; GFX10-NEXT:    s_endpgm
623;
624; GFX11-LABEL: s_fdiv_f32_fast_math:
625; GFX11:       ; %bb.0: ; %entry
626; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
627; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
628; GFX11-NEXT:    v_rcp_f32_e32 v0, s3
629; GFX11-NEXT:    s_waitcnt_depctr 0xfff
630; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
631; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
632; GFX11-NEXT:    s_endpgm
633;
634; EG-LABEL: s_fdiv_f32_fast_math:
635; EG:       ; %bb.0: ; %entry
636; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
637; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
638; EG-NEXT:    CF_END
639; EG-NEXT:    PAD
640; EG-NEXT:    ALU clause starting at 4:
641; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
642; EG-NEXT:     MUL_IEEE T0.X, PS, KC0[2].Z,
643; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
644; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
645entry:
646  %fdiv = fdiv fast float %a, %b
647  store float %fdiv, ptr addrspace(1) %out
648  ret void
649}
650
651define amdgpu_kernel void @s_fdiv_ulp25_f32_fast_math(ptr addrspace(1) %out, float %a, float %b) #0 {
652; GFX67-LABEL: s_fdiv_ulp25_f32_fast_math:
653; GFX67:       ; %bb.0: ; %entry
654; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
655; GFX67-NEXT:    s_mov_b32 s7, 0xf000
656; GFX67-NEXT:    s_mov_b32 s6, -1
657; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
658; GFX67-NEXT:    v_rcp_f32_e32 v0, s3
659; GFX67-NEXT:    s_mov_b32 s4, s0
660; GFX67-NEXT:    s_mov_b32 s5, s1
661; GFX67-NEXT:    v_mul_f32_e32 v0, s2, v0
662; GFX67-NEXT:    buffer_store_dword v0, off, s[4:7], 0
663; GFX67-NEXT:    s_endpgm
664;
665; GFX8-LABEL: s_fdiv_ulp25_f32_fast_math:
666; GFX8:       ; %bb.0: ; %entry
667; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
668; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
669; GFX8-NEXT:    v_rcp_f32_e32 v0, s3
670; GFX8-NEXT:    v_mul_f32_e32 v2, s2, v0
671; GFX8-NEXT:    v_mov_b32_e32 v0, s0
672; GFX8-NEXT:    v_mov_b32_e32 v1, s1
673; GFX8-NEXT:    flat_store_dword v[0:1], v2
674; GFX8-NEXT:    s_endpgm
675;
676; GFX10-LABEL: s_fdiv_ulp25_f32_fast_math:
677; GFX10:       ; %bb.0: ; %entry
678; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
679; GFX10-NEXT:    v_mov_b32_e32 v1, 0
680; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
681; GFX10-NEXT:    v_rcp_f32_e32 v0, s3
682; GFX10-NEXT:    v_mul_f32_e32 v0, s2, v0
683; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
684; GFX10-NEXT:    s_endpgm
685;
686; GFX11-LABEL: s_fdiv_ulp25_f32_fast_math:
687; GFX11:       ; %bb.0: ; %entry
688; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
689; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
690; GFX11-NEXT:    v_rcp_f32_e32 v0, s3
691; GFX11-NEXT:    s_waitcnt_depctr 0xfff
692; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
693; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
694; GFX11-NEXT:    s_endpgm
695;
696; EG-LABEL: s_fdiv_ulp25_f32_fast_math:
697; EG:       ; %bb.0: ; %entry
698; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
699; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
700; EG-NEXT:    CF_END
701; EG-NEXT:    PAD
702; EG-NEXT:    ALU clause starting at 4:
703; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
704; EG-NEXT:     MUL_IEEE T0.X, PS, KC0[2].Z,
705; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
706; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
707entry:
708  %fdiv = fdiv fast float %a, %b, !fpmath !0
709  store float %fdiv, ptr addrspace(1) %out
710  ret void
711}
712
713define amdgpu_kernel void @s_fdiv_f32_arcp_daz(ptr addrspace(1) %out, float %a, float %b) #0 {
714; GFX6-FASTFMA-LABEL: s_fdiv_f32_arcp_daz:
715; GFX6-FASTFMA:       ; %bb.0: ; %entry
716; GFX6-FASTFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
717; GFX6-FASTFMA-NEXT:    s_mov_b32 s7, 0xf000
718; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, -1
719; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
720; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v1, s2
721; GFX6-FASTFMA-NEXT:    s_mov_b32 s4, s0
722; GFX6-FASTFMA-NEXT:    s_mov_b32 s5, s1
723; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[0:1], s3, s3, v1
724; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
725; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s3
726; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s2, v0, s2
727; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
728; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
729; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
730; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v0, v3
731; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v4, v0
732; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
733; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v2, v4, v0
734; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
735; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
736; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v0, s3, v1
737; GFX6-FASTFMA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
738; GFX6-FASTFMA-NEXT:    s_endpgm
739;
740; GFX6-SLOWFMA-LABEL: s_fdiv_f32_arcp_daz:
741; GFX6-SLOWFMA:       ; %bb.0: ; %entry
742; GFX6-SLOWFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
743; GFX6-SLOWFMA-NEXT:    s_mov_b32 s7, 0xf000
744; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, -1
745; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
746; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v0, s2
747; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
748; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v2, s3
749; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s2, v2, s2
750; GFX6-SLOWFMA-NEXT:    s_mov_b32 s4, s0
751; GFX6-SLOWFMA-NEXT:    s_mov_b32 s5, s1
752; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
753; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
754; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
755; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
756; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
757; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
758; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
759; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
760; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
761; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
762; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s3, v0
763; GFX6-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[4:7], 0
764; GFX6-SLOWFMA-NEXT:    s_endpgm
765;
766; GFX7-LABEL: s_fdiv_f32_arcp_daz:
767; GFX7:       ; %bb.0: ; %entry
768; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
769; GFX7-NEXT:    s_mov_b32 s7, 0xf000
770; GFX7-NEXT:    s_mov_b32 s6, -1
771; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
772; GFX7-NEXT:    v_mov_b32_e32 v1, s2
773; GFX7-NEXT:    s_mov_b32 s4, s0
774; GFX7-NEXT:    s_mov_b32 s5, s1
775; GFX7-NEXT:    v_div_scale_f32 v2, s[0:1], s3, s3, v1
776; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
777; GFX7-NEXT:    v_mov_b32_e32 v0, s3
778; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s2, v0, s2
779; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
780; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
781; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
782; GFX7-NEXT:    v_mul_f32_e32 v4, v0, v3
783; GFX7-NEXT:    v_fma_f32 v5, -v2, v4, v0
784; GFX7-NEXT:    v_fma_f32 v4, v5, v3, v4
785; GFX7-NEXT:    v_fma_f32 v0, -v2, v4, v0
786; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
787; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
788; GFX7-NEXT:    v_div_fixup_f32 v0, v0, s3, v1
789; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0
790; GFX7-NEXT:    s_endpgm
791;
792; GFX8-LABEL: s_fdiv_f32_arcp_daz:
793; GFX8:       ; %bb.0: ; %entry
794; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
795; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
796; GFX8-NEXT:    v_mov_b32_e32 v0, s2
797; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s3, s3, v0
798; GFX8-NEXT:    v_mov_b32_e32 v2, s3
799; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s2, v2, s2
800; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
801; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
802; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
803; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
804; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
805; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
806; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
807; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
808; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
809; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
810; GFX8-NEXT:    v_div_fixup_f32 v2, v1, s3, v0
811; GFX8-NEXT:    v_mov_b32_e32 v0, s0
812; GFX8-NEXT:    v_mov_b32_e32 v1, s1
813; GFX8-NEXT:    flat_store_dword v[0:1], v2
814; GFX8-NEXT:    s_endpgm
815;
816; GFX10-LABEL: s_fdiv_f32_arcp_daz:
817; GFX10:       ; %bb.0: ; %entry
818; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
819; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
820; GFX10-NEXT:    v_div_scale_f32 v0, s4, s3, s3, s2
821; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, s2, s3, s2
822; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
823; GFX10-NEXT:    s_denorm_mode 15
824; GFX10-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
825; GFX10-NEXT:    v_fmac_f32_e32 v1, v3, v1
826; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
827; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
828; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
829; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
830; GFX10-NEXT:    s_denorm_mode 12
831; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
832; GFX10-NEXT:    v_mov_b32_e32 v1, 0
833; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s3, s2
834; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
835; GFX10-NEXT:    s_endpgm
836;
837; GFX11-LABEL: s_fdiv_f32_arcp_daz:
838; GFX11:       ; %bb.0: ; %entry
839; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
840; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
841; GFX11-NEXT:    v_div_scale_f32 v0, null, s3, s3, s2
842; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, s2, s3, s2
843; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
844; GFX11-NEXT:    s_denorm_mode 15
845; GFX11-NEXT:    s_waitcnt_depctr 0xfff
846; GFX11-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
847; GFX11-NEXT:    v_fmac_f32_e32 v1, v3, v1
848; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
849; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
850; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
851; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
852; GFX11-NEXT:    s_denorm_mode 12
853; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
854; GFX11-NEXT:    v_mov_b32_e32 v1, 0
855; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s3, s2
856; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
857; GFX11-NEXT:    s_endpgm
858;
859; EG-LABEL: s_fdiv_f32_arcp_daz:
860; EG:       ; %bb.0: ; %entry
861; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
862; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
863; EG-NEXT:    CF_END
864; EG-NEXT:    PAD
865; EG-NEXT:    ALU clause starting at 4:
866; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
867; EG-NEXT:     MUL_IEEE T0.X, KC0[2].Z, PS,
868; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
869; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
870entry:
871  %fdiv = fdiv arcp float %a, %b
872  store float %fdiv, ptr addrspace(1) %out
873  ret void
874}
875
876define amdgpu_kernel void @s_fdiv_f32_arcp_ninf(ptr addrspace(1) %out, float %a, float %b) #0 {
877; GFX67-LABEL: s_fdiv_f32_arcp_ninf:
878; GFX67:       ; %bb.0: ; %entry
879; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
880; GFX67-NEXT:    s_mov_b32 s7, 0xf000
881; GFX67-NEXT:    s_mov_b32 s6, -1
882; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
883; GFX67-NEXT:    v_rcp_f32_e32 v0, s3
884; GFX67-NEXT:    s_mov_b32 s4, s0
885; GFX67-NEXT:    s_mov_b32 s5, s1
886; GFX67-NEXT:    v_mul_f32_e32 v0, s2, v0
887; GFX67-NEXT:    buffer_store_dword v0, off, s[4:7], 0
888; GFX67-NEXT:    s_endpgm
889;
890; GFX8-LABEL: s_fdiv_f32_arcp_ninf:
891; GFX8:       ; %bb.0: ; %entry
892; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
893; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
894; GFX8-NEXT:    v_rcp_f32_e32 v0, s3
895; GFX8-NEXT:    v_mul_f32_e32 v2, s2, v0
896; GFX8-NEXT:    v_mov_b32_e32 v0, s0
897; GFX8-NEXT:    v_mov_b32_e32 v1, s1
898; GFX8-NEXT:    flat_store_dword v[0:1], v2
899; GFX8-NEXT:    s_endpgm
900;
901; GFX10-LABEL: s_fdiv_f32_arcp_ninf:
902; GFX10:       ; %bb.0: ; %entry
903; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
904; GFX10-NEXT:    v_mov_b32_e32 v1, 0
905; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
906; GFX10-NEXT:    v_rcp_f32_e32 v0, s3
907; GFX10-NEXT:    v_mul_f32_e32 v0, s2, v0
908; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
909; GFX10-NEXT:    s_endpgm
910;
911; GFX11-LABEL: s_fdiv_f32_arcp_ninf:
912; GFX11:       ; %bb.0: ; %entry
913; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
914; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
915; GFX11-NEXT:    v_rcp_f32_e32 v0, s3
916; GFX11-NEXT:    s_waitcnt_depctr 0xfff
917; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0
918; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
919; GFX11-NEXT:    s_endpgm
920;
921; EG-LABEL: s_fdiv_f32_arcp_ninf:
922; EG:       ; %bb.0: ; %entry
923; EG-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
924; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
925; EG-NEXT:    CF_END
926; EG-NEXT:    PAD
927; EG-NEXT:    ALU clause starting at 4:
928; EG-NEXT:     RECIP_IEEE * T0.X, KC0[2].W,
929; EG-NEXT:     MUL_IEEE T0.X, PS, KC0[2].Z,
930; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
931; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
932entry:
933  %fdiv = fdiv arcp ninf float %a, %b
934  store float %fdiv, ptr addrspace(1) %out
935  ret void
936}
937
938define amdgpu_kernel void @s_fdiv_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
939; GFX6-FASTFMA-LABEL: s_fdiv_v2f32:
940; GFX6-FASTFMA:       ; %bb.0: ; %entry
941; GFX6-FASTFMA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0xb
942; GFX6-FASTFMA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
943; GFX6-FASTFMA-NEXT:    s_mov_b32 s3, 0xf000
944; GFX6-FASTFMA-NEXT:    s_mov_b32 s2, -1
945; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
946; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v1, s9
947; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], s11, s11, v1
948; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
949; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s11
950; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s9, v0, s9
951; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
952; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
953; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
954; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v0, v3
955; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v4, v0
956; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
957; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v2, v4, v0
958; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
959; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v2, s8
960; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
961; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], s10, s10, v2
962; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
963; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v1, v0, s11, v1
964; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s10
965; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s8, v0, s8
966; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
967; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
968; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
969; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v0, v4
970; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v5, v0
971; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
972; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v3, v5, v0
973; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
974; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v4, v5
975; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v0, s10, v2
976; GFX6-FASTFMA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
977; GFX6-FASTFMA-NEXT:    s_endpgm
978;
979; GFX6-SLOWFMA-LABEL: s_fdiv_v2f32:
980; GFX6-SLOWFMA:       ; %bb.0: ; %entry
981; GFX6-SLOWFMA-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xb
982; GFX6-SLOWFMA-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
983; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
984; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v0, s1
985; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[6:7], s3, s3, v0
986; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v2, s3
987; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s1, v2, s1
988; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v4, s0
989; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
990; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
991; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v3, 1.0
992; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
993; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v2, v3
994; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v1, v5, v2
995; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
996; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v5, v2
997; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
998; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[6:7], s2, s2, v4
999; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v5
1000; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v3, s2
1001; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, s0, v3, s0
1002; GFX6-SLOWFMA-NEXT:    s_mov_b32 s7, 0xf000
1003; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, -1
1004; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v2
1005; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v1, v1, s3, v0
1006; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1007; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, -v2, v5, 1.0
1008; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, v0, v5, v5
1009; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v0
1010; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
1011; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v0, v5
1012; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
1013; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1014; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v0, v2, v0, v5
1015; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v0, s2, v4
1016; GFX6-SLOWFMA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1017; GFX6-SLOWFMA-NEXT:    s_endpgm
1018;
1019; GFX7-LABEL: s_fdiv_v2f32:
1020; GFX7:       ; %bb.0: ; %entry
1021; GFX7-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0xb
1022; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
1023; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1024; GFX7-NEXT:    s_mov_b32 s2, -1
1025; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1026; GFX7-NEXT:    v_mov_b32_e32 v1, s9
1027; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], s11, s11, v1
1028; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
1029; GFX7-NEXT:    v_mov_b32_e32 v0, s11
1030; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s9, v0, s9
1031; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1032; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
1033; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
1034; GFX7-NEXT:    v_mul_f32_e32 v4, v0, v3
1035; GFX7-NEXT:    v_fma_f32 v5, -v2, v4, v0
1036; GFX7-NEXT:    v_fma_f32 v4, v5, v3, v4
1037; GFX7-NEXT:    v_fma_f32 v0, -v2, v4, v0
1038; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1039; GFX7-NEXT:    v_mov_b32_e32 v2, s8
1040; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
1041; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], s10, s10, v2
1042; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
1043; GFX7-NEXT:    v_div_fixup_f32 v1, v0, s11, v1
1044; GFX7-NEXT:    v_mov_b32_e32 v0, s10
1045; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s8, v0, s8
1046; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1047; GFX7-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
1048; GFX7-NEXT:    v_fma_f32 v4, v5, v4, v4
1049; GFX7-NEXT:    v_mul_f32_e32 v5, v0, v4
1050; GFX7-NEXT:    v_fma_f32 v6, -v3, v5, v0
1051; GFX7-NEXT:    v_fma_f32 v5, v6, v4, v5
1052; GFX7-NEXT:    v_fma_f32 v0, -v3, v5, v0
1053; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1054; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v4, v5
1055; GFX7-NEXT:    v_div_fixup_f32 v0, v0, s10, v2
1056; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1057; GFX7-NEXT:    s_endpgm
1058;
1059; GFX8-LABEL: s_fdiv_v2f32:
1060; GFX8:       ; %bb.0: ; %entry
1061; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1062; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1063; GFX8-NEXT:    v_mov_b32_e32 v0, s1
1064; GFX8-NEXT:    v_div_scale_f32 v1, s[6:7], s3, s3, v0
1065; GFX8-NEXT:    v_mov_b32_e32 v2, s3
1066; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s1, v2, s1
1067; GFX8-NEXT:    v_mov_b32_e32 v4, s0
1068; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
1069; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1070; GFX8-NEXT:    v_fma_f32 v5, -v1, v3, 1.0
1071; GFX8-NEXT:    v_fma_f32 v3, v5, v3, v3
1072; GFX8-NEXT:    v_mul_f32_e32 v5, v2, v3
1073; GFX8-NEXT:    v_fma_f32 v6, -v1, v5, v2
1074; GFX8-NEXT:    v_fma_f32 v5, v6, v3, v5
1075; GFX8-NEXT:    v_fma_f32 v1, -v1, v5, v2
1076; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1077; GFX8-NEXT:    v_div_scale_f32 v2, s[6:7], s2, s2, v4
1078; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v5
1079; GFX8-NEXT:    v_mov_b32_e32 v3, s2
1080; GFX8-NEXT:    v_div_scale_f32 v3, vcc, s0, v3, s0
1081; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1082; GFX8-NEXT:    v_rcp_f32_e32 v5, v2
1083; GFX8-NEXT:    v_div_fixup_f32 v1, v1, s3, v0
1084; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1085; GFX8-NEXT:    v_fma_f32 v0, -v2, v5, 1.0
1086; GFX8-NEXT:    v_fma_f32 v0, v0, v5, v5
1087; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v0
1088; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
1089; GFX8-NEXT:    v_fma_f32 v5, v6, v0, v5
1090; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
1091; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1092; GFX8-NEXT:    v_div_fmas_f32 v0, v2, v0, v5
1093; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1094; GFX8-NEXT:    v_mov_b32_e32 v3, s1
1095; GFX8-NEXT:    v_mov_b32_e32 v2, s0
1096; GFX8-NEXT:    v_div_fixup_f32 v0, v0, s2, v4
1097; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1098; GFX8-NEXT:    s_endpgm
1099;
1100; GFX10-LABEL: s_fdiv_v2f32:
1101; GFX10:       ; %bb.0: ; %entry
1102; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1103; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1104; GFX10-NEXT:    v_div_scale_f32 v0, s6, s3, s3, s1
1105; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, s1, s3, s1
1106; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
1107; GFX10-NEXT:    s_denorm_mode 15
1108; GFX10-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
1109; GFX10-NEXT:    v_fmac_f32_e32 v1, v3, v1
1110; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
1111; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
1112; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
1113; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
1114; GFX10-NEXT:    s_denorm_mode 12
1115; GFX10-NEXT:    v_div_scale_f32 v2, s6, s2, s2, s0
1116; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
1117; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
1118; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
1119; GFX10-NEXT:    v_div_fixup_f32 v1, v0, s3, s1
1120; GFX10-NEXT:    v_div_scale_f32 v0, vcc_lo, s0, s2, s0
1121; GFX10-NEXT:    s_denorm_mode 15
1122; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
1123; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
1124; GFX10-NEXT:    v_mul_f32_e32 v4, v0, v3
1125; GFX10-NEXT:    v_fma_f32 v5, -v2, v4, v0
1126; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v3
1127; GFX10-NEXT:    v_fma_f32 v0, -v2, v4, v0
1128; GFX10-NEXT:    s_denorm_mode 12
1129; GFX10-NEXT:    v_mov_b32_e32 v2, 0
1130; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
1131; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s2, s0
1132; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1133; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
1134; GFX10-NEXT:    s_endpgm
1135;
1136; GFX11-LABEL: s_fdiv_v2f32:
1137; GFX11:       ; %bb.0: ; %entry
1138; GFX11-NEXT:    s_clause 0x1
1139; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x2c
1140; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
1141; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1142; GFX11-NEXT:    v_div_scale_f32 v0, null, s3, s3, s1
1143; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, s1, s3, s1
1144; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
1145; GFX11-NEXT:    s_denorm_mode 15
1146; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1147; GFX11-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
1148; GFX11-NEXT:    v_fmac_f32_e32 v1, v3, v1
1149; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
1150; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
1151; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
1152; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
1153; GFX11-NEXT:    s_denorm_mode 12
1154; GFX11-NEXT:    v_div_scale_f32 v2, null, s2, s2, s0
1155; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
1156; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
1157; GFX11-NEXT:    v_div_fixup_f32 v1, v0, s3, s1
1158; GFX11-NEXT:    v_div_scale_f32 v0, vcc_lo, s0, s2, s0
1159; GFX11-NEXT:    s_denorm_mode 15
1160; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1161; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
1162; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
1163; GFX11-NEXT:    v_mul_f32_e32 v4, v0, v3
1164; GFX11-NEXT:    v_fma_f32 v5, -v2, v4, v0
1165; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v3
1166; GFX11-NEXT:    v_fma_f32 v0, -v2, v4, v0
1167; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1168; GFX11-NEXT:    s_denorm_mode 12
1169; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
1170; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s2, s0
1171; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[4:5]
1172; GFX11-NEXT:    s_endpgm
1173;
1174; EG-LABEL: s_fdiv_v2f32:
1175; EG:       ; %bb.0: ; %entry
1176; EG-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
1177; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1178; EG-NEXT:    CF_END
1179; EG-NEXT:    PAD
1180; EG-NEXT:    ALU clause starting at 4:
1181; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Z,
1182; EG-NEXT:     MUL_IEEE T0.Y, KC0[3].X, PS,
1183; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Y,
1184; EG-NEXT:     MUL_IEEE T0.X, KC0[2].W, PS,
1185; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1186; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1187entry:
1188  %fdiv = fdiv <2 x float> %a, %b
1189  store <2 x float> %fdiv, ptr addrspace(1) %out
1190  ret void
1191}
1192
1193define amdgpu_kernel void @s_fdiv_ulp25_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
1194; GFX67-LABEL: s_fdiv_ulp25_v2f32:
1195; GFX67:       ; %bb.0: ; %entry
1196; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xb
1197; GFX67-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
1198; GFX67-NEXT:    s_mov_b32 s7, 0xf000
1199; GFX67-NEXT:    s_mov_b32 s6, -1
1200; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1201; GFX67-NEXT:    v_rcp_f32_e32 v0, s2
1202; GFX67-NEXT:    v_rcp_f32_e32 v1, s3
1203; GFX67-NEXT:    v_mul_f32_e32 v0, s0, v0
1204; GFX67-NEXT:    v_mul_f32_e32 v1, s1, v1
1205; GFX67-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1206; GFX67-NEXT:    s_endpgm
1207;
1208; GFX8-LABEL: s_fdiv_ulp25_v2f32:
1209; GFX8:       ; %bb.0: ; %entry
1210; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1211; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
1212; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1213; GFX8-NEXT:    v_rcp_f32_e32 v0, s2
1214; GFX8-NEXT:    v_rcp_f32_e32 v1, s3
1215; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1216; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1217; GFX8-NEXT:    v_mul_f32_e32 v0, s0, v0
1218; GFX8-NEXT:    v_mul_f32_e32 v1, s1, v1
1219; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1220; GFX8-NEXT:    s_endpgm
1221;
1222; GFX10-LABEL: s_fdiv_ulp25_v2f32:
1223; GFX10:       ; %bb.0: ; %entry
1224; GFX10-NEXT:    s_clause 0x1
1225; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1226; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
1227; GFX10-NEXT:    v_mov_b32_e32 v2, 0
1228; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1229; GFX10-NEXT:    v_rcp_f32_e32 v0, s2
1230; GFX10-NEXT:    v_rcp_f32_e32 v1, s3
1231; GFX10-NEXT:    v_mul_f32_e32 v0, s0, v0
1232; GFX10-NEXT:    v_mul_f32_e32 v1, s1, v1
1233; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
1234; GFX10-NEXT:    s_endpgm
1235;
1236; GFX11-LABEL: s_fdiv_ulp25_v2f32:
1237; GFX11:       ; %bb.0: ; %entry
1238; GFX11-NEXT:    s_clause 0x1
1239; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x2c
1240; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
1241; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1242; GFX11-NEXT:    v_rcp_f32_e32 v0, s2
1243; GFX11-NEXT:    v_rcp_f32_e32 v1, s3
1244; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1245; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1246; GFX11-NEXT:    v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1
1247; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[4:5]
1248; GFX11-NEXT:    s_endpgm
1249;
1250; EG-LABEL: s_fdiv_ulp25_v2f32:
1251; EG:       ; %bb.0: ; %entry
1252; EG-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
1253; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1254; EG-NEXT:    CF_END
1255; EG-NEXT:    PAD
1256; EG-NEXT:    ALU clause starting at 4:
1257; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Z,
1258; EG-NEXT:     MUL_IEEE T0.Y, KC0[3].X, PS,
1259; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Y,
1260; EG-NEXT:     MUL_IEEE T0.X, KC0[2].W, PS,
1261; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1262; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1263entry:
1264  %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0
1265  store <2 x float> %fdiv, ptr addrspace(1) %out
1266  ret void
1267}
1268
1269define amdgpu_kernel void @s_fdiv_v2f32_fast_math(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
1270; GFX67-LABEL: s_fdiv_v2f32_fast_math:
1271; GFX67:       ; %bb.0: ; %entry
1272; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xb
1273; GFX67-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
1274; GFX67-NEXT:    s_mov_b32 s7, 0xf000
1275; GFX67-NEXT:    s_mov_b32 s6, -1
1276; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1277; GFX67-NEXT:    v_rcp_f32_e32 v0, s3
1278; GFX67-NEXT:    v_rcp_f32_e32 v2, s2
1279; GFX67-NEXT:    v_mul_f32_e32 v1, s1, v0
1280; GFX67-NEXT:    v_mul_f32_e32 v0, s0, v2
1281; GFX67-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1282; GFX67-NEXT:    s_endpgm
1283;
1284; GFX8-LABEL: s_fdiv_v2f32_fast_math:
1285; GFX8:       ; %bb.0: ; %entry
1286; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1287; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
1288; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1289; GFX8-NEXT:    v_rcp_f32_e32 v0, s3
1290; GFX8-NEXT:    v_rcp_f32_e32 v2, s2
1291; GFX8-NEXT:    v_mul_f32_e32 v1, s1, v0
1292; GFX8-NEXT:    v_mul_f32_e32 v0, s0, v2
1293; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1294; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1295; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1296; GFX8-NEXT:    s_endpgm
1297;
1298; GFX10-LABEL: s_fdiv_v2f32_fast_math:
1299; GFX10:       ; %bb.0: ; %entry
1300; GFX10-NEXT:    s_clause 0x1
1301; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1302; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
1303; GFX10-NEXT:    v_mov_b32_e32 v3, 0
1304; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1305; GFX10-NEXT:    v_rcp_f32_e32 v0, s3
1306; GFX10-NEXT:    v_rcp_f32_e32 v2, s2
1307; GFX10-NEXT:    v_mul_f32_e32 v1, s1, v0
1308; GFX10-NEXT:    v_mul_f32_e32 v0, s0, v2
1309; GFX10-NEXT:    global_store_dwordx2 v3, v[0:1], s[6:7]
1310; GFX10-NEXT:    s_endpgm
1311;
1312; GFX11-LABEL: s_fdiv_v2f32_fast_math:
1313; GFX11:       ; %bb.0: ; %entry
1314; GFX11-NEXT:    s_clause 0x1
1315; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x2c
1316; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
1317; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1318; GFX11-NEXT:    v_rcp_f32_e32 v0, s3
1319; GFX11-NEXT:    v_rcp_f32_e32 v2, s2
1320; GFX11-NEXT:    v_mov_b32_e32 v3, 0
1321; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1322; GFX11-NEXT:    v_dual_mul_f32 v1, s1, v0 :: v_dual_mul_f32 v0, s0, v2
1323; GFX11-NEXT:    global_store_b64 v3, v[0:1], s[4:5]
1324; GFX11-NEXT:    s_endpgm
1325;
1326; EG-LABEL: s_fdiv_v2f32_fast_math:
1327; EG:       ; %bb.0: ; %entry
1328; EG-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
1329; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1330; EG-NEXT:    CF_END
1331; EG-NEXT:    PAD
1332; EG-NEXT:    ALU clause starting at 4:
1333; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Z,
1334; EG-NEXT:     MUL_IEEE T0.Y, PS, KC0[3].X,
1335; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Y,
1336; EG-NEXT:     MUL_IEEE T0.X, PS, KC0[2].W,
1337; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1338; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1339entry:
1340  %fdiv = fdiv fast <2 x float> %a, %b
1341  store <2 x float> %fdiv, ptr addrspace(1) %out
1342  ret void
1343}
1344
1345define amdgpu_kernel void @s_fdiv_v2f32_arcp_math(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 {
1346; GFX67-LABEL: s_fdiv_v2f32_arcp_math:
1347; GFX67:       ; %bb.0: ; %entry
1348; GFX67-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xb
1349; GFX67-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
1350; GFX67-NEXT:    s_mov_b32 s7, 0xf000
1351; GFX67-NEXT:    s_mov_b32 s6, -1
1352; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1353; GFX67-NEXT:    v_rcp_f32_e32 v0, s3
1354; GFX67-NEXT:    v_rcp_f32_e32 v2, s2
1355; GFX67-NEXT:    v_mul_f32_e32 v1, s1, v0
1356; GFX67-NEXT:    v_mul_f32_e32 v0, s0, v2
1357; GFX67-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1358; GFX67-NEXT:    s_endpgm
1359;
1360; GFX8-LABEL: s_fdiv_v2f32_arcp_math:
1361; GFX8:       ; %bb.0: ; %entry
1362; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1363; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
1364; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1365; GFX8-NEXT:    v_rcp_f32_e32 v0, s3
1366; GFX8-NEXT:    v_rcp_f32_e32 v2, s2
1367; GFX8-NEXT:    v_mul_f32_e32 v1, s1, v0
1368; GFX8-NEXT:    v_mul_f32_e32 v0, s0, v2
1369; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1370; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1371; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
1372; GFX8-NEXT:    s_endpgm
1373;
1374; GFX10-LABEL: s_fdiv_v2f32_arcp_math:
1375; GFX10:       ; %bb.0: ; %entry
1376; GFX10-NEXT:    s_clause 0x1
1377; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
1378; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x24
1379; GFX10-NEXT:    v_mov_b32_e32 v3, 0
1380; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1381; GFX10-NEXT:    v_rcp_f32_e32 v0, s3
1382; GFX10-NEXT:    v_rcp_f32_e32 v2, s2
1383; GFX10-NEXT:    v_mul_f32_e32 v1, s1, v0
1384; GFX10-NEXT:    v_mul_f32_e32 v0, s0, v2
1385; GFX10-NEXT:    global_store_dwordx2 v3, v[0:1], s[6:7]
1386; GFX10-NEXT:    s_endpgm
1387;
1388; GFX11-LABEL: s_fdiv_v2f32_arcp_math:
1389; GFX11:       ; %bb.0: ; %entry
1390; GFX11-NEXT:    s_clause 0x1
1391; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x2c
1392; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x24
1393; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1394; GFX11-NEXT:    v_rcp_f32_e32 v0, s3
1395; GFX11-NEXT:    v_rcp_f32_e32 v2, s2
1396; GFX11-NEXT:    v_mov_b32_e32 v3, 0
1397; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1398; GFX11-NEXT:    v_dual_mul_f32 v1, s1, v0 :: v_dual_mul_f32 v0, s0, v2
1399; GFX11-NEXT:    global_store_b64 v3, v[0:1], s[4:5]
1400; GFX11-NEXT:    s_endpgm
1401;
1402; EG-LABEL: s_fdiv_v2f32_arcp_math:
1403; EG:       ; %bb.0: ; %entry
1404; EG-NEXT:    ALU 5, @4, KC0[CB0:0-32], KC1[]
1405; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
1406; EG-NEXT:    CF_END
1407; EG-NEXT:    PAD
1408; EG-NEXT:    ALU clause starting at 4:
1409; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Z,
1410; EG-NEXT:     MUL_IEEE T0.Y, PS, KC0[3].X,
1411; EG-NEXT:     RECIP_IEEE * T0.X, KC0[3].Y,
1412; EG-NEXT:     MUL_IEEE T0.X, PS, KC0[2].W,
1413; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1414; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1415entry:
1416  %fdiv = fdiv arcp ninf <2 x float> %a, %b
1417  store <2 x float> %fdiv, ptr addrspace(1) %out
1418  ret void
1419}
1420
1421define amdgpu_kernel void @s_fdiv_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1422; GFX6-FASTFMA-LABEL: s_fdiv_v4f32:
1423; GFX6-FASTFMA:       ; %bb.0:
1424; GFX6-FASTFMA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
1425; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
1426; GFX6-FASTFMA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1427; GFX6-FASTFMA-NEXT:    s_mov_b32 s11, 0xf000
1428; GFX6-FASTFMA-NEXT:    s_mov_b32 s10, -1
1429; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
1430; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v1, s3
1431; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[12:13], s7, s7, v1
1432; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
1433; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s7
1434; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s3, v0, s3
1435; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1436; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
1437; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
1438; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v0, v3
1439; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v4, v0
1440; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
1441; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v2, v4, v0
1442; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1443; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
1444; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v3, v0, s7, v1
1445; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v1, s2
1446; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[12:13], s6, s6, v1
1447; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v2
1448; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s6
1449; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s2, v0, s2
1450; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1451; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
1452; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
1453; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v0, v4
1454; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v0
1455; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
1456; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v2, v5, v0
1457; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1458; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v4, v5
1459; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v2, v0, s6, v1
1460; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v1, s1
1461; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, s[2:3], s5, s5, v1
1462; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v5, v4
1463; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s5
1464; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s1, v0, s1
1465; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1466; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
1467; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
1468; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v0, v5
1469; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v4, v6, v0
1470; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
1471; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v4, v6, v0
1472; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1473; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v4, s0
1474; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v5, v6
1475; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, s[2:3], s4, s4, v4
1476; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v6, v5
1477; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v1, v0, s5, v1
1478; GFX6-FASTFMA-NEXT:    v_mov_b32_e32 v0, s4
1479; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, vcc, s0, v0, s0
1480; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1481; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v5, v6, 1.0
1482; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v6, v6
1483; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v7, v0, v6
1484; GFX6-FASTFMA-NEXT:    v_fma_f32 v8, -v5, v7, v0
1485; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, v8, v6, v7
1486; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v5, v7, v0
1487; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1488; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v6, v7
1489; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v0, s4, v4
1490; GFX6-FASTFMA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1491; GFX6-FASTFMA-NEXT:    s_endpgm
1492;
1493; GFX6-SLOWFMA-LABEL: s_fdiv_v4f32:
1494; GFX6-SLOWFMA:       ; %bb.0:
1495; GFX6-SLOWFMA-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
1496; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
1497; GFX6-SLOWFMA-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1498; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
1499; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v0, s3
1500; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[10:11], s7, s7, v0
1501; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v2, s7
1502; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s3, v2, s3
1503; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v4, s2
1504; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v7, s1
1505; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v8, s0
1506; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
1507; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1508; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v3, 1.0
1509; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
1510; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v2, v3
1511; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v1, v5, v2
1512; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
1513; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v5, v2
1514; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1515; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[10:11], s6, s6, v4
1516; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v5
1517; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v3, s6
1518; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v5, vcc, s2, v3, s2
1519; GFX6-SLOWFMA-NEXT:    s_mov_b32 s11, 0xf000
1520; GFX6-SLOWFMA-NEXT:    s_mov_b32 s10, -1
1521; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v6, v2
1522; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v3, v1, s7, v0
1523; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1524; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, -v2, v6, 1.0
1525; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, v0, v6, v6
1526; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v1, v5, v0
1527; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v1, v5
1528; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, v6, v0, v1
1529; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v1, v5
1530; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1531; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v5, s[2:3], s5, s5, v7
1532; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v0, v2, v0, v1
1533; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v1, s5
1534; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, vcc, s1, v1, s1
1535; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v6, v5
1536; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v2, v0, s6, v4
1537; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1538; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, -v5, v6, 1.0
1539; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, v0, v6, v6
1540; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v1, v0
1541; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v5, v4, v1
1542; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v6, v0, v4
1543; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v5, v4, v1
1544; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1545; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v5, s[2:3], s4, s4, v8
1546; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v0, v1, v0, v4
1547; GFX6-SLOWFMA-NEXT:    v_mov_b32_e32 v1, s4
1548; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, s0, v1, s0
1549; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v6, v5
1550; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v1, v0, s5, v7
1551; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1552; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, -v5, v6, 1.0
1553; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, v0, v6, v6
1554; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v0
1555; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v5, v6, v4
1556; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v0, v6
1557; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v5, v6, v4
1558; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1559; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v0, v4, v0, v6
1560; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v0, s4, v8
1561; GFX6-SLOWFMA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1562; GFX6-SLOWFMA-NEXT:    s_endpgm
1563;
1564; GFX7-LABEL: s_fdiv_v4f32:
1565; GFX7:       ; %bb.0:
1566; GFX7-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
1567; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1568; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1569; GFX7-NEXT:    s_mov_b32 s11, 0xf000
1570; GFX7-NEXT:    s_mov_b32 s10, -1
1571; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1572; GFX7-NEXT:    v_mov_b32_e32 v1, s3
1573; GFX7-NEXT:    v_div_scale_f32 v2, s[12:13], s7, s7, v1
1574; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
1575; GFX7-NEXT:    v_mov_b32_e32 v0, s7
1576; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s3, v0, s3
1577; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1578; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
1579; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
1580; GFX7-NEXT:    v_mul_f32_e32 v4, v0, v3
1581; GFX7-NEXT:    v_fma_f32 v5, -v2, v4, v0
1582; GFX7-NEXT:    v_fma_f32 v4, v5, v3, v4
1583; GFX7-NEXT:    v_fma_f32 v0, -v2, v4, v0
1584; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1585; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v3, v4
1586; GFX7-NEXT:    v_div_fixup_f32 v3, v0, s7, v1
1587; GFX7-NEXT:    v_mov_b32_e32 v1, s2
1588; GFX7-NEXT:    v_div_scale_f32 v2, s[12:13], s6, s6, v1
1589; GFX7-NEXT:    v_rcp_f32_e32 v4, v2
1590; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1591; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s2, v0, s2
1592; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1593; GFX7-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
1594; GFX7-NEXT:    v_fma_f32 v4, v5, v4, v4
1595; GFX7-NEXT:    v_mul_f32_e32 v5, v0, v4
1596; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v0
1597; GFX7-NEXT:    v_fma_f32 v5, v6, v4, v5
1598; GFX7-NEXT:    v_fma_f32 v0, -v2, v5, v0
1599; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1600; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v4, v5
1601; GFX7-NEXT:    v_div_fixup_f32 v2, v0, s6, v1
1602; GFX7-NEXT:    v_mov_b32_e32 v1, s1
1603; GFX7-NEXT:    v_div_scale_f32 v4, s[2:3], s5, s5, v1
1604; GFX7-NEXT:    v_rcp_f32_e32 v5, v4
1605; GFX7-NEXT:    v_mov_b32_e32 v0, s5
1606; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s1, v0, s1
1607; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1608; GFX7-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
1609; GFX7-NEXT:    v_fma_f32 v5, v6, v5, v5
1610; GFX7-NEXT:    v_mul_f32_e32 v6, v0, v5
1611; GFX7-NEXT:    v_fma_f32 v7, -v4, v6, v0
1612; GFX7-NEXT:    v_fma_f32 v6, v7, v5, v6
1613; GFX7-NEXT:    v_fma_f32 v0, -v4, v6, v0
1614; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1615; GFX7-NEXT:    v_mov_b32_e32 v4, s0
1616; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v5, v6
1617; GFX7-NEXT:    v_div_scale_f32 v5, s[2:3], s4, s4, v4
1618; GFX7-NEXT:    v_rcp_f32_e32 v6, v5
1619; GFX7-NEXT:    v_div_fixup_f32 v1, v0, s5, v1
1620; GFX7-NEXT:    v_mov_b32_e32 v0, s4
1621; GFX7-NEXT:    v_div_scale_f32 v0, vcc, s0, v0, s0
1622; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1623; GFX7-NEXT:    v_fma_f32 v7, -v5, v6, 1.0
1624; GFX7-NEXT:    v_fma_f32 v6, v7, v6, v6
1625; GFX7-NEXT:    v_mul_f32_e32 v7, v0, v6
1626; GFX7-NEXT:    v_fma_f32 v8, -v5, v7, v0
1627; GFX7-NEXT:    v_fma_f32 v7, v8, v6, v7
1628; GFX7-NEXT:    v_fma_f32 v0, -v5, v7, v0
1629; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1630; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v6, v7
1631; GFX7-NEXT:    v_div_fixup_f32 v0, v0, s4, v4
1632; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1633; GFX7-NEXT:    s_endpgm
1634;
1635; GFX8-LABEL: s_fdiv_v4f32:
1636; GFX8:       ; %bb.0:
1637; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
1638; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1639; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1640; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1641; GFX8-NEXT:    v_mov_b32_e32 v0, s3
1642; GFX8-NEXT:    v_div_scale_f32 v1, s[10:11], s7, s7, v0
1643; GFX8-NEXT:    v_mov_b32_e32 v2, s7
1644; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s3, v2, s3
1645; GFX8-NEXT:    v_mov_b32_e32 v4, s2
1646; GFX8-NEXT:    v_mov_b32_e32 v7, s1
1647; GFX8-NEXT:    v_mov_b32_e32 v8, s0
1648; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
1649; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1650; GFX8-NEXT:    v_fma_f32 v5, -v1, v3, 1.0
1651; GFX8-NEXT:    v_fma_f32 v3, v5, v3, v3
1652; GFX8-NEXT:    v_mul_f32_e32 v5, v2, v3
1653; GFX8-NEXT:    v_fma_f32 v6, -v1, v5, v2
1654; GFX8-NEXT:    v_fma_f32 v5, v6, v3, v5
1655; GFX8-NEXT:    v_fma_f32 v1, -v1, v5, v2
1656; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1657; GFX8-NEXT:    v_div_scale_f32 v2, s[10:11], s6, s6, v4
1658; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v5
1659; GFX8-NEXT:    v_mov_b32_e32 v3, s6
1660; GFX8-NEXT:    v_div_scale_f32 v5, vcc, s2, v3, s2
1661; GFX8-NEXT:    v_rcp_f32_e32 v6, v2
1662; GFX8-NEXT:    v_div_fixup_f32 v3, v1, s7, v0
1663; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1664; GFX8-NEXT:    v_fma_f32 v0, -v2, v6, 1.0
1665; GFX8-NEXT:    v_fma_f32 v0, v0, v6, v6
1666; GFX8-NEXT:    v_mul_f32_e32 v1, v5, v0
1667; GFX8-NEXT:    v_fma_f32 v6, -v2, v1, v5
1668; GFX8-NEXT:    v_fma_f32 v1, v6, v0, v1
1669; GFX8-NEXT:    v_fma_f32 v2, -v2, v1, v5
1670; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1671; GFX8-NEXT:    v_div_scale_f32 v5, s[2:3], s5, s5, v7
1672; GFX8-NEXT:    v_div_fmas_f32 v0, v2, v0, v1
1673; GFX8-NEXT:    v_mov_b32_e32 v1, s5
1674; GFX8-NEXT:    v_div_scale_f32 v1, vcc, s1, v1, s1
1675; GFX8-NEXT:    v_rcp_f32_e32 v6, v5
1676; GFX8-NEXT:    v_div_fixup_f32 v2, v0, s6, v4
1677; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1678; GFX8-NEXT:    v_fma_f32 v0, -v5, v6, 1.0
1679; GFX8-NEXT:    v_fma_f32 v0, v0, v6, v6
1680; GFX8-NEXT:    v_mul_f32_e32 v4, v1, v0
1681; GFX8-NEXT:    v_fma_f32 v6, -v5, v4, v1
1682; GFX8-NEXT:    v_fma_f32 v4, v6, v0, v4
1683; GFX8-NEXT:    v_fma_f32 v1, -v5, v4, v1
1684; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1685; GFX8-NEXT:    v_div_scale_f32 v5, s[2:3], s4, s4, v8
1686; GFX8-NEXT:    v_div_fmas_f32 v0, v1, v0, v4
1687; GFX8-NEXT:    v_mov_b32_e32 v1, s4
1688; GFX8-NEXT:    v_div_scale_f32 v4, vcc, s0, v1, s0
1689; GFX8-NEXT:    v_rcp_f32_e32 v6, v5
1690; GFX8-NEXT:    v_div_fixup_f32 v1, v0, s5, v7
1691; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1692; GFX8-NEXT:    v_fma_f32 v0, -v5, v6, 1.0
1693; GFX8-NEXT:    v_fma_f32 v0, v0, v6, v6
1694; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v0
1695; GFX8-NEXT:    v_fma_f32 v7, -v5, v6, v4
1696; GFX8-NEXT:    v_fma_f32 v6, v7, v0, v6
1697; GFX8-NEXT:    v_fma_f32 v4, -v5, v6, v4
1698; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1699; GFX8-NEXT:    v_div_fmas_f32 v0, v4, v0, v6
1700; GFX8-NEXT:    v_mov_b32_e32 v4, s8
1701; GFX8-NEXT:    v_mov_b32_e32 v5, s9
1702; GFX8-NEXT:    v_div_fixup_f32 v0, v0, s4, v8
1703; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1704; GFX8-NEXT:    s_endpgm
1705;
1706; GFX10-LABEL: s_fdiv_v4f32:
1707; GFX10:       ; %bb.0:
1708; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
1709; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1710; GFX10-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1711; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1712; GFX10-NEXT:    v_div_scale_f32 v0, s10, s7, s7, s3
1713; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, s3, s7, s3
1714; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
1715; GFX10-NEXT:    s_denorm_mode 15
1716; GFX10-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
1717; GFX10-NEXT:    v_fmac_f32_e32 v1, v3, v1
1718; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
1719; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
1720; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
1721; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
1722; GFX10-NEXT:    s_denorm_mode 12
1723; GFX10-NEXT:    v_div_scale_f32 v2, s10, s6, s6, s2
1724; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
1725; GFX10-NEXT:    v_div_scale_f32 v1, vcc_lo, s2, s6, s2
1726; GFX10-NEXT:    v_rcp_f32_e32 v4, v2
1727; GFX10-NEXT:    v_div_fixup_f32 v3, v0, s7, s3
1728; GFX10-NEXT:    s_denorm_mode 15
1729; GFX10-NEXT:    v_fma_f32 v0, -v2, v4, 1.0
1730; GFX10-NEXT:    v_fmac_f32_e32 v4, v0, v4
1731; GFX10-NEXT:    v_mul_f32_e32 v0, v1, v4
1732; GFX10-NEXT:    v_fma_f32 v5, -v2, v0, v1
1733; GFX10-NEXT:    v_fmac_f32_e32 v0, v5, v4
1734; GFX10-NEXT:    v_fma_f32 v1, -v2, v0, v1
1735; GFX10-NEXT:    s_denorm_mode 12
1736; GFX10-NEXT:    v_div_scale_f32 v5, s3, s5, s5, s1
1737; GFX10-NEXT:    v_div_fmas_f32 v0, v1, v4, v0
1738; GFX10-NEXT:    v_div_scale_f32 v1, vcc_lo, s1, s5, s1
1739; GFX10-NEXT:    v_rcp_f32_e32 v6, v5
1740; GFX10-NEXT:    v_div_fixup_f32 v2, v0, s6, s2
1741; GFX10-NEXT:    s_denorm_mode 15
1742; GFX10-NEXT:    v_fma_f32 v0, -v5, v6, 1.0
1743; GFX10-NEXT:    v_fmac_f32_e32 v6, v0, v6
1744; GFX10-NEXT:    v_mul_f32_e32 v0, v1, v6
1745; GFX10-NEXT:    v_fma_f32 v4, -v5, v0, v1
1746; GFX10-NEXT:    v_fmac_f32_e32 v0, v4, v6
1747; GFX10-NEXT:    v_fma_f32 v1, -v5, v0, v1
1748; GFX10-NEXT:    s_denorm_mode 12
1749; GFX10-NEXT:    v_div_scale_f32 v4, s2, s4, s4, s0
1750; GFX10-NEXT:    v_div_fmas_f32 v0, v1, v6, v0
1751; GFX10-NEXT:    v_rcp_f32_e32 v5, v4
1752; GFX10-NEXT:    v_div_fixup_f32 v1, v0, s5, s1
1753; GFX10-NEXT:    v_div_scale_f32 v0, vcc_lo, s0, s4, s0
1754; GFX10-NEXT:    s_denorm_mode 15
1755; GFX10-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
1756; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v5
1757; GFX10-NEXT:    v_mul_f32_e32 v6, v0, v5
1758; GFX10-NEXT:    v_fma_f32 v7, -v4, v6, v0
1759; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v5
1760; GFX10-NEXT:    v_fma_f32 v0, -v4, v6, v0
1761; GFX10-NEXT:    s_denorm_mode 12
1762; GFX10-NEXT:    v_mov_b32_e32 v4, 0
1763; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v5, v6
1764; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s4, s0
1765; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
1766; GFX10-NEXT:    s_endpgm
1767;
1768; GFX11-LABEL: s_fdiv_v4f32:
1769; GFX11:       ; %bb.0:
1770; GFX11-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
1771; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1772; GFX11-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
1773; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1774; GFX11-NEXT:    v_div_scale_f32 v0, null, s7, s7, s3
1775; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, s3, s7, s3
1776; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
1777; GFX11-NEXT:    s_denorm_mode 15
1778; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1779; GFX11-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
1780; GFX11-NEXT:    v_fmac_f32_e32 v1, v3, v1
1781; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
1782; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
1783; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
1784; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
1785; GFX11-NEXT:    s_denorm_mode 12
1786; GFX11-NEXT:    v_div_scale_f32 v2, null, s6, s6, s2
1787; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
1788; GFX11-NEXT:    v_div_scale_f32 v1, vcc_lo, s2, s6, s2
1789; GFX11-NEXT:    v_rcp_f32_e32 v4, v2
1790; GFX11-NEXT:    v_div_fixup_f32 v3, v0, s7, s3
1791; GFX11-NEXT:    s_denorm_mode 15
1792; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1793; GFX11-NEXT:    v_fma_f32 v0, -v2, v4, 1.0
1794; GFX11-NEXT:    v_fmac_f32_e32 v4, v0, v4
1795; GFX11-NEXT:    v_mul_f32_e32 v0, v1, v4
1796; GFX11-NEXT:    v_fma_f32 v5, -v2, v0, v1
1797; GFX11-NEXT:    v_fmac_f32_e32 v0, v5, v4
1798; GFX11-NEXT:    v_fma_f32 v1, -v2, v0, v1
1799; GFX11-NEXT:    s_denorm_mode 12
1800; GFX11-NEXT:    v_div_scale_f32 v5, null, s5, s5, s1
1801; GFX11-NEXT:    v_div_fmas_f32 v0, v1, v4, v0
1802; GFX11-NEXT:    v_div_scale_f32 v1, vcc_lo, s1, s5, s1
1803; GFX11-NEXT:    v_rcp_f32_e32 v6, v5
1804; GFX11-NEXT:    v_div_fixup_f32 v2, v0, s6, s2
1805; GFX11-NEXT:    s_denorm_mode 15
1806; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1807; GFX11-NEXT:    v_fma_f32 v0, -v5, v6, 1.0
1808; GFX11-NEXT:    v_fmac_f32_e32 v6, v0, v6
1809; GFX11-NEXT:    v_mul_f32_e32 v0, v1, v6
1810; GFX11-NEXT:    v_fma_f32 v4, -v5, v0, v1
1811; GFX11-NEXT:    v_fmac_f32_e32 v0, v4, v6
1812; GFX11-NEXT:    v_fma_f32 v1, -v5, v0, v1
1813; GFX11-NEXT:    s_denorm_mode 12
1814; GFX11-NEXT:    v_div_scale_f32 v4, null, s4, s4, s0
1815; GFX11-NEXT:    v_div_fmas_f32 v0, v1, v6, v0
1816; GFX11-NEXT:    v_rcp_f32_e32 v5, v4
1817; GFX11-NEXT:    v_div_fixup_f32 v1, v0, s5, s1
1818; GFX11-NEXT:    v_div_scale_f32 v0, vcc_lo, s0, s4, s0
1819; GFX11-NEXT:    s_denorm_mode 15
1820; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1821; GFX11-NEXT:    v_fma_f32 v6, -v4, v5, 1.0
1822; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v5
1823; GFX11-NEXT:    v_mul_f32_e32 v6, v0, v5
1824; GFX11-NEXT:    v_fma_f32 v7, -v4, v6, v0
1825; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v5
1826; GFX11-NEXT:    v_fma_f32 v0, -v4, v6, v0
1827; GFX11-NEXT:    s_denorm_mode 12
1828; GFX11-NEXT:    v_mov_b32_e32 v4, 0
1829; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v5, v6
1830; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s4, s0
1831; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[8:9]
1832; GFX11-NEXT:    s_endpgm
1833;
1834; EG-LABEL: s_fdiv_v4f32:
1835; EG:       ; %bb.0:
1836; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
1837; EG-NEXT:    TEX 1 @6
1838; EG-NEXT:    ALU 9, @11, KC0[CB0:0-32], KC1[]
1839; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
1840; EG-NEXT:    CF_END
1841; EG-NEXT:    PAD
1842; EG-NEXT:    Fetch clause starting at 6:
1843; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
1844; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
1845; EG-NEXT:    ALU clause starting at 10:
1846; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1847; EG-NEXT:    ALU clause starting at 11:
1848; EG-NEXT:     RECIP_IEEE * T1.W, T1.W,
1849; EG-NEXT:     MUL_IEEE T0.W, T0.W, PS,
1850; EG-NEXT:     RECIP_IEEE * T1.Z, T1.Z,
1851; EG-NEXT:     MUL_IEEE T0.Z, T0.Z, PS,
1852; EG-NEXT:     RECIP_IEEE * T1.Y, T1.Y,
1853; EG-NEXT:     MUL_IEEE T0.Y, T0.Y, PS,
1854; EG-NEXT:     RECIP_IEEE * T1.X, T1.X,
1855; EG-NEXT:     MUL_IEEE T0.X, T0.X, PS,
1856; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1857; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1858  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
1859  %a = load <4 x float>, ptr addrspace(1) %in
1860  %b = load <4 x float>, ptr addrspace(1) %b_ptr
1861  %result = fdiv <4 x float> %a, %b
1862  store <4 x float> %result, ptr addrspace(1) %out
1863  ret void
1864}
1865
1866define amdgpu_kernel void @s_fdiv_v4f32_fast_math(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1867; GFX67-LABEL: s_fdiv_v4f32_fast_math:
1868; GFX67:       ; %bb.0:
1869; GFX67-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
1870; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1871; GFX67-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1872; GFX67-NEXT:    s_mov_b32 s11, 0xf000
1873; GFX67-NEXT:    s_mov_b32 s10, -1
1874; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1875; GFX67-NEXT:    v_rcp_f32_e32 v0, s7
1876; GFX67-NEXT:    v_rcp_f32_e32 v1, s6
1877; GFX67-NEXT:    v_rcp_f32_e32 v4, s5
1878; GFX67-NEXT:    v_rcp_f32_e32 v5, s4
1879; GFX67-NEXT:    v_mul_f32_e32 v3, s3, v0
1880; GFX67-NEXT:    v_mul_f32_e32 v2, s2, v1
1881; GFX67-NEXT:    v_mul_f32_e32 v1, s1, v4
1882; GFX67-NEXT:    v_mul_f32_e32 v0, s0, v5
1883; GFX67-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1884; GFX67-NEXT:    s_endpgm
1885;
1886; GFX8-LABEL: s_fdiv_v4f32_fast_math:
1887; GFX8:       ; %bb.0:
1888; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
1889; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1890; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1891; GFX8-NEXT:    v_mov_b32_e32 v4, s8
1892; GFX8-NEXT:    v_mov_b32_e32 v5, s9
1893; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1894; GFX8-NEXT:    v_rcp_f32_e32 v0, s7
1895; GFX8-NEXT:    v_rcp_f32_e32 v1, s6
1896; GFX8-NEXT:    v_rcp_f32_e32 v6, s5
1897; GFX8-NEXT:    v_rcp_f32_e32 v7, s4
1898; GFX8-NEXT:    v_mul_f32_e32 v3, s3, v0
1899; GFX8-NEXT:    v_mul_f32_e32 v2, s2, v1
1900; GFX8-NEXT:    v_mul_f32_e32 v1, s1, v6
1901; GFX8-NEXT:    v_mul_f32_e32 v0, s0, v7
1902; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
1903; GFX8-NEXT:    s_endpgm
1904;
1905; GFX10-LABEL: s_fdiv_v4f32_fast_math:
1906; GFX10:       ; %bb.0:
1907; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
1908; GFX10-NEXT:    v_mov_b32_e32 v6, 0
1909; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1910; GFX10-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1911; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1912; GFX10-NEXT:    v_rcp_f32_e32 v0, s7
1913; GFX10-NEXT:    v_rcp_f32_e32 v1, s6
1914; GFX10-NEXT:    v_rcp_f32_e32 v4, s5
1915; GFX10-NEXT:    v_rcp_f32_e32 v5, s4
1916; GFX10-NEXT:    v_mul_f32_e32 v3, s3, v0
1917; GFX10-NEXT:    v_mul_f32_e32 v2, s2, v1
1918; GFX10-NEXT:    v_mul_f32_e32 v1, s1, v4
1919; GFX10-NEXT:    v_mul_f32_e32 v0, s0, v5
1920; GFX10-NEXT:    global_store_dwordx4 v6, v[0:3], s[8:9]
1921; GFX10-NEXT:    s_endpgm
1922;
1923; GFX11-LABEL: s_fdiv_v4f32_fast_math:
1924; GFX11:       ; %bb.0:
1925; GFX11-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
1926; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1927; GFX11-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
1928; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1929; GFX11-NEXT:    v_rcp_f32_e32 v0, s7
1930; GFX11-NEXT:    v_rcp_f32_e32 v1, s6
1931; GFX11-NEXT:    v_rcp_f32_e32 v4, s5
1932; GFX11-NEXT:    v_rcp_f32_e32 v5, s4
1933; GFX11-NEXT:    v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, s3, v0
1934; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1935; GFX11-NEXT:    v_dual_mul_f32 v2, s2, v1 :: v_dual_mul_f32 v1, s1, v4
1936; GFX11-NEXT:    v_mul_f32_e32 v0, s0, v5
1937; GFX11-NEXT:    global_store_b128 v6, v[0:3], s[8:9]
1938; GFX11-NEXT:    s_endpgm
1939;
1940; EG-LABEL: s_fdiv_v4f32_fast_math:
1941; EG:       ; %bb.0:
1942; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
1943; EG-NEXT:    TEX 1 @6
1944; EG-NEXT:    ALU 9, @11, KC0[CB0:0-32], KC1[]
1945; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
1946; EG-NEXT:    CF_END
1947; EG-NEXT:    PAD
1948; EG-NEXT:    Fetch clause starting at 6:
1949; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
1950; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
1951; EG-NEXT:    ALU clause starting at 10:
1952; EG-NEXT:     MOV * T0.X, KC0[2].Z,
1953; EG-NEXT:    ALU clause starting at 11:
1954; EG-NEXT:     RECIP_IEEE * T1.W, T1.W,
1955; EG-NEXT:     MUL_IEEE T0.W, PS, T0.W,
1956; EG-NEXT:     RECIP_IEEE * T1.Z, T1.Z,
1957; EG-NEXT:     MUL_IEEE T0.Z, PS, T0.Z,
1958; EG-NEXT:     RECIP_IEEE * T1.Y, T1.Y,
1959; EG-NEXT:     MUL_IEEE T0.Y, PS, T0.Y,
1960; EG-NEXT:     RECIP_IEEE * T1.X, T1.X,
1961; EG-NEXT:     MUL_IEEE T0.X, PS, T0.X,
1962; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
1963; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
1964  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
1965  %a = load <4 x float>, ptr addrspace(1) %in
1966  %b = load <4 x float>, ptr addrspace(1) %b_ptr
1967  %result = fdiv fast <4 x float> %a, %b
1968  store <4 x float> %result, ptr addrspace(1) %out
1969  ret void
1970}
1971
1972define amdgpu_kernel void @s_fdiv_v4f32_arcp_math(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
1973; GFX67-LABEL: s_fdiv_v4f32_arcp_math:
1974; GFX67:       ; %bb.0:
1975; GFX67-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
1976; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1977; GFX67-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1978; GFX67-NEXT:    s_mov_b32 s11, 0xf000
1979; GFX67-NEXT:    s_mov_b32 s10, -1
1980; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
1981; GFX67-NEXT:    v_rcp_f32_e32 v0, s7
1982; GFX67-NEXT:    v_rcp_f32_e32 v1, s6
1983; GFX67-NEXT:    v_rcp_f32_e32 v4, s5
1984; GFX67-NEXT:    v_rcp_f32_e32 v5, s4
1985; GFX67-NEXT:    v_mul_f32_e32 v3, s3, v0
1986; GFX67-NEXT:    v_mul_f32_e32 v2, s2, v1
1987; GFX67-NEXT:    v_mul_f32_e32 v1, s1, v4
1988; GFX67-NEXT:    v_mul_f32_e32 v0, s0, v5
1989; GFX67-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
1990; GFX67-NEXT:    s_endpgm
1991;
1992; GFX8-LABEL: s_fdiv_v4f32_arcp_math:
1993; GFX8:       ; %bb.0:
1994; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
1995; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1996; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
1997; GFX8-NEXT:    v_mov_b32_e32 v4, s8
1998; GFX8-NEXT:    v_mov_b32_e32 v5, s9
1999; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2000; GFX8-NEXT:    v_rcp_f32_e32 v0, s7
2001; GFX8-NEXT:    v_rcp_f32_e32 v1, s6
2002; GFX8-NEXT:    v_rcp_f32_e32 v6, s5
2003; GFX8-NEXT:    v_rcp_f32_e32 v7, s4
2004; GFX8-NEXT:    v_mul_f32_e32 v3, s3, v0
2005; GFX8-NEXT:    v_mul_f32_e32 v2, s2, v1
2006; GFX8-NEXT:    v_mul_f32_e32 v1, s1, v6
2007; GFX8-NEXT:    v_mul_f32_e32 v0, s0, v7
2008; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
2009; GFX8-NEXT:    s_endpgm
2010;
2011; GFX10-LABEL: s_fdiv_v4f32_arcp_math:
2012; GFX10:       ; %bb.0:
2013; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x24
2014; GFX10-NEXT:    v_mov_b32_e32 v6, 0
2015; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2016; GFX10-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
2017; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2018; GFX10-NEXT:    v_rcp_f32_e32 v0, s7
2019; GFX10-NEXT:    v_rcp_f32_e32 v1, s6
2020; GFX10-NEXT:    v_rcp_f32_e32 v4, s5
2021; GFX10-NEXT:    v_rcp_f32_e32 v5, s4
2022; GFX10-NEXT:    v_mul_f32_e32 v3, s3, v0
2023; GFX10-NEXT:    v_mul_f32_e32 v2, s2, v1
2024; GFX10-NEXT:    v_mul_f32_e32 v1, s1, v4
2025; GFX10-NEXT:    v_mul_f32_e32 v0, s0, v5
2026; GFX10-NEXT:    global_store_dwordx4 v6, v[0:3], s[8:9]
2027; GFX10-NEXT:    s_endpgm
2028;
2029; GFX11-LABEL: s_fdiv_v4f32_arcp_math:
2030; GFX11:       ; %bb.0:
2031; GFX11-NEXT:    s_load_b128 s[8:11], s[4:5], 0x24
2032; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2033; GFX11-NEXT:    s_load_b256 s[0:7], s[10:11], 0x0
2034; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2035; GFX11-NEXT:    v_rcp_f32_e32 v0, s7
2036; GFX11-NEXT:    v_rcp_f32_e32 v1, s6
2037; GFX11-NEXT:    v_rcp_f32_e32 v4, s5
2038; GFX11-NEXT:    v_rcp_f32_e32 v5, s4
2039; GFX11-NEXT:    v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, s3, v0
2040; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2041; GFX11-NEXT:    v_dual_mul_f32 v2, s2, v1 :: v_dual_mul_f32 v1, s1, v4
2042; GFX11-NEXT:    v_mul_f32_e32 v0, s0, v5
2043; GFX11-NEXT:    global_store_b128 v6, v[0:3], s[8:9]
2044; GFX11-NEXT:    s_endpgm
2045;
2046; EG-LABEL: s_fdiv_v4f32_arcp_math:
2047; EG:       ; %bb.0:
2048; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
2049; EG-NEXT:    TEX 1 @6
2050; EG-NEXT:    ALU 9, @11, KC0[CB0:0-32], KC1[]
2051; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
2052; EG-NEXT:    CF_END
2053; EG-NEXT:    PAD
2054; EG-NEXT:    Fetch clause starting at 6:
2055; EG-NEXT:     VTX_READ_128 T1.XYZW, T0.X, 16, #1
2056; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
2057; EG-NEXT:    ALU clause starting at 10:
2058; EG-NEXT:     MOV * T0.X, KC0[2].Z,
2059; EG-NEXT:    ALU clause starting at 11:
2060; EG-NEXT:     RECIP_IEEE * T1.W, T1.W,
2061; EG-NEXT:     MUL_IEEE T0.W, PS, T0.W,
2062; EG-NEXT:     RECIP_IEEE * T1.Z, T1.Z,
2063; EG-NEXT:     MUL_IEEE T0.Z, PS, T0.Z,
2064; EG-NEXT:     RECIP_IEEE * T1.Y, T1.Y,
2065; EG-NEXT:     MUL_IEEE T0.Y, PS, T0.Y,
2066; EG-NEXT:     RECIP_IEEE * T1.X, T1.X,
2067; EG-NEXT:     MUL_IEEE T0.X, PS, T0.X,
2068; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
2069; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2070  %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1
2071  %a = load <4 x float>, ptr addrspace(1) %in
2072  %b = load <4 x float>, ptr addrspace(1) %b_ptr
2073  %result = fdiv arcp ninf <4 x float> %a, %b
2074  store <4 x float> %result, ptr addrspace(1) %out
2075  ret void
2076}
2077
2078define amdgpu_kernel void @s_fdiv_f32_correctly_rounded_divide_sqrt(ptr addrspace(1) %out, float %a) #0 {
2079; GFX6-FASTFMA-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2080; GFX6-FASTFMA:       ; %bb.0: ; %entry
2081; GFX6-FASTFMA-NEXT:    s_load_dword s6, s[4:5], 0xb
2082; GFX6-FASTFMA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2083; GFX6-FASTFMA-NEXT:    s_mov_b32 s3, 0xf000
2084; GFX6-FASTFMA-NEXT:    s_mov_b32 s2, -1
2085; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
2086; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, s[4:5], s6, s6, 1.0
2087; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v1, v0
2088; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s6, 1.0
2089; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2090; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
2091; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, v3, v1, v1
2092; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v3, v2, v1
2093; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v0, v3, v2
2094; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v1, v3
2095; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v0, v3, v2
2096; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2097; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2098; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v0, s6, 1.0
2099; GFX6-FASTFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2100; GFX6-FASTFMA-NEXT:    s_endpgm
2101;
2102; GFX6-SLOWFMA-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2103; GFX6-SLOWFMA:       ; %bb.0: ; %entry
2104; GFX6-SLOWFMA-NEXT:    s_load_dword s6, s[4:5], 0xb
2105; GFX6-SLOWFMA-NEXT:    s_mov_b32 s3, 0xf000
2106; GFX6-SLOWFMA-NEXT:    s_mov_b32 s2, -1
2107; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
2108; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v0, s[0:1], s6, s6, 1.0
2109; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s6, 1.0
2110; GFX6-SLOWFMA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2111; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v2, v0
2112; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2113; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
2114; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, v3, v2, v2
2115; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v3, v1, v2
2116; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v0, v3, v1
2117; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v2, v3
2118; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, -v0, v3, v1
2119; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2120; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
2121; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v0, s6, 1.0
2122; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
2123; GFX6-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2124; GFX6-SLOWFMA-NEXT:    s_endpgm
2125;
2126; GFX7-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2127; GFX7:       ; %bb.0: ; %entry
2128; GFX7-NEXT:    s_load_dword s6, s[4:5], 0xb
2129; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2130; GFX7-NEXT:    s_mov_b32 s3, 0xf000
2131; GFX7-NEXT:    s_mov_b32 s2, -1
2132; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2133; GFX7-NEXT:    v_div_scale_f32 v0, s[4:5], s6, s6, 1.0
2134; GFX7-NEXT:    v_rcp_f32_e32 v1, v0
2135; GFX7-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s6, 1.0
2136; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2137; GFX7-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
2138; GFX7-NEXT:    v_fma_f32 v1, v3, v1, v1
2139; GFX7-NEXT:    v_mul_f32_e32 v3, v2, v1
2140; GFX7-NEXT:    v_fma_f32 v4, -v0, v3, v2
2141; GFX7-NEXT:    v_fma_f32 v3, v4, v1, v3
2142; GFX7-NEXT:    v_fma_f32 v0, -v0, v3, v2
2143; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2144; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2145; GFX7-NEXT:    v_div_fixup_f32 v0, v0, s6, 1.0
2146; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2147; GFX7-NEXT:    s_endpgm
2148;
2149; GFX8-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2150; GFX8:       ; %bb.0: ; %entry
2151; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x2c
2152; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2153; GFX8-NEXT:    v_div_scale_f32 v0, s[0:1], s2, s2, 1.0
2154; GFX8-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s2, 1.0
2155; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2156; GFX8-NEXT:    v_rcp_f32_e32 v2, v0
2157; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2158; GFX8-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
2159; GFX8-NEXT:    v_fma_f32 v2, v3, v2, v2
2160; GFX8-NEXT:    v_mul_f32_e32 v3, v1, v2
2161; GFX8-NEXT:    v_fma_f32 v4, -v0, v3, v1
2162; GFX8-NEXT:    v_fma_f32 v3, v4, v2, v3
2163; GFX8-NEXT:    v_fma_f32 v0, -v0, v3, v1
2164; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2165; GFX8-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
2166; GFX8-NEXT:    v_div_fixup_f32 v2, v0, s2, 1.0
2167; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2168; GFX8-NEXT:    v_mov_b32_e32 v0, s0
2169; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2170; GFX8-NEXT:    flat_store_dword v[0:1], v2
2171; GFX8-NEXT:    s_endpgm
2172;
2173; GFX10-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2174; GFX10:       ; %bb.0: ; %entry
2175; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x2c
2176; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2177; GFX10-NEXT:    v_div_scale_f32 v0, s0, s2, s2, 1.0
2178; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0
2179; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2180; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
2181; GFX10-NEXT:    s_denorm_mode 15
2182; GFX10-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
2183; GFX10-NEXT:    v_fmac_f32_e32 v1, v3, v1
2184; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
2185; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
2186; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
2187; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
2188; GFX10-NEXT:    s_denorm_mode 12
2189; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2190; GFX10-NEXT:    v_mov_b32_e32 v1, 0
2191; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s2, 1.0
2192; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2193; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
2194; GFX10-NEXT:    s_endpgm
2195;
2196; GFX11-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2197; GFX11:       ; %bb.0: ; %entry
2198; GFX11-NEXT:    s_clause 0x1
2199; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
2200; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2201; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2202; GFX11-NEXT:    v_div_scale_f32 v0, null, s2, s2, 1.0
2203; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0
2204; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
2205; GFX11-NEXT:    s_denorm_mode 15
2206; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2207; GFX11-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
2208; GFX11-NEXT:    v_fmac_f32_e32 v1, v3, v1
2209; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
2210; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
2211; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
2212; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
2213; GFX11-NEXT:    s_denorm_mode 12
2214; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2215; GFX11-NEXT:    v_mov_b32_e32 v1, 0
2216; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s2, 1.0
2217; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
2218; GFX11-NEXT:    s_endpgm
2219;
2220; EG-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt:
2221; EG:       ; %bb.0: ; %entry
2222; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
2223; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
2224; EG-NEXT:    CF_END
2225; EG-NEXT:    PAD
2226; EG-NEXT:    ALU clause starting at 4:
2227; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
2228; EG-NEXT:     RECIP_IEEE * T1.X, KC0[2].Z,
2229; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2230entry:
2231  %fdiv = fdiv float 1.000000e+00, %a
2232  store float %fdiv, ptr addrspace(1) %out
2233  ret void
2234}
2235
2236define amdgpu_kernel void @s_fdiv_f32_denorms_correctly_rounded_divide_sqrt(ptr addrspace(1) %out, float %a) #1 {
2237; GFX6-FASTFMA-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2238; GFX6-FASTFMA:       ; %bb.0: ; %entry
2239; GFX6-FASTFMA-NEXT:    s_load_dword s6, s[4:5], 0xb
2240; GFX6-FASTFMA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2241; GFX6-FASTFMA-NEXT:    s_mov_b32 s3, 0xf000
2242; GFX6-FASTFMA-NEXT:    s_mov_b32 s2, -1
2243; GFX6-FASTFMA-NEXT:    s_waitcnt lgkmcnt(0)
2244; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v0, s[4:5], s6, s6, 1.0
2245; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v1, v0
2246; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s6, 1.0
2247; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
2248; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, v3, v1, v1
2249; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v3, v2, v1
2250; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v0, v3, v2
2251; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v1, v3
2252; GFX6-FASTFMA-NEXT:    v_fma_f32 v0, -v0, v3, v2
2253; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2254; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v0, s6, 1.0
2255; GFX6-FASTFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2256; GFX6-FASTFMA-NEXT:    s_endpgm
2257;
2258; GFX6-SLOWFMA-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2259; GFX6-SLOWFMA:       ; %bb.0: ; %entry
2260; GFX6-SLOWFMA-NEXT:    s_load_dword s6, s[4:5], 0xb
2261; GFX6-SLOWFMA-NEXT:    s_mov_b32 s3, 0xf000
2262; GFX6-SLOWFMA-NEXT:    s_mov_b32 s2, -1
2263; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
2264; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v0, s[0:1], s6, s6, 1.0
2265; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s6, 1.0
2266; GFX6-SLOWFMA-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2267; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v2, v0
2268; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
2269; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, v3, v2, v2
2270; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v3, v1, v2
2271; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v0, v3, v1
2272; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v2, v3
2273; GFX6-SLOWFMA-NEXT:    v_fma_f32 v0, -v0, v3, v1
2274; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
2275; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v0, s6, 1.0
2276; GFX6-SLOWFMA-NEXT:    s_waitcnt lgkmcnt(0)
2277; GFX6-SLOWFMA-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2278; GFX6-SLOWFMA-NEXT:    s_endpgm
2279;
2280; GFX7-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2281; GFX7:       ; %bb.0: ; %entry
2282; GFX7-NEXT:    s_load_dword s6, s[4:5], 0xb
2283; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
2284; GFX7-NEXT:    s_mov_b32 s3, 0xf000
2285; GFX7-NEXT:    s_mov_b32 s2, -1
2286; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2287; GFX7-NEXT:    v_div_scale_f32 v0, s[4:5], s6, s6, 1.0
2288; GFX7-NEXT:    v_rcp_f32_e32 v1, v0
2289; GFX7-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s6, 1.0
2290; GFX7-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
2291; GFX7-NEXT:    v_fma_f32 v1, v3, v1, v1
2292; GFX7-NEXT:    v_mul_f32_e32 v3, v2, v1
2293; GFX7-NEXT:    v_fma_f32 v4, -v0, v3, v2
2294; GFX7-NEXT:    v_fma_f32 v3, v4, v1, v3
2295; GFX7-NEXT:    v_fma_f32 v0, -v0, v3, v2
2296; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2297; GFX7-NEXT:    v_div_fixup_f32 v0, v0, s6, 1.0
2298; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2299; GFX7-NEXT:    s_endpgm
2300;
2301; GFX8-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2302; GFX8:       ; %bb.0: ; %entry
2303; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x2c
2304; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2305; GFX8-NEXT:    v_div_scale_f32 v0, s[0:1], s2, s2, 1.0
2306; GFX8-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s2, 1.0
2307; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2308; GFX8-NEXT:    v_rcp_f32_e32 v2, v0
2309; GFX8-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
2310; GFX8-NEXT:    v_fma_f32 v2, v3, v2, v2
2311; GFX8-NEXT:    v_mul_f32_e32 v3, v1, v2
2312; GFX8-NEXT:    v_fma_f32 v4, -v0, v3, v1
2313; GFX8-NEXT:    v_fma_f32 v3, v4, v2, v3
2314; GFX8-NEXT:    v_fma_f32 v0, -v0, v3, v1
2315; GFX8-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
2316; GFX8-NEXT:    v_div_fixup_f32 v2, v0, s2, 1.0
2317; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2318; GFX8-NEXT:    v_mov_b32_e32 v0, s0
2319; GFX8-NEXT:    v_mov_b32_e32 v1, s1
2320; GFX8-NEXT:    flat_store_dword v[0:1], v2
2321; GFX8-NEXT:    s_endpgm
2322;
2323; GFX10-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2324; GFX10:       ; %bb.0: ; %entry
2325; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x2c
2326; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2327; GFX10-NEXT:    v_div_scale_f32 v0, s0, s2, s2, 1.0
2328; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
2329; GFX10-NEXT:    v_rcp_f32_e32 v1, v0
2330; GFX10-NEXT:    v_fma_f32 v2, -v0, v1, 1.0
2331; GFX10-NEXT:    v_fmac_f32_e32 v1, v2, v1
2332; GFX10-NEXT:    v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0
2333; GFX10-NEXT:    v_mul_f32_e32 v3, v2, v1
2334; GFX10-NEXT:    v_fma_f32 v4, -v0, v3, v2
2335; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v1
2336; GFX10-NEXT:    v_fma_f32 v0, -v0, v3, v2
2337; GFX10-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2338; GFX10-NEXT:    v_mov_b32_e32 v1, 0
2339; GFX10-NEXT:    v_div_fixup_f32 v0, v0, s2, 1.0
2340; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
2341; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
2342; GFX10-NEXT:    s_endpgm
2343;
2344; GFX11-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2345; GFX11:       ; %bb.0: ; %entry
2346; GFX11-NEXT:    s_clause 0x1
2347; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x2c
2348; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
2349; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2350; GFX11-NEXT:    v_div_scale_f32 v0, null, s2, s2, 1.0
2351; GFX11-NEXT:    v_rcp_f32_e32 v1, v0
2352; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2353; GFX11-NEXT:    v_fma_f32 v2, -v0, v1, 1.0
2354; GFX11-NEXT:    v_fmac_f32_e32 v1, v2, v1
2355; GFX11-NEXT:    v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0
2356; GFX11-NEXT:    v_mul_f32_e32 v3, v2, v1
2357; GFX11-NEXT:    v_fma_f32 v4, -v0, v3, v2
2358; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v1
2359; GFX11-NEXT:    v_fma_f32 v0, -v0, v3, v2
2360; GFX11-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
2361; GFX11-NEXT:    v_mov_b32_e32 v1, 0
2362; GFX11-NEXT:    v_div_fixup_f32 v0, v0, s2, 1.0
2363; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
2364; GFX11-NEXT:    s_endpgm
2365;
2366; EG-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt:
2367; EG:       ; %bb.0: ; %entry
2368; EG-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
2369; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
2370; EG-NEXT:    CF_END
2371; EG-NEXT:    PAD
2372; EG-NEXT:    ALU clause starting at 4:
2373; EG-NEXT:     LSHR T0.X, KC0[2].Y, literal.x,
2374; EG-NEXT:     RECIP_IEEE * T1.X, KC0[2].Z,
2375; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
2376entry:
2377  %fdiv = fdiv float 1.000000e+00, %a
2378  store float %fdiv, ptr addrspace(1) %out
2379  ret void
2380}
2381
2382define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 {
2383; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_denorm:
2384; GFX6-FASTFMA:       ; %bb.0:
2385; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2386; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2387; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
2388; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2389; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2390; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2391; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2392; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
2393; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
2394; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
2395; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
2396; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
2397; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2398; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2399; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2400; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
2401;
2402; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_denorm:
2403; GFX6-SLOWFMA:       ; %bb.0:
2404; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2406; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2407; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2408; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
2409; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2410; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2411; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
2412; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
2413; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
2414; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
2415; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
2416; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2417; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2418; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2419; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
2420;
2421; GFX7-LABEL: v_fdiv_f32_dynamic_denorm:
2422; GFX7:       ; %bb.0:
2423; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2424; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2425; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
2426; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2427; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2428; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2429; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2430; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
2431; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
2432; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
2433; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
2434; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
2435; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2436; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2437; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2438; GFX7-NEXT:    s_setpc_b64 s[30:31]
2439;
2440; GFX8-LABEL: v_fdiv_f32_dynamic_denorm:
2441; GFX8:       ; %bb.0:
2442; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2443; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2444; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2445; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2446; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
2447; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2448; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2449; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
2450; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
2451; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
2452; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
2453; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
2454; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2455; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2456; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2457; GFX8-NEXT:    s_setpc_b64 s[30:31]
2458;
2459; GFX10-LABEL: v_fdiv_f32_dynamic_denorm:
2460; GFX10:       ; %bb.0:
2461; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2462; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
2463; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2464; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2465; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
2466; GFX10-NEXT:    s_denorm_mode 15
2467; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2468; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
2469; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
2470; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
2471; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
2472; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
2473; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2474; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2475; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2476; GFX10-NEXT:    s_setpc_b64 s[30:31]
2477;
2478; GFX11-LABEL: v_fdiv_f32_dynamic_denorm:
2479; GFX11:       ; %bb.0:
2480; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2481; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
2482; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2483; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
2484; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
2485; GFX11-NEXT:    s_denorm_mode 15
2486; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2487; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2488; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
2489; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
2490; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
2491; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
2492; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
2493; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
2494; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2495; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2496; GFX11-NEXT:    s_setpc_b64 s[30:31]
2497;
2498; EG-LABEL: v_fdiv_f32_dynamic_denorm:
2499; EG:       ; %bb.0:
2500; EG-NEXT:    CF_END
2501; EG-NEXT:    PAD
2502  %fdiv = fdiv float %a, %b
2503  ret float %fdiv
2504}
2505
2506define float @v_fdiv_f32_ieee(float %x, float %y) #1 {
2507; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee:
2508; GFX6-FASTFMA:       ; %bb.0:
2509; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2510; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2511; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
2512; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
2513; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
2514; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2515; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
2516; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
2517; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
2518; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
2519; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2520; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2521; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
2522;
2523; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee:
2524; GFX6-SLOWFMA:       ; %bb.0:
2525; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2526; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2527; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2528; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
2529; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2530; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
2531; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
2532; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
2533; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
2534; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
2535; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2536; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2537; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
2538;
2539; GFX7-LABEL: v_fdiv_f32_ieee:
2540; GFX7:       ; %bb.0:
2541; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2542; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2543; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
2544; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
2545; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
2546; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2547; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
2548; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
2549; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
2550; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
2551; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2552; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2553; GFX7-NEXT:    s_setpc_b64 s[30:31]
2554;
2555; GFX8-LABEL: v_fdiv_f32_ieee:
2556; GFX8:       ; %bb.0:
2557; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2558; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2559; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2560; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
2561; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2562; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
2563; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
2564; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
2565; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
2566; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
2567; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2568; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2569; GFX8-NEXT:    s_setpc_b64 s[30:31]
2570;
2571; GFX10-LABEL: v_fdiv_f32_ieee:
2572; GFX10:       ; %bb.0:
2573; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
2575; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
2576; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
2577; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
2578; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2579; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
2580; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
2581; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
2582; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
2583; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2584; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2585; GFX10-NEXT:    s_setpc_b64 s[30:31]
2586;
2587; GFX11-LABEL: v_fdiv_f32_ieee:
2588; GFX11:       ; %bb.0:
2589; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2590; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
2591; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
2592; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2593; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
2594; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
2595; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2596; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
2597; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
2598; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
2599; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
2600; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2601; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2602; GFX11-NEXT:    s_setpc_b64 s[30:31]
2603;
2604; EG-LABEL: v_fdiv_f32_ieee:
2605; EG:       ; %bb.0:
2606; EG-NEXT:    CF_END
2607; EG-NEXT:    PAD
2608  %div = fdiv float %x, %y
2609  ret float %div
2610}
2611
2612define float @v_fdiv_f32_ieee_25ulp(float %x, float %y) #1 {
2613; GFX6-LABEL: v_fdiv_f32_ieee_25ulp:
2614; GFX6:       ; %bb.0:
2615; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
2617; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
2618; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2619; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2620; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
2621; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
2622; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2623; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2624; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
2625; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2626; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
2627; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2628; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2629; GFX6-NEXT:    s_setpc_b64 s[30:31]
2630;
2631; GFX7-LABEL: v_fdiv_f32_ieee_25ulp:
2632; GFX7:       ; %bb.0:
2633; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2634; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
2635; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
2636; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2637; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
2638; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
2639; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
2640; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
2641; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2642; GFX7-NEXT:    s_setpc_b64 s[30:31]
2643;
2644; GFX8-LABEL: v_fdiv_f32_ieee_25ulp:
2645; GFX8:       ; %bb.0:
2646; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2647; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
2648; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
2649; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2650; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
2651; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
2652; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
2653; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
2654; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
2655; GFX8-NEXT:    s_setpc_b64 s[30:31]
2656;
2657; GFX10-LABEL: v_fdiv_f32_ieee_25ulp:
2658; GFX10:       ; %bb.0:
2659; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2660; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
2661; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2662; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
2663; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2664; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
2665; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
2666; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
2667; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
2668; GFX10-NEXT:    s_setpc_b64 s[30:31]
2669;
2670; GFX11-LABEL: v_fdiv_f32_ieee_25ulp:
2671; GFX11:       ; %bb.0:
2672; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2673; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
2674; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2675; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
2676; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2677; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
2678; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
2679; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2680; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
2681; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
2682; GFX11-NEXT:    s_setpc_b64 s[30:31]
2683;
2684; EG-LABEL: v_fdiv_f32_ieee_25ulp:
2685; EG:       ; %bb.0:
2686; EG-NEXT:    CF_END
2687; EG-NEXT:    PAD
2688  %div = fdiv float %x, %y, !fpmath !0
2689  ret float %div
2690}
2691
2692define float @v_fdiv_f32_dynamic(float %x, float %y) #2 {
2693; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic:
2694; GFX6-FASTFMA:       ; %bb.0:
2695; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2696; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2697; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
2698; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2699; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2700; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2701; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2702; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
2703; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
2704; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
2705; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
2706; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
2707; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2708; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2709; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2710; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
2711;
2712; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic:
2713; GFX6-SLOWFMA:       ; %bb.0:
2714; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2715; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2716; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2717; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2718; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
2719; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2720; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2721; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
2722; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
2723; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
2724; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
2725; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
2726; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2727; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2728; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2729; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
2730;
2731; GFX7-LABEL: v_fdiv_f32_dynamic:
2732; GFX7:       ; %bb.0:
2733; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2734; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2735; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
2736; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2737; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2738; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2739; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2740; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
2741; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
2742; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
2743; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
2744; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
2745; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2746; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2747; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2748; GFX7-NEXT:    s_setpc_b64 s[30:31]
2749;
2750; GFX8-LABEL: v_fdiv_f32_dynamic:
2751; GFX8:       ; %bb.0:
2752; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2753; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2754; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2755; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2756; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
2757; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2758; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2759; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
2760; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
2761; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
2762; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
2763; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
2764; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2765; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2766; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2767; GFX8-NEXT:    s_setpc_b64 s[30:31]
2768;
2769; GFX10-LABEL: v_fdiv_f32_dynamic:
2770; GFX10:       ; %bb.0:
2771; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2772; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
2773; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2774; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
2775; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
2776; GFX10-NEXT:    s_denorm_mode 15
2777; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2778; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
2779; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
2780; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
2781; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
2782; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
2783; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
2784; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2785; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2786; GFX10-NEXT:    s_setpc_b64 s[30:31]
2787;
2788; GFX11-LABEL: v_fdiv_f32_dynamic:
2789; GFX11:       ; %bb.0:
2790; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2791; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
2792; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2793; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
2794; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
2795; GFX11-NEXT:    s_denorm_mode 15
2796; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2797; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2798; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
2799; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
2800; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
2801; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
2802; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
2803; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
2804; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2805; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2806; GFX11-NEXT:    s_setpc_b64 s[30:31]
2807;
2808; EG-LABEL: v_fdiv_f32_dynamic:
2809; EG:       ; %bb.0:
2810; EG-NEXT:    CF_END
2811; EG-NEXT:    PAD
2812  %div = fdiv float %x, %y
2813  ret float %div
2814}
2815
2816define float @v_fdiv_f32_dynamic_25ulp(float %x, float %y) #2 {
2817; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp:
2818; GFX6:       ; %bb.0:
2819; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2820; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
2821; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
2822; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
2823; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
2824; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
2825; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
2826; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2827; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2828; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
2829; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2830; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
2831; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
2832; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
2833; GFX6-NEXT:    s_setpc_b64 s[30:31]
2834;
2835; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp:
2836; GFX7:       ; %bb.0:
2837; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2838; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
2839; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
2840; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2841; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
2842; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
2843; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
2844; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
2845; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2846; GFX7-NEXT:    s_setpc_b64 s[30:31]
2847;
2848; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp:
2849; GFX8:       ; %bb.0:
2850; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2851; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
2852; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
2853; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2854; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
2855; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
2856; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
2857; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
2858; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
2859; GFX8-NEXT:    s_setpc_b64 s[30:31]
2860;
2861; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp:
2862; GFX10:       ; %bb.0:
2863; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2864; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
2865; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2866; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
2867; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2868; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
2869; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
2870; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
2871; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
2872; GFX10-NEXT:    s_setpc_b64 s[30:31]
2873;
2874; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp:
2875; GFX11:       ; %bb.0:
2876; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2877; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
2878; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
2879; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
2880; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2881; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
2882; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
2883; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2884; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
2885; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
2886; GFX11-NEXT:    s_setpc_b64 s[30:31]
2887;
2888; EG-LABEL: v_fdiv_f32_dynamic_25ulp:
2889; EG:       ; %bb.0:
2890; EG-NEXT:    CF_END
2891; EG-NEXT:    PAD
2892  %div = fdiv float %x, %y, !fpmath !0
2893  ret float %div
2894}
2895
2896define float @v_fdiv_f32_daz(float %x, float %y) #0 {
2897; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz:
2898; GFX6-FASTFMA:       ; %bb.0:
2899; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2900; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2901; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
2902; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2903; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2904; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2905; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
2906; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
2907; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
2908; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
2909; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
2910; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2911; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2912; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2913; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
2914;
2915; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz:
2916; GFX6-SLOWFMA:       ; %bb.0:
2917; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2919; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2920; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
2921; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2922; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2923; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
2924; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
2925; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
2926; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
2927; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
2928; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2929; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2930; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2931; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
2932;
2933; GFX7-LABEL: v_fdiv_f32_daz:
2934; GFX7:       ; %bb.0:
2935; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2936; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2937; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
2938; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
2939; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2940; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2941; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
2942; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
2943; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
2944; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
2945; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
2946; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2947; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2948; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2949; GFX7-NEXT:    s_setpc_b64 s[30:31]
2950;
2951; GFX8-LABEL: v_fdiv_f32_daz:
2952; GFX8:       ; %bb.0:
2953; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2954; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
2955; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
2956; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
2957; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
2958; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
2959; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
2960; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
2961; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
2962; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
2963; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
2964; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
2965; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
2966; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2967; GFX8-NEXT:    s_setpc_b64 s[30:31]
2968;
2969; GFX10-LABEL: v_fdiv_f32_daz:
2970; GFX10:       ; %bb.0:
2971; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
2973; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2974; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
2975; GFX10-NEXT:    s_denorm_mode 15
2976; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2977; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
2978; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
2979; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
2980; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
2981; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
2982; GFX10-NEXT:    s_denorm_mode 12
2983; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
2984; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
2985; GFX10-NEXT:    s_setpc_b64 s[30:31]
2986;
2987; GFX11-LABEL: v_fdiv_f32_daz:
2988; GFX11:       ; %bb.0:
2989; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2990; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
2991; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
2992; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
2993; GFX11-NEXT:    s_denorm_mode 15
2994; GFX11-NEXT:    s_waitcnt_depctr 0xfff
2995; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
2996; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
2997; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
2998; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
2999; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
3000; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
3001; GFX11-NEXT:    s_denorm_mode 12
3002; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3003; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3004; GFX11-NEXT:    s_setpc_b64 s[30:31]
3005;
3006; EG-LABEL: v_fdiv_f32_daz:
3007; EG:       ; %bb.0:
3008; EG-NEXT:    CF_END
3009; EG-NEXT:    PAD
3010  %div = fdiv float %x, %y
3011  ret float %div
3012}
3013
3014define float @v_fdiv_f32_daz_25ulp(float %x, float %y) #0 {
3015; GFX678-LABEL: v_fdiv_f32_daz_25ulp:
3016; GFX678:       ; %bb.0:
3017; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3018; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
3019; GFX678-NEXT:    v_mov_b32_e32 v2, 0x2f800000
3020; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
3021; GFX678-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
3022; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v2
3023; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
3024; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
3025; GFX678-NEXT:    v_mul_f32_e32 v0, v2, v0
3026; GFX678-NEXT:    s_setpc_b64 s[30:31]
3027;
3028; GFX10-LABEL: v_fdiv_f32_daz_25ulp:
3029; GFX10:       ; %bb.0:
3030; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3031; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
3032; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
3033; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v2
3034; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
3035; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
3036; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
3037; GFX10-NEXT:    s_setpc_b64 s[30:31]
3038;
3039; GFX11-LABEL: v_fdiv_f32_daz_25ulp:
3040; GFX11:       ; %bb.0:
3041; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3042; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
3043; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
3044; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
3045; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
3046; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3047; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
3048; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
3049; GFX11-NEXT:    s_setpc_b64 s[30:31]
3050;
3051; EG-LABEL: v_fdiv_f32_daz_25ulp:
3052; EG:       ; %bb.0:
3053; EG-NEXT:    CF_END
3054; EG-NEXT:    PAD
3055  %div = fdiv float %x, %y, !fpmath !0
3056  ret float %div
3057}
3058
3059; If we emit an fmul, make sure it fuses into the user.
3060define float @v_fdiv_f32_ieee_contractable_user(float %x, float %y, float %z) #1 {
3061; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee_contractable_user:
3062; GFX6-FASTFMA:       ; %bb.0:
3063; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3064; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3065; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
3066; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
3067; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
3068; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
3069; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
3070; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
3071; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
3072; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
3073; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3074; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3075; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
3076; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
3077;
3078; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee_contractable_user:
3079; GFX6-SLOWFMA:       ; %bb.0:
3080; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3081; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3082; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3083; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
3084; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
3085; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
3086; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
3087; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
3088; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
3089; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
3090; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
3091; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3092; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
3093; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
3094;
3095; GFX7-LABEL: v_fdiv_f32_ieee_contractable_user:
3096; GFX7:       ; %bb.0:
3097; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3098; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3099; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
3100; GFX7-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
3101; GFX7-NEXT:    v_fma_f32 v4, v5, v4, v4
3102; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
3103; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
3104; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
3105; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
3106; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
3107; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3108; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3109; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
3110; GFX7-NEXT:    s_setpc_b64 s[30:31]
3111;
3112; GFX8-LABEL: v_fdiv_f32_ieee_contractable_user:
3113; GFX8:       ; %bb.0:
3114; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3115; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3116; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3117; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
3118; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
3119; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
3120; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
3121; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
3122; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
3123; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
3124; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
3125; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3126; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
3127; GFX8-NEXT:    s_setpc_b64 s[30:31]
3128;
3129; GFX10-LABEL: v_fdiv_f32_ieee_contractable_user:
3130; GFX10:       ; %bb.0:
3131; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3132; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
3133; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
3134; GFX10-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
3135; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v4
3136; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
3137; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
3138; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
3139; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
3140; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
3141; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3142; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3143; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
3144; GFX10-NEXT:    s_setpc_b64 s[30:31]
3145;
3146; GFX11-LABEL: v_fdiv_f32_ieee_contractable_user:
3147; GFX11:       ; %bb.0:
3148; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3149; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
3150; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
3151; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3152; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
3153; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
3154; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
3155; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
3156; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
3157; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
3158; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
3159; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3160; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3161; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
3162; GFX11-NEXT:    s_setpc_b64 s[30:31]
3163;
3164; EG-LABEL: v_fdiv_f32_ieee_contractable_user:
3165; EG:       ; %bb.0:
3166; EG-NEXT:    CF_END
3167; EG-NEXT:    PAD
3168  %div = fdiv contract float %x, %y
3169  %add = fadd contract float %div, %z
3170  ret float %add
3171}
3172
3173define float @v_fdiv_f32_ieee_25ulp_contractable_user(float %x, float %y, float %z) #1 {
3174; GFX6-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user:
3175; GFX6:       ; %bb.0:
3176; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3177; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
3178; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v1
3179; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
3180; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
3181; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
3182; GFX6-NEXT:    v_frexp_mant_f32_e32 v4, v0
3183; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
3184; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3185; GFX6-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
3186; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3187; GFX6-NEXT:    v_mul_f32_e32 v3, v4, v3
3188; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
3189; GFX6-NEXT:    v_ldexp_f32_e32 v0, v3, v0
3190; GFX6-NEXT:    v_add_f32_e32 v0, v0, v2
3191; GFX6-NEXT:    s_setpc_b64 s[30:31]
3192;
3193; GFX7-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user:
3194; GFX7:       ; %bb.0:
3195; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3196; GFX7-NEXT:    v_frexp_mant_f32_e32 v3, v1
3197; GFX7-NEXT:    v_rcp_f32_e32 v3, v3
3198; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3199; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
3200; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
3201; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v3
3202; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
3203; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3204; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
3205; GFX7-NEXT:    s_setpc_b64 s[30:31]
3206;
3207; GFX8-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user:
3208; GFX8:       ; %bb.0:
3209; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3210; GFX8-NEXT:    v_frexp_mant_f32_e32 v3, v1
3211; GFX8-NEXT:    v_rcp_f32_e32 v3, v3
3212; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3213; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
3214; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
3215; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v3
3216; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v4, v1
3217; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
3218; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
3219; GFX8-NEXT:    s_setpc_b64 s[30:31]
3220;
3221; GFX10-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user:
3222; GFX10:       ; %bb.0:
3223; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3224; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v1
3225; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3226; GFX10-NEXT:    v_frexp_mant_f32_e32 v4, v0
3227; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3228; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
3229; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
3230; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v3
3231; GFX10-NEXT:    v_ldexp_f32 v0, v3, v0
3232; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
3233; GFX10-NEXT:    s_setpc_b64 s[30:31]
3234;
3235; GFX11-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user:
3236; GFX11:       ; %bb.0:
3237; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3238; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v1
3239; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3240; GFX11-NEXT:    v_frexp_mant_f32_e32 v4, v0
3241; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3242; GFX11-NEXT:    v_rcp_f32_e32 v3, v3
3243; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
3244; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3245; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v3
3246; GFX11-NEXT:    v_ldexp_f32 v0, v3, v0
3247; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
3248; GFX11-NEXT:    s_setpc_b64 s[30:31]
3249;
3250; EG-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user:
3251; EG:       ; %bb.0:
3252; EG-NEXT:    CF_END
3253; EG-NEXT:    PAD
3254  %div = fdiv contract float %x, %y, !fpmath !0
3255  %add = fadd contract float %div, %z
3256  ret float %add
3257}
3258
3259define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) #2 {
3260; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_contractable_user:
3261; GFX6-FASTFMA:       ; %bb.0:
3262; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3263; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3264; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
3265; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
3266; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3267; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3268; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3269; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v6, v4, v4
3270; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
3271; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
3272; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
3273; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
3274; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3275; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3276; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3277; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
3278; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
3279;
3280; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_contractable_user:
3281; GFX6-SLOWFMA:       ; %bb.0:
3282; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3283; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3284; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3285; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3286; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
3287; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3288; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
3289; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
3290; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
3291; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
3292; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
3293; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
3294; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3295; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
3296; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3297; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
3298; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
3299;
3300; GFX7-LABEL: v_fdiv_f32_dynamic_contractable_user:
3301; GFX7:       ; %bb.0:
3302; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3303; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3304; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
3305; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
3306; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3307; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3308; GFX7-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3309; GFX7-NEXT:    v_fma_f32 v4, v6, v4, v4
3310; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
3311; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
3312; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
3313; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
3314; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3315; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3316; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3317; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
3318; GFX7-NEXT:    s_setpc_b64 s[30:31]
3319;
3320; GFX8-LABEL: v_fdiv_f32_dynamic_contractable_user:
3321; GFX8:       ; %bb.0:
3322; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3324; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3325; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3326; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
3327; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3328; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
3329; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
3330; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
3331; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
3332; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
3333; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
3334; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3335; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
3336; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3337; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
3338; GFX8-NEXT:    s_setpc_b64 s[30:31]
3339;
3340; GFX10-LABEL: v_fdiv_f32_dynamic_contractable_user:
3341; GFX10:       ; %bb.0:
3342; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3343; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
3344; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
3345; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3346; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
3347; GFX10-NEXT:    s_denorm_mode 15
3348; GFX10-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3349; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v4
3350; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
3351; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
3352; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
3353; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
3354; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3355; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3356; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3357; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
3358; GFX10-NEXT:    s_setpc_b64 s[30:31]
3359;
3360; GFX11-LABEL: v_fdiv_f32_dynamic_contractable_user:
3361; GFX11:       ; %bb.0:
3362; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3363; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
3364; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
3365; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
3366; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
3367; GFX11-NEXT:    s_denorm_mode 15
3368; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3369; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3370; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
3371; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
3372; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
3373; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
3374; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
3375; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
3376; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3377; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3378; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
3379; GFX11-NEXT:    s_setpc_b64 s[30:31]
3380;
3381; EG-LABEL: v_fdiv_f32_dynamic_contractable_user:
3382; EG:       ; %bb.0:
3383; EG-NEXT:    CF_END
3384; EG-NEXT:    PAD
3385  %div = fdiv contract float %x, %y
3386  %add = fadd contract float %div, %z
3387  ret float %add
3388}
3389
3390define float @v_fdiv_f32_dynamic_25ulp_contractable_user(float %x, float %y, float %z) #2 {
3391; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user:
3392; GFX6:       ; %bb.0:
3393; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3394; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
3395; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v1
3396; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
3397; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
3398; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
3399; GFX6-NEXT:    v_frexp_mant_f32_e32 v4, v0
3400; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
3401; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3402; GFX6-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
3403; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3404; GFX6-NEXT:    v_mul_f32_e32 v3, v4, v3
3405; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
3406; GFX6-NEXT:    v_ldexp_f32_e32 v0, v3, v0
3407; GFX6-NEXT:    v_add_f32_e32 v0, v0, v2
3408; GFX6-NEXT:    s_setpc_b64 s[30:31]
3409;
3410; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user:
3411; GFX7:       ; %bb.0:
3412; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3413; GFX7-NEXT:    v_frexp_mant_f32_e32 v3, v1
3414; GFX7-NEXT:    v_rcp_f32_e32 v3, v3
3415; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3416; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
3417; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
3418; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v3
3419; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
3420; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3421; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
3422; GFX7-NEXT:    s_setpc_b64 s[30:31]
3423;
3424; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user:
3425; GFX8:       ; %bb.0:
3426; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3427; GFX8-NEXT:    v_frexp_mant_f32_e32 v3, v1
3428; GFX8-NEXT:    v_rcp_f32_e32 v3, v3
3429; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3430; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
3431; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
3432; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v3
3433; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v4, v1
3434; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
3435; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
3436; GFX8-NEXT:    s_setpc_b64 s[30:31]
3437;
3438; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user:
3439; GFX10:       ; %bb.0:
3440; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3441; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v1
3442; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3443; GFX10-NEXT:    v_frexp_mant_f32_e32 v4, v0
3444; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3445; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
3446; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
3447; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v3
3448; GFX10-NEXT:    v_ldexp_f32 v0, v3, v0
3449; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
3450; GFX10-NEXT:    s_setpc_b64 s[30:31]
3451;
3452; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user:
3453; GFX11:       ; %bb.0:
3454; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3455; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v1
3456; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3457; GFX11-NEXT:    v_frexp_mant_f32_e32 v4, v0
3458; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3459; GFX11-NEXT:    v_rcp_f32_e32 v3, v3
3460; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
3461; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3462; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v3
3463; GFX11-NEXT:    v_ldexp_f32 v0, v3, v0
3464; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
3465; GFX11-NEXT:    s_setpc_b64 s[30:31]
3466;
3467; EG-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user:
3468; EG:       ; %bb.0:
3469; EG-NEXT:    CF_END
3470; EG-NEXT:    PAD
3471  %div = fdiv contract float %x, %y, !fpmath !0
3472  %add = fadd contract float %div, %z
3473  ret float %add
3474}
3475
3476define float @v_fdiv_f32_daz_contractable_user(float %x, float %y, float %z) #0 {
3477; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz_contractable_user:
3478; GFX6-FASTFMA:       ; %bb.0:
3479; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3480; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3481; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
3482; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
3483; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3484; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3485; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v6, v4, v4
3486; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
3487; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
3488; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
3489; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
3490; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
3491; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3492; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3493; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
3494; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
3495;
3496; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz_contractable_user:
3497; GFX6-SLOWFMA:       ; %bb.0:
3498; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3499; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3500; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3501; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
3502; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3503; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
3504; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
3505; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
3506; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
3507; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
3508; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
3509; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
3510; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
3511; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3512; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
3513; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
3514;
3515; GFX7-LABEL: v_fdiv_f32_daz_contractable_user:
3516; GFX7:       ; %bb.0:
3517; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3518; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3519; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
3520; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
3521; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3522; GFX7-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3523; GFX7-NEXT:    v_fma_f32 v4, v6, v4, v4
3524; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
3525; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
3526; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
3527; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
3528; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
3529; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3530; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3531; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
3532; GFX7-NEXT:    s_setpc_b64 s[30:31]
3533;
3534; GFX8-LABEL: v_fdiv_f32_daz_contractable_user:
3535; GFX8:       ; %bb.0:
3536; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3537; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
3538; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3539; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
3540; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3541; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
3542; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
3543; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
3544; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
3545; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
3546; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
3547; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
3548; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
3549; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3550; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
3551; GFX8-NEXT:    s_setpc_b64 s[30:31]
3552;
3553; GFX10-LABEL: v_fdiv_f32_daz_contractable_user:
3554; GFX10:       ; %bb.0:
3555; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3556; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
3557; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
3558; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
3559; GFX10-NEXT:    s_denorm_mode 15
3560; GFX10-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3561; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v4
3562; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
3563; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
3564; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
3565; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
3566; GFX10-NEXT:    s_denorm_mode 12
3567; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3568; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3569; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
3570; GFX10-NEXT:    s_setpc_b64 s[30:31]
3571;
3572; GFX11-LABEL: v_fdiv_f32_daz_contractable_user:
3573; GFX11:       ; %bb.0:
3574; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3575; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
3576; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
3577; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
3578; GFX11-NEXT:    s_denorm_mode 15
3579; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3580; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
3581; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
3582; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
3583; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
3584; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
3585; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
3586; GFX11-NEXT:    s_denorm_mode 12
3587; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
3588; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
3589; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
3590; GFX11-NEXT:    s_setpc_b64 s[30:31]
3591;
3592; EG-LABEL: v_fdiv_f32_daz_contractable_user:
3593; EG:       ; %bb.0:
3594; EG-NEXT:    CF_END
3595; EG-NEXT:    PAD
3596  %div = fdiv contract float %x, %y
3597  %add = fadd contract float %div, %z
3598  ret float %add
3599}
3600
3601define float @v_fdiv_f32_daz_25ulp_contractable_user(float %x, float %y, float %z) #0 {
3602; GFX678-LABEL: v_fdiv_f32_daz_25ulp_contractable_user:
3603; GFX678:       ; %bb.0:
3604; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3605; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
3606; GFX678-NEXT:    v_mov_b32_e32 v3, 0x2f800000
3607; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
3608; GFX678-NEXT:    v_cndmask_b32_e32 v3, 1.0, v3, vcc
3609; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v3
3610; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
3611; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
3612; GFX678-NEXT:    v_mad_f32 v0, v3, v0, v2
3613; GFX678-NEXT:    s_setpc_b64 s[30:31]
3614;
3615; GFX10-LABEL: v_fdiv_f32_daz_25ulp_contractable_user:
3616; GFX10:       ; %bb.0:
3617; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3618; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
3619; GFX10-NEXT:    v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s4
3620; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v3
3621; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
3622; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
3623; GFX10-NEXT:    v_mad_f32 v0, v3, v0, v2
3624; GFX10-NEXT:    s_setpc_b64 s[30:31]
3625;
3626; GFX11-LABEL: v_fdiv_f32_daz_25ulp_contractable_user:
3627; GFX11:       ; %bb.0:
3628; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3629; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
3630; GFX11-NEXT:    v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s0
3631; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v3
3632; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
3633; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3634; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
3635; GFX11-NEXT:    v_fma_f32 v0, v3, v0, v2
3636; GFX11-NEXT:    s_setpc_b64 s[30:31]
3637;
3638; EG-LABEL: v_fdiv_f32_daz_25ulp_contractable_user:
3639; EG:       ; %bb.0:
3640; EG-NEXT:    CF_END
3641; EG-NEXT:    PAD
3642  %div = fdiv contract float %x, %y, !fpmath !0
3643  %add = fadd contract float %div, %z
3644  ret float %add
3645}
3646
3647define float @v_fdiv_f32_ieee__nnan_ninf(float %x, float %y, float %z) #1 {
3648; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3649; GFX6-FASTFMA:       ; %bb.0:
3650; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3651; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3652; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
3653; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
3654; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
3655; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3656; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
3657; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
3658; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
3659; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
3660; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3661; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3662; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
3663;
3664; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3665; GFX6-SLOWFMA:       ; %bb.0:
3666; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3667; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3668; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
3669; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
3670; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
3671; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
3672; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
3673; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
3674; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
3675; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
3676; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
3677; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3678; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
3679;
3680; GFX7-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3681; GFX7:       ; %bb.0:
3682; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3683; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3684; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
3685; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
3686; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
3687; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3688; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
3689; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
3690; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
3691; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
3692; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3693; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3694; GFX7-NEXT:    s_setpc_b64 s[30:31]
3695;
3696; GFX8-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3697; GFX8:       ; %bb.0:
3698; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3699; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3700; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
3701; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
3702; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
3703; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
3704; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
3705; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
3706; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
3707; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
3708; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
3709; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3710; GFX8-NEXT:    s_setpc_b64 s[30:31]
3711;
3712; GFX10-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3713; GFX10:       ; %bb.0:
3714; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3715; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
3716; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
3717; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
3718; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
3719; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
3720; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
3721; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
3722; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
3723; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
3724; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3725; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3726; GFX10-NEXT:    s_setpc_b64 s[30:31]
3727;
3728; GFX11-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3729; GFX11:       ; %bb.0:
3730; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3731; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
3732; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
3733; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3734; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
3735; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
3736; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
3737; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
3738; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
3739; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
3740; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
3741; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3742; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3743; GFX11-NEXT:    s_setpc_b64 s[30:31]
3744;
3745; EG-LABEL: v_fdiv_f32_ieee__nnan_ninf:
3746; EG:       ; %bb.0:
3747; EG-NEXT:    CF_END
3748; EG-NEXT:    PAD
3749  %div = fdiv nnan ninf float %x, %y
3750  ret float %div
3751}
3752
3753define float @v_fdiv_f32_ieee_25ulp__nnan_ninf(float %x, float %y, float %z) #1 {
3754; GFX6-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf:
3755; GFX6:       ; %bb.0:
3756; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3757; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
3758; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
3759; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
3760; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
3761; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
3762; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
3763; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
3764; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3765; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
3766; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3767; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
3768; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
3769; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
3770; GFX6-NEXT:    s_setpc_b64 s[30:31]
3771;
3772; GFX7-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf:
3773; GFX7:       ; %bb.0:
3774; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3775; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
3776; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
3777; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3778; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
3779; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
3780; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
3781; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
3782; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3783; GFX7-NEXT:    s_setpc_b64 s[30:31]
3784;
3785; GFX8-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf:
3786; GFX8:       ; %bb.0:
3787; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3788; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
3789; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
3790; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3791; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
3792; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
3793; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
3794; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
3795; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
3796; GFX8-NEXT:    s_setpc_b64 s[30:31]
3797;
3798; GFX10-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf:
3799; GFX10:       ; %bb.0:
3800; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3801; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
3802; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3803; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
3804; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3805; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
3806; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
3807; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
3808; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
3809; GFX10-NEXT:    s_setpc_b64 s[30:31]
3810;
3811; GFX11-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf:
3812; GFX11:       ; %bb.0:
3813; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3814; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
3815; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3816; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
3817; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3818; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
3819; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
3820; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3821; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
3822; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
3823; GFX11-NEXT:    s_setpc_b64 s[30:31]
3824;
3825; EG-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf:
3826; EG:       ; %bb.0:
3827; EG-NEXT:    CF_END
3828; EG-NEXT:    PAD
3829  %div = fdiv nnan ninf float %x, %y, !fpmath !0
3830  ret float %div
3831}
3832
3833define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 {
3834; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3835; GFX6-FASTFMA:       ; %bb.0:
3836; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3837; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3838; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
3839; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3840; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3841; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3842; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
3843; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
3844; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
3845; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
3846; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
3847; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
3848; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3849; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3850; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3851; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
3852;
3853; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3854; GFX6-SLOWFMA:       ; %bb.0:
3855; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3856; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3857; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
3858; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3859; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
3860; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3861; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
3862; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
3863; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
3864; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
3865; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
3866; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
3867; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3868; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
3869; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3870; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
3871;
3872; GFX7-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3873; GFX7:       ; %bb.0:
3874; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3875; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3876; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
3877; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
3878; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3879; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3880; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
3881; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
3882; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
3883; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
3884; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
3885; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
3886; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3887; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3888; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3889; GFX7-NEXT:    s_setpc_b64 s[30:31]
3890;
3891; GFX8-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3892; GFX8:       ; %bb.0:
3893; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3894; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
3895; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
3896; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3897; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
3898; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
3899; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
3900; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
3901; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
3902; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
3903; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
3904; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
3905; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3906; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
3907; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3908; GFX8-NEXT:    s_setpc_b64 s[30:31]
3909;
3910; GFX10-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3911; GFX10:       ; %bb.0:
3912; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3913; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
3914; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
3915; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
3916; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
3917; GFX10-NEXT:    s_denorm_mode 15
3918; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
3919; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
3920; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
3921; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
3922; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
3923; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
3924; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
3925; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3926; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3927; GFX10-NEXT:    s_setpc_b64 s[30:31]
3928;
3929; GFX11-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3930; GFX11:       ; %bb.0:
3931; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3932; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
3933; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
3934; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
3935; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
3936; GFX11-NEXT:    s_denorm_mode 15
3937; GFX11-NEXT:    s_waitcnt_depctr 0xfff
3938; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
3939; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
3940; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
3941; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
3942; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
3943; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
3944; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
3945; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
3946; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
3947; GFX11-NEXT:    s_setpc_b64 s[30:31]
3948;
3949; EG-LABEL: v_fdiv_f32_dynamic__nnan_ninf:
3950; EG:       ; %bb.0:
3951; EG-NEXT:    CF_END
3952; EG-NEXT:    PAD
3953  %div = fdiv nnan ninf float %x, %y
3954  ret float %div
3955}
3956
3957define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf(float %x, float %y, float %z) #2 {
3958; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
3959; GFX6:       ; %bb.0:
3960; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3961; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
3962; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
3963; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
3964; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
3965; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
3966; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
3967; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
3968; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3969; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
3970; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
3971; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
3972; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
3973; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
3974; GFX6-NEXT:    s_setpc_b64 s[30:31]
3975;
3976; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
3977; GFX7:       ; %bb.0:
3978; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3979; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
3980; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
3981; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3982; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
3983; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
3984; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
3985; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
3986; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
3987; GFX7-NEXT:    s_setpc_b64 s[30:31]
3988;
3989; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
3990; GFX8:       ; %bb.0:
3991; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
3993; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
3994; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
3995; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
3996; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
3997; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
3998; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
3999; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
4000; GFX8-NEXT:    s_setpc_b64 s[30:31]
4001;
4002; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
4003; GFX10:       ; %bb.0:
4004; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4005; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
4006; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4007; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
4008; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4009; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
4010; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4011; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
4012; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
4013; GFX10-NEXT:    s_setpc_b64 s[30:31]
4014;
4015; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
4016; GFX11:       ; %bb.0:
4017; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4018; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
4019; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4020; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
4021; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4022; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
4023; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4024; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4025; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
4026; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
4027; GFX11-NEXT:    s_setpc_b64 s[30:31]
4028;
4029; EG-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf:
4030; EG:       ; %bb.0:
4031; EG-NEXT:    CF_END
4032; EG-NEXT:    PAD
4033  %div = fdiv nnan ninf float %x, %y, !fpmath !0
4034  ret float %div
4035}
4036
4037define float @v_fdiv_f32_daz__nnan_ninf(float %x, float %y, float %z) #0 {
4038; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz__nnan_ninf:
4039; GFX6-FASTFMA:       ; %bb.0:
4040; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4041; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
4042; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
4043; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4044; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4045; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
4046; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
4047; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
4048; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
4049; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
4050; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
4051; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4052; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4053; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
4054; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
4055;
4056; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz__nnan_ninf:
4057; GFX6-SLOWFMA:       ; %bb.0:
4058; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4059; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
4060; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
4061; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
4062; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4063; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
4064; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
4065; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
4066; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
4067; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
4068; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
4069; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4070; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
4071; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
4072; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
4073;
4074; GFX7-LABEL: v_fdiv_f32_daz__nnan_ninf:
4075; GFX7:       ; %bb.0:
4076; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4077; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
4078; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
4079; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4080; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4081; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
4082; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
4083; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
4084; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
4085; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
4086; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
4087; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4088; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4089; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
4090; GFX7-NEXT:    s_setpc_b64 s[30:31]
4091;
4092; GFX8-LABEL: v_fdiv_f32_daz__nnan_ninf:
4093; GFX8:       ; %bb.0:
4094; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4095; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
4096; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
4097; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
4098; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4099; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
4100; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
4101; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
4102; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
4103; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
4104; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
4105; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4106; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
4107; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
4108; GFX8-NEXT:    s_setpc_b64 s[30:31]
4109;
4110; GFX10-LABEL: v_fdiv_f32_daz__nnan_ninf:
4111; GFX10:       ; %bb.0:
4112; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4113; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
4114; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
4115; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
4116; GFX10-NEXT:    s_denorm_mode 15
4117; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
4118; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
4119; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
4120; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
4121; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
4122; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
4123; GFX10-NEXT:    s_denorm_mode 12
4124; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4125; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
4126; GFX10-NEXT:    s_setpc_b64 s[30:31]
4127;
4128; GFX11-LABEL: v_fdiv_f32_daz__nnan_ninf:
4129; GFX11:       ; %bb.0:
4130; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4131; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
4132; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
4133; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
4134; GFX11-NEXT:    s_denorm_mode 15
4135; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4136; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
4137; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
4138; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
4139; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
4140; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
4141; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
4142; GFX11-NEXT:    s_denorm_mode 12
4143; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4144; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
4145; GFX11-NEXT:    s_setpc_b64 s[30:31]
4146;
4147; EG-LABEL: v_fdiv_f32_daz__nnan_ninf:
4148; EG:       ; %bb.0:
4149; EG-NEXT:    CF_END
4150; EG-NEXT:    PAD
4151  %div = fdiv nnan ninf float %x, %y
4152  ret float %div
4153}
4154
4155define float @v_fdiv_f32_daz_25ulp__nnan_ninf(float %x, float %y, float %z) #0 {
4156; GFX678-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf:
4157; GFX678:       ; %bb.0:
4158; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4159; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
4160; GFX678-NEXT:    v_mov_b32_e32 v2, 0x2f800000
4161; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
4162; GFX678-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
4163; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v2
4164; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
4165; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
4166; GFX678-NEXT:    v_mul_f32_e32 v0, v2, v0
4167; GFX678-NEXT:    s_setpc_b64 s[30:31]
4168;
4169; GFX10-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf:
4170; GFX10:       ; %bb.0:
4171; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4172; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
4173; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
4174; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v2
4175; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
4176; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
4177; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
4178; GFX10-NEXT:    s_setpc_b64 s[30:31]
4179;
4180; GFX11-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf:
4181; GFX11:       ; %bb.0:
4182; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4183; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
4184; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
4185; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
4186; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
4187; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4188; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
4189; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
4190; GFX11-NEXT:    s_setpc_b64 s[30:31]
4191;
4192; EG-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf:
4193; EG:       ; %bb.0:
4194; EG-NEXT:    CF_END
4195; EG-NEXT:    PAD
4196  %div = fdiv nnan ninf float %x, %y, !fpmath !0
4197  ret float %div
4198}
4199
4200define float @v_fdiv_f32_ieee__nnan_ninf_contractable_user(float %x, float %y, float %z) #1 {
4201; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4202; GFX6-FASTFMA:       ; %bb.0:
4203; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4204; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4205; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
4206; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
4207; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
4208; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
4209; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
4210; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
4211; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
4212; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
4213; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4214; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4215; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
4216; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
4217;
4218; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4219; GFX6-SLOWFMA:       ; %bb.0:
4220; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4221; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4222; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4223; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
4224; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
4225; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
4226; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
4227; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
4228; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
4229; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
4230; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
4231; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4232; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
4233; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
4234;
4235; GFX7-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4236; GFX7:       ; %bb.0:
4237; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4238; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4239; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
4240; GFX7-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
4241; GFX7-NEXT:    v_fma_f32 v4, v5, v4, v4
4242; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
4243; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
4244; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
4245; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
4246; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
4247; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4248; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4249; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
4250; GFX7-NEXT:    s_setpc_b64 s[30:31]
4251;
4252; GFX8-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4253; GFX8:       ; %bb.0:
4254; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4255; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4256; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4257; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
4258; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
4259; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
4260; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
4261; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
4262; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
4263; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
4264; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
4265; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4266; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
4267; GFX8-NEXT:    s_setpc_b64 s[30:31]
4268;
4269; GFX10-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4270; GFX10:       ; %bb.0:
4271; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4272; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
4273; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
4274; GFX10-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
4275; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v4
4276; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
4277; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
4278; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
4279; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
4280; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
4281; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4282; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4283; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
4284; GFX10-NEXT:    s_setpc_b64 s[30:31]
4285;
4286; GFX11-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4287; GFX11:       ; %bb.0:
4288; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4289; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
4290; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
4291; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4292; GFX11-NEXT:    v_fma_f32 v5, -v3, v4, 1.0
4293; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v4
4294; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
4295; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
4296; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
4297; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
4298; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
4299; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4300; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4301; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
4302; GFX11-NEXT:    s_setpc_b64 s[30:31]
4303;
4304; EG-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user:
4305; EG:       ; %bb.0:
4306; EG-NEXT:    CF_END
4307; EG-NEXT:    PAD
4308  %div = fdiv nnan ninf contract float %x, %y
4309  %add = fadd contract float %div, %z
4310  ret float %add
4311}
4312
4313define float @v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #1 {
4314; GFX6-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user:
4315; GFX6:       ; %bb.0:
4316; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4317; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
4318; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v1
4319; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
4320; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
4321; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
4322; GFX6-NEXT:    v_frexp_mant_f32_e32 v4, v0
4323; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
4324; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4325; GFX6-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
4326; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4327; GFX6-NEXT:    v_mul_f32_e32 v3, v4, v3
4328; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
4329; GFX6-NEXT:    v_ldexp_f32_e32 v0, v3, v0
4330; GFX6-NEXT:    v_add_f32_e32 v0, v0, v2
4331; GFX6-NEXT:    s_setpc_b64 s[30:31]
4332;
4333; GFX7-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user:
4334; GFX7:       ; %bb.0:
4335; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4336; GFX7-NEXT:    v_frexp_mant_f32_e32 v3, v1
4337; GFX7-NEXT:    v_rcp_f32_e32 v3, v3
4338; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4339; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
4340; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
4341; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v3
4342; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
4343; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
4344; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
4345; GFX7-NEXT:    s_setpc_b64 s[30:31]
4346;
4347; GFX8-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user:
4348; GFX8:       ; %bb.0:
4349; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4350; GFX8-NEXT:    v_frexp_mant_f32_e32 v3, v1
4351; GFX8-NEXT:    v_rcp_f32_e32 v3, v3
4352; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4353; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
4354; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
4355; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v3
4356; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v4, v1
4357; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
4358; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
4359; GFX8-NEXT:    s_setpc_b64 s[30:31]
4360;
4361; GFX10-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user:
4362; GFX10:       ; %bb.0:
4363; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4364; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v1
4365; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4366; GFX10-NEXT:    v_frexp_mant_f32_e32 v4, v0
4367; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4368; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
4369; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4370; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v3
4371; GFX10-NEXT:    v_ldexp_f32 v0, v3, v0
4372; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
4373; GFX10-NEXT:    s_setpc_b64 s[30:31]
4374;
4375; GFX11-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user:
4376; GFX11:       ; %bb.0:
4377; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4378; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v1
4379; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4380; GFX11-NEXT:    v_frexp_mant_f32_e32 v4, v0
4381; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4382; GFX11-NEXT:    v_rcp_f32_e32 v3, v3
4383; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4384; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4385; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v3
4386; GFX11-NEXT:    v_ldexp_f32 v0, v3, v0
4387; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
4388; GFX11-NEXT:    s_setpc_b64 s[30:31]
4389;
4390; EG-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user:
4391; EG:       ; %bb.0:
4392; EG-NEXT:    CF_END
4393; EG-NEXT:    PAD
4394  %div = fdiv nnan ninf contract float %x, %y, !fpmath !0
4395  %add = fadd contract float %div, %z
4396  ret float %add
4397}
4398
4399define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y, float %z) #2 {
4400; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4401; GFX6-FASTFMA:       ; %bb.0:
4402; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4403; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4404; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
4405; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
4406; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4407; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4408; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4409; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v6, v4, v4
4410; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
4411; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
4412; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
4413; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
4414; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
4415; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4416; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4417; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
4418; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
4419;
4420; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4421; GFX6-SLOWFMA:       ; %bb.0:
4422; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4423; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4424; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4425; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4426; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
4427; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4428; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
4429; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
4430; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
4431; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
4432; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
4433; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
4434; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
4435; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
4436; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4437; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
4438; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
4439;
4440; GFX7-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4441; GFX7:       ; %bb.0:
4442; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4443; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4444; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
4445; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
4446; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4447; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4448; GFX7-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4449; GFX7-NEXT:    v_fma_f32 v4, v6, v4, v4
4450; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
4451; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
4452; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
4453; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
4454; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
4455; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4456; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4457; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
4458; GFX7-NEXT:    s_setpc_b64 s[30:31]
4459;
4460; GFX8-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4461; GFX8:       ; %bb.0:
4462; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4463; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4464; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4465; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4466; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
4467; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4468; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
4469; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
4470; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
4471; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
4472; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
4473; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
4474; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
4475; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
4476; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4477; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
4478; GFX8-NEXT:    s_setpc_b64 s[30:31]
4479;
4480; GFX10-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4481; GFX10:       ; %bb.0:
4482; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4483; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
4484; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
4485; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4486; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
4487; GFX10-NEXT:    s_denorm_mode 15
4488; GFX10-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4489; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v4
4490; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
4491; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
4492; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
4493; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
4494; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
4495; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4496; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4497; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
4498; GFX10-NEXT:    s_setpc_b64 s[30:31]
4499;
4500; GFX11-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4501; GFX11:       ; %bb.0:
4502; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4503; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
4504; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
4505; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
4506; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
4507; GFX11-NEXT:    s_denorm_mode 15
4508; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4509; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4510; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
4511; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
4512; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
4513; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
4514; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
4515; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
4516; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4517; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4518; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
4519; GFX11-NEXT:    s_setpc_b64 s[30:31]
4520;
4521; EG-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user:
4522; EG:       ; %bb.0:
4523; EG-NEXT:    CF_END
4524; EG-NEXT:    PAD
4525  %div = fdiv nnan ninf contract float %x, %y
4526  %add = fadd contract float %div, %z
4527  ret float %add
4528}
4529
4530define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #2 {
4531; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
4532; GFX6:       ; %bb.0:
4533; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4534; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
4535; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v1
4536; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
4537; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
4538; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
4539; GFX6-NEXT:    v_frexp_mant_f32_e32 v4, v0
4540; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
4541; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4542; GFX6-NEXT:    v_cndmask_b32_e32 v4, v0, v4, vcc
4543; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4544; GFX6-NEXT:    v_mul_f32_e32 v3, v4, v3
4545; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
4546; GFX6-NEXT:    v_ldexp_f32_e32 v0, v3, v0
4547; GFX6-NEXT:    v_add_f32_e32 v0, v0, v2
4548; GFX6-NEXT:    s_setpc_b64 s[30:31]
4549;
4550; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
4551; GFX7:       ; %bb.0:
4552; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4553; GFX7-NEXT:    v_frexp_mant_f32_e32 v3, v1
4554; GFX7-NEXT:    v_rcp_f32_e32 v3, v3
4555; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4556; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
4557; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
4558; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v3
4559; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
4560; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
4561; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
4562; GFX7-NEXT:    s_setpc_b64 s[30:31]
4563;
4564; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
4565; GFX8:       ; %bb.0:
4566; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4567; GFX8-NEXT:    v_frexp_mant_f32_e32 v3, v1
4568; GFX8-NEXT:    v_rcp_f32_e32 v3, v3
4569; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4570; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v4, v0
4571; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
4572; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v3
4573; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v4, v1
4574; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
4575; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
4576; GFX8-NEXT:    s_setpc_b64 s[30:31]
4577;
4578; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
4579; GFX10:       ; %bb.0:
4580; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4581; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v1
4582; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4583; GFX10-NEXT:    v_frexp_mant_f32_e32 v4, v0
4584; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4585; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
4586; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4587; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v3
4588; GFX10-NEXT:    v_ldexp_f32 v0, v3, v0
4589; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
4590; GFX10-NEXT:    s_setpc_b64 s[30:31]
4591;
4592; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
4593; GFX11:       ; %bb.0:
4594; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4595; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v1
4596; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4597; GFX11-NEXT:    v_frexp_mant_f32_e32 v4, v0
4598; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4599; GFX11-NEXT:    v_rcp_f32_e32 v3, v3
4600; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4601; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4602; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v3
4603; GFX11-NEXT:    v_ldexp_f32 v0, v3, v0
4604; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
4605; GFX11-NEXT:    s_setpc_b64 s[30:31]
4606;
4607; EG-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user:
4608; EG:       ; %bb.0:
4609; EG-NEXT:    CF_END
4610; EG-NEXT:    PAD
4611  %div = fdiv nnan ninf contract float %x, %y, !fpmath !0
4612  %add = fadd contract float %div, %z
4613  ret float %add
4614}
4615
4616define float @v_fdiv_f32_daz__nnan_ninf_contractable_user(float %x, float %y, float %z) #0 {
4617; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4618; GFX6-FASTFMA:       ; %bb.0:
4619; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4620; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4621; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v4, v3
4622; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
4623; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4624; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4625; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v6, v4, v4
4626; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v6, v5, v4
4627; GFX6-FASTFMA-NEXT:    v_fma_f32 v7, -v3, v6, v5
4628; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, v7, v4, v6
4629; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v3, v6, v5
4630; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4631; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4632; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4633; GFX6-FASTFMA-NEXT:    v_add_f32_e32 v0, v0, v2
4634; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
4635;
4636; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4637; GFX6-SLOWFMA:       ; %bb.0:
4638; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4639; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4640; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4641; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v5, v3
4642; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4643; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
4644; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v5, v5
4645; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v6, v4, v5
4646; GFX6-SLOWFMA-NEXT:    v_fma_f32 v7, -v3, v6, v4
4647; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, v7, v5, v6
4648; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, -v3, v6, v4
4649; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4650; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
4651; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4652; GFX6-SLOWFMA-NEXT:    v_add_f32_e32 v0, v0, v2
4653; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
4654;
4655; GFX7-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4656; GFX7:       ; %bb.0:
4657; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4658; GFX7-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4659; GFX7-NEXT:    v_rcp_f32_e32 v4, v3
4660; GFX7-NEXT:    v_div_scale_f32 v5, vcc, v0, v1, v0
4661; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4662; GFX7-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4663; GFX7-NEXT:    v_fma_f32 v4, v6, v4, v4
4664; GFX7-NEXT:    v_mul_f32_e32 v6, v5, v4
4665; GFX7-NEXT:    v_fma_f32 v7, -v3, v6, v5
4666; GFX7-NEXT:    v_fma_f32 v6, v7, v4, v6
4667; GFX7-NEXT:    v_fma_f32 v3, -v3, v6, v5
4668; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4669; GFX7-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4670; GFX7-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4671; GFX7-NEXT:    v_add_f32_e32 v0, v0, v2
4672; GFX7-NEXT:    s_setpc_b64 s[30:31]
4673;
4674; GFX8-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4675; GFX8:       ; %bb.0:
4676; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4677; GFX8-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v0
4678; GFX8-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
4679; GFX8-NEXT:    v_rcp_f32_e32 v5, v3
4680; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4681; GFX8-NEXT:    v_fma_f32 v6, -v3, v5, 1.0
4682; GFX8-NEXT:    v_fma_f32 v5, v6, v5, v5
4683; GFX8-NEXT:    v_mul_f32_e32 v6, v4, v5
4684; GFX8-NEXT:    v_fma_f32 v7, -v3, v6, v4
4685; GFX8-NEXT:    v_fma_f32 v6, v7, v5, v6
4686; GFX8-NEXT:    v_fma_f32 v3, -v3, v6, v4
4687; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
4688; GFX8-NEXT:    v_div_fmas_f32 v3, v3, v5, v6
4689; GFX8-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4690; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
4691; GFX8-NEXT:    s_setpc_b64 s[30:31]
4692;
4693; GFX10-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4694; GFX10:       ; %bb.0:
4695; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4696; GFX10-NEXT:    v_div_scale_f32 v3, s4, v1, v1, v0
4697; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
4698; GFX10-NEXT:    v_rcp_f32_e32 v4, v3
4699; GFX10-NEXT:    s_denorm_mode 15
4700; GFX10-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4701; GFX10-NEXT:    v_fmac_f32_e32 v4, v6, v4
4702; GFX10-NEXT:    v_mul_f32_e32 v6, v5, v4
4703; GFX10-NEXT:    v_fma_f32 v7, -v3, v6, v5
4704; GFX10-NEXT:    v_fmac_f32_e32 v6, v7, v4
4705; GFX10-NEXT:    v_fma_f32 v3, -v3, v6, v5
4706; GFX10-NEXT:    s_denorm_mode 12
4707; GFX10-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4708; GFX10-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4709; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
4710; GFX10-NEXT:    s_setpc_b64 s[30:31]
4711;
4712; GFX11-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4713; GFX11:       ; %bb.0:
4714; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4715; GFX11-NEXT:    v_div_scale_f32 v3, null, v1, v1, v0
4716; GFX11-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
4717; GFX11-NEXT:    v_rcp_f32_e32 v4, v3
4718; GFX11-NEXT:    s_denorm_mode 15
4719; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4720; GFX11-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
4721; GFX11-NEXT:    v_fmac_f32_e32 v4, v6, v4
4722; GFX11-NEXT:    v_mul_f32_e32 v6, v5, v4
4723; GFX11-NEXT:    v_fma_f32 v7, -v3, v6, v5
4724; GFX11-NEXT:    v_fmac_f32_e32 v6, v7, v4
4725; GFX11-NEXT:    v_fma_f32 v3, -v3, v6, v5
4726; GFX11-NEXT:    s_denorm_mode 12
4727; GFX11-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
4728; GFX11-NEXT:    v_div_fixup_f32 v0, v3, v1, v0
4729; GFX11-NEXT:    v_add_f32_e32 v0, v0, v2
4730; GFX11-NEXT:    s_setpc_b64 s[30:31]
4731;
4732; EG-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user:
4733; EG:       ; %bb.0:
4734; EG-NEXT:    CF_END
4735; EG-NEXT:    PAD
4736  %div = fdiv nnan ninf contract float %x, %y
4737  %add = fadd contract float %div, %z
4738  ret float %add
4739}
4740
4741define float @v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #0 {
4742; GFX678-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user:
4743; GFX678:       ; %bb.0:
4744; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4745; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
4746; GFX678-NEXT:    v_mov_b32_e32 v3, 0x2f800000
4747; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
4748; GFX678-NEXT:    v_cndmask_b32_e32 v3, 1.0, v3, vcc
4749; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v3
4750; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
4751; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
4752; GFX678-NEXT:    v_mad_f32 v0, v3, v0, v2
4753; GFX678-NEXT:    s_setpc_b64 s[30:31]
4754;
4755; GFX10-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user:
4756; GFX10:       ; %bb.0:
4757; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4758; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
4759; GFX10-NEXT:    v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s4
4760; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v3
4761; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
4762; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
4763; GFX10-NEXT:    v_mad_f32 v0, v3, v0, v2
4764; GFX10-NEXT:    s_setpc_b64 s[30:31]
4765;
4766; GFX11-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user:
4767; GFX11:       ; %bb.0:
4768; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4769; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
4770; GFX11-NEXT:    v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s0
4771; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v3
4772; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
4773; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4774; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
4775; GFX11-NEXT:    v_fma_f32 v0, v3, v0, v2
4776; GFX11-NEXT:    s_setpc_b64 s[30:31]
4777;
4778; EG-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user:
4779; EG:       ; %bb.0:
4780; EG-NEXT:    CF_END
4781; EG-NEXT:    PAD
4782  %div = fdiv nnan ninf contract float %x, %y, !fpmath !0
4783  %add = fadd contract float %div, %z
4784  ret float %add
4785}
4786
4787define float @v_fdiv_neglhs_f32_ieee(float %x, float %y) #1 {
4788; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_ieee:
4789; GFX6-FASTFMA:       ; %bb.0:
4790; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4791; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
4792; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
4793; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
4794; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
4795; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
4796; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
4797; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
4798; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
4799; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
4800; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4801; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4802; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
4803;
4804; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_ieee:
4805; GFX6-SLOWFMA:       ; %bb.0:
4806; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4807; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
4808; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
4809; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
4810; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
4811; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
4812; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
4813; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
4814; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
4815; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
4816; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
4817; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4818; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
4819;
4820; GFX7-LABEL: v_fdiv_neglhs_f32_ieee:
4821; GFX7:       ; %bb.0:
4822; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4823; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
4824; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
4825; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
4826; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
4827; GFX7-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
4828; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
4829; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
4830; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
4831; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
4832; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4833; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4834; GFX7-NEXT:    s_setpc_b64 s[30:31]
4835;
4836; GFX8-LABEL: v_fdiv_neglhs_f32_ieee:
4837; GFX8:       ; %bb.0:
4838; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4839; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
4840; GFX8-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
4841; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
4842; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
4843; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
4844; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
4845; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
4846; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
4847; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
4848; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
4849; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4850; GFX8-NEXT:    s_setpc_b64 s[30:31]
4851;
4852; GFX10-LABEL: v_fdiv_neglhs_f32_ieee:
4853; GFX10:       ; %bb.0:
4854; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4855; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, -v0
4856; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
4857; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
4858; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
4859; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
4860; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
4861; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
4862; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
4863; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
4864; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4865; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4866; GFX10-NEXT:    s_setpc_b64 s[30:31]
4867;
4868; GFX11-LABEL: v_fdiv_neglhs_f32_ieee:
4869; GFX11:       ; %bb.0:
4870; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4871; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, -v0
4872; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
4873; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4874; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
4875; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
4876; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
4877; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
4878; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
4879; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
4880; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
4881; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4882; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4883; GFX11-NEXT:    s_setpc_b64 s[30:31]
4884;
4885; EG-LABEL: v_fdiv_neglhs_f32_ieee:
4886; EG:       ; %bb.0:
4887; EG-NEXT:    CF_END
4888; EG-NEXT:    PAD
4889  %neg.x = fneg float %x
4890  %div = fdiv float %neg.x, %y
4891  ret float %div
4892}
4893
4894define float @v_fdiv_neglhs_f32_ieee_25ulp(float %x, float %y) #1 {
4895; GFX6-LABEL: v_fdiv_neglhs_f32_ieee_25ulp:
4896; GFX6:       ; %bb.0:
4897; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4898; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
4899; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
4900; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
4901; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
4902; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
4903; GFX6-NEXT:    v_frexp_mant_f32_e64 v3, -v0
4904; GFX6-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
4905; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4906; GFX6-NEXT:    v_cndmask_b32_e64 v3, -v0, v3, s[4:5]
4907; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4908; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
4909; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
4910; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
4911; GFX6-NEXT:    s_setpc_b64 s[30:31]
4912;
4913; GFX7-LABEL: v_fdiv_neglhs_f32_ieee_25ulp:
4914; GFX7:       ; %bb.0:
4915; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4916; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
4917; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
4918; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4919; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
4920; GFX7-NEXT:    v_frexp_mant_f32_e64 v0, -v0
4921; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
4922; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
4923; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
4924; GFX7-NEXT:    s_setpc_b64 s[30:31]
4925;
4926; GFX8-LABEL: v_fdiv_neglhs_f32_ieee_25ulp:
4927; GFX8:       ; %bb.0:
4928; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4929; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
4930; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
4931; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4932; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
4933; GFX8-NEXT:    v_frexp_mant_f32_e64 v0, -v0
4934; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
4935; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
4936; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
4937; GFX8-NEXT:    s_setpc_b64 s[30:31]
4938;
4939; GFX10-LABEL: v_fdiv_neglhs_f32_ieee_25ulp:
4940; GFX10:       ; %bb.0:
4941; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4942; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
4943; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4944; GFX10-NEXT:    v_frexp_mant_f32_e64 v3, -v0
4945; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4946; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
4947; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4948; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
4949; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
4950; GFX10-NEXT:    s_setpc_b64 s[30:31]
4951;
4952; GFX11-LABEL: v_fdiv_neglhs_f32_ieee_25ulp:
4953; GFX11:       ; %bb.0:
4954; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4955; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
4956; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
4957; GFX11-NEXT:    v_frexp_mant_f32_e64 v3, -v0
4958; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
4959; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
4960; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
4961; GFX11-NEXT:    s_waitcnt_depctr 0xfff
4962; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
4963; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
4964; GFX11-NEXT:    s_setpc_b64 s[30:31]
4965;
4966; EG-LABEL: v_fdiv_neglhs_f32_ieee_25ulp:
4967; EG:       ; %bb.0:
4968; EG-NEXT:    CF_END
4969; EG-NEXT:    PAD
4970  %neg.x = fneg float %x
4971  %div = fdiv float %neg.x, %y, !fpmath !0
4972  ret float %div
4973}
4974
4975define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 {
4976; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_dynamic:
4977; GFX6-FASTFMA:       ; %bb.0:
4978; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4979; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
4980; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
4981; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
4982; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
4983; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
4984; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
4985; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
4986; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
4987; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
4988; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
4989; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
4990; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
4991; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
4992; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
4993; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
4994;
4995; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_dynamic:
4996; GFX6-SLOWFMA:       ; %bb.0:
4997; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4998; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
4999; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
5000; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5001; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
5002; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5003; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5004; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
5005; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
5006; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
5007; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
5008; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
5009; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5010; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5011; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5012; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
5013;
5014; GFX7-LABEL: v_fdiv_neglhs_f32_dynamic:
5015; GFX7:       ; %bb.0:
5016; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5017; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5018; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
5019; GFX7-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
5020; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5021; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5022; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5023; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
5024; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
5025; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
5026; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
5027; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
5028; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5029; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5030; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5031; GFX7-NEXT:    s_setpc_b64 s[30:31]
5032;
5033; GFX8-LABEL: v_fdiv_neglhs_f32_dynamic:
5034; GFX8:       ; %bb.0:
5035; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5036; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5037; GFX8-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
5038; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5039; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
5040; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5041; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5042; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
5043; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
5044; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
5045; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
5046; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
5047; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5048; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5049; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5050; GFX8-NEXT:    s_setpc_b64 s[30:31]
5051;
5052; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic:
5053; GFX10:       ; %bb.0:
5054; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5055; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, -v0
5056; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
5057; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5058; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
5059; GFX10-NEXT:    s_denorm_mode 15
5060; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5061; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
5062; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
5063; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
5064; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
5065; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
5066; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5067; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5068; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5069; GFX10-NEXT:    s_setpc_b64 s[30:31]
5070;
5071; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic:
5072; GFX11:       ; %bb.0:
5073; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5074; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, -v0
5075; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
5076; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
5077; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
5078; GFX11-NEXT:    s_denorm_mode 15
5079; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5080; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5081; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
5082; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
5083; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
5084; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
5085; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
5086; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
5087; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5088; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5089; GFX11-NEXT:    s_setpc_b64 s[30:31]
5090;
5091; EG-LABEL: v_fdiv_neglhs_f32_dynamic:
5092; EG:       ; %bb.0:
5093; EG-NEXT:    CF_END
5094; EG-NEXT:    PAD
5095  %neg.x = fneg float %x
5096  %div = fdiv float %neg.x, %y
5097  ret float %div
5098}
5099
5100define float @v_fdiv_neglhs_f32_dynamic_25ulp(float %x, float %y) #2 {
5101; GFX6-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
5102; GFX6:       ; %bb.0:
5103; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5104; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
5105; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
5106; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
5107; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
5108; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
5109; GFX6-NEXT:    v_frexp_mant_f32_e64 v3, -v0
5110; GFX6-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
5111; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5112; GFX6-NEXT:    v_cndmask_b32_e64 v3, -v0, v3, s[4:5]
5113; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5114; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
5115; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
5116; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
5117; GFX6-NEXT:    s_setpc_b64 s[30:31]
5118;
5119; GFX7-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
5120; GFX7:       ; %bb.0:
5121; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5122; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
5123; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
5124; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5125; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
5126; GFX7-NEXT:    v_frexp_mant_f32_e64 v0, -v0
5127; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
5128; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
5129; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
5130; GFX7-NEXT:    s_setpc_b64 s[30:31]
5131;
5132; GFX8-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
5133; GFX8:       ; %bb.0:
5134; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5135; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
5136; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
5137; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5138; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
5139; GFX8-NEXT:    v_frexp_mant_f32_e64 v0, -v0
5140; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
5141; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
5142; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
5143; GFX8-NEXT:    s_setpc_b64 s[30:31]
5144;
5145; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
5146; GFX10:       ; %bb.0:
5147; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5148; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
5149; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5150; GFX10-NEXT:    v_frexp_mant_f32_e64 v3, -v0
5151; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5152; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
5153; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
5154; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
5155; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
5156; GFX10-NEXT:    s_setpc_b64 s[30:31]
5157;
5158; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
5159; GFX11:       ; %bb.0:
5160; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5161; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
5162; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5163; GFX11-NEXT:    v_frexp_mant_f32_e64 v3, -v0
5164; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5165; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
5166; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
5167; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5168; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
5169; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
5170; GFX11-NEXT:    s_setpc_b64 s[30:31]
5171;
5172; EG-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp:
5173; EG:       ; %bb.0:
5174; EG-NEXT:    CF_END
5175; EG-NEXT:    PAD
5176  %neg.x = fneg float %x
5177  %div = fdiv float %neg.x, %y, !fpmath !0
5178  ret float %div
5179}
5180
5181define float @v_fdiv_neglhs_f32_daz(float %x, float %y) #0 {
5182; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_daz:
5183; GFX6-FASTFMA:       ; %bb.0:
5184; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5185; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5186; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
5187; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
5188; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5189; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5190; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
5191; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
5192; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
5193; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
5194; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
5195; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5196; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5197; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5198; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
5199;
5200; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_daz:
5201; GFX6-SLOWFMA:       ; %bb.0:
5202; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5203; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5204; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
5205; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
5206; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5207; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5208; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
5209; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
5210; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
5211; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
5212; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
5213; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5214; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5215; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5216; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
5217;
5218; GFX7-LABEL: v_fdiv_neglhs_f32_daz:
5219; GFX7:       ; %bb.0:
5220; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5221; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5222; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
5223; GFX7-NEXT:    v_div_scale_f32 v4, vcc, -v0, v1, -v0
5224; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5225; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5226; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
5227; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
5228; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
5229; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
5230; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
5231; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5232; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5233; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5234; GFX7-NEXT:    s_setpc_b64 s[30:31]
5235;
5236; GFX8-LABEL: v_fdiv_neglhs_f32_daz:
5237; GFX8:       ; %bb.0:
5238; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5239; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, -v0
5240; GFX8-NEXT:    v_div_scale_f32 v3, vcc, -v0, v1, -v0
5241; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
5242; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5243; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5244; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
5245; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
5246; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
5247; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
5248; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
5249; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5250; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5251; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5252; GFX8-NEXT:    s_setpc_b64 s[30:31]
5253;
5254; GFX10-LABEL: v_fdiv_neglhs_f32_daz:
5255; GFX10:       ; %bb.0:
5256; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5257; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, -v0
5258; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
5259; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
5260; GFX10-NEXT:    s_denorm_mode 15
5261; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5262; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
5263; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
5264; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
5265; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
5266; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
5267; GFX10-NEXT:    s_denorm_mode 12
5268; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5269; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5270; GFX10-NEXT:    s_setpc_b64 s[30:31]
5271;
5272; GFX11-LABEL: v_fdiv_neglhs_f32_daz:
5273; GFX11:       ; %bb.0:
5274; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5275; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, -v0
5276; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0
5277; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
5278; GFX11-NEXT:    s_denorm_mode 15
5279; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5280; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5281; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
5282; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
5283; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
5284; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
5285; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
5286; GFX11-NEXT:    s_denorm_mode 12
5287; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5288; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, -v0
5289; GFX11-NEXT:    s_setpc_b64 s[30:31]
5290;
5291; EG-LABEL: v_fdiv_neglhs_f32_daz:
5292; EG:       ; %bb.0:
5293; EG-NEXT:    CF_END
5294; EG-NEXT:    PAD
5295  %neg.x = fneg float %x
5296  %div = fdiv float %neg.x, %y
5297  ret float %div
5298}
5299
5300define float @v_fdiv_neglhs_f32_daz_25ulp(float %x, float %y) #0 {
5301; GFX678-LABEL: v_fdiv_neglhs_f32_daz_25ulp:
5302; GFX678:       ; %bb.0:
5303; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5304; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
5305; GFX678-NEXT:    v_mov_b32_e32 v2, 0x2f800000
5306; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
5307; GFX678-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
5308; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v2
5309; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
5310; GFX678-NEXT:    v_mul_f32_e64 v0, -v0, v1
5311; GFX678-NEXT:    v_mul_f32_e32 v0, v2, v0
5312; GFX678-NEXT:    s_setpc_b64 s[30:31]
5313;
5314; GFX10-LABEL: v_fdiv_neglhs_f32_daz_25ulp:
5315; GFX10:       ; %bb.0:
5316; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5317; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
5318; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
5319; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v2
5320; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
5321; GFX10-NEXT:    v_mul_f32_e64 v0, -v0, v1
5322; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
5323; GFX10-NEXT:    s_setpc_b64 s[30:31]
5324;
5325; GFX11-LABEL: v_fdiv_neglhs_f32_daz_25ulp:
5326; GFX11:       ; %bb.0:
5327; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
5329; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
5330; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
5331; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
5332; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5333; GFX11-NEXT:    v_mul_f32_e64 v0, -v0, v1
5334; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
5335; GFX11-NEXT:    s_setpc_b64 s[30:31]
5336;
5337; EG-LABEL: v_fdiv_neglhs_f32_daz_25ulp:
5338; EG:       ; %bb.0:
5339; EG-NEXT:    CF_END
5340; EG-NEXT:    PAD
5341  %neg.x = fneg float %x
5342  %div = fdiv float %neg.x, %y, !fpmath !0
5343  ret float %div
5344}
5345
5346define float @v_fdiv_negrhs_f32_ieee(float %x, float %y) #1 {
5347; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_ieee:
5348; GFX6-FASTFMA:       ; %bb.0:
5349; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5350; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5351; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
5352; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
5353; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
5354; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
5355; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
5356; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
5357; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
5358; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
5359; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5360; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5361; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
5362;
5363; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_ieee:
5364; GFX6-SLOWFMA:       ; %bb.0:
5365; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5366; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5367; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
5368; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
5369; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5370; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
5371; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
5372; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
5373; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
5374; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
5375; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5376; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5377; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
5378;
5379; GFX7-LABEL: v_fdiv_negrhs_f32_ieee:
5380; GFX7:       ; %bb.0:
5381; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5382; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5383; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
5384; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
5385; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
5386; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
5387; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
5388; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
5389; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
5390; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
5391; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5392; GFX7-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5393; GFX7-NEXT:    s_setpc_b64 s[30:31]
5394;
5395; GFX8-LABEL: v_fdiv_negrhs_f32_ieee:
5396; GFX8:       ; %bb.0:
5397; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5398; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5399; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
5400; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
5401; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5402; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
5403; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
5404; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
5405; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
5406; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
5407; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5408; GFX8-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5409; GFX8-NEXT:    s_setpc_b64 s[30:31]
5410;
5411; GFX10-LABEL: v_fdiv_negrhs_f32_ieee:
5412; GFX10:       ; %bb.0:
5413; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5414; GFX10-NEXT:    v_div_scale_f32 v2, s4, -v1, -v1, v0
5415; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
5416; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
5417; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
5418; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
5419; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
5420; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
5421; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
5422; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
5423; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5424; GFX10-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5425; GFX10-NEXT:    s_setpc_b64 s[30:31]
5426;
5427; GFX11-LABEL: v_fdiv_negrhs_f32_ieee:
5428; GFX11:       ; %bb.0:
5429; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5430; GFX11-NEXT:    v_div_scale_f32 v2, null, -v1, -v1, v0
5431; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
5432; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5433; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
5434; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
5435; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
5436; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
5437; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
5438; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
5439; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
5440; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5441; GFX11-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5442; GFX11-NEXT:    s_setpc_b64 s[30:31]
5443;
5444; EG-LABEL: v_fdiv_negrhs_f32_ieee:
5445; EG:       ; %bb.0:
5446; EG-NEXT:    CF_END
5447; EG-NEXT:    PAD
5448  %neg.y = fneg float %y
5449  %div = fdiv float %x, %neg.y
5450  ret float %div
5451}
5452
5453define float @v_fdiv_negrhs_f32_ieee_25ulp(float %x, float %y) #1 {
5454; GFX6-LABEL: v_fdiv_negrhs_f32_ieee_25ulp:
5455; GFX6:       ; %bb.0:
5456; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5457; GFX6-NEXT:    s_mov_b32 s6, 0x7f800000
5458; GFX6-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5459; GFX6-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v1|, s6
5460; GFX6-NEXT:    v_cndmask_b32_e64 v2, -v1, v2, s[4:5]
5461; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
5462; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
5463; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s6
5464; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5465; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
5466; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5467; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
5468; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
5469; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
5470; GFX6-NEXT:    s_setpc_b64 s[30:31]
5471;
5472; GFX7-LABEL: v_fdiv_negrhs_f32_ieee_25ulp:
5473; GFX7:       ; %bb.0:
5474; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5475; GFX7-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5476; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
5477; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5478; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
5479; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
5480; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
5481; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
5482; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
5483; GFX7-NEXT:    s_setpc_b64 s[30:31]
5484;
5485; GFX8-LABEL: v_fdiv_negrhs_f32_ieee_25ulp:
5486; GFX8:       ; %bb.0:
5487; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5488; GFX8-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5489; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
5490; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5491; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
5492; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
5493; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
5494; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
5495; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
5496; GFX8-NEXT:    s_setpc_b64 s[30:31]
5497;
5498; GFX10-LABEL: v_fdiv_negrhs_f32_ieee_25ulp:
5499; GFX10:       ; %bb.0:
5500; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5501; GFX10-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5502; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5503; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
5504; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5505; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
5506; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
5507; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
5508; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
5509; GFX10-NEXT:    s_setpc_b64 s[30:31]
5510;
5511; GFX11-LABEL: v_fdiv_negrhs_f32_ieee_25ulp:
5512; GFX11:       ; %bb.0:
5513; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5514; GFX11-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5515; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5516; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
5517; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5518; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
5519; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
5520; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5521; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
5522; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
5523; GFX11-NEXT:    s_setpc_b64 s[30:31]
5524;
5525; EG-LABEL: v_fdiv_negrhs_f32_ieee_25ulp:
5526; EG:       ; %bb.0:
5527; EG-NEXT:    CF_END
5528; EG-NEXT:    PAD
5529  %neg.y = fneg float %y
5530  %div = fdiv float %x, %neg.y, !fpmath !0
5531  ret float %div
5532}
5533
5534define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 {
5535; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_dynamic:
5536; GFX6-FASTFMA:       ; %bb.0:
5537; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5538; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5539; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
5540; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
5541; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5542; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5543; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5544; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
5545; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
5546; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
5547; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
5548; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
5549; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5550; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5551; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5552; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
5553;
5554; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_dynamic:
5555; GFX6-SLOWFMA:       ; %bb.0:
5556; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5557; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5558; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
5559; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5560; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
5561; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5562; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5563; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
5564; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
5565; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
5566; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
5567; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
5568; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5569; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5570; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5571; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
5572;
5573; GFX7-LABEL: v_fdiv_negrhs_f32_dynamic:
5574; GFX7:       ; %bb.0:
5575; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5576; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5577; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
5578; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
5579; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5580; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5581; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5582; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
5583; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
5584; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
5585; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
5586; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
5587; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5588; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5589; GFX7-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5590; GFX7-NEXT:    s_setpc_b64 s[30:31]
5591;
5592; GFX8-LABEL: v_fdiv_negrhs_f32_dynamic:
5593; GFX8:       ; %bb.0:
5594; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5595; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5596; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
5597; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5598; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
5599; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5600; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5601; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
5602; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
5603; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
5604; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
5605; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
5606; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5607; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5608; GFX8-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5609; GFX8-NEXT:    s_setpc_b64 s[30:31]
5610;
5611; GFX10-LABEL: v_fdiv_negrhs_f32_dynamic:
5612; GFX10:       ; %bb.0:
5613; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5614; GFX10-NEXT:    v_div_scale_f32 v2, s4, -v1, -v1, v0
5615; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
5616; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
5617; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
5618; GFX10-NEXT:    s_denorm_mode 15
5619; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5620; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
5621; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
5622; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
5623; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
5624; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
5625; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
5626; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5627; GFX10-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5628; GFX10-NEXT:    s_setpc_b64 s[30:31]
5629;
5630; GFX11-LABEL: v_fdiv_negrhs_f32_dynamic:
5631; GFX11:       ; %bb.0:
5632; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5633; GFX11-NEXT:    v_div_scale_f32 v2, null, -v1, -v1, v0
5634; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
5635; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
5636; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
5637; GFX11-NEXT:    s_denorm_mode 15
5638; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5639; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5640; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
5641; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
5642; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
5643; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
5644; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
5645; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
5646; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5647; GFX11-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5648; GFX11-NEXT:    s_setpc_b64 s[30:31]
5649;
5650; EG-LABEL: v_fdiv_negrhs_f32_dynamic:
5651; EG:       ; %bb.0:
5652; EG-NEXT:    CF_END
5653; EG-NEXT:    PAD
5654  %neg.y = fneg float %y
5655  %div = fdiv float %x, %neg.y
5656  ret float %div
5657}
5658
5659define float @v_fdiv_negrhs_f32_dynamic_25ulp(float %x, float %y) #2 {
5660; GFX6-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp:
5661; GFX6:       ; %bb.0:
5662; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5663; GFX6-NEXT:    s_mov_b32 s6, 0x7f800000
5664; GFX6-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5665; GFX6-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v1|, s6
5666; GFX6-NEXT:    v_cndmask_b32_e64 v2, -v1, v2, s[4:5]
5667; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
5668; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
5669; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s6
5670; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5671; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
5672; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5673; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
5674; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
5675; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
5676; GFX6-NEXT:    s_setpc_b64 s[30:31]
5677;
5678; GFX7-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp:
5679; GFX7:       ; %bb.0:
5680; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5681; GFX7-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5682; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
5683; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5684; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
5685; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
5686; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
5687; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
5688; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
5689; GFX7-NEXT:    s_setpc_b64 s[30:31]
5690;
5691; GFX8-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp:
5692; GFX8:       ; %bb.0:
5693; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5694; GFX8-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5695; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
5696; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5697; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
5698; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
5699; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
5700; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
5701; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
5702; GFX8-NEXT:    s_setpc_b64 s[30:31]
5703;
5704; GFX10-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp:
5705; GFX10:       ; %bb.0:
5706; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5707; GFX10-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5708; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5709; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
5710; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5711; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
5712; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
5713; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
5714; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
5715; GFX10-NEXT:    s_setpc_b64 s[30:31]
5716;
5717; GFX11-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp:
5718; GFX11:       ; %bb.0:
5719; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5720; GFX11-NEXT:    v_frexp_mant_f32_e64 v2, -v1
5721; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
5722; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
5723; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
5724; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
5725; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
5726; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5727; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
5728; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
5729; GFX11-NEXT:    s_setpc_b64 s[30:31]
5730;
5731; EG-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp:
5732; EG:       ; %bb.0:
5733; EG-NEXT:    CF_END
5734; EG-NEXT:    PAD
5735  %neg.y = fneg float %y
5736  %div = fdiv float %x, %neg.y, !fpmath !0
5737  ret float %div
5738}
5739
5740define float @v_fdiv_negrhs_f32_daz(float %x, float %y) #0 {
5741; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_daz:
5742; GFX6-FASTFMA:       ; %bb.0:
5743; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5744; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5745; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
5746; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
5747; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5748; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5749; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
5750; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
5751; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
5752; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
5753; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
5754; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5755; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5756; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5757; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
5758;
5759; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_daz:
5760; GFX6-SLOWFMA:       ; %bb.0:
5761; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5762; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5763; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
5764; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
5765; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5766; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5767; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
5768; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
5769; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
5770; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
5771; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
5772; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5773; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5774; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5775; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
5776;
5777; GFX7-LABEL: v_fdiv_negrhs_f32_daz:
5778; GFX7:       ; %bb.0:
5779; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5780; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5781; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
5782; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, -v1, v0
5783; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5784; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5785; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
5786; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
5787; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
5788; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
5789; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
5790; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5791; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5792; GFX7-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5793; GFX7-NEXT:    s_setpc_b64 s[30:31]
5794;
5795; GFX8-LABEL: v_fdiv_negrhs_f32_daz:
5796; GFX8:       ; %bb.0:
5797; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5798; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], -v1, -v1, v0
5799; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, -v1, v0
5800; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
5801; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
5802; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
5803; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
5804; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
5805; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
5806; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
5807; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
5808; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
5809; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
5810; GFX8-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5811; GFX8-NEXT:    s_setpc_b64 s[30:31]
5812;
5813; GFX10-LABEL: v_fdiv_negrhs_f32_daz:
5814; GFX10:       ; %bb.0:
5815; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5816; GFX10-NEXT:    v_div_scale_f32 v2, s4, -v1, -v1, v0
5817; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
5818; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
5819; GFX10-NEXT:    s_denorm_mode 15
5820; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5821; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
5822; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
5823; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
5824; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
5825; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
5826; GFX10-NEXT:    s_denorm_mode 12
5827; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5828; GFX10-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5829; GFX10-NEXT:    s_setpc_b64 s[30:31]
5830;
5831; GFX11-LABEL: v_fdiv_negrhs_f32_daz:
5832; GFX11:       ; %bb.0:
5833; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5834; GFX11-NEXT:    v_div_scale_f32 v2, null, -v1, -v1, v0
5835; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, -v1, v0
5836; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
5837; GFX11-NEXT:    s_denorm_mode 15
5838; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5839; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
5840; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
5841; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
5842; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
5843; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
5844; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
5845; GFX11-NEXT:    s_denorm_mode 12
5846; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
5847; GFX11-NEXT:    v_div_fixup_f32 v0, v2, -v1, v0
5848; GFX11-NEXT:    s_setpc_b64 s[30:31]
5849;
5850; EG-LABEL: v_fdiv_negrhs_f32_daz:
5851; EG:       ; %bb.0:
5852; EG-NEXT:    CF_END
5853; EG-NEXT:    PAD
5854  %neg.y = fneg float %y
5855  %div = fdiv float %x, %neg.y
5856  ret float %div
5857}
5858
5859define float @v_fdiv_negrhs_f32_daz_25ulp(float %x, float %y) #0 {
5860; GFX678-LABEL: v_fdiv_negrhs_f32_daz_25ulp:
5861; GFX678:       ; %bb.0:
5862; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5863; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
5864; GFX678-NEXT:    v_mov_b32_e32 v2, 0x2f800000
5865; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
5866; GFX678-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
5867; GFX678-NEXT:    v_mul_f32_e64 v1, -v1, v2
5868; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
5869; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
5870; GFX678-NEXT:    v_mul_f32_e32 v0, v2, v0
5871; GFX678-NEXT:    s_setpc_b64 s[30:31]
5872;
5873; GFX10-LABEL: v_fdiv_negrhs_f32_daz_25ulp:
5874; GFX10:       ; %bb.0:
5875; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5876; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
5877; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
5878; GFX10-NEXT:    v_mul_f32_e64 v1, -v1, v2
5879; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
5880; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
5881; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
5882; GFX10-NEXT:    s_setpc_b64 s[30:31]
5883;
5884; GFX11-LABEL: v_fdiv_negrhs_f32_daz_25ulp:
5885; GFX11:       ; %bb.0:
5886; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5887; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
5888; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
5889; GFX11-NEXT:    v_mul_f32_e64 v1, -v1, v2
5890; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
5891; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5892; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
5893; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
5894; GFX11-NEXT:    s_setpc_b64 s[30:31]
5895;
5896; EG-LABEL: v_fdiv_negrhs_f32_daz_25ulp:
5897; EG:       ; %bb.0:
5898; EG-NEXT:    CF_END
5899; EG-NEXT:    PAD
5900  %neg.y = fneg float %y
5901  %div = fdiv float %x, %neg.y, !fpmath !0
5902  ret float %div
5903}
5904
5905define float @v_fdiv_f32_constrhs0_ieee(float %x) #1 {
5906; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_ieee:
5907; GFX6-FASTFMA:       ; %bb.0:
5908; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5909; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
5910; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
5911; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
5912; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
5913; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v3, v2, v2
5914; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
5915; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
5916; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
5917; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
5918; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
5919; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
5920; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
5921; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
5922;
5923; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_ieee:
5924; GFX6-SLOWFMA:       ; %bb.0:
5925; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5926; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
5927; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
5928; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
5929; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
5930; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
5931; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
5932; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
5933; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
5934; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
5935; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
5936; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
5937; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
5938; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
5939;
5940; GFX7-LABEL: v_fdiv_f32_constrhs0_ieee:
5941; GFX7:       ; %bb.0:
5942; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5943; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
5944; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
5945; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
5946; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
5947; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
5948; GFX7-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
5949; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
5950; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
5951; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
5952; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
5953; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
5954; GFX7-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
5955; GFX7-NEXT:    s_setpc_b64 s[30:31]
5956;
5957; GFX8-LABEL: v_fdiv_f32_constrhs0_ieee:
5958; GFX8:       ; %bb.0:
5959; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5960; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
5961; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
5962; GFX8-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
5963; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
5964; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
5965; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
5966; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
5967; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
5968; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
5969; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
5970; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
5971; GFX8-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
5972; GFX8-NEXT:    s_setpc_b64 s[30:31]
5973;
5974; GFX10-LABEL: v_fdiv_f32_constrhs0_ieee:
5975; GFX10:       ; %bb.0:
5976; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5977; GFX10-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
5978; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
5979; GFX10-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
5980; GFX10-NEXT:    v_fmac_f32_e32 v2, v3, v2
5981; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
5982; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
5983; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
5984; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
5985; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
5986; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
5987; GFX10-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
5988; GFX10-NEXT:    s_setpc_b64 s[30:31]
5989;
5990; GFX11-LABEL: v_fdiv_f32_constrhs0_ieee:
5991; GFX11:       ; %bb.0:
5992; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5993; GFX11-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
5994; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
5995; GFX11-NEXT:    s_waitcnt_depctr 0xfff
5996; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
5997; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
5998; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
5999; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
6000; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
6001; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
6002; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
6003; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6004; GFX11-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
6005; GFX11-NEXT:    s_setpc_b64 s[30:31]
6006;
6007; EG-LABEL: v_fdiv_f32_constrhs0_ieee:
6008; EG:       ; %bb.0:
6009; EG-NEXT:    CF_END
6010; EG-NEXT:    PAD
6011  %div = fdiv float %x, 12345.0
6012  ret float %div
6013}
6014
6015define float @v_fdiv_f32_constrhs0_ieee_25ulp(float %x) #1 {
6016; GFX6-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp:
6017; GFX6:       ; %bb.0:
6018; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6019; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
6020; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v0
6021; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
6022; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, 0x4640e400
6023; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6024; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6025; GFX6-NEXT:    v_mul_f32_e32 v2, 0x3fa9e0f0, v2
6026; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
6027; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
6028; GFX6-NEXT:    s_setpc_b64 s[30:31]
6029;
6030; GFX7-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp:
6031; GFX7:       ; %bb.0:
6032; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6033; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v0
6034; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
6035; GFX7-NEXT:    v_mul_f32_e32 v0, 0x3fa9e0f0, v0
6036; GFX7-NEXT:    v_add_i32_e32 v1, vcc, -14, v1
6037; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
6038; GFX7-NEXT:    s_setpc_b64 s[30:31]
6039;
6040; GFX8-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp:
6041; GFX8:       ; %bb.0:
6042; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6043; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v0
6044; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
6045; GFX8-NEXT:    v_mul_f32_e32 v0, 0x3fa9e0f0, v0
6046; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -14, v1
6047; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
6048; GFX8-NEXT:    s_setpc_b64 s[30:31]
6049;
6050; GFX10-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp:
6051; GFX10:       ; %bb.0:
6052; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6053; GFX10-NEXT:    v_frexp_mant_f32_e32 v1, v0
6054; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6055; GFX10-NEXT:    v_mul_f32_e32 v1, 0x3fa9e0f0, v1
6056; GFX10-NEXT:    v_add_nc_u32_e32 v0, -14, v0
6057; GFX10-NEXT:    v_ldexp_f32 v0, v1, v0
6058; GFX10-NEXT:    s_setpc_b64 s[30:31]
6059;
6060; GFX11-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp:
6061; GFX11:       ; %bb.0:
6062; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6063; GFX11-NEXT:    v_frexp_mant_f32_e32 v1, v0
6064; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6065; GFX11-NEXT:    v_dual_mul_f32 v1, 0x3fa9e0f0, v1 :: v_dual_add_nc_u32 v0, -14, v0
6066; GFX11-NEXT:    v_ldexp_f32 v0, v1, v0
6067; GFX11-NEXT:    s_setpc_b64 s[30:31]
6068;
6069; EG-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp:
6070; EG:       ; %bb.0:
6071; EG-NEXT:    CF_END
6072; EG-NEXT:    PAD
6073  %div = fdiv float %x, 12345.0, !fpmath !0
6074  ret float %div
6075}
6076
6077define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 {
6078; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_dynamic:
6079; GFX6-FASTFMA:       ; %bb.0:
6080; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6081; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6082; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6083; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
6084; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
6085; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6086; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6087; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6088; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
6089; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
6090; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
6091; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
6092; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
6093; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6094; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6095; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6096; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
6097;
6098; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_dynamic:
6099; GFX6-SLOWFMA:       ; %bb.0:
6100; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6101; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6102; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6103; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
6104; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6105; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
6106; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6107; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6108; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
6109; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
6110; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
6111; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
6112; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
6113; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6114; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6115; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6116; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
6117;
6118; GFX7-LABEL: v_fdiv_f32_constrhs0_dynamic:
6119; GFX7:       ; %bb.0:
6120; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6121; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
6122; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6123; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
6124; GFX7-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
6125; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6126; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6127; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6128; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
6129; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
6130; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
6131; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
6132; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
6133; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6134; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6135; GFX7-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6136; GFX7-NEXT:    s_setpc_b64 s[30:31]
6137;
6138; GFX8-LABEL: v_fdiv_f32_constrhs0_dynamic:
6139; GFX8:       ; %bb.0:
6140; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6141; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
6142; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6143; GFX8-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
6144; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6145; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
6146; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6147; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6148; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
6149; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
6150; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
6151; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
6152; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
6153; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6154; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6155; GFX8-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6156; GFX8-NEXT:    s_setpc_b64 s[30:31]
6157;
6158; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic:
6159; GFX10:       ; %bb.0:
6160; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6161; GFX10-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
6162; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
6163; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6164; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
6165; GFX10-NEXT:    s_denorm_mode 15
6166; GFX10-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6167; GFX10-NEXT:    v_fmac_f32_e32 v2, v4, v2
6168; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
6169; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
6170; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
6171; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
6172; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6173; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6174; GFX10-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
6175; GFX10-NEXT:    s_setpc_b64 s[30:31]
6176;
6177; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic:
6178; GFX11:       ; %bb.0:
6179; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6180; GFX11-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
6181; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
6182; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
6183; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
6184; GFX11-NEXT:    s_denorm_mode 15
6185; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6186; GFX11-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6187; GFX11-NEXT:    v_fmac_f32_e32 v2, v4, v2
6188; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
6189; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
6190; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
6191; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
6192; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
6193; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6194; GFX11-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
6195; GFX11-NEXT:    s_setpc_b64 s[30:31]
6196;
6197; EG-LABEL: v_fdiv_f32_constrhs0_dynamic:
6198; EG:       ; %bb.0:
6199; EG-NEXT:    CF_END
6200; EG-NEXT:    PAD
6201  %div = fdiv float %x, 12345.0
6202  ret float %div
6203}
6204
6205define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #2 {
6206; GFX6-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
6207; GFX6:       ; %bb.0:
6208; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6209; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
6210; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v0
6211; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
6212; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, 0x4640e400
6213; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
6214; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6215; GFX6-NEXT:    v_mul_f32_e32 v2, 0x3fa9e0f0, v2
6216; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
6217; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
6218; GFX6-NEXT:    s_setpc_b64 s[30:31]
6219;
6220; GFX7-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
6221; GFX7:       ; %bb.0:
6222; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6223; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v0
6224; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
6225; GFX7-NEXT:    v_mul_f32_e32 v0, 0x3fa9e0f0, v0
6226; GFX7-NEXT:    v_add_i32_e32 v1, vcc, -14, v1
6227; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
6228; GFX7-NEXT:    s_setpc_b64 s[30:31]
6229;
6230; GFX8-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
6231; GFX8:       ; %bb.0:
6232; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6233; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v0
6234; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
6235; GFX8-NEXT:    v_mul_f32_e32 v0, 0x3fa9e0f0, v0
6236; GFX8-NEXT:    v_add_u32_e32 v1, vcc, -14, v1
6237; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
6238; GFX8-NEXT:    s_setpc_b64 s[30:31]
6239;
6240; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
6241; GFX10:       ; %bb.0:
6242; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6243; GFX10-NEXT:    v_frexp_mant_f32_e32 v1, v0
6244; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6245; GFX10-NEXT:    v_mul_f32_e32 v1, 0x3fa9e0f0, v1
6246; GFX10-NEXT:    v_add_nc_u32_e32 v0, -14, v0
6247; GFX10-NEXT:    v_ldexp_f32 v0, v1, v0
6248; GFX10-NEXT:    s_setpc_b64 s[30:31]
6249;
6250; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
6251; GFX11:       ; %bb.0:
6252; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6253; GFX11-NEXT:    v_frexp_mant_f32_e32 v1, v0
6254; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6255; GFX11-NEXT:    v_dual_mul_f32 v1, 0x3fa9e0f0, v1 :: v_dual_add_nc_u32 v0, -14, v0
6256; GFX11-NEXT:    v_ldexp_f32 v0, v1, v0
6257; GFX11-NEXT:    s_setpc_b64 s[30:31]
6258;
6259; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp:
6260; EG:       ; %bb.0:
6261; EG-NEXT:    CF_END
6262; EG-NEXT:    PAD
6263  %div = fdiv float %x, 12345.0, !fpmath !0
6264  ret float %div
6265}
6266
6267define float @v_fdiv_f32_constrhs0_daz(float %x) #0 {
6268; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_daz:
6269; GFX6-FASTFMA:       ; %bb.0:
6270; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6271; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6272; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6273; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
6274; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
6275; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6276; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6277; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
6278; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
6279; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
6280; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
6281; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
6282; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6283; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6284; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6285; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
6286;
6287; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_daz:
6288; GFX6-SLOWFMA:       ; %bb.0:
6289; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6290; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6291; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6292; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
6293; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
6294; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6295; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6296; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
6297; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
6298; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
6299; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
6300; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
6301; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6302; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6303; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6304; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
6305;
6306; GFX7-LABEL: v_fdiv_f32_constrhs0_daz:
6307; GFX7:       ; %bb.0:
6308; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6309; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
6310; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6311; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
6312; GFX7-NEXT:    v_div_scale_f32 v3, vcc, v0, s6, v0
6313; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6314; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6315; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
6316; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
6317; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
6318; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
6319; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
6320; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6321; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6322; GFX7-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6323; GFX7-NEXT:    s_setpc_b64 s[30:31]
6324;
6325; GFX8-LABEL: v_fdiv_f32_constrhs0_daz:
6326; GFX8:       ; %bb.0:
6327; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6328; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
6329; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], s6, s6, v0
6330; GFX8-NEXT:    v_div_scale_f32 v2, vcc, v0, s6, v0
6331; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
6332; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6333; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6334; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
6335; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
6336; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
6337; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
6338; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
6339; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6340; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6341; GFX8-NEXT:    v_div_fixup_f32 v0, v1, s6, v0
6342; GFX8-NEXT:    s_setpc_b64 s[30:31]
6343;
6344; GFX10-LABEL: v_fdiv_f32_constrhs0_daz:
6345; GFX10:       ; %bb.0:
6346; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6347; GFX10-NEXT:    v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0
6348; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
6349; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
6350; GFX10-NEXT:    s_denorm_mode 15
6351; GFX10-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6352; GFX10-NEXT:    v_fmac_f32_e32 v2, v4, v2
6353; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
6354; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
6355; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
6356; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
6357; GFX10-NEXT:    s_denorm_mode 12
6358; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6359; GFX10-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
6360; GFX10-NEXT:    s_setpc_b64 s[30:31]
6361;
6362; GFX11-LABEL: v_fdiv_f32_constrhs0_daz:
6363; GFX11:       ; %bb.0:
6364; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6365; GFX11-NEXT:    v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0
6366; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0
6367; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
6368; GFX11-NEXT:    s_denorm_mode 15
6369; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6370; GFX11-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6371; GFX11-NEXT:    v_fmac_f32_e32 v2, v4, v2
6372; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
6373; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
6374; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
6375; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
6376; GFX11-NEXT:    s_denorm_mode 12
6377; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6378; GFX11-NEXT:    v_div_fixup_f32 v0, v1, 0x4640e400, v0
6379; GFX11-NEXT:    s_setpc_b64 s[30:31]
6380;
6381; EG-LABEL: v_fdiv_f32_constrhs0_daz:
6382; EG:       ; %bb.0:
6383; EG-NEXT:    CF_END
6384; EG-NEXT:    PAD
6385  %div = fdiv float %x, 12345.0
6386  ret float %div
6387}
6388
6389define float @v_fdiv_f32_constrhs0_daz_25ulp(float %x) #0 {
6390; GCN-LABEL: v_fdiv_f32_constrhs0_daz_25ulp:
6391; GCN:       ; %bb.0:
6392; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6393; GCN-NEXT:    v_mul_f32_e32 v0, 0x38a9e0f0, v0
6394; GCN-NEXT:    s_setpc_b64 s[30:31]
6395;
6396; EG-LABEL: v_fdiv_f32_constrhs0_daz_25ulp:
6397; EG:       ; %bb.0:
6398; EG-NEXT:    CF_END
6399; EG-NEXT:    PAD
6400  %div = fdiv float %x, 12345.0, !fpmath !0
6401  ret float %div
6402}
6403
6404define float @v_fdiv_f32_constlhs0_ieee(float %x) #1 {
6405; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_ieee:
6406; GFX6-FASTFMA:       ; %bb.0:
6407; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6408; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6409; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6410; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
6411; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
6412; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v3, v2, v2
6413; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
6414; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
6415; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
6416; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
6417; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
6418; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6419; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6420; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
6421;
6422; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_ieee:
6423; GFX6-SLOWFMA:       ; %bb.0:
6424; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6425; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6426; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6427; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
6428; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
6429; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6430; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
6431; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
6432; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
6433; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
6434; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
6435; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6436; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6437; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
6438;
6439; GFX7-LABEL: v_fdiv_f32_constlhs0_ieee:
6440; GFX7:       ; %bb.0:
6441; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6442; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
6443; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6444; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
6445; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
6446; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
6447; GFX7-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
6448; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
6449; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
6450; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
6451; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
6452; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6453; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6454; GFX7-NEXT:    s_setpc_b64 s[30:31]
6455;
6456; GFX8-LABEL: v_fdiv_f32_constlhs0_ieee:
6457; GFX8:       ; %bb.0:
6458; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6459; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
6460; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6461; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
6462; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
6463; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6464; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
6465; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
6466; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
6467; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
6468; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
6469; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6470; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6471; GFX8-NEXT:    s_setpc_b64 s[30:31]
6472;
6473; GFX10-LABEL: v_fdiv_f32_constlhs0_ieee:
6474; GFX10:       ; %bb.0:
6475; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6476; GFX10-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
6477; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
6478; GFX10-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
6479; GFX10-NEXT:    v_fmac_f32_e32 v2, v3, v2
6480; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
6481; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
6482; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
6483; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
6484; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
6485; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6486; GFX10-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
6487; GFX10-NEXT:    s_setpc_b64 s[30:31]
6488;
6489; GFX11-LABEL: v_fdiv_f32_constlhs0_ieee:
6490; GFX11:       ; %bb.0:
6491; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6492; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
6493; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
6494; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6495; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
6496; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
6497; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
6498; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
6499; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
6500; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
6501; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
6502; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6503; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
6504; GFX11-NEXT:    s_setpc_b64 s[30:31]
6505;
6506; EG-LABEL: v_fdiv_f32_constlhs0_ieee:
6507; EG:       ; %bb.0:
6508; EG-NEXT:    CF_END
6509; EG-NEXT:    PAD
6510  %div = fdiv float 12345.0, %x
6511  ret float %div
6512}
6513
6514define float @v_fdiv_f32_constlhs0_ieee_25ulp(float %x) #1 {
6515; GFX6-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp:
6516; GFX6:       ; %bb.0:
6517; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6518; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
6519; GFX6-NEXT:    v_frexp_mant_f32_e32 v1, v0
6520; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
6521; GFX6-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
6522; GFX6-NEXT:    v_rcp_f32_e32 v1, v1
6523; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6524; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v2, 0x4640e400
6525; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
6526; GFX6-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6527; GFX6-NEXT:    v_ldexp_f32_e32 v0, v1, v0
6528; GFX6-NEXT:    s_setpc_b64 s[30:31]
6529;
6530; GFX7-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp:
6531; GFX7:       ; %bb.0:
6532; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6533; GFX7-NEXT:    v_frexp_mant_f32_e32 v1, v0
6534; GFX7-NEXT:    v_rcp_f32_e32 v1, v1
6535; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6536; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, 14, v0
6537; GFX7-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6538; GFX7-NEXT:    v_ldexp_f32_e32 v0, v1, v0
6539; GFX7-NEXT:    s_setpc_b64 s[30:31]
6540;
6541; GFX8-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp:
6542; GFX8:       ; %bb.0:
6543; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6544; GFX8-NEXT:    v_frexp_mant_f32_e32 v1, v0
6545; GFX8-NEXT:    v_rcp_f32_e32 v1, v1
6546; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6547; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 14, v0
6548; GFX8-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6549; GFX8-NEXT:    v_ldexp_f32 v0, v1, v0
6550; GFX8-NEXT:    s_setpc_b64 s[30:31]
6551;
6552; GFX10-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp:
6553; GFX10:       ; %bb.0:
6554; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6555; GFX10-NEXT:    v_frexp_mant_f32_e32 v1, v0
6556; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6557; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
6558; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 14, v0
6559; GFX10-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6560; GFX10-NEXT:    v_ldexp_f32 v0, v1, v0
6561; GFX10-NEXT:    s_setpc_b64 s[30:31]
6562;
6563; GFX11-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp:
6564; GFX11:       ; %bb.0:
6565; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6566; GFX11-NEXT:    v_frexp_mant_f32_e32 v1, v0
6567; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6568; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
6569; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 14, v0
6570; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6571; GFX11-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6572; GFX11-NEXT:    v_ldexp_f32 v0, v1, v0
6573; GFX11-NEXT:    s_setpc_b64 s[30:31]
6574;
6575; EG-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp:
6576; EG:       ; %bb.0:
6577; EG-NEXT:    CF_END
6578; EG-NEXT:    PAD
6579  %div = fdiv float 12345.0, %x, !fpmath !0
6580  ret float %div
6581}
6582
6583define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 {
6584; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_dynamic:
6585; GFX6-FASTFMA:       ; %bb.0:
6586; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6587; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6588; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6589; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
6590; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
6591; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6592; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6593; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6594; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
6595; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
6596; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
6597; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
6598; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
6599; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6600; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6601; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6602; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
6603;
6604; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_dynamic:
6605; GFX6-SLOWFMA:       ; %bb.0:
6606; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6607; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6608; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6609; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
6610; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6611; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
6612; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6613; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6614; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
6615; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
6616; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
6617; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
6618; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
6619; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6620; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6621; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6622; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
6623;
6624; GFX7-LABEL: v_fdiv_f32_constlhs0_dynamic:
6625; GFX7:       ; %bb.0:
6626; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6627; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
6628; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6629; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
6630; GFX7-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
6631; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6632; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6633; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6634; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
6635; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
6636; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
6637; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
6638; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
6639; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6640; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6641; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6642; GFX7-NEXT:    s_setpc_b64 s[30:31]
6643;
6644; GFX8-LABEL: v_fdiv_f32_constlhs0_dynamic:
6645; GFX8:       ; %bb.0:
6646; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6647; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
6648; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6649; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
6650; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6651; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
6652; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6653; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6654; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
6655; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
6656; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
6657; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
6658; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
6659; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6660; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6661; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6662; GFX8-NEXT:    s_setpc_b64 s[30:31]
6663;
6664; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic:
6665; GFX10:       ; %bb.0:
6666; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6667; GFX10-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
6668; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
6669; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
6670; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
6671; GFX10-NEXT:    s_denorm_mode 15
6672; GFX10-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6673; GFX10-NEXT:    v_fmac_f32_e32 v2, v4, v2
6674; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
6675; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
6676; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
6677; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
6678; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
6679; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6680; GFX10-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
6681; GFX10-NEXT:    s_setpc_b64 s[30:31]
6682;
6683; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic:
6684; GFX11:       ; %bb.0:
6685; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6686; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
6687; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
6688; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
6689; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
6690; GFX11-NEXT:    s_denorm_mode 15
6691; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6692; GFX11-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6693; GFX11-NEXT:    v_fmac_f32_e32 v2, v4, v2
6694; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
6695; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
6696; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
6697; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
6698; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
6699; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6700; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
6701; GFX11-NEXT:    s_setpc_b64 s[30:31]
6702;
6703; EG-LABEL: v_fdiv_f32_constlhs0_dynamic:
6704; EG:       ; %bb.0:
6705; EG-NEXT:    CF_END
6706; EG-NEXT:    PAD
6707  %div = fdiv float 12345.0, %x
6708  ret float %div
6709}
6710
6711define float @v_fdiv_f32_constlhs0_dynamic_25ulp(float %x) #2 {
6712; GFX6-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
6713; GFX6:       ; %bb.0:
6714; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6715; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
6716; GFX6-NEXT:    v_frexp_mant_f32_e32 v1, v0
6717; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
6718; GFX6-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
6719; GFX6-NEXT:    v_rcp_f32_e32 v1, v1
6720; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6721; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v2, 0x4640e400
6722; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v2, v0
6723; GFX6-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6724; GFX6-NEXT:    v_ldexp_f32_e32 v0, v1, v0
6725; GFX6-NEXT:    s_setpc_b64 s[30:31]
6726;
6727; GFX7-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
6728; GFX7:       ; %bb.0:
6729; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6730; GFX7-NEXT:    v_frexp_mant_f32_e32 v1, v0
6731; GFX7-NEXT:    v_rcp_f32_e32 v1, v1
6732; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6733; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, 14, v0
6734; GFX7-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6735; GFX7-NEXT:    v_ldexp_f32_e32 v0, v1, v0
6736; GFX7-NEXT:    s_setpc_b64 s[30:31]
6737;
6738; GFX8-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
6739; GFX8:       ; %bb.0:
6740; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6741; GFX8-NEXT:    v_frexp_mant_f32_e32 v1, v0
6742; GFX8-NEXT:    v_rcp_f32_e32 v1, v1
6743; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6744; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, 14, v0
6745; GFX8-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6746; GFX8-NEXT:    v_ldexp_f32 v0, v1, v0
6747; GFX8-NEXT:    s_setpc_b64 s[30:31]
6748;
6749; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
6750; GFX10:       ; %bb.0:
6751; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6752; GFX10-NEXT:    v_frexp_mant_f32_e32 v1, v0
6753; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6754; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
6755; GFX10-NEXT:    v_sub_nc_u32_e32 v0, 14, v0
6756; GFX10-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6757; GFX10-NEXT:    v_ldexp_f32 v0, v1, v0
6758; GFX10-NEXT:    s_setpc_b64 s[30:31]
6759;
6760; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
6761; GFX11:       ; %bb.0:
6762; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6763; GFX11-NEXT:    v_frexp_mant_f32_e32 v1, v0
6764; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
6765; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
6766; GFX11-NEXT:    v_sub_nc_u32_e32 v0, 14, v0
6767; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6768; GFX11-NEXT:    v_mul_f32_e32 v1, 0x3f40e400, v1
6769; GFX11-NEXT:    v_ldexp_f32 v0, v1, v0
6770; GFX11-NEXT:    s_setpc_b64 s[30:31]
6771;
6772; EG-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp:
6773; EG:       ; %bb.0:
6774; EG-NEXT:    CF_END
6775; EG-NEXT:    PAD
6776  %div = fdiv float 12345.0, %x, !fpmath !0
6777  ret float %div
6778}
6779
6780define float @v_fdiv_f32_constlhs0_daz(float %x) #0 {
6781; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_daz:
6782; GFX6-FASTFMA:       ; %bb.0:
6783; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6784; GFX6-FASTFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6785; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6786; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v2, v1
6787; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
6788; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6789; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6790; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, v4, v2, v2
6791; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v4, v3, v2
6792; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v1, v4, v3
6793; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, v5, v2, v4
6794; GFX6-FASTFMA-NEXT:    v_fma_f32 v1, -v1, v4, v3
6795; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6796; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6797; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6798; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
6799;
6800; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_daz:
6801; GFX6-SLOWFMA:       ; %bb.0:
6802; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6803; GFX6-SLOWFMA-NEXT:    s_mov_b32 s6, 0x4640e400
6804; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6805; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
6806; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v3, v1
6807; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6808; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6809; GFX6-SLOWFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
6810; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v4, v2, v3
6811; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v1, v4, v2
6812; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v3, v4
6813; GFX6-SLOWFMA-NEXT:    v_fma_f32 v1, -v1, v4, v2
6814; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6815; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6816; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6817; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
6818;
6819; GFX7-LABEL: v_fdiv_f32_constlhs0_daz:
6820; GFX7:       ; %bb.0:
6821; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6822; GFX7-NEXT:    s_mov_b32 s6, 0x4640e400
6823; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6824; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
6825; GFX7-NEXT:    v_div_scale_f32 v3, vcc, s6, v0, s6
6826; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6827; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6828; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
6829; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
6830; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
6831; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
6832; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
6833; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6834; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6835; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6836; GFX7-NEXT:    s_setpc_b64 s[30:31]
6837;
6838; GFX8-LABEL: v_fdiv_f32_constlhs0_daz:
6839; GFX8:       ; %bb.0:
6840; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6841; GFX8-NEXT:    s_mov_b32 s6, 0x4640e400
6842; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, s6
6843; GFX8-NEXT:    v_div_scale_f32 v2, vcc, s6, v0, s6
6844; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
6845; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
6846; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
6847; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
6848; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
6849; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
6850; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
6851; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
6852; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
6853; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
6854; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, s6
6855; GFX8-NEXT:    s_setpc_b64 s[30:31]
6856;
6857; GFX10-LABEL: v_fdiv_f32_constlhs0_daz:
6858; GFX10:       ; %bb.0:
6859; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6860; GFX10-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 0x4640e400
6861; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
6862; GFX10-NEXT:    v_rcp_f32_e32 v2, v1
6863; GFX10-NEXT:    s_denorm_mode 15
6864; GFX10-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6865; GFX10-NEXT:    v_fmac_f32_e32 v2, v4, v2
6866; GFX10-NEXT:    v_mul_f32_e32 v4, v3, v2
6867; GFX10-NEXT:    v_fma_f32 v5, -v1, v4, v3
6868; GFX10-NEXT:    v_fmac_f32_e32 v4, v5, v2
6869; GFX10-NEXT:    v_fma_f32 v1, -v1, v4, v3
6870; GFX10-NEXT:    s_denorm_mode 12
6871; GFX10-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6872; GFX10-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
6873; GFX10-NEXT:    s_setpc_b64 s[30:31]
6874;
6875; GFX11-LABEL: v_fdiv_f32_constlhs0_daz:
6876; GFX11:       ; %bb.0:
6877; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6878; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 0x4640e400
6879; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400
6880; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
6881; GFX11-NEXT:    s_denorm_mode 15
6882; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6883; GFX11-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
6884; GFX11-NEXT:    v_fmac_f32_e32 v2, v4, v2
6885; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
6886; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
6887; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
6888; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
6889; GFX11-NEXT:    s_denorm_mode 12
6890; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
6891; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 0x4640e400
6892; GFX11-NEXT:    s_setpc_b64 s[30:31]
6893;
6894; EG-LABEL: v_fdiv_f32_constlhs0_daz:
6895; EG:       ; %bb.0:
6896; EG-NEXT:    CF_END
6897; EG-NEXT:    PAD
6898  %div = fdiv float 12345.0, %x
6899  ret float %div
6900}
6901
6902define float @v_fdiv_f32_constlhs0_daz_25ulp(float %x) #0 {
6903; GFX678-LABEL: v_fdiv_f32_constlhs0_daz_25ulp:
6904; GFX678:       ; %bb.0:
6905; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6906; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
6907; GFX678-NEXT:    v_mov_b32_e32 v1, 0x2f800000
6908; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v0|, s4
6909; GFX678-NEXT:    v_cndmask_b32_e32 v1, 1.0, v1, vcc
6910; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
6911; GFX678-NEXT:    v_rcp_f32_e32 v0, v0
6912; GFX678-NEXT:    v_mul_f32_e32 v0, 0x4640e400, v0
6913; GFX678-NEXT:    v_mul_f32_e32 v0, v1, v0
6914; GFX678-NEXT:    s_setpc_b64 s[30:31]
6915;
6916; GFX10-LABEL: v_fdiv_f32_constlhs0_daz_25ulp:
6917; GFX10:       ; %bb.0:
6918; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6919; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v0|
6920; GFX10-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s4
6921; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
6922; GFX10-NEXT:    v_rcp_f32_e32 v0, v0
6923; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4640e400, v0
6924; GFX10-NEXT:    v_mul_f32_e32 v0, v1, v0
6925; GFX10-NEXT:    s_setpc_b64 s[30:31]
6926;
6927; GFX11-LABEL: v_fdiv_f32_constlhs0_daz_25ulp:
6928; GFX11:       ; %bb.0:
6929; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6930; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v0|
6931; GFX11-NEXT:    v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s0
6932; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
6933; GFX11-NEXT:    v_rcp_f32_e32 v0, v0
6934; GFX11-NEXT:    s_waitcnt_depctr 0xfff
6935; GFX11-NEXT:    v_mul_f32_e32 v0, 0x4640e400, v0
6936; GFX11-NEXT:    v_mul_f32_e32 v0, v1, v0
6937; GFX11-NEXT:    s_setpc_b64 s[30:31]
6938;
6939; EG-LABEL: v_fdiv_f32_constlhs0_daz_25ulp:
6940; EG:       ; %bb.0:
6941; EG-NEXT:    CF_END
6942; EG-NEXT:    PAD
6943  %div = fdiv float 12345.0, %x, !fpmath !0
6944  ret float %div
6945}
6946
6947define float @v_fdiv_f32_ieee_nodenorm_x(float nofpclass(sub) %x, float %y) #1 {
6948; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee_nodenorm_x:
6949; GFX6-FASTFMA:       ; %bb.0:
6950; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6951; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
6952; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
6953; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
6954; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
6955; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
6956; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
6957; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
6958; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
6959; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
6960; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
6961; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
6962; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
6963;
6964; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee_nodenorm_x:
6965; GFX6-SLOWFMA:       ; %bb.0:
6966; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6967; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
6968; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
6969; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
6970; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
6971; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
6972; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
6973; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
6974; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
6975; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
6976; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
6977; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
6978; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
6979;
6980; GFX7-LABEL: v_fdiv_f32_ieee_nodenorm_x:
6981; GFX7:       ; %bb.0:
6982; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6983; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
6984; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
6985; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
6986; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
6987; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
6988; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
6989; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
6990; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
6991; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
6992; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
6993; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
6994; GFX7-NEXT:    s_setpc_b64 s[30:31]
6995;
6996; GFX8-LABEL: v_fdiv_f32_ieee_nodenorm_x:
6997; GFX8:       ; %bb.0:
6998; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6999; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7000; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7001; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
7002; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7003; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
7004; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
7005; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
7006; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
7007; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
7008; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7009; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7010; GFX8-NEXT:    s_setpc_b64 s[30:31]
7011;
7012; GFX10-LABEL: v_fdiv_f32_ieee_nodenorm_x:
7013; GFX10:       ; %bb.0:
7014; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7015; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
7016; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
7017; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
7018; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
7019; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7020; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
7021; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
7022; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
7023; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
7024; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7025; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7026; GFX10-NEXT:    s_setpc_b64 s[30:31]
7027;
7028; GFX11-LABEL: v_fdiv_f32_ieee_nodenorm_x:
7029; GFX11:       ; %bb.0:
7030; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7031; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
7032; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
7033; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7034; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
7035; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
7036; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7037; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
7038; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
7039; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
7040; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
7041; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7042; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7043; GFX11-NEXT:    s_setpc_b64 s[30:31]
7044;
7045; EG-LABEL: v_fdiv_f32_ieee_nodenorm_x:
7046; EG:       ; %bb.0:
7047; EG-NEXT:    CF_END
7048; EG-NEXT:    PAD
7049  %div = fdiv float %x, %y
7050  ret float %div
7051}
7052
7053define float @v_fdiv_f32_ieee_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #1 {
7054; GFX6-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x:
7055; GFX6:       ; %bb.0:
7056; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7057; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
7058; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
7059; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
7060; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
7061; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
7062; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
7063; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
7064; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7065; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
7066; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7067; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
7068; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
7069; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
7070; GFX6-NEXT:    s_setpc_b64 s[30:31]
7071;
7072; GFX7-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x:
7073; GFX7:       ; %bb.0:
7074; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7075; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
7076; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
7077; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7078; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7079; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
7080; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
7081; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
7082; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
7083; GFX7-NEXT:    s_setpc_b64 s[30:31]
7084;
7085; GFX8-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x:
7086; GFX8:       ; %bb.0:
7087; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7088; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
7089; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
7090; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7091; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7092; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
7093; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
7094; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
7095; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
7096; GFX8-NEXT:    s_setpc_b64 s[30:31]
7097;
7098; GFX10-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x:
7099; GFX10:       ; %bb.0:
7100; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7101; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
7102; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7103; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
7104; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7105; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
7106; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7107; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
7108; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
7109; GFX10-NEXT:    s_setpc_b64 s[30:31]
7110;
7111; GFX11-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x:
7112; GFX11:       ; %bb.0:
7113; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7114; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
7115; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7116; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
7117; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7118; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
7119; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7120; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7121; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
7122; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
7123; GFX11-NEXT:    s_setpc_b64 s[30:31]
7124;
7125; EG-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x:
7126; EG:       ; %bb.0:
7127; EG-NEXT:    CF_END
7128; EG-NEXT:    PAD
7129  %div = fdiv float %x, %y, !fpmath !0
7130  ret float %div
7131}
7132
7133define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #2 {
7134; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7135; GFX6-FASTFMA:       ; %bb.0:
7136; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7137; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7138; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
7139; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7140; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7141; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7142; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7143; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
7144; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
7145; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
7146; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
7147; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
7148; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7149; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7150; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7151; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
7152;
7153; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7154; GFX6-SLOWFMA:       ; %bb.0:
7155; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7156; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7157; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7158; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7159; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
7160; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7161; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7162; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
7163; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
7164; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
7165; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
7166; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
7167; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7168; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7169; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7170; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
7171;
7172; GFX7-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7173; GFX7:       ; %bb.0:
7174; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7175; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7176; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
7177; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7178; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7179; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7180; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7181; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
7182; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
7183; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
7184; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
7185; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
7186; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7187; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7188; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7189; GFX7-NEXT:    s_setpc_b64 s[30:31]
7190;
7191; GFX8-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7192; GFX8:       ; %bb.0:
7193; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7194; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7195; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7196; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7197; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
7198; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7199; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7200; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
7201; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
7202; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
7203; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
7204; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
7205; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7206; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7207; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7208; GFX8-NEXT:    s_setpc_b64 s[30:31]
7209;
7210; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7211; GFX10:       ; %bb.0:
7212; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7213; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
7214; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7215; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7216; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
7217; GFX10-NEXT:    s_denorm_mode 15
7218; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7219; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
7220; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
7221; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
7222; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
7223; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
7224; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7225; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7226; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7227; GFX10-NEXT:    s_setpc_b64 s[30:31]
7228;
7229; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7230; GFX11:       ; %bb.0:
7231; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7232; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
7233; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7234; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
7235; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
7236; GFX11-NEXT:    s_denorm_mode 15
7237; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7238; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7239; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
7240; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
7241; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
7242; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
7243; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
7244; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
7245; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7246; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7247; GFX11-NEXT:    s_setpc_b64 s[30:31]
7248;
7249; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_x:
7250; EG:       ; %bb.0:
7251; EG-NEXT:    CF_END
7252; EG-NEXT:    PAD
7253  %div = fdiv float %x, %y
7254  ret float %div
7255}
7256
7257define float @v_fdiv_f32_dynamic_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #2 {
7258; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
7259; GFX6:       ; %bb.0:
7260; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7261; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
7262; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
7263; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
7264; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
7265; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
7266; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
7267; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
7268; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7269; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
7270; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7271; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
7272; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
7273; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
7274; GFX6-NEXT:    s_setpc_b64 s[30:31]
7275;
7276; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
7277; GFX7:       ; %bb.0:
7278; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7279; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
7280; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
7281; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7282; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7283; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
7284; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
7285; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
7286; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
7287; GFX7-NEXT:    s_setpc_b64 s[30:31]
7288;
7289; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
7290; GFX8:       ; %bb.0:
7291; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7292; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
7293; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
7294; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7295; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7296; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
7297; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
7298; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
7299; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
7300; GFX8-NEXT:    s_setpc_b64 s[30:31]
7301;
7302; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
7303; GFX10:       ; %bb.0:
7304; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7305; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
7306; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7307; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
7308; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7309; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
7310; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7311; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
7312; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
7313; GFX10-NEXT:    s_setpc_b64 s[30:31]
7314;
7315; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
7316; GFX11:       ; %bb.0:
7317; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7318; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
7319; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7320; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
7321; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7322; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
7323; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7324; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7325; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
7326; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
7327; GFX11-NEXT:    s_setpc_b64 s[30:31]
7328;
7329; EG-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x:
7330; EG:       ; %bb.0:
7331; EG-NEXT:    CF_END
7332; EG-NEXT:    PAD
7333  %div = fdiv float %x, %y, !fpmath !0
7334  ret float %div
7335}
7336
7337define float @v_fdiv_f32_daz_nodenorm_x(float nofpclass(sub) %x, float %y) #0 {
7338; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz_nodenorm_x:
7339; GFX6-FASTFMA:       ; %bb.0:
7340; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7341; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7342; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
7343; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7344; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7345; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7346; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
7347; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
7348; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
7349; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
7350; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
7351; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7352; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7353; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7354; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
7355;
7356; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz_nodenorm_x:
7357; GFX6-SLOWFMA:       ; %bb.0:
7358; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7359; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7360; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7361; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
7362; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7363; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7364; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
7365; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
7366; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
7367; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
7368; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
7369; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7370; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7371; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7372; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
7373;
7374; GFX7-LABEL: v_fdiv_f32_daz_nodenorm_x:
7375; GFX7:       ; %bb.0:
7376; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7377; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7378; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
7379; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7380; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7381; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7382; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
7383; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
7384; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
7385; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
7386; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
7387; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7388; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7389; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7390; GFX7-NEXT:    s_setpc_b64 s[30:31]
7391;
7392; GFX8-LABEL: v_fdiv_f32_daz_nodenorm_x:
7393; GFX8:       ; %bb.0:
7394; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7395; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7396; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7397; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
7398; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7399; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7400; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
7401; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
7402; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
7403; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
7404; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
7405; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7406; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7407; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7408; GFX8-NEXT:    s_setpc_b64 s[30:31]
7409;
7410; GFX10-LABEL: v_fdiv_f32_daz_nodenorm_x:
7411; GFX10:       ; %bb.0:
7412; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7413; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
7414; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7415; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
7416; GFX10-NEXT:    s_denorm_mode 15
7417; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7418; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
7419; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
7420; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
7421; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
7422; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
7423; GFX10-NEXT:    s_denorm_mode 12
7424; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7425; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7426; GFX10-NEXT:    s_setpc_b64 s[30:31]
7427;
7428; GFX11-LABEL: v_fdiv_f32_daz_nodenorm_x:
7429; GFX11:       ; %bb.0:
7430; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7431; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
7432; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7433; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
7434; GFX11-NEXT:    s_denorm_mode 15
7435; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7436; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7437; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
7438; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
7439; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
7440; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
7441; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
7442; GFX11-NEXT:    s_denorm_mode 12
7443; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7444; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7445; GFX11-NEXT:    s_setpc_b64 s[30:31]
7446;
7447; EG-LABEL: v_fdiv_f32_daz_nodenorm_x:
7448; EG:       ; %bb.0:
7449; EG-NEXT:    CF_END
7450; EG-NEXT:    PAD
7451  %div = fdiv float %x, %y
7452  ret float %div
7453}
7454
7455define float @v_fdiv_f32_daz_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #0 {
7456; GFX678-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x:
7457; GFX678:       ; %bb.0:
7458; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7459; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
7460; GFX678-NEXT:    v_mov_b32_e32 v2, 0x2f800000
7461; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
7462; GFX678-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
7463; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v2
7464; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
7465; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
7466; GFX678-NEXT:    v_mul_f32_e32 v0, v2, v0
7467; GFX678-NEXT:    s_setpc_b64 s[30:31]
7468;
7469; GFX10-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x:
7470; GFX10:       ; %bb.0:
7471; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7472; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
7473; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
7474; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v2
7475; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
7476; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
7477; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
7478; GFX10-NEXT:    s_setpc_b64 s[30:31]
7479;
7480; GFX11-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x:
7481; GFX11:       ; %bb.0:
7482; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7483; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
7484; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
7485; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
7486; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
7487; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7488; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
7489; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
7490; GFX11-NEXT:    s_setpc_b64 s[30:31]
7491;
7492; EG-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x:
7493; EG:       ; %bb.0:
7494; EG-NEXT:    CF_END
7495; EG-NEXT:    PAD
7496  %div = fdiv float %x, %y, !fpmath !0
7497  ret float %div
7498}
7499
7500define float @v_fdiv_f32_ieee_nodenorm_y(float %x, float nofpclass(sub) %y) #1 {
7501; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7502; GFX6-FASTFMA:       ; %bb.0:
7503; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7504; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7505; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
7506; GFX6-FASTFMA-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
7507; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v4, v3, v3
7508; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7509; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
7510; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
7511; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
7512; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
7513; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7514; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7515; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
7516;
7517; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7518; GFX6-SLOWFMA:       ; %bb.0:
7519; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7520; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7521; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7522; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
7523; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7524; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
7525; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
7526; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
7527; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
7528; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
7529; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7530; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7531; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
7532;
7533; GFX7-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7534; GFX7:       ; %bb.0:
7535; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7536; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7537; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
7538; GFX7-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
7539; GFX7-NEXT:    v_fma_f32 v3, v4, v3, v3
7540; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7541; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
7542; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
7543; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
7544; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
7545; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7546; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7547; GFX7-NEXT:    s_setpc_b64 s[30:31]
7548;
7549; GFX8-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7550; GFX8:       ; %bb.0:
7551; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7552; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7553; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7554; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
7555; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7556; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
7557; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
7558; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
7559; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
7560; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
7561; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7562; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7563; GFX8-NEXT:    s_setpc_b64 s[30:31]
7564;
7565; GFX10-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7566; GFX10:       ; %bb.0:
7567; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7568; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
7569; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
7570; GFX10-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
7571; GFX10-NEXT:    v_fmac_f32_e32 v3, v4, v3
7572; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7573; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
7574; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
7575; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
7576; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
7577; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7578; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7579; GFX10-NEXT:    s_setpc_b64 s[30:31]
7580;
7581; GFX11-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7582; GFX11:       ; %bb.0:
7583; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7584; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
7585; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
7586; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7587; GFX11-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
7588; GFX11-NEXT:    v_fmac_f32_e32 v3, v4, v3
7589; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7590; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
7591; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
7592; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
7593; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
7594; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7595; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7596; GFX11-NEXT:    s_setpc_b64 s[30:31]
7597;
7598; EG-LABEL: v_fdiv_f32_ieee_nodenorm_y:
7599; EG:       ; %bb.0:
7600; EG-NEXT:    CF_END
7601; EG-NEXT:    PAD
7602  %div = fdiv float %x, %y
7603  ret float %div
7604}
7605
7606define float @v_fdiv_f32_ieee_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #1 {
7607; GFX6-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y:
7608; GFX6:       ; %bb.0:
7609; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7610; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
7611; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
7612; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
7613; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
7614; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
7615; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
7616; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
7617; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7618; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
7619; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7620; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
7621; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
7622; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
7623; GFX6-NEXT:    s_setpc_b64 s[30:31]
7624;
7625; GFX7-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y:
7626; GFX7:       ; %bb.0:
7627; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7628; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
7629; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
7630; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7631; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7632; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
7633; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
7634; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
7635; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
7636; GFX7-NEXT:    s_setpc_b64 s[30:31]
7637;
7638; GFX8-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y:
7639; GFX8:       ; %bb.0:
7640; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7641; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
7642; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
7643; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7644; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7645; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
7646; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
7647; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
7648; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
7649; GFX8-NEXT:    s_setpc_b64 s[30:31]
7650;
7651; GFX10-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y:
7652; GFX10:       ; %bb.0:
7653; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7654; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
7655; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7656; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
7657; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7658; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
7659; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7660; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
7661; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
7662; GFX10-NEXT:    s_setpc_b64 s[30:31]
7663;
7664; GFX11-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y:
7665; GFX11:       ; %bb.0:
7666; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7667; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
7668; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7669; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
7670; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7671; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
7672; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7673; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7674; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
7675; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
7676; GFX11-NEXT:    s_setpc_b64 s[30:31]
7677;
7678; EG-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y:
7679; EG:       ; %bb.0:
7680; EG-NEXT:    CF_END
7681; EG-NEXT:    PAD
7682  %div = fdiv float %x, %y, !fpmath !0
7683  ret float %div
7684}
7685
7686define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #2 {
7687; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7688; GFX6-FASTFMA:       ; %bb.0:
7689; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7690; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7691; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
7692; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7693; GFX6-FASTFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7694; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7695; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7696; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
7697; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
7698; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
7699; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
7700; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
7701; GFX6-FASTFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7702; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7703; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7704; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
7705;
7706; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7707; GFX6-SLOWFMA:       ; %bb.0:
7708; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7709; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7710; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7711; GFX6-SLOWFMA-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7712; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
7713; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7714; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7715; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
7716; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
7717; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
7718; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
7719; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
7720; GFX6-SLOWFMA-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7721; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7722; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7723; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
7724;
7725; GFX7-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7726; GFX7:       ; %bb.0:
7727; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7728; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7729; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
7730; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7731; GFX7-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7732; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7733; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7734; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
7735; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
7736; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
7737; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
7738; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
7739; GFX7-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7740; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7741; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7742; GFX7-NEXT:    s_setpc_b64 s[30:31]
7743;
7744; GFX8-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7745; GFX8:       ; %bb.0:
7746; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7747; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7748; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7749; GFX8-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7750; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
7751; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7752; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7753; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
7754; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
7755; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
7756; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
7757; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
7758; GFX8-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7759; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7760; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7761; GFX8-NEXT:    s_setpc_b64 s[30:31]
7762;
7763; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7764; GFX10:       ; %bb.0:
7765; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7766; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
7767; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7768; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2)
7769; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
7770; GFX10-NEXT:    s_denorm_mode 15
7771; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7772; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
7773; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
7774; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
7775; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
7776; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
7777; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4
7778; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7779; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7780; GFX10-NEXT:    s_setpc_b64 s[30:31]
7781;
7782; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7783; GFX11:       ; %bb.0:
7784; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7785; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
7786; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7787; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2)
7788; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
7789; GFX11-NEXT:    s_denorm_mode 15
7790; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7791; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7792; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
7793; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
7794; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
7795; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
7796; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
7797; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0
7798; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7799; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7800; GFX11-NEXT:    s_setpc_b64 s[30:31]
7801;
7802; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_y:
7803; EG:       ; %bb.0:
7804; EG-NEXT:    CF_END
7805; EG-NEXT:    PAD
7806  %div = fdiv float %x, %y
7807  ret float %div
7808}
7809
7810define float @v_fdiv_f32_dynamic_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #2 {
7811; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
7812; GFX6:       ; %bb.0:
7813; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7814; GFX6-NEXT:    s_mov_b32 s4, 0x7f800000
7815; GFX6-NEXT:    v_frexp_mant_f32_e32 v2, v1
7816; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v1|, s4
7817; GFX6-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
7818; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
7819; GFX6-NEXT:    v_frexp_mant_f32_e32 v3, v0
7820; GFX6-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
7821; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7822; GFX6-NEXT:    v_cndmask_b32_e32 v3, v0, v3, vcc
7823; GFX6-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7824; GFX6-NEXT:    v_mul_f32_e32 v2, v3, v2
7825; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
7826; GFX6-NEXT:    v_ldexp_f32_e32 v0, v2, v0
7827; GFX6-NEXT:    s_setpc_b64 s[30:31]
7828;
7829; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
7830; GFX7:       ; %bb.0:
7831; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7832; GFX7-NEXT:    v_frexp_mant_f32_e32 v2, v1
7833; GFX7-NEXT:    v_rcp_f32_e32 v2, v2
7834; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7835; GFX7-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7836; GFX7-NEXT:    v_frexp_mant_f32_e32 v0, v0
7837; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v2
7838; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
7839; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
7840; GFX7-NEXT:    s_setpc_b64 s[30:31]
7841;
7842; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
7843; GFX8:       ; %bb.0:
7844; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7845; GFX8-NEXT:    v_frexp_mant_f32_e32 v2, v1
7846; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
7847; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7848; GFX8-NEXT:    v_frexp_exp_i32_f32_e32 v3, v0
7849; GFX8-NEXT:    v_frexp_mant_f32_e32 v0, v0
7850; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v2
7851; GFX8-NEXT:    v_sub_u32_e32 v1, vcc, v3, v1
7852; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
7853; GFX8-NEXT:    s_setpc_b64 s[30:31]
7854;
7855; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
7856; GFX10:       ; %bb.0:
7857; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7858; GFX10-NEXT:    v_frexp_mant_f32_e32 v2, v1
7859; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7860; GFX10-NEXT:    v_frexp_mant_f32_e32 v3, v0
7861; GFX10-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7862; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
7863; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7864; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
7865; GFX10-NEXT:    v_ldexp_f32 v0, v2, v0
7866; GFX10-NEXT:    s_setpc_b64 s[30:31]
7867;
7868; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
7869; GFX11:       ; %bb.0:
7870; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7871; GFX11-NEXT:    v_frexp_mant_f32_e32 v2, v1
7872; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
7873; GFX11-NEXT:    v_frexp_mant_f32_e32 v3, v0
7874; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
7875; GFX11-NEXT:    v_rcp_f32_e32 v2, v2
7876; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1
7877; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7878; GFX11-NEXT:    v_mul_f32_e32 v2, v3, v2
7879; GFX11-NEXT:    v_ldexp_f32 v0, v2, v0
7880; GFX11-NEXT:    s_setpc_b64 s[30:31]
7881;
7882; EG-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y:
7883; EG:       ; %bb.0:
7884; EG-NEXT:    CF_END
7885; EG-NEXT:    PAD
7886  %div = fdiv float %x, %y, !fpmath !0
7887  ret float %div
7888}
7889
7890define float @v_fdiv_f32_daz_nodenorm_y(float %x, float nofpclass(sub) %y) #0 {
7891; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz_nodenorm_y:
7892; GFX6-FASTFMA:       ; %bb.0:
7893; GFX6-FASTFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7894; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7895; GFX6-FASTFMA-NEXT:    v_rcp_f32_e32 v3, v2
7896; GFX6-FASTFMA-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7897; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7898; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7899; GFX6-FASTFMA-NEXT:    v_fma_f32 v3, v5, v3, v3
7900; GFX6-FASTFMA-NEXT:    v_mul_f32_e32 v5, v4, v3
7901; GFX6-FASTFMA-NEXT:    v_fma_f32 v6, -v2, v5, v4
7902; GFX6-FASTFMA-NEXT:    v_fma_f32 v5, v6, v3, v5
7903; GFX6-FASTFMA-NEXT:    v_fma_f32 v2, -v2, v5, v4
7904; GFX6-FASTFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7905; GFX6-FASTFMA-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7906; GFX6-FASTFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7907; GFX6-FASTFMA-NEXT:    s_setpc_b64 s[30:31]
7908;
7909; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz_nodenorm_y:
7910; GFX6-SLOWFMA:       ; %bb.0:
7911; GFX6-SLOWFMA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7912; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7913; GFX6-SLOWFMA-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7914; GFX6-SLOWFMA-NEXT:    v_rcp_f32_e32 v4, v2
7915; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7916; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7917; GFX6-SLOWFMA-NEXT:    v_fma_f32 v4, v5, v4, v4
7918; GFX6-SLOWFMA-NEXT:    v_mul_f32_e32 v5, v3, v4
7919; GFX6-SLOWFMA-NEXT:    v_fma_f32 v6, -v2, v5, v3
7920; GFX6-SLOWFMA-NEXT:    v_fma_f32 v5, v6, v4, v5
7921; GFX6-SLOWFMA-NEXT:    v_fma_f32 v2, -v2, v5, v3
7922; GFX6-SLOWFMA-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7923; GFX6-SLOWFMA-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7924; GFX6-SLOWFMA-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7925; GFX6-SLOWFMA-NEXT:    s_setpc_b64 s[30:31]
7926;
7927; GFX7-LABEL: v_fdiv_f32_daz_nodenorm_y:
7928; GFX7:       ; %bb.0:
7929; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7930; GFX7-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7931; GFX7-NEXT:    v_rcp_f32_e32 v3, v2
7932; GFX7-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
7933; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7934; GFX7-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7935; GFX7-NEXT:    v_fma_f32 v3, v5, v3, v3
7936; GFX7-NEXT:    v_mul_f32_e32 v5, v4, v3
7937; GFX7-NEXT:    v_fma_f32 v6, -v2, v5, v4
7938; GFX7-NEXT:    v_fma_f32 v5, v6, v3, v5
7939; GFX7-NEXT:    v_fma_f32 v2, -v2, v5, v4
7940; GFX7-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7941; GFX7-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7942; GFX7-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7943; GFX7-NEXT:    s_setpc_b64 s[30:31]
7944;
7945; GFX8-LABEL: v_fdiv_f32_daz_nodenorm_y:
7946; GFX8:       ; %bb.0:
7947; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7948; GFX8-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
7949; GFX8-NEXT:    v_div_scale_f32 v3, vcc, v0, v1, v0
7950; GFX8-NEXT:    v_rcp_f32_e32 v4, v2
7951; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
7952; GFX8-NEXT:    v_fma_f32 v5, -v2, v4, 1.0
7953; GFX8-NEXT:    v_fma_f32 v4, v5, v4, v4
7954; GFX8-NEXT:    v_mul_f32_e32 v5, v3, v4
7955; GFX8-NEXT:    v_fma_f32 v6, -v2, v5, v3
7956; GFX8-NEXT:    v_fma_f32 v5, v6, v4, v5
7957; GFX8-NEXT:    v_fma_f32 v2, -v2, v5, v3
7958; GFX8-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
7959; GFX8-NEXT:    v_div_fmas_f32 v2, v2, v4, v5
7960; GFX8-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7961; GFX8-NEXT:    s_setpc_b64 s[30:31]
7962;
7963; GFX10-LABEL: v_fdiv_f32_daz_nodenorm_y:
7964; GFX10:       ; %bb.0:
7965; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7966; GFX10-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
7967; GFX10-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7968; GFX10-NEXT:    v_rcp_f32_e32 v3, v2
7969; GFX10-NEXT:    s_denorm_mode 15
7970; GFX10-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7971; GFX10-NEXT:    v_fmac_f32_e32 v3, v5, v3
7972; GFX10-NEXT:    v_mul_f32_e32 v5, v4, v3
7973; GFX10-NEXT:    v_fma_f32 v6, -v2, v5, v4
7974; GFX10-NEXT:    v_fmac_f32_e32 v5, v6, v3
7975; GFX10-NEXT:    v_fma_f32 v2, -v2, v5, v4
7976; GFX10-NEXT:    s_denorm_mode 12
7977; GFX10-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7978; GFX10-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7979; GFX10-NEXT:    s_setpc_b64 s[30:31]
7980;
7981; GFX11-LABEL: v_fdiv_f32_daz_nodenorm_y:
7982; GFX11:       ; %bb.0:
7983; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7984; GFX11-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
7985; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
7986; GFX11-NEXT:    v_rcp_f32_e32 v3, v2
7987; GFX11-NEXT:    s_denorm_mode 15
7988; GFX11-NEXT:    s_waitcnt_depctr 0xfff
7989; GFX11-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
7990; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v3
7991; GFX11-NEXT:    v_mul_f32_e32 v5, v4, v3
7992; GFX11-NEXT:    v_fma_f32 v6, -v2, v5, v4
7993; GFX11-NEXT:    v_fmac_f32_e32 v5, v6, v3
7994; GFX11-NEXT:    v_fma_f32 v2, -v2, v5, v4
7995; GFX11-NEXT:    s_denorm_mode 12
7996; GFX11-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
7997; GFX11-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
7998; GFX11-NEXT:    s_setpc_b64 s[30:31]
7999;
8000; EG-LABEL: v_fdiv_f32_daz_nodenorm_y:
8001; EG:       ; %bb.0:
8002; EG-NEXT:    CF_END
8003; EG-NEXT:    PAD
8004  %div = fdiv float %x, %y
8005  ret float %div
8006}
8007
8008define float @v_fdiv_f32_daz_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #0 {
8009; GFX678-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y:
8010; GFX678:       ; %bb.0:
8011; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8012; GFX678-NEXT:    s_mov_b32 s4, 0x6f800000
8013; GFX678-NEXT:    v_mov_b32_e32 v2, 0x2f800000
8014; GFX678-NEXT:    v_cmp_gt_f32_e64 vcc, |v1|, s4
8015; GFX678-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
8016; GFX678-NEXT:    v_mul_f32_e32 v1, v1, v2
8017; GFX678-NEXT:    v_rcp_f32_e32 v1, v1
8018; GFX678-NEXT:    v_mul_f32_e32 v0, v0, v1
8019; GFX678-NEXT:    v_mul_f32_e32 v0, v2, v0
8020; GFX678-NEXT:    s_setpc_b64 s[30:31]
8021;
8022; GFX10-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y:
8023; GFX10:       ; %bb.0:
8024; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8025; GFX10-NEXT:    v_cmp_lt_f32_e64 s4, 0x6f800000, |v1|
8026; GFX10-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4
8027; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v2
8028; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
8029; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
8030; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v0
8031; GFX10-NEXT:    s_setpc_b64 s[30:31]
8032;
8033; GFX11-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y:
8034; GFX11:       ; %bb.0:
8035; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8036; GFX11-NEXT:    v_cmp_lt_f32_e64 s0, 0x6f800000, |v1|
8037; GFX11-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0
8038; GFX11-NEXT:    v_mul_f32_e32 v1, v1, v2
8039; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
8040; GFX11-NEXT:    s_waitcnt_depctr 0xfff
8041; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1
8042; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v0
8043; GFX11-NEXT:    s_setpc_b64 s[30:31]
8044;
8045; EG-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y:
8046; EG:       ; %bb.0:
8047; EG-NEXT:    CF_END
8048; EG-NEXT:    PAD
8049  %div = fdiv float %x, %y, !fpmath !0
8050  ret float %div
8051}
8052
8053attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
8054attributes #1 = { "denormal-fp-math-f32"="ieee,ieee" }
8055attributes #2 = { "denormal-fp-math-f32"="dynamic,dynamic" }
8056
8057!0 = !{float 2.500000e+00}
8058