xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll (revision 758444ca3e7163a1504eeced3383af861d01d761)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10_W32 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s
6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s
7; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s
8; REQUIRES: do-not-run-me
9
10define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) {
11; GFX7-LABEL: v_div_fmas_f32:
12; GFX7:       ; %bb.0:
13; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GFX7-NEXT:    v_and_b32_e32 v3, 1, v3
15; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
16; GFX7-NEXT:    s_nop 3
17; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
18; GFX7-NEXT:    s_setpc_b64 s[30:31]
19;
20; GFX8-LABEL: v_div_fmas_f32:
21; GFX8:       ; %bb.0:
22; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX8-NEXT:    v_and_b32_e32 v3, 1, v3
24; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
25; GFX8-NEXT:    s_nop 3
26; GFX8-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
27; GFX8-NEXT:    s_setpc_b64 s[30:31]
28;
29; GFX10_W32-LABEL: v_div_fmas_f32:
30; GFX10_W32:       ; %bb.0:
31; GFX10_W32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32; GFX10_W32-NEXT:    v_and_b32_e32 v3, 1, v3
33; GFX10_W32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v3
34; GFX10_W32-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
35; GFX10_W32-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX10_W64-LABEL: v_div_fmas_f32:
38; GFX10_W64:       ; %bb.0:
39; GFX10_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX10_W64-NEXT:    v_and_b32_e32 v3, 1, v3
41; GFX10_W64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
42; GFX10_W64-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
43; GFX10_W64-NEXT:    s_setpc_b64 s[30:31]
44;
45; GFX11_W32-LABEL: v_div_fmas_f32:
46; GFX11_W32:       ; %bb.0:
47; GFX11_W32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX11_W32-NEXT:    v_and_b32_e32 v3, 1, v3
49; GFX11_W32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v3
50; GFX11_W32-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
51; GFX11_W32-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX11_W64-LABEL: v_div_fmas_f32:
54; GFX11_W64:       ; %bb.0:
55; GFX11_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX11_W64-NEXT:    v_and_b32_e32 v3, 1, v3
57; GFX11_W64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v3
58; GFX11_W64-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
59; GFX11_W64-NEXT:    s_setpc_b64 s[30:31]
60  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
61  ret float %result
62}
63
64define double @v_div_fmas_f64(double %a, double %b, double %c, i1 %d) {
65; GFX7-LABEL: v_div_fmas_f64:
66; GFX7:       ; %bb.0:
67; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX7-NEXT:    v_and_b32_e32 v6, 1, v6
69; GFX7-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
70; GFX7-NEXT:    s_nop 3
71; GFX7-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
72; GFX7-NEXT:    s_setpc_b64 s[30:31]
73;
74; GFX8-LABEL: v_div_fmas_f64:
75; GFX8:       ; %bb.0:
76; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX8-NEXT:    v_and_b32_e32 v6, 1, v6
78; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
79; GFX8-NEXT:    s_nop 3
80; GFX8-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
81; GFX8-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX10_W32-LABEL: v_div_fmas_f64:
84; GFX10_W32:       ; %bb.0:
85; GFX10_W32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX10_W32-NEXT:    v_and_b32_e32 v6, 1, v6
87; GFX10_W32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
88; GFX10_W32-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
89; GFX10_W32-NEXT:    s_setpc_b64 s[30:31]
90;
91; GFX10_W64-LABEL: v_div_fmas_f64:
92; GFX10_W64:       ; %bb.0:
93; GFX10_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX10_W64-NEXT:    v_and_b32_e32 v6, 1, v6
95; GFX10_W64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
96; GFX10_W64-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
97; GFX10_W64-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX11_W32-LABEL: v_div_fmas_f64:
100; GFX11_W32:       ; %bb.0:
101; GFX11_W32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX11_W32-NEXT:    v_and_b32_e32 v6, 1, v6
103; GFX11_W32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
104; GFX11_W32-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
105; GFX11_W32-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX11_W64-LABEL: v_div_fmas_f64:
108; GFX11_W64:       ; %bb.0:
109; GFX11_W64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX11_W64-NEXT:    v_and_b32_e32 v6, 1, v6
111; GFX11_W64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
112; GFX11_W64-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
113; GFX11_W64-NEXT:    s_setpc_b64 s[30:31]
114  %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
115  ret double %result
116}
117
118define amdgpu_ps float @s_div_fmas_f32(float inreg %a, float inreg %b, float inreg %c, i32 inreg %d) {
119; GFX7-LABEL: s_div_fmas_f32:
120; GFX7:       ; %bb.0:
121; GFX7-NEXT:    s_cmp_eq_u32 s3, 0
122; GFX7-NEXT:    s_cselect_b32 s3, 1, 0
123; GFX7-NEXT:    v_mov_b32_e32 v0, s0
124; GFX7-NEXT:    s_and_b32 s0, 1, s3
125; GFX7-NEXT:    v_mov_b32_e32 v1, s1
126; GFX7-NEXT:    v_mov_b32_e32 v2, s2
127; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
128; GFX7-NEXT:    s_nop 3
129; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
130; GFX7-NEXT:    ; return to shader part epilog
131;
132; GFX8-LABEL: s_div_fmas_f32:
133; GFX8:       ; %bb.0:
134; GFX8-NEXT:    s_cmp_eq_u32 s3, 0
135; GFX8-NEXT:    s_cselect_b32 s3, 1, 0
136; GFX8-NEXT:    v_mov_b32_e32 v0, s0
137; GFX8-NEXT:    s_and_b32 s0, 1, s3
138; GFX8-NEXT:    v_mov_b32_e32 v1, s1
139; GFX8-NEXT:    v_mov_b32_e32 v2, s2
140; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
141; GFX8-NEXT:    s_nop 3
142; GFX8-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
143; GFX8-NEXT:    ; return to shader part epilog
144;
145; GFX10_W32-LABEL: s_div_fmas_f32:
146; GFX10_W32:       ; %bb.0:
147; GFX10_W32-NEXT:    s_cmp_eq_u32 s3, 0
148; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s1
149; GFX10_W32-NEXT:    s_cselect_b32 s3, 1, 0
150; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s2
151; GFX10_W32-NEXT:    s_and_b32 s3, 1, s3
152; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s3
153; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s0, v0, v1
154; GFX10_W32-NEXT:    ; return to shader part epilog
155;
156; GFX10_W64-LABEL: s_div_fmas_f32:
157; GFX10_W64:       ; %bb.0:
158; GFX10_W64-NEXT:    s_cmp_eq_u32 s3, 0
159; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s1
160; GFX10_W64-NEXT:    s_cselect_b32 s3, 1, 0
161; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s2
162; GFX10_W64-NEXT:    s_and_b32 s3, 1, s3
163; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s3
164; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s0, v0, v1
165; GFX10_W64-NEXT:    ; return to shader part epilog
166;
167; GFX11_W32-LABEL: s_div_fmas_f32:
168; GFX11_W32:       ; %bb.0:
169; GFX11_W32-NEXT:    s_cmp_eq_u32 s3, 0
170; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
171; GFX11_W32-NEXT:    s_cselect_b32 s3, 1, 0
172; GFX11_W32-NEXT:    s_and_b32 s3, 1, s3
173; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s3
174; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s0, v0, v1
175; GFX11_W32-NEXT:    ; return to shader part epilog
176;
177; GFX11_W64-LABEL: s_div_fmas_f32:
178; GFX11_W64:       ; %bb.0:
179; GFX11_W64-NEXT:    s_cmp_eq_u32 s3, 0
180; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s1
181; GFX11_W64-NEXT:    s_cselect_b32 s3, 1, 0
182; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s2
183; GFX11_W64-NEXT:    s_and_b32 s3, 1, s3
184; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s3
185; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s0, v0, v1
186; GFX11_W64-NEXT:    ; return to shader part epilog
187  %vcc = icmp eq i32 %d, 0
188  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %vcc)
189  ret float %result
190}
191
192define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double inreg %c, i32 inreg %d) {
193; GFX7-LABEL: s_div_fmas_f64:
194; GFX7:       ; %bb.0:
195; GFX7-NEXT:    s_cmp_eq_u32 s6, 0
196; GFX7-NEXT:    s_cselect_b32 s6, 1, 0
197; GFX7-NEXT:    v_mov_b32_e32 v0, s0
198; GFX7-NEXT:    v_mov_b32_e32 v1, s1
199; GFX7-NEXT:    v_mov_b32_e32 v2, s2
200; GFX7-NEXT:    v_mov_b32_e32 v4, s4
201; GFX7-NEXT:    s_and_b32 s0, 1, s6
202; GFX7-NEXT:    v_mov_b32_e32 v3, s3
203; GFX7-NEXT:    v_mov_b32_e32 v5, s5
204; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
205; GFX7-NEXT:    s_nop 3
206; GFX7-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
207; GFX7-NEXT:    v_readfirstlane_b32 s0, v0
208; GFX7-NEXT:    v_readfirstlane_b32 s1, v1
209; GFX7-NEXT:    ; return to shader part epilog
210;
211; GFX8-LABEL: s_div_fmas_f64:
212; GFX8:       ; %bb.0:
213; GFX8-NEXT:    s_cmp_eq_u32 s6, 0
214; GFX8-NEXT:    s_cselect_b32 s6, 1, 0
215; GFX8-NEXT:    v_mov_b32_e32 v0, s0
216; GFX8-NEXT:    v_mov_b32_e32 v1, s1
217; GFX8-NEXT:    v_mov_b32_e32 v2, s2
218; GFX8-NEXT:    v_mov_b32_e32 v4, s4
219; GFX8-NEXT:    s_and_b32 s0, 1, s6
220; GFX8-NEXT:    v_mov_b32_e32 v3, s3
221; GFX8-NEXT:    v_mov_b32_e32 v5, s5
222; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
223; GFX8-NEXT:    s_nop 3
224; GFX8-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
225; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
226; GFX8-NEXT:    v_readfirstlane_b32 s1, v1
227; GFX8-NEXT:    ; return to shader part epilog
228;
229; GFX10_W32-LABEL: s_div_fmas_f64:
230; GFX10_W32:       ; %bb.0:
231; GFX10_W32-NEXT:    s_cmp_eq_u32 s6, 0
232; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s2
233; GFX10_W32-NEXT:    s_cselect_b32 s6, 1, 0
234; GFX10_W32-NEXT:    v_mov_b32_e32 v2, s4
235; GFX10_W32-NEXT:    s_and_b32 s6, 1, s6
236; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s3
237; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s6
238; GFX10_W32-NEXT:    v_mov_b32_e32 v3, s5
239; GFX10_W32-NEXT:    v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
240; GFX10_W32-NEXT:    v_readfirstlane_b32 s0, v0
241; GFX10_W32-NEXT:    v_readfirstlane_b32 s1, v1
242; GFX10_W32-NEXT:    ; return to shader part epilog
243;
244; GFX10_W64-LABEL: s_div_fmas_f64:
245; GFX10_W64:       ; %bb.0:
246; GFX10_W64-NEXT:    s_cmp_eq_u32 s6, 0
247; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s2
248; GFX10_W64-NEXT:    s_cselect_b32 s6, 1, 0
249; GFX10_W64-NEXT:    v_mov_b32_e32 v2, s4
250; GFX10_W64-NEXT:    s_and_b32 s6, 1, s6
251; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s3
252; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
253; GFX10_W64-NEXT:    v_mov_b32_e32 v3, s5
254; GFX10_W64-NEXT:    v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
255; GFX10_W64-NEXT:    v_readfirstlane_b32 s0, v0
256; GFX10_W64-NEXT:    v_readfirstlane_b32 s1, v1
257; GFX10_W64-NEXT:    ; return to shader part epilog
258;
259; GFX11_W32-LABEL: s_div_fmas_f64:
260; GFX11_W32:       ; %bb.0:
261; GFX11_W32-NEXT:    s_cmp_eq_u32 s6, 0
262; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
263; GFX11_W32-NEXT:    s_cselect_b32 s6, 1, 0
264; GFX11_W32-NEXT:    v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
265; GFX11_W32-NEXT:    s_and_b32 s6, 1, s6
266; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s6
267; GFX11_W32-NEXT:    v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
268; GFX11_W32-NEXT:    v_readfirstlane_b32 s0, v0
269; GFX11_W32-NEXT:    v_readfirstlane_b32 s1, v1
270; GFX11_W32-NEXT:    ; return to shader part epilog
271;
272; GFX11_W64-LABEL: s_div_fmas_f64:
273; GFX11_W64:       ; %bb.0:
274; GFX11_W64-NEXT:    s_cmp_eq_u32 s6, 0
275; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s2
276; GFX11_W64-NEXT:    s_cselect_b32 s6, 1, 0
277; GFX11_W64-NEXT:    v_mov_b32_e32 v2, s4
278; GFX11_W64-NEXT:    s_and_b32 s6, 1, s6
279; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s3
280; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s6
281; GFX11_W64-NEXT:    v_mov_b32_e32 v3, s5
282; GFX11_W64-NEXT:    v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3]
283; GFX11_W64-NEXT:    v_readfirstlane_b32 s0, v0
284; GFX11_W64-NEXT:    v_readfirstlane_b32 s1, v1
285; GFX11_W64-NEXT:    ; return to shader part epilog
286  %vcc = icmp eq i32 %d, 0
287  %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %vcc)
288  ret double %result
289}
290
291define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
292; GFX7-LABEL: test_div_fmas_f32:
293; GFX7:       ; %bb.0:
294; GFX7-NEXT:    s_load_dword s4, s[2:3], 0xa
295; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x13
296; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x1c
297; GFX7-NEXT:    s_load_dword s7, s[2:3], 0x25
298; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
299; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
300; GFX7-NEXT:    v_mov_b32_e32 v0, s4
301; GFX7-NEXT:    v_mov_b32_e32 v1, s5
302; GFX7-NEXT:    v_mov_b32_e32 v2, s6
303; GFX7-NEXT:    s_and_b32 s2, 1, s7
304; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
305; GFX7-NEXT:    s_mov_b32 s2, -1
306; GFX7-NEXT:    s_mov_b32 s3, 0xf000
307; GFX7-NEXT:    s_nop 1
308; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
309; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
310; GFX7-NEXT:    s_endpgm
311;
312; GFX8-LABEL: test_div_fmas_f32:
313; GFX8:       ; %bb.0:
314; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x28
315; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x4c
316; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x70
317; GFX8-NEXT:    s_load_dword s5, s[2:3], 0x94
318; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
319; GFX8-NEXT:    v_mov_b32_e32 v0, s0
320; GFX8-NEXT:    v_mov_b32_e32 v1, s1
321; GFX8-NEXT:    v_mov_b32_e32 v2, s4
322; GFX8-NEXT:    s_and_b32 s0, 1, s5
323; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
324; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
325; GFX8-NEXT:    s_nop 2
326; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, v2
327; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
328; GFX8-NEXT:    v_mov_b32_e32 v0, s0
329; GFX8-NEXT:    v_mov_b32_e32 v1, s1
330; GFX8-NEXT:    flat_store_dword v[0:1], v2
331; GFX8-NEXT:    s_endpgm
332;
333; GFX10_W32-LABEL: test_div_fmas_f32:
334; GFX10_W32:       ; %bb.0:
335; GFX10_W32-NEXT:    s_clause 0x4
336; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x94
337; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x4c
338; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x70
339; GFX10_W32-NEXT:    s_load_dword s7, s[2:3], 0x28
340; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
341; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
342; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
343; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
344; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
345; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s6
346; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
347; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
348; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
349; GFX10_W32-NEXT:    s_endpgm
350;
351; GFX10_W64-LABEL: test_div_fmas_f32:
352; GFX10_W64:       ; %bb.0:
353; GFX10_W64-NEXT:    s_clause 0x4
354; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x94
355; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x4c
356; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x70
357; GFX10_W64-NEXT:    s_load_dword s7, s[2:3], 0x28
358; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
359; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
360; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
361; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
362; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
363; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s6
364; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
365; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
366; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
367; GFX10_W64-NEXT:    s_endpgm
368;
369; GFX11_W32-LABEL: test_div_fmas_f32:
370; GFX11_W32:       ; %bb.0:
371; GFX11_W32-NEXT:    s_clause 0x4
372; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x94
373; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x4c
374; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x70
375; GFX11_W32-NEXT:    s_load_b32 s7, s[2:3], 0x28
376; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
377; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
379; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
380; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
381; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
382; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
383; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
384; GFX11_W32-NEXT:    s_nop 0
385; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
386; GFX11_W32-NEXT:    s_endpgm
387;
388; GFX11_W64-LABEL: test_div_fmas_f32:
389; GFX11_W64:       ; %bb.0:
390; GFX11_W64-NEXT:    s_clause 0x4
391; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x94
392; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x4c
393; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x70
394; GFX11_W64-NEXT:    s_load_b32 s7, s[2:3], 0x28
395; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
396; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
397; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
398; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
399; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
400; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s6
401; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s7, v0, v1
402; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
403; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
404; GFX11_W64-NEXT:    s_nop 0
405; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
406; GFX11_W64-NEXT:    s_endpgm
407  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
408  store float %result, ptr addrspace(1) %out, align 4
409  ret void
410}
411
412define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
413; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
414; GFX7:       ; %bb.0:
415; GFX7-NEXT:    s_load_dword s4, s[2:3], 0x13
416; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x1c
417; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x25
418; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
419; GFX7-NEXT:    s_mov_b32 s3, 0xf000
420; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
421; GFX7-NEXT:    v_mov_b32_e32 v0, s4
422; GFX7-NEXT:    v_mov_b32_e32 v1, s5
423; GFX7-NEXT:    s_and_b32 s2, 1, s6
424; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
425; GFX7-NEXT:    s_mov_b32 s2, -1
426; GFX7-NEXT:    s_nop 2
427; GFX7-NEXT:    v_div_fmas_f32 v0, 1.0, v0, v1
428; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
429; GFX7-NEXT:    s_endpgm
430;
431; GFX8-LABEL: test_div_fmas_f32_inline_imm_0:
432; GFX8:       ; %bb.0:
433; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x4c
434; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x70
435; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x94
436; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
437; GFX8-NEXT:    v_mov_b32_e32 v0, s0
438; GFX8-NEXT:    v_mov_b32_e32 v1, s1
439; GFX8-NEXT:    s_and_b32 s0, 1, s4
440; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
441; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
442; GFX8-NEXT:    s_nop 2
443; GFX8-NEXT:    v_div_fmas_f32 v2, 1.0, v0, v1
444; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
445; GFX8-NEXT:    v_mov_b32_e32 v0, s0
446; GFX8-NEXT:    v_mov_b32_e32 v1, s1
447; GFX8-NEXT:    flat_store_dword v[0:1], v2
448; GFX8-NEXT:    s_endpgm
449;
450; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0:
451; GFX10_W32:       ; %bb.0:
452; GFX10_W32-NEXT:    s_clause 0x3
453; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x94
454; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x70
455; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x4c
456; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
457; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
458; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
459; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
460; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
461; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
462; GFX10_W32-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
463; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
464; GFX10_W32-NEXT:    s_endpgm
465;
466; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0:
467; GFX10_W64:       ; %bb.0:
468; GFX10_W64-NEXT:    s_clause 0x3
469; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x94
470; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x70
471; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x4c
472; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
473; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
474; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
475; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
476; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
477; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
478; GFX10_W64-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
479; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
480; GFX10_W64-NEXT:    s_endpgm
481;
482; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0:
483; GFX11_W32:       ; %bb.0:
484; GFX11_W32-NEXT:    s_clause 0x3
485; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x94
486; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x70
487; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x4c
488; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
489; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
490; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
491; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
492; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s5
493; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
494; GFX11_W32-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
495; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
496; GFX11_W32-NEXT:    s_nop 0
497; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
498; GFX11_W32-NEXT:    s_endpgm
499;
500; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0:
501; GFX11_W64:       ; %bb.0:
502; GFX11_W64-NEXT:    s_clause 0x3
503; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x94
504; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x70
505; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x4c
506; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
507; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
508; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
509; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
510; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
511; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
512; GFX11_W64-NEXT:    v_div_fmas_f32 v0, 1.0, s6, v0
513; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
514; GFX11_W64-NEXT:    s_nop 0
515; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
516; GFX11_W64-NEXT:    s_endpgm
517  %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
518  store float %result, ptr addrspace(1) %out, align 4
519  ret void
520}
521
522define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
523; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
524; GFX7:       ; %bb.0:
525; GFX7-NEXT:    s_load_dword s4, s[2:3], 0x2
526; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x4
527; GFX7-NEXT:    s_load_dword s6, s[2:3], 0xd
528; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
529; GFX7-NEXT:    s_mov_b32 s3, 0xf000
530; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
531; GFX7-NEXT:    v_mov_b32_e32 v0, s4
532; GFX7-NEXT:    v_mov_b32_e32 v1, s5
533; GFX7-NEXT:    s_and_b32 s2, 1, s6
534; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
535; GFX7-NEXT:    s_mov_b32 s2, -1
536; GFX7-NEXT:    s_nop 2
537; GFX7-NEXT:    v_div_fmas_f32 v0, v0, 1.0, v1
538; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
539; GFX7-NEXT:    s_endpgm
540;
541; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
542; GFX8:       ; %bb.0:
543; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x8
544; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x10
545; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x34
546; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
547; GFX8-NEXT:    v_mov_b32_e32 v0, s0
548; GFX8-NEXT:    v_mov_b32_e32 v1, s1
549; GFX8-NEXT:    s_and_b32 s0, 1, s4
550; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
551; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
552; GFX8-NEXT:    s_nop 2
553; GFX8-NEXT:    v_div_fmas_f32 v2, v0, 1.0, v1
554; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
555; GFX8-NEXT:    v_mov_b32_e32 v0, s0
556; GFX8-NEXT:    v_mov_b32_e32 v1, s1
557; GFX8-NEXT:    flat_store_dword v[0:1], v2
558; GFX8-NEXT:    s_endpgm
559;
560; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
561; GFX10_W32:       ; %bb.0:
562; GFX10_W32-NEXT:    s_clause 0x3
563; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x34
564; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x10
565; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x8
566; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
567; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
568; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
569; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
570; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
571; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
572; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
573; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
574; GFX10_W32-NEXT:    s_endpgm
575;
576; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
577; GFX10_W64:       ; %bb.0:
578; GFX10_W64-NEXT:    s_clause 0x3
579; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x34
580; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x10
581; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x8
582; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
583; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
584; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
585; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
586; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
587; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
588; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
589; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
590; GFX10_W64-NEXT:    s_endpgm
591;
592; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1:
593; GFX11_W32:       ; %bb.0:
594; GFX11_W32-NEXT:    s_clause 0x3
595; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x34
596; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x10
597; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x8
598; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
599; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
600; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
601; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
602; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s5
603; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
604; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
605; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
606; GFX11_W32-NEXT:    s_nop 0
607; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
608; GFX11_W32-NEXT:    s_endpgm
609;
610; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1:
611; GFX11_W64:       ; %bb.0:
612; GFX11_W64-NEXT:    s_clause 0x3
613; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x34
614; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x10
615; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x8
616; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
617; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
618; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
619; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
620; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
621; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
622; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s6, 1.0, v0
623; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
624; GFX11_W64-NEXT:    s_nop 0
625; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
626; GFX11_W64-NEXT:    s_endpgm
627  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d)
628  store float %result, ptr addrspace(1) %out, align 4
629  ret void
630}
631
632define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
633; GFX7-LABEL: test_div_fmas_f32_inline_imm_2:
634; GFX7:       ; %bb.0:
635; GFX7-NEXT:    s_load_dword s4, s[2:3], 0xa
636; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x13
637; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x25
638; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
639; GFX7-NEXT:    s_mov_b32 s3, 0xf000
640; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
641; GFX7-NEXT:    v_mov_b32_e32 v0, s4
642; GFX7-NEXT:    v_mov_b32_e32 v1, s5
643; GFX7-NEXT:    s_and_b32 s2, 1, s6
644; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
645; GFX7-NEXT:    s_mov_b32 s2, -1
646; GFX7-NEXT:    s_nop 2
647; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, 1.0
648; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
649; GFX7-NEXT:    s_endpgm
650;
651; GFX8-LABEL: test_div_fmas_f32_inline_imm_2:
652; GFX8:       ; %bb.0:
653; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x28
654; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x4c
655; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x94
656; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
657; GFX8-NEXT:    v_mov_b32_e32 v0, s0
658; GFX8-NEXT:    v_mov_b32_e32 v1, s1
659; GFX8-NEXT:    s_and_b32 s0, 1, s4
660; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
661; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
662; GFX8-NEXT:    s_nop 2
663; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, 1.0
664; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
665; GFX8-NEXT:    v_mov_b32_e32 v0, s0
666; GFX8-NEXT:    v_mov_b32_e32 v1, s1
667; GFX8-NEXT:    flat_store_dword v[0:1], v2
668; GFX8-NEXT:    s_endpgm
669;
670; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2:
671; GFX10_W32:       ; %bb.0:
672; GFX10_W32-NEXT:    s_clause 0x3
673; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x94
674; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x4c
675; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x28
676; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
677; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
678; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
679; GFX10_W32-NEXT:    s_and_b32 s2, 1, s4
680; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
681; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
682; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s6, v0, 1.0
683; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
684; GFX10_W32-NEXT:    s_endpgm
685;
686; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2:
687; GFX10_W64:       ; %bb.0:
688; GFX10_W64-NEXT:    s_clause 0x3
689; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x94
690; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x4c
691; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x28
692; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
693; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
694; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
695; GFX10_W64-NEXT:    s_and_b32 s2, 1, s4
696; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
697; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
698; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s6, v0, 1.0
699; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
700; GFX10_W64-NEXT:    s_endpgm
701;
702; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_2:
703; GFX11_W32:       ; %bb.0:
704; GFX11_W32-NEXT:    s_clause 0x3
705; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x94
706; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x4c
707; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x28
708; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
709; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
710; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
711; GFX11_W32-NEXT:    s_and_b32 s2, 1, s4
712; GFX11_W32-NEXT:    v_mov_b32_e32 v0, s5
713; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
714; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s6, v0, 1.0
715; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
716; GFX11_W32-NEXT:    s_nop 0
717; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
718; GFX11_W32-NEXT:    s_endpgm
719;
720; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_2:
721; GFX11_W64:       ; %bb.0:
722; GFX11_W64-NEXT:    s_clause 0x3
723; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x94
724; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x4c
725; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x28
726; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
727; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
728; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
729; GFX11_W64-NEXT:    s_and_b32 s2, 1, s4
730; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
731; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
732; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s6, v0, 1.0
733; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
734; GFX11_W64-NEXT:    s_nop 0
735; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
736; GFX11_W64-NEXT:    s_endpgm
737  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d)
738  store float %result, ptr addrspace(1) %out, align 4
739  ret void
740}
741
742define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) {
743; GFX7-LABEL: test_div_fmas_f64:
744; GFX7:       ; %bb.0:
745; GFX7-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
746; GFX7-NEXT:    s_load_dword s0, s[2:3], 0x8
747; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
748; GFX7-NEXT:    v_mov_b32_e32 v0, s6
749; GFX7-NEXT:    v_mov_b32_e32 v2, s8
750; GFX7-NEXT:    v_mov_b32_e32 v4, s10
751; GFX7-NEXT:    s_and_b32 s0, 1, s0
752; GFX7-NEXT:    v_mov_b32_e32 v1, s7
753; GFX7-NEXT:    v_mov_b32_e32 v3, s9
754; GFX7-NEXT:    v_mov_b32_e32 v5, s11
755; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
756; GFX7-NEXT:    s_mov_b32 s6, -1
757; GFX7-NEXT:    s_mov_b32 s7, 0xf000
758; GFX7-NEXT:    s_nop 1
759; GFX7-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
760; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
761; GFX7-NEXT:    s_endpgm
762;
763; GFX8-LABEL: test_div_fmas_f64:
764; GFX8:       ; %bb.0:
765; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
766; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x20
767; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
768; GFX8-NEXT:    v_mov_b32_e32 v0, s6
769; GFX8-NEXT:    v_mov_b32_e32 v2, s8
770; GFX8-NEXT:    v_mov_b32_e32 v4, s10
771; GFX8-NEXT:    s_and_b32 s0, 1, s0
772; GFX8-NEXT:    v_mov_b32_e32 v1, s7
773; GFX8-NEXT:    v_mov_b32_e32 v3, s9
774; GFX8-NEXT:    v_mov_b32_e32 v5, s11
775; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
776; GFX8-NEXT:    s_nop 3
777; GFX8-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
778; GFX8-NEXT:    v_mov_b32_e32 v2, s4
779; GFX8-NEXT:    v_mov_b32_e32 v3, s5
780; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
781; GFX8-NEXT:    s_endpgm
782;
783; GFX10_W32-LABEL: test_div_fmas_f64:
784; GFX10_W32:       ; %bb.0:
785; GFX10_W32-NEXT:    s_clause 0x1
786; GFX10_W32-NEXT:    s_load_dword s0, s[2:3], 0x20
787; GFX10_W32-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
788; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
789; GFX10_W32-NEXT:    s_and_b32 s0, 1, s0
790; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s8
791; GFX10_W32-NEXT:    v_mov_b32_e32 v2, s10
792; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
793; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s9
794; GFX10_W32-NEXT:    v_mov_b32_e32 v3, s11
795; GFX10_W32-NEXT:    v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3]
796; GFX10_W32-NEXT:    v_mov_b32_e32 v2, 0
797; GFX10_W32-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
798; GFX10_W32-NEXT:    s_endpgm
799;
800; GFX10_W64-LABEL: test_div_fmas_f64:
801; GFX10_W64:       ; %bb.0:
802; GFX10_W64-NEXT:    s_clause 0x1
803; GFX10_W64-NEXT:    s_load_dword s0, s[2:3], 0x20
804; GFX10_W64-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x0
805; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
806; GFX10_W64-NEXT:    s_and_b32 s0, 1, s0
807; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s8
808; GFX10_W64-NEXT:    v_mov_b32_e32 v2, s10
809; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
810; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s9
811; GFX10_W64-NEXT:    v_mov_b32_e32 v3, s11
812; GFX10_W64-NEXT:    v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3]
813; GFX10_W64-NEXT:    v_mov_b32_e32 v2, 0
814; GFX10_W64-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
815; GFX10_W64-NEXT:    s_endpgm
816;
817; GFX11_W32-LABEL: test_div_fmas_f64:
818; GFX11_W32:       ; %bb.0:
819; GFX11_W32-NEXT:    s_clause 0x1
820; GFX11_W32-NEXT:    s_load_b32 s8, s[2:3], 0x20
821; GFX11_W32-NEXT:    s_load_b256 s[0:7], s[2:3], 0x0
822; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
823; GFX11_W32-NEXT:    s_and_b32 s8, 1, s8
824; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
825; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s8
826; GFX11_W32-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
827; GFX11_W32-NEXT:    v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
828; GFX11_W32-NEXT:    v_mov_b32_e32 v2, 0
829; GFX11_W32-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
830; GFX11_W32-NEXT:    s_nop 0
831; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
832; GFX11_W32-NEXT:    s_endpgm
833;
834; GFX11_W64-LABEL: test_div_fmas_f64:
835; GFX11_W64:       ; %bb.0:
836; GFX11_W64-NEXT:    s_clause 0x1
837; GFX11_W64-NEXT:    s_load_b32 s8, s[2:3], 0x20
838; GFX11_W64-NEXT:    s_load_b256 s[0:7], s[2:3], 0x0
839; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
840; GFX11_W64-NEXT:    s_and_b32 s8, 1, s8
841; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s4
842; GFX11_W64-NEXT:    v_mov_b32_e32 v2, s6
843; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s8
844; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s5
845; GFX11_W64-NEXT:    v_mov_b32_e32 v3, s7
846; GFX11_W64-NEXT:    v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3]
847; GFX11_W64-NEXT:    v_mov_b32_e32 v2, 0
848; GFX11_W64-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
849; GFX11_W64-NEXT:    s_nop 0
850; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
851; GFX11_W64-NEXT:    s_endpgm
852  %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
853  store double %result, ptr addrspace(1) %out, align 8
854  ret void
855}
856
857define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) {
858; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc:
859; GFX7:       ; %bb.0:
860; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x2
861; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
862; GFX7-NEXT:    s_mov_b32 s3, 0xf000
863; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
864; GFX7-NEXT:    s_cmp_eq_u32 s7, 0
865; GFX7-NEXT:    s_cselect_b32 s2, 1, 0
866; GFX7-NEXT:    s_and_b32 s2, 1, s2
867; GFX7-NEXT:    v_mov_b32_e32 v0, s4
868; GFX7-NEXT:    v_mov_b32_e32 v1, s5
869; GFX7-NEXT:    v_mov_b32_e32 v2, s6
870; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
871; GFX7-NEXT:    s_mov_b32 s2, -1
872; GFX7-NEXT:    s_nop 2
873; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
874; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
875; GFX7-NEXT:    s_endpgm
876;
877; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc:
878; GFX8:       ; %bb.0:
879; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x8
880; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
881; GFX8-NEXT:    s_cmp_eq_u32 s7, 0
882; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
883; GFX8-NEXT:    s_and_b32 s0, 1, s0
884; GFX8-NEXT:    v_mov_b32_e32 v0, s4
885; GFX8-NEXT:    v_mov_b32_e32 v1, s5
886; GFX8-NEXT:    v_mov_b32_e32 v2, s6
887; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
888; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
889; GFX8-NEXT:    s_nop 2
890; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, v2
891; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
892; GFX8-NEXT:    v_mov_b32_e32 v0, s0
893; GFX8-NEXT:    v_mov_b32_e32 v1, s1
894; GFX8-NEXT:    flat_store_dword v[0:1], v2
895; GFX8-NEXT:    s_endpgm
896;
897; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
898; GFX10_W32:       ; %bb.0:
899; GFX10_W32-NEXT:    s_clause 0x1
900; GFX10_W32-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x8
901; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
902; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
903; GFX10_W32-NEXT:    s_cmp_eq_u32 s7, 0
904; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
905; GFX10_W32-NEXT:    s_cselect_b32 s2, 1, 0
906; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s6
907; GFX10_W32-NEXT:    s_and_b32 s2, 1, s2
908; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
909; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s4, v0, v1
910; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
911; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
912; GFX10_W32-NEXT:    s_endpgm
913;
914; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
915; GFX10_W64:       ; %bb.0:
916; GFX10_W64-NEXT:    s_clause 0x1
917; GFX10_W64-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x8
918; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
919; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
920; GFX10_W64-NEXT:    s_cmp_eq_u32 s7, 0
921; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
922; GFX10_W64-NEXT:    s_cselect_b32 s2, 1, 0
923; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s6
924; GFX10_W64-NEXT:    s_and_b32 s2, 1, s2
925; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
926; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s4, v0, v1
927; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
928; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
929; GFX10_W64-NEXT:    s_endpgm
930;
931; GFX11_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
932; GFX11_W32:       ; %bb.0:
933; GFX11_W32-NEXT:    s_clause 0x1
934; GFX11_W32-NEXT:    s_load_b128 s[4:7], s[2:3], 0x8
935; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
936; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
937; GFX11_W32-NEXT:    s_cmp_eq_u32 s7, 0
938; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6
939; GFX11_W32-NEXT:    s_cselect_b32 s2, 1, 0
940; GFX11_W32-NEXT:    s_and_b32 s2, 1, s2
941; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s2
942; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s4, v0, v1
943; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
944; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
945; GFX11_W32-NEXT:    s_nop 0
946; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
947; GFX11_W32-NEXT:    s_endpgm
948;
949; GFX11_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
950; GFX11_W64:       ; %bb.0:
951; GFX11_W64-NEXT:    s_clause 0x1
952; GFX11_W64-NEXT:    s_load_b128 s[4:7], s[2:3], 0x8
953; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
954; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
955; GFX11_W64-NEXT:    s_cmp_eq_u32 s7, 0
956; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s5
957; GFX11_W64-NEXT:    s_cselect_b32 s2, 1, 0
958; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s6
959; GFX11_W64-NEXT:    s_and_b32 s2, 1, s2
960; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
961; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s4, v0, v1
962; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
963; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
964; GFX11_W64-NEXT:    s_nop 0
965; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
966; GFX11_W64-NEXT:    s_endpgm
967  %cmp = icmp eq i32 %i, 0
968  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp)
969  store float %result, ptr addrspace(1) %out, align 4
970  ret void
971}
972
973define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
974; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
975; GFX7:       ; %bb.0:
976; GFX7-NEXT:    s_load_dword s4, s[2:3], 0xa
977; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x13
978; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x1c
979; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
980; GFX7-NEXT:    s_mov_b64 vcc, 0
981; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
982; GFX7-NEXT:    v_mov_b32_e32 v0, s4
983; GFX7-NEXT:    v_mov_b32_e32 v1, s5
984; GFX7-NEXT:    v_mov_b32_e32 v2, s6
985; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
986; GFX7-NEXT:    s_mov_b32 s2, -1
987; GFX7-NEXT:    s_mov_b32 s3, 0xf000
988; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
989; GFX7-NEXT:    s_endpgm
990;
991; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
992; GFX8:       ; %bb.0:
993; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x28
994; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x4c
995; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x70
996; GFX8-NEXT:    s_mov_b64 vcc, 0
997; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
998; GFX8-NEXT:    v_mov_b32_e32 v0, s0
999; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1000; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1001; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, v2
1002; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1003; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1004; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1005; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1006; GFX8-NEXT:    flat_store_dword v[0:1], v2
1007; GFX8-NEXT:    s_endpgm
1008;
1009; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1010; GFX10_W32:       ; %bb.0:
1011; GFX10_W32-NEXT:    s_clause 0x3
1012; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x4c
1013; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x70
1014; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x28
1015; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1016; GFX10_W32-NEXT:    s_mov_b32 vcc_lo, 0
1017; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1018; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s4
1019; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s5
1020; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1021; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
1022; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
1023; GFX10_W32-NEXT:    s_endpgm
1024;
1025; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1026; GFX10_W64:       ; %bb.0:
1027; GFX10_W64-NEXT:    s_clause 0x3
1028; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x4c
1029; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x70
1030; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x28
1031; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1032; GFX10_W64-NEXT:    s_mov_b64 vcc, 0
1033; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1034; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s4
1035; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s5
1036; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1037; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
1038; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
1039; GFX10_W64-NEXT:    s_endpgm
1040;
1041; GFX11_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1042; GFX11_W32:       ; %bb.0:
1043; GFX11_W32-NEXT:    s_clause 0x3
1044; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x4c
1045; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x70
1046; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x28
1047; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1048; GFX11_W32-NEXT:    s_mov_b32 vcc_lo, 0
1049; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1050; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
1051; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1052; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
1053; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
1054; GFX11_W32-NEXT:    s_nop 0
1055; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1056; GFX11_W32-NEXT:    s_endpgm
1057;
1058; GFX11_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
1059; GFX11_W64:       ; %bb.0:
1060; GFX11_W64-NEXT:    s_clause 0x3
1061; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x4c
1062; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x70
1063; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x28
1064; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1065; GFX11_W64-NEXT:    s_mov_b64 vcc, 0
1066; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1067; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s4
1068; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s5
1069; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1070; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
1071; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
1072; GFX11_W64-NEXT:    s_nop 0
1073; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1074; GFX11_W64-NEXT:    s_endpgm
1075  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false)
1076  store float %result, ptr addrspace(1) %out, align 4
1077  ret void
1078}
1079
1080define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
1081; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1082; GFX7:       ; %bb.0:
1083; GFX7-NEXT:    s_load_dword s4, s[2:3], 0xa
1084; GFX7-NEXT:    s_load_dword s5, s[2:3], 0x13
1085; GFX7-NEXT:    s_load_dword s6, s[2:3], 0x1c
1086; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1087; GFX7-NEXT:    s_mov_b64 vcc, -1
1088; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1089; GFX7-NEXT:    v_mov_b32_e32 v0, s4
1090; GFX7-NEXT:    v_mov_b32_e32 v1, s5
1091; GFX7-NEXT:    v_mov_b32_e32 v2, s6
1092; GFX7-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
1093; GFX7-NEXT:    s_mov_b32 s2, -1
1094; GFX7-NEXT:    s_mov_b32 s3, 0xf000
1095; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1096; GFX7-NEXT:    s_endpgm
1097;
1098; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1099; GFX8:       ; %bb.0:
1100; GFX8-NEXT:    s_load_dword s0, s[2:3], 0x28
1101; GFX8-NEXT:    s_load_dword s1, s[2:3], 0x4c
1102; GFX8-NEXT:    s_load_dword s4, s[2:3], 0x70
1103; GFX8-NEXT:    s_mov_b64 vcc, -1
1104; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1105; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1106; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1107; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1108; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, v2
1109; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1110; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1111; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1112; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1113; GFX8-NEXT:    flat_store_dword v[0:1], v2
1114; GFX8-NEXT:    s_endpgm
1115;
1116; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1117; GFX10_W32:       ; %bb.0:
1118; GFX10_W32-NEXT:    s_clause 0x3
1119; GFX10_W32-NEXT:    s_load_dword s4, s[2:3], 0x4c
1120; GFX10_W32-NEXT:    s_load_dword s5, s[2:3], 0x70
1121; GFX10_W32-NEXT:    s_load_dword s6, s[2:3], 0x28
1122; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1123; GFX10_W32-NEXT:    s_mov_b32 vcc_lo, -1
1124; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1125; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s4
1126; GFX10_W32-NEXT:    v_mov_b32_e32 v1, s5
1127; GFX10_W32-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1128; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
1129; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1]
1130; GFX10_W32-NEXT:    s_endpgm
1131;
1132; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1133; GFX10_W64:       ; %bb.0:
1134; GFX10_W64-NEXT:    s_clause 0x3
1135; GFX10_W64-NEXT:    s_load_dword s4, s[2:3], 0x4c
1136; GFX10_W64-NEXT:    s_load_dword s5, s[2:3], 0x70
1137; GFX10_W64-NEXT:    s_load_dword s6, s[2:3], 0x28
1138; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1139; GFX10_W64-NEXT:    s_mov_b64 vcc, -1
1140; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1141; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s4
1142; GFX10_W64-NEXT:    v_mov_b32_e32 v1, s5
1143; GFX10_W64-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1144; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
1145; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1]
1146; GFX10_W64-NEXT:    s_endpgm
1147;
1148; GFX11_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1149; GFX11_W32:       ; %bb.0:
1150; GFX11_W32-NEXT:    s_clause 0x3
1151; GFX11_W32-NEXT:    s_load_b32 s4, s[2:3], 0x4c
1152; GFX11_W32-NEXT:    s_load_b32 s5, s[2:3], 0x70
1153; GFX11_W32-NEXT:    s_load_b32 s6, s[2:3], 0x28
1154; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1155; GFX11_W32-NEXT:    s_mov_b32 vcc_lo, -1
1156; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1157; GFX11_W32-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
1158; GFX11_W32-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1159; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
1160; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1]
1161; GFX11_W32-NEXT:    s_nop 0
1162; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1163; GFX11_W32-NEXT:    s_endpgm
1164;
1165; GFX11_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
1166; GFX11_W64:       ; %bb.0:
1167; GFX11_W64-NEXT:    s_clause 0x3
1168; GFX11_W64-NEXT:    s_load_b32 s4, s[2:3], 0x4c
1169; GFX11_W64-NEXT:    s_load_b32 s5, s[2:3], 0x70
1170; GFX11_W64-NEXT:    s_load_b32 s6, s[2:3], 0x28
1171; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1172; GFX11_W64-NEXT:    s_mov_b64 vcc, -1
1173; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1174; GFX11_W64-NEXT:    v_mov_b32_e32 v0, s4
1175; GFX11_W64-NEXT:    v_mov_b32_e32 v1, s5
1176; GFX11_W64-NEXT:    v_div_fmas_f32 v0, s6, v0, v1
1177; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
1178; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1]
1179; GFX11_W64-NEXT:    s_nop 0
1180; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1181; GFX11_W64-NEXT:    s_endpgm
1182  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true)
1183  store float %result, ptr addrspace(1) %out, align 4
1184  ret void
1185}
1186
1187define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) {
1188; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1189; GFX7:       ; %bb.0:
1190; GFX7-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
1191; GFX7-NEXT:    s_load_dword s0, s[2:3], 0xc
1192; GFX7-NEXT:    s_mov_b32 s6, 0
1193; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1194; GFX7-NEXT:    v_mov_b32_e32 v2, 0
1195; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1196; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1197; GFX7-NEXT:    s_mov_b64 s[4:5], s[10:11]
1198; GFX7-NEXT:    buffer_load_dword v3, v[1:2], s[4:7], 0 addr64 glc
1199; GFX7-NEXT:    s_waitcnt vmcnt(0)
1200; GFX7-NEXT:    buffer_load_dword v4, v[1:2], s[4:7], 0 addr64 offset:4 glc
1201; GFX7-NEXT:    s_waitcnt vmcnt(0)
1202; GFX7-NEXT:    buffer_load_dword v1, v[1:2], s[4:7], 0 addr64 offset:8 glc
1203; GFX7-NEXT:    s_waitcnt vmcnt(0)
1204; GFX7-NEXT:    s_cmp_lg_u32 s0, 0
1205; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
1206; GFX7-NEXT:    s_and_b32 s0, 1, s0
1207; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1208; GFX7-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1209; GFX7-NEXT:    s_mov_b32 s6, -1
1210; GFX7-NEXT:    s_and_b64 vcc, vcc, s[0:1]
1211; GFX7-NEXT:    s_mov_b64 s[10:11], s[6:7]
1212; GFX7-NEXT:    v_div_fmas_f32 v0, v3, v4, v1
1213; GFX7-NEXT:    buffer_store_dword v0, off, s[8:11], 0 offset:8
1214; GFX7-NEXT:    s_endpgm
1215;
1216; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1217; GFX8:       ; %bb.0:
1218; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1219; GFX8-NEXT:    s_load_dword s2, s[2:3], 0x30
1220; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
1221; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1222; GFX8-NEXT:    v_mov_b32_e32 v1, s6
1223; GFX8-NEXT:    v_mov_b32_e32 v2, s7
1224; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
1225; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
1226; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 4, v1
1227; GFX8-NEXT:    v_addc_u32_e32 v4, vcc, 0, v2, vcc
1228; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 8, v1
1229; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v2, vcc
1230; GFX8-NEXT:    flat_load_dword v1, v[1:2] glc
1231; GFX8-NEXT:    s_waitcnt vmcnt(0)
1232; GFX8-NEXT:    flat_load_dword v2, v[3:4] glc
1233; GFX8-NEXT:    s_waitcnt vmcnt(0)
1234; GFX8-NEXT:    flat_load_dword v3, v[5:6] glc
1235; GFX8-NEXT:    s_waitcnt vmcnt(0)
1236; GFX8-NEXT:    s_add_u32 s0, s4, 8
1237; GFX8-NEXT:    s_addc_u32 s1, s5, 0
1238; GFX8-NEXT:    s_cmp_lg_u32 s2, 0
1239; GFX8-NEXT:    s_cselect_b32 s2, 1, 0
1240; GFX8-NEXT:    s_and_b32 s2, 1, s2
1241; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1242; GFX8-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, s2
1243; GFX8-NEXT:    s_and_b64 vcc, vcc, s[2:3]
1244; GFX8-NEXT:    s_nop 1
1245; GFX8-NEXT:    v_div_fmas_f32 v2, v1, v2, v3
1246; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1247; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1248; GFX8-NEXT:    flat_store_dword v[0:1], v2
1249; GFX8-NEXT:    s_endpgm
1250;
1251; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1252; GFX10_W32:       ; %bb.0:
1253; GFX10_W32-NEXT:    s_clause 0x1
1254; GFX10_W32-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1255; GFX10_W32-NEXT:    s_load_dword s0, s[2:3], 0x30
1256; GFX10_W32-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1257; GFX10_W32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1258; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1259; GFX10_W32-NEXT:    global_load_dword v2, v1, s[6:7] glc dlc
1260; GFX10_W32-NEXT:    s_waitcnt vmcnt(0)
1261; GFX10_W32-NEXT:    global_load_dword v3, v1, s[6:7] offset:4 glc dlc
1262; GFX10_W32-NEXT:    s_waitcnt vmcnt(0)
1263; GFX10_W32-NEXT:    global_load_dword v4, v1, s[6:7] offset:8 glc dlc
1264; GFX10_W32-NEXT:    s_waitcnt vmcnt(0)
1265; GFX10_W32-NEXT:    s_cmp_lg_u32 s0, 0
1266; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
1267; GFX10_W32-NEXT:    s_cselect_b32 s0, 1, 0
1268; GFX10_W32-NEXT:    s_and_b32 s0, 1, s0
1269; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
1270; GFX10_W32-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
1271; GFX10_W32-NEXT:    v_div_fmas_f32 v0, v2, v3, v4
1272; GFX10_W32-NEXT:    global_store_dword v1, v0, s[4:5] offset:8
1273; GFX10_W32-NEXT:    s_endpgm
1274;
1275; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1276; GFX10_W64:       ; %bb.0:
1277; GFX10_W64-NEXT:    s_clause 0x1
1278; GFX10_W64-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
1279; GFX10_W64-NEXT:    s_load_dword s0, s[2:3], 0x30
1280; GFX10_W64-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1281; GFX10_W64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1282; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1283; GFX10_W64-NEXT:    global_load_dword v2, v1, s[6:7] glc dlc
1284; GFX10_W64-NEXT:    s_waitcnt vmcnt(0)
1285; GFX10_W64-NEXT:    global_load_dword v3, v1, s[6:7] offset:4 glc dlc
1286; GFX10_W64-NEXT:    s_waitcnt vmcnt(0)
1287; GFX10_W64-NEXT:    global_load_dword v4, v1, s[6:7] offset:8 glc dlc
1288; GFX10_W64-NEXT:    s_waitcnt vmcnt(0)
1289; GFX10_W64-NEXT:    s_cmp_lg_u32 s0, 0
1290; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
1291; GFX10_W64-NEXT:    s_cselect_b32 s0, 1, 0
1292; GFX10_W64-NEXT:    s_and_b32 s0, 1, s0
1293; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1294; GFX10_W64-NEXT:    s_and_b64 vcc, vcc, s[0:1]
1295; GFX10_W64-NEXT:    v_div_fmas_f32 v0, v2, v3, v4
1296; GFX10_W64-NEXT:    global_store_dword v1, v0, s[4:5] offset:8
1297; GFX10_W64-NEXT:    s_endpgm
1298;
1299; GFX11_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1300; GFX11_W32:       ; %bb.0:
1301; GFX11_W32-NEXT:    s_clause 0x1
1302; GFX11_W32-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
1303; GFX11_W32-NEXT:    s_load_b32 s0, s[2:3], 0x30
1304; GFX11_W32-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1305; GFX11_W32-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1306; GFX11_W32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1307; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1308; GFX11_W32-NEXT:    global_load_b32 v2, v1, s[6:7] glc dlc
1309; GFX11_W32-NEXT:    s_waitcnt vmcnt(0)
1310; GFX11_W32-NEXT:    global_load_b32 v3, v1, s[6:7] offset:4 glc dlc
1311; GFX11_W32-NEXT:    s_waitcnt vmcnt(0)
1312; GFX11_W32-NEXT:    global_load_b32 v1, v1, s[6:7] offset:8 glc dlc
1313; GFX11_W32-NEXT:    s_waitcnt vmcnt(0)
1314; GFX11_W32-NEXT:    s_cmp_lg_u32 s0, 0
1315; GFX11_W32-NEXT:    s_cselect_b32 s0, 1, 0
1316; GFX11_W32-NEXT:    s_and_b32 s0, 1, s0
1317; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
1318; GFX11_W32-NEXT:    s_and_b32 vcc_lo, vcc_lo, s0
1319; GFX11_W32-NEXT:    v_div_fmas_f32 v0, v2, v3, v1
1320; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
1321; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[4:5] offset:8
1322; GFX11_W32-NEXT:    s_nop 0
1323; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1324; GFX11_W32-NEXT:    s_endpgm
1325;
1326; GFX11_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
1327; GFX11_W64:       ; %bb.0:
1328; GFX11_W64-NEXT:    s_clause 0x1
1329; GFX11_W64-NEXT:    s_load_b128 s[4:7], s[2:3], 0x0
1330; GFX11_W64-NEXT:    s_load_b32 s0, s[2:3], 0x30
1331; GFX11_W64-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1332; GFX11_W64-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1333; GFX11_W64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1334; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1335; GFX11_W64-NEXT:    global_load_b32 v2, v1, s[6:7] glc dlc
1336; GFX11_W64-NEXT:    s_waitcnt vmcnt(0)
1337; GFX11_W64-NEXT:    global_load_b32 v3, v1, s[6:7] offset:4 glc dlc
1338; GFX11_W64-NEXT:    s_waitcnt vmcnt(0)
1339; GFX11_W64-NEXT:    global_load_b32 v1, v1, s[6:7] offset:8 glc dlc
1340; GFX11_W64-NEXT:    s_waitcnt vmcnt(0)
1341; GFX11_W64-NEXT:    s_cmp_lg_u32 s0, 0
1342; GFX11_W64-NEXT:    s_cselect_b32 s0, 1, 0
1343; GFX11_W64-NEXT:    s_and_b32 s0, 1, s0
1344; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1345; GFX11_W64-NEXT:    s_and_b64 vcc, vcc, s[0:1]
1346; GFX11_W64-NEXT:    v_div_fmas_f32 v0, v2, v3, v1
1347; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
1348; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[4:5] offset:8
1349; GFX11_W64-NEXT:    s_nop 0
1350; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1351; GFX11_W64-NEXT:    s_endpgm
1352  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1353  %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
1354  %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
1355  %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
1356  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
1357
1358  %a = load volatile float, ptr addrspace(1) %gep.a
1359  %b = load volatile float, ptr addrspace(1) %gep.b
1360  %c = load volatile float, ptr addrspace(1) %gep.c
1361
1362  %cmp0 = icmp eq i32 %tid, 0
1363  %cmp1 = icmp ne i32 %d, 0
1364  %and = and i1 %cmp0, %cmp1
1365
1366  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and)
1367  store float %result, ptr addrspace(1) %gep.out, align 4
1368  ret void
1369}
1370
1371define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) {
1372; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
1373; GFX7:       ; %bb.0: ; %entry
1374; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0xa
1375; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1376; GFX7-NEXT:    v_mov_b32_e32 v2, 0
1377; GFX7-NEXT:    s_mov_b32 s6, 0
1378; GFX7-NEXT:    s_mov_b32 s7, 0xf000
1379; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1380; GFX7-NEXT:    buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
1381; GFX7-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v0
1382; GFX7-NEXT:    s_mov_b64 vcc, 0
1383; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], s[0:1]
1384; GFX7-NEXT:    s_cbranch_execz .LBB13_2
1385; GFX7-NEXT:  ; %bb.1: ; %bb
1386; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x14
1387; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1388; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
1389; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1390; GFX7-NEXT:    s_cmp_lg_u32 s0, 0
1391; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
1392; GFX7-NEXT:    s_and_b32 s0, 1, s0
1393; GFX7-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1394; GFX7-NEXT:    s_andn2_b64 s[8:9], 0, exec
1395; GFX7-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
1396; GFX7-NEXT:    s_or_b64 vcc, s[8:9], s[0:1]
1397; GFX7-NEXT:  .LBB13_2: ; %exit
1398; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
1399; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
1400; GFX7-NEXT:    s_waitcnt vmcnt(0)
1401; GFX7-NEXT:    v_div_fmas_f32 v0, v1, v2, v3
1402; GFX7-NEXT:    s_mov_b32 s6, -1
1403; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1404; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:8
1405; GFX7-NEXT:    s_endpgm
1406;
1407; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc:
1408; GFX8:       ; %bb.0: ; %entry
1409; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x28
1410; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
1411; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1412; GFX8-NEXT:    v_mov_b32_e32 v2, s1
1413; GFX8-NEXT:    v_mov_b32_e32 v1, s0
1414; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
1415; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
1416; GFX8-NEXT:    flat_load_dwordx3 v[1:3], v[1:2]
1417; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v0
1418; GFX8-NEXT:    s_mov_b64 vcc, 0
1419; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], s[0:1]
1420; GFX8-NEXT:    s_cbranch_execz .LBB13_2
1421; GFX8-NEXT:  ; %bb.1: ; %bb
1422; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x50
1423; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1424; GFX8-NEXT:    s_load_dword s0, s[0:1], 0x0
1425; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1426; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
1427; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
1428; GFX8-NEXT:    s_and_b32 s0, 1, s0
1429; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1430; GFX8-NEXT:    s_andn2_b64 s[6:7], 0, exec
1431; GFX8-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
1432; GFX8-NEXT:    s_or_b64 vcc, s[6:7], s[0:1]
1433; GFX8-NEXT:  .LBB13_2: ; %exit
1434; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1435; GFX8-NEXT:    s_waitcnt vmcnt(0)
1436; GFX8-NEXT:    v_div_fmas_f32 v2, v1, v2, v3
1437; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1438; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1439; GFX8-NEXT:    s_add_u32 s0, s0, 8
1440; GFX8-NEXT:    s_addc_u32 s1, s1, 0
1441; GFX8-NEXT:    v_mov_b32_e32 v0, s0
1442; GFX8-NEXT:    v_mov_b32_e32 v1, s1
1443; GFX8-NEXT:    flat_store_dword v[0:1], v2
1444; GFX8-NEXT:    s_endpgm
1445;
1446; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
1447; GFX10_W32:       ; %bb.0: ; %entry
1448; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x28
1449; GFX10_W32-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1450; GFX10_W32-NEXT:    s_mov_b32 vcc_lo, 0
1451; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1452; GFX10_W32-NEXT:    global_load_dwordx3 v[1:3], v1, s[0:1]
1453; GFX10_W32-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
1454; GFX10_W32-NEXT:    s_and_saveexec_b32 s1, s0
1455; GFX10_W32-NEXT:    s_cbranch_execz .LBB13_2
1456; GFX10_W32-NEXT:  ; %bb.1: ; %bb
1457; GFX10_W32-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x50
1458; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1459; GFX10_W32-NEXT:    s_load_dword s0, s[4:5], 0x0
1460; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1461; GFX10_W32-NEXT:    s_cmp_lg_u32 s0, 0
1462; GFX10_W32-NEXT:    s_cselect_b32 s0, 1, 0
1463; GFX10_W32-NEXT:    s_andn2_b32 s4, 0, exec_lo
1464; GFX10_W32-NEXT:    s_and_b32 s0, 1, s0
1465; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
1466; GFX10_W32-NEXT:    s_and_b32 s0, exec_lo, s0
1467; GFX10_W32-NEXT:    s_or_b32 vcc_lo, s4, s0
1468; GFX10_W32-NEXT:  .LBB13_2: ; %exit
1469; GFX10_W32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1470; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1471; GFX10_W32-NEXT:    s_waitcnt vmcnt(0)
1472; GFX10_W32-NEXT:    v_div_fmas_f32 v0, v1, v2, v3
1473; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
1474; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
1475; GFX10_W32-NEXT:    global_store_dword v1, v0, s[0:1] offset:8
1476; GFX10_W32-NEXT:    s_endpgm
1477;
1478; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
1479; GFX10_W64:       ; %bb.0: ; %entry
1480; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x28
1481; GFX10_W64-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
1482; GFX10_W64-NEXT:    s_mov_b64 vcc, 0
1483; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1484; GFX10_W64-NEXT:    global_load_dwordx3 v[1:3], v1, s[0:1]
1485; GFX10_W64-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v0
1486; GFX10_W64-NEXT:    s_and_saveexec_b64 s[4:5], s[0:1]
1487; GFX10_W64-NEXT:    s_cbranch_execz .LBB13_2
1488; GFX10_W64-NEXT:  ; %bb.1: ; %bb
1489; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x50
1490; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1491; GFX10_W64-NEXT:    s_load_dword s0, s[0:1], 0x0
1492; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1493; GFX10_W64-NEXT:    s_cmp_lg_u32 s0, 0
1494; GFX10_W64-NEXT:    s_cselect_b32 s0, 1, 0
1495; GFX10_W64-NEXT:    s_andn2_b64 s[6:7], 0, exec
1496; GFX10_W64-NEXT:    s_and_b32 s0, 1, s0
1497; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1498; GFX10_W64-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
1499; GFX10_W64-NEXT:    s_or_b64 vcc, s[6:7], s[0:1]
1500; GFX10_W64-NEXT:  .LBB13_2: ; %exit
1501; GFX10_W64-NEXT:    s_or_b64 exec, exec, s[4:5]
1502; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
1503; GFX10_W64-NEXT:    s_waitcnt vmcnt(0)
1504; GFX10_W64-NEXT:    v_div_fmas_f32 v0, v1, v2, v3
1505; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
1506; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
1507; GFX10_W64-NEXT:    global_store_dword v1, v0, s[0:1] offset:8
1508; GFX10_W64-NEXT:    s_endpgm
1509;
1510; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
1511; GFX11_W32:       ; %bb.0: ; %entry
1512; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x28
1513; GFX11_W32-NEXT:    v_and_b32_e32 v3, 0x3ff, v0
1514; GFX11_W32-NEXT:    s_mov_b32 vcc_lo, 0
1515; GFX11_W32-NEXT:    v_lshlrev_b32_e32 v0, 2, v3
1516; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1517; GFX11_W32-NEXT:    global_load_b96 v[0:2], v0, s[0:1]
1518; GFX11_W32-NEXT:    s_mov_b32 s1, exec_lo
1519; GFX11_W32-NEXT:    v_cmpx_eq_u32_e32 0, v3
1520; GFX11_W32-NEXT:    s_cbranch_execz .LBB13_2
1521; GFX11_W32-NEXT:  ; %bb.1: ; %bb
1522; GFX11_W32-NEXT:    s_load_b64 s[4:5], s[2:3], 0x50
1523; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1524; GFX11_W32-NEXT:    s_load_b32 s0, s[4:5], 0x0
1525; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1526; GFX11_W32-NEXT:    s_cmp_lg_u32 s0, 0
1527; GFX11_W32-NEXT:    s_cselect_b32 s0, 1, 0
1528; GFX11_W32-NEXT:    s_and_not1_b32 s4, 0, exec_lo
1529; GFX11_W32-NEXT:    s_and_b32 s0, 1, s0
1530; GFX11_W32-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0
1531; GFX11_W32-NEXT:    s_and_b32 s0, exec_lo, s0
1532; GFX11_W32-NEXT:    s_or_b32 vcc_lo, s4, s0
1533; GFX11_W32-NEXT:  .LBB13_2: ; %exit
1534; GFX11_W32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1535; GFX11_W32-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1536; GFX11_W32-NEXT:    s_waitcnt vmcnt(0)
1537; GFX11_W32-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
1538; GFX11_W32-NEXT:    v_mov_b32_e32 v1, 0
1539; GFX11_W32-NEXT:    s_waitcnt lgkmcnt(0)
1540; GFX11_W32-NEXT:    global_store_b32 v1, v0, s[0:1] offset:8
1541; GFX11_W32-NEXT:    s_nop 0
1542; GFX11_W32-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1543; GFX11_W32-NEXT:    s_endpgm
1544;
1545; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
1546; GFX11_W64:       ; %bb.0: ; %entry
1547; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x28
1548; GFX11_W64-NEXT:    v_and_b32_e32 v3, 0x3ff, v0
1549; GFX11_W64-NEXT:    s_mov_b64 vcc, 0
1550; GFX11_W64-NEXT:    s_mov_b64 s[4:5], exec
1551; GFX11_W64-NEXT:    v_lshlrev_b32_e32 v0, 2, v3
1552; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1553; GFX11_W64-NEXT:    global_load_b96 v[0:2], v0, s[0:1]
1554; GFX11_W64-NEXT:    v_cmpx_eq_u32_e32 0, v3
1555; GFX11_W64-NEXT:    s_cbranch_execz .LBB13_2
1556; GFX11_W64-NEXT:  ; %bb.1: ; %bb
1557; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x50
1558; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1559; GFX11_W64-NEXT:    s_load_b32 s0, s[0:1], 0x0
1560; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1561; GFX11_W64-NEXT:    s_cmp_lg_u32 s0, 0
1562; GFX11_W64-NEXT:    s_cselect_b32 s0, 1, 0
1563; GFX11_W64-NEXT:    s_and_not1_b64 s[6:7], 0, exec
1564; GFX11_W64-NEXT:    s_and_b32 s0, 1, s0
1565; GFX11_W64-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
1566; GFX11_W64-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
1567; GFX11_W64-NEXT:    s_or_b64 vcc, s[6:7], s[0:1]
1568; GFX11_W64-NEXT:  .LBB13_2: ; %exit
1569; GFX11_W64-NEXT:    s_or_b64 exec, exec, s[4:5]
1570; GFX11_W64-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
1571; GFX11_W64-NEXT:    s_waitcnt vmcnt(0)
1572; GFX11_W64-NEXT:    v_div_fmas_f32 v0, v0, v1, v2
1573; GFX11_W64-NEXT:    v_mov_b32_e32 v1, 0
1574; GFX11_W64-NEXT:    s_waitcnt lgkmcnt(0)
1575; GFX11_W64-NEXT:    global_store_b32 v1, v0, s[0:1] offset:8
1576; GFX11_W64-NEXT:    s_nop 0
1577; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1578; GFX11_W64-NEXT:    s_endpgm
1579entry:
1580  %tid = call i32 @llvm.amdgcn.workitem.id.x()
1581  %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
1582  %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
1583  %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
1584
1585  %a = load float, ptr addrspace(1) %gep.a
1586  %b = load float, ptr addrspace(1) %gep.b
1587  %c = load float, ptr addrspace(1) %gep.c
1588
1589  %cmp0 = icmp eq i32 %tid, 0
1590  br i1 %cmp0, label %bb, label %exit
1591
1592bb:
1593  %val = load i32, ptr addrspace(1) %dummy
1594  %cmp1 = icmp ne i32 %val, 0
1595  br label %exit
1596
1597exit:
1598  %cond = phi i1 [false, %entry], [%cmp1, %bb]
1599  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
1600  %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond)
1601  store float %result, ptr addrspace(1) %gep.out, align 4
1602  ret void
1603}
1604
1605declare i32 @llvm.amdgcn.workitem.id.x() #0
1606declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #0
1607declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) #0
1608
1609attributes #0 = { nounwind readnone speculatable }
1610