xref: /llvm-project/llvm/test/CodeGen/AMDGPU/rsq.f32.ll (revision 0b0d9a3bee47e9de05e869be306284fa66533f63)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,SI-DAZ-UNSAFE %s
3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s          | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,SI-IEEE-UNSAFE %s
4
5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-SAFE,SI-DAZ-SAFE %s
6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s          | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-SAFE,SI-IEEE-SAFE %s
7
8
9; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,CI-DAZ-UNSAFE %s
10; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s          | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,CI-IEEE-UNSAFE %s
11
12; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-SAFE,CI-DAZ-SAFE %s
13; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee < %s          | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-SAFE,CI-IEEE-SAFE %s
14
15
16declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
17declare float @llvm.sqrt.f32(float) nounwind readnone
18declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) nounwind readnone
19
20define amdgpu_kernel void @rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) {
21; GCN-DAZ-UNSAFE-LABEL: rsq_f32:
22; GCN-DAZ-UNSAFE:       ; %bb.0:
23; GCN-DAZ-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
24; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
25; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s6, -1
26; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s10, s6
27; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s11, s7
28; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
29; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s8, s2
30; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s9, s3
31; GCN-DAZ-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
32; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s4, s0
33; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s5, s1
34; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
35; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
36; GCN-DAZ-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
37; GCN-DAZ-UNSAFE-NEXT:    s_endpgm
38;
39; GCN-IEEE-UNSAFE-LABEL: rsq_f32:
40; GCN-IEEE-UNSAFE:       ; %bb.0:
41; GCN-IEEE-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
42; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
43; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s6, -1
44; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s10, s6
45; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s11, s7
46; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
47; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s8, s2
48; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s9, s3
49; GCN-IEEE-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
50; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s4, s0
51; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s5, s1
52; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
53; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
54; GCN-IEEE-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
55; GCN-IEEE-UNSAFE-NEXT:    s_endpgm
56;
57; GCN-DAZ-SAFE-LABEL: rsq_f32:
58; GCN-DAZ-SAFE:       ; %bb.0:
59; GCN-DAZ-SAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
60; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s7, 0xf000
61; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s6, -1
62; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s10, s6
63; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s11, s7
64; GCN-DAZ-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
65; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s8, s2
66; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s9, s3
67; GCN-DAZ-SAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
68; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s2, 0xf800000
69; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
70; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, s0
71; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s5, s1
72; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0)
73; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
74; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v0
75; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
76; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v1, v0
77; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v0, v1
78; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0.5, v1
79; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v1, v3, 0.5
80; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
81; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v1, v4, v1
82; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v3, v3, v0
83; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v4, v1, v3
84; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
85; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
86; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
87; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
88; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e32 v0, v0
89; GCN-DAZ-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
90; GCN-DAZ-SAFE-NEXT:    s_endpgm
91;
92; SI-IEEE-SAFE-LABEL: rsq_f32:
93; SI-IEEE-SAFE:       ; %bb.0:
94; SI-IEEE-SAFE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
95; SI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
96; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
97; SI-IEEE-SAFE-NEXT:    s_mov_b32 s2, s6
98; SI-IEEE-SAFE-NEXT:    s_mov_b32 s3, s7
99; SI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
100; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, s10
101; SI-IEEE-SAFE-NEXT:    s_mov_b32 s1, s11
102; SI-IEEE-SAFE-NEXT:    buffer_load_dword v0, off, s[0:3], 0
103; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0xf800000
104; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x260
105; SI-IEEE-SAFE-NEXT:    s_mov_b32 s2, 0x7f800000
106; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, s8
107; SI-IEEE-SAFE-NEXT:    s_mov_b32 s5, s9
108; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
109; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
110; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e64 s[0:1], s0, v0
111; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
112; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
113; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, -1, v2
114; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
115; SI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, v0
116; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v4, v2, v0
117; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
118; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
119; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v6
120; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
121; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
122; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
123; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
124; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
125; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
126; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s2
127; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
128; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
129; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
130; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
131; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
132; SI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
133; SI-IEEE-SAFE-NEXT:    s_endpgm
134;
135; CI-IEEE-SAFE-LABEL: rsq_f32:
136; CI-IEEE-SAFE:       ; %bb.0:
137; CI-IEEE-SAFE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
138; CI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
139; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
140; CI-IEEE-SAFE-NEXT:    s_mov_b32 s2, s6
141; CI-IEEE-SAFE-NEXT:    s_mov_b32 s3, s7
142; CI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
143; CI-IEEE-SAFE-NEXT:    s_mov_b32 s0, s10
144; CI-IEEE-SAFE-NEXT:    s_mov_b32 s1, s11
145; CI-IEEE-SAFE-NEXT:    buffer_load_dword v0, off, s[0:3], 0
146; CI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0xf800000
147; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x260
148; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, s8
149; CI-IEEE-SAFE-NEXT:    s_mov_b32 s5, s9
150; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
151; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
152; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e64 s[0:1], s0, v0
153; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
154; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
155; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, -1, v2
156; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
157; CI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, v0
158; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v4, v2, v0
159; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
160; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
161; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v6
162; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
163; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
164; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
165; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
166; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
167; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
168; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
169; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
170; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
171; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
172; CI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
173; CI-IEEE-SAFE-NEXT:    s_endpgm
174; GCN-UNSAFE-LABEL: rsq_f32:
175; GCN-UNSAFE:       ; %bb.0:
176; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
177; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
178; GCN-UNSAFE-NEXT:    s_mov_b32 s6, -1
179; GCN-UNSAFE-NEXT:    s_mov_b32 s10, s6
180; GCN-UNSAFE-NEXT:    s_mov_b32 s11, s7
181; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
182; GCN-UNSAFE-NEXT:    s_mov_b32 s8, s2
183; GCN-UNSAFE-NEXT:    s_mov_b32 s9, s3
184; GCN-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
185; GCN-UNSAFE-NEXT:    s_mov_b32 s4, s0
186; GCN-UNSAFE-NEXT:    s_mov_b32 s5, s1
187; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
188; GCN-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
189; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
190; GCN-UNSAFE-NEXT:    s_endpgm
191  %val = load float, ptr addrspace(1) %in, align 4
192  %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone
193  %div = fdiv contract float 1.0, %sqrt, !fpmath !0
194  store float %div, ptr addrspace(1) %out, align 4
195  ret void
196}
197
198define amdgpu_kernel void @rsq_f32_sgpr(ptr addrspace(1) noalias %out, float %val) {
199; GCN-DAZ-UNSAFE-LABEL: rsq_f32_sgpr:
200; GCN-DAZ-UNSAFE:       ; %bb.0:
201; GCN-DAZ-UNSAFE-NEXT:    s_load_dword s2, s[4:5], 0xb
202; GCN-DAZ-UNSAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
203; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s3, 0xf000
204; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
205; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, s2
206; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s2, -1
207; GCN-DAZ-UNSAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
208; GCN-DAZ-UNSAFE-NEXT:    s_endpgm
209;
210; GCN-IEEE-UNSAFE-LABEL: rsq_f32_sgpr:
211; GCN-IEEE-UNSAFE:       ; %bb.0:
212; GCN-IEEE-UNSAFE-NEXT:    s_load_dword s2, s[4:5], 0xb
213; GCN-IEEE-UNSAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
214; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s3, 0xf000
215; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
216; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, s2
217; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s2, -1
218; GCN-IEEE-UNSAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
219; GCN-IEEE-UNSAFE-NEXT:    s_endpgm
220;
221; GCN-DAZ-SAFE-LABEL: rsq_f32_sgpr:
222; GCN-DAZ-SAFE:       ; %bb.0:
223; GCN-DAZ-SAFE-NEXT:    s_load_dword s0, s[4:5], 0xb
224; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v0, 0xf800000
225; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
226; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s3, 0xf000
227; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s2, -1
228; GCN-DAZ-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
229; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, s0, v1
230; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, s0
231; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s0, v0
232; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
233; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v1, v0
234; GCN-DAZ-SAFE-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
235; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, v0, v1
236; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0.5, v1
237; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
238; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v3, v2
239; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v1, v3, v1
240; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, -v2, v2, v0
241; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v3, v1, v2
242; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
243; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
244; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
245; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
246; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
247; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e32 v0, v0
248; GCN-DAZ-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
249; GCN-DAZ-SAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
250; GCN-DAZ-SAFE-NEXT:    s_endpgm
251;
252; SI-IEEE-SAFE-LABEL: rsq_f32_sgpr:
253; SI-IEEE-SAFE:       ; %bb.0:
254; SI-IEEE-SAFE-NEXT:    s_load_dword s0, s[4:5], 0xb
255; SI-IEEE-SAFE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
256; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v0, 0xf800000
257; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
258; SI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
259; SI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
260; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, s0, v1
261; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, s0
262; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
263; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
264; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
265; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
266; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
267; SI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
268; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
269; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
270; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
271; SI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
272; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
273; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
274; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
275; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
276; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
277; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
278; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
279; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0x7f800000
280; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
281; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s0
282; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
283; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
284; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
285; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
286; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
287; SI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
288; SI-IEEE-SAFE-NEXT:    s_endpgm
289;
290; CI-IEEE-SAFE-LABEL: rsq_f32_sgpr:
291; CI-IEEE-SAFE:       ; %bb.0:
292; CI-IEEE-SAFE-NEXT:    s_load_dword s0, s[4:5], 0xb
293; CI-IEEE-SAFE-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
294; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v0, 0xf800000
295; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x4f800000
296; CI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
297; CI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
298; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, s0, v1
299; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, s0
300; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
301; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, v2, v1, s[0:1]
302; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
303; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
304; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
305; CI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
306; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v3
307; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v1, v2, vcc
308; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
309; CI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
310; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
311; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc
312; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
313; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
314; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
315; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
316; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
317; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
318; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
319; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
320; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
321; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
322; CI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
323; CI-IEEE-SAFE-NEXT:    s_endpgm
324; GCN-UNSAFE-LABEL: rsq_f32_sgpr:
325; GCN-UNSAFE:       ; %bb.0:
326; GCN-UNSAFE-NEXT:    s_load_dword s2, s[0:1], 0xb
327; GCN-UNSAFE-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
328; GCN-UNSAFE-NEXT:    s_mov_b32 s3, 0xf000
329; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
330; GCN-UNSAFE-NEXT:    v_rsq_f32_e32 v0, s2
331; GCN-UNSAFE-NEXT:    s_mov_b32 s2, -1
332; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[0:3], 0
333; GCN-UNSAFE-NEXT:    s_endpgm
334  %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone
335  %div = fdiv contract float 1.0, %sqrt, !fpmath !0
336  store float %div, ptr addrspace(1) %out, align 4
337  ret void
338}
339
340; Recognize that this is rsqrt(a) * rcp(b) * c,
341; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
342
343; NOTE: c * rcp( sqrt(a) * b ) is generated when we move rcp generation to AMGGPUCogenPrepare.
344define amdgpu_kernel void @rsqrt_fmul(ptr addrspace(1) %out, ptr addrspace(1) %in) {
345; GCN-UNSAFE-LABEL: rsqrt_fmul:
346; GCN-UNSAFE:       ; %bb.0:
347; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
348; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
349; GCN-UNSAFE-NEXT:    s_mov_b32 s6, 0
350; GCN-UNSAFE-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
351; GCN-UNSAFE-NEXT:    v_mov_b32_e32 v1, 0
352; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
353; GCN-UNSAFE-NEXT:    s_mov_b64 s[8:9], s[2:3]
354; GCN-UNSAFE-NEXT:    s_mov_b64 s[10:11], s[6:7]
355; GCN-UNSAFE-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
356; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
357; GCN-UNSAFE-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc
358; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
359; GCN-UNSAFE-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc
360; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
361; GCN-UNSAFE-NEXT:    s_mov_b64 s[4:5], s[0:1]
362; GCN-UNSAFE-NEXT:    v_sqrt_f32_e32 v2, v2
363; GCN-UNSAFE-NEXT:    v_mul_f32_e32 v2, v2, v3
364; GCN-UNSAFE-NEXT:    v_rcp_f32_e32 v2, v2
365; GCN-UNSAFE-NEXT:    v_mul_f32_e32 v2, v4, v2
366; GCN-UNSAFE-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
367; GCN-UNSAFE-NEXT:    s_endpgm
368; GCN-DAZ-UNSAFE-LABEL: rsqrt_fmul:
369; GCN-DAZ-UNSAFE:       ; %bb.0:
370; GCN-DAZ-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
371; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
372; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s6, 0
373; GCN-DAZ-UNSAFE-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
374; GCN-DAZ-UNSAFE-NEXT:    v_mov_b32_e32 v1, 0
375; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
376; GCN-DAZ-UNSAFE-NEXT:    s_mov_b64 s[8:9], s[2:3]
377; GCN-DAZ-UNSAFE-NEXT:    s_mov_b64 s[10:11], s[6:7]
378; GCN-DAZ-UNSAFE-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
379; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
380; GCN-DAZ-UNSAFE-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc
381; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
382; GCN-DAZ-UNSAFE-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc
383; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
384; GCN-DAZ-UNSAFE-NEXT:    s_mov_b64 s[4:5], s[0:1]
385; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v2, v2
386; GCN-DAZ-UNSAFE-NEXT:    v_rcp_f32_e32 v3, v3
387; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e32 v2, v2, v3
388; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e32 v2, v4, v2
389; GCN-DAZ-UNSAFE-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
390; GCN-DAZ-UNSAFE-NEXT:    s_endpgm
391;
392; GCN-IEEE-UNSAFE-LABEL: rsqrt_fmul:
393; GCN-IEEE-UNSAFE:       ; %bb.0:
394; GCN-IEEE-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
395; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
396; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s6, 0
397; GCN-IEEE-UNSAFE-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
398; GCN-IEEE-UNSAFE-NEXT:    v_mov_b32_e32 v1, 0
399; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
400; GCN-IEEE-UNSAFE-NEXT:    s_mov_b64 s[8:9], s[2:3]
401; GCN-IEEE-UNSAFE-NEXT:    s_mov_b64 s[10:11], s[6:7]
402; GCN-IEEE-UNSAFE-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
403; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
404; GCN-IEEE-UNSAFE-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc
405; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
406; GCN-IEEE-UNSAFE-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc
407; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
408; GCN-IEEE-UNSAFE-NEXT:    s_mov_b64 s[4:5], s[0:1]
409; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v2, v2
410; GCN-IEEE-UNSAFE-NEXT:    v_rcp_f32_e32 v3, v3
411; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e32 v2, v2, v3
412; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e32 v2, v4, v2
413; GCN-IEEE-UNSAFE-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
414; GCN-IEEE-UNSAFE-NEXT:    s_endpgm
415;
416; GCN-DAZ-SAFE-LABEL: rsqrt_fmul:
417; GCN-DAZ-SAFE:       ; %bb.0:
418; GCN-DAZ-SAFE-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
419; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s3, 0xf000
420; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s2, 0
421; GCN-DAZ-SAFE-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
422; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v1, 0
423; GCN-DAZ-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
424; GCN-DAZ-SAFE-NEXT:    s_mov_b64 s[8:9], s[6:7]
425; GCN-DAZ-SAFE-NEXT:    s_mov_b64 s[10:11], s[2:3]
426; GCN-DAZ-SAFE-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
427; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0)
428; GCN-DAZ-SAFE-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc
429; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0)
430; GCN-DAZ-SAFE-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc
431; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0)
432; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s0, 0xf800000
433; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v6, 0x260
434; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v2
435; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v2
436; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
437; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v5, v2
438; GCN-DAZ-SAFE-NEXT:    s_mov_b64 s[0:1], s[4:5]
439; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v7, v2, v5
440; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0.5, v5
441; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v8, -v5, v7, 0.5
442; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, v7, v8, v7
443; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v5, v8, v5
444; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v8, -v7, v7, v2
445; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v8, v5, v7
446; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v7, 0x37800000, v5
447; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
448; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v2, v6
449; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
450; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, v2, v3
451; GCN-DAZ-SAFE-NEXT:    v_div_scale_f32 v3, s[4:5], v2, v2, v4
452; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e32 v5, v3
453; GCN-DAZ-SAFE-NEXT:    v_div_scale_f32 v6, vcc, v4, v2, v4
454; GCN-DAZ-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
455; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, -v3, v5, 1.0
456; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v7, v5, v5
457; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v7, v6, v5
458; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v8, -v3, v7, v6
459; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, v8, v5, v7
460; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, -v3, v7, v6
461; GCN-DAZ-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
462; GCN-DAZ-SAFE-NEXT:    v_div_fmas_f32 v3, v3, v5, v7
463; GCN-DAZ-SAFE-NEXT:    v_div_fixup_f32 v2, v3, v2, v4
464; GCN-DAZ-SAFE-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
465; GCN-DAZ-SAFE-NEXT:    s_endpgm
466;
467; GCN-IEEE-SAFE-LABEL: rsqrt_fmul:
468; GCN-IEEE-SAFE:       ; %bb.0:
469; GCN-IEEE-SAFE-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
470; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s3, 0xf000
471; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s2, 0
472; GCN-IEEE-SAFE-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
473; GCN-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0
474; GCN-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
475; GCN-IEEE-SAFE-NEXT:    s_mov_b64 s[8:9], s[6:7]
476; GCN-IEEE-SAFE-NEXT:    s_mov_b64 s[10:11], s[2:3]
477; GCN-IEEE-SAFE-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
478; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
479; GCN-IEEE-SAFE-NEXT:    buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc
480; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
481; GCN-IEEE-SAFE-NEXT:    buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc
482; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
483; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0xf800000
484; GCN-IEEE-SAFE-NEXT:    v_mov_b32_e32 v6, 0x260
485; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v2
486; GCN-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s0, v2
487; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
488; GCN-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v5, v2
489; GCN-IEEE-SAFE-NEXT:    v_add_i32_e64 v7, s[0:1], -1, v5
490; GCN-IEEE-SAFE-NEXT:    v_add_i32_e64 v8, s[0:1], 1, v5
491; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v9, -v7, v5, v2
492; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v10, -v8, v5, v2
493; GCN-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[0:1], 0, v9
494; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[0:1]
495; GCN-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], 0, v10
496; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[0:1]
497; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v7, 0x37800000, v5
498; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
499; GCN-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v2, v6
500; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
501; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, v2, v3
502; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v3, s[0:1], v2, v2, v4
503; GCN-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v5, v3
504; GCN-IEEE-SAFE-NEXT:    v_div_scale_f32 v6, vcc, v4, v2, v4
505; GCN-IEEE-SAFE-NEXT:    s_mov_b64 s[0:1], s[4:5]
506; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v7, -v3, v5, 1.0
507; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v5, v7, v5, v5
508; GCN-IEEE-SAFE-NEXT:    v_mul_f32_e32 v7, v6, v5
509; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v8, -v3, v7, v6
510; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v7, v8, v5, v7
511; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v3, v7, v6
512; GCN-IEEE-SAFE-NEXT:    v_div_fmas_f32 v3, v3, v5, v7
513; GCN-IEEE-SAFE-NEXT:    v_div_fixup_f32 v2, v3, v2, v4
514; GCN-IEEE-SAFE-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
515; GCN-IEEE-SAFE-NEXT:    s_endpgm
516  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
517  %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid
518  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
519  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
520  %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2
521
522  %a = load volatile float, ptr addrspace(1) %gep.0
523  %b = load volatile float, ptr addrspace(1) %gep.1
524  %c = load volatile float, ptr addrspace(1) %gep.2
525
526  %x = call contract float @llvm.sqrt.f32(float %a)
527  %y = fmul contract float %x, %b
528  %z = fdiv contract float %c, %y
529  store float %z, ptr addrspace(1) %out.gep
530  ret void
531}
532
533define amdgpu_kernel void @neg_rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) {
534; GCN-DAZ-UNSAFE-LABEL: neg_rsq_f32:
535; GCN-DAZ-UNSAFE:       ; %bb.0:
536; GCN-DAZ-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
537; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
538; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s6, -1
539; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s10, s6
540; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s11, s7
541; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
542; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s8, s2
543; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s9, s3
544; GCN-DAZ-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
545; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s4, s0
546; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s5, s1
547; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
548; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
549; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
550; GCN-DAZ-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
551; GCN-DAZ-UNSAFE-NEXT:    s_endpgm
552;
553; GCN-IEEE-UNSAFE-LABEL: neg_rsq_f32:
554; GCN-IEEE-UNSAFE:       ; %bb.0:
555; GCN-IEEE-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
556; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
557; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s6, -1
558; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s10, s6
559; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s11, s7
560; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
561; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s8, s2
562; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s9, s3
563; GCN-IEEE-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
564; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s4, s0
565; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s5, s1
566; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
567; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
568; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
569; GCN-IEEE-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
570; GCN-IEEE-UNSAFE-NEXT:    s_endpgm
571;
572; GCN-DAZ-SAFE-LABEL: neg_rsq_f32:
573; GCN-DAZ-SAFE:       ; %bb.0:
574; GCN-DAZ-SAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
575; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s7, 0xf000
576; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s6, -1
577; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s10, s6
578; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s11, s7
579; GCN-DAZ-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
580; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s8, s2
581; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s9, s3
582; GCN-DAZ-SAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
583; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s2, 0xf800000
584; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
585; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, s0
586; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s5, s1
587; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0)
588; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
589; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s2, v0
590; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
591; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v1, v0
592; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v0, v1
593; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0.5, v1
594; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v1, v3, 0.5
595; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
596; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v1, v4, v1
597; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v3, v3, v0
598; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v4, v1, v3
599; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
600; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
601; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
602; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
603; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
604; GCN-DAZ-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
605; GCN-DAZ-SAFE-NEXT:    s_endpgm
606;
607; SI-IEEE-SAFE-LABEL: neg_rsq_f32:
608; SI-IEEE-SAFE:       ; %bb.0:
609; SI-IEEE-SAFE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
610; SI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
611; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
612; SI-IEEE-SAFE-NEXT:    s_mov_b32 s2, s6
613; SI-IEEE-SAFE-NEXT:    s_mov_b32 s3, s7
614; SI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
615; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, s10
616; SI-IEEE-SAFE-NEXT:    s_mov_b32 s1, s11
617; SI-IEEE-SAFE-NEXT:    buffer_load_dword v0, off, s[0:3], 0
618; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0xf800000
619; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x260
620; SI-IEEE-SAFE-NEXT:    s_mov_b32 s2, 0x7f800000
621; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, s8
622; SI-IEEE-SAFE-NEXT:    s_mov_b32 s5, s9
623; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
624; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
625; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e64 s[0:1], s0, v0
626; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
627; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
628; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, -1, v2
629; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
630; SI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, v0
631; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v4, v2, v0
632; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
633; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
634; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v6
635; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
636; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
637; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
638; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
639; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
640; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
641; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s2
642; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v0, v1, s[0:1]
643; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
644; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
645; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
646; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
647; SI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
648; SI-IEEE-SAFE-NEXT:    s_endpgm
649;
650; CI-IEEE-SAFE-LABEL: neg_rsq_f32:
651; CI-IEEE-SAFE:       ; %bb.0:
652; CI-IEEE-SAFE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
653; CI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
654; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
655; CI-IEEE-SAFE-NEXT:    s_mov_b32 s2, s6
656; CI-IEEE-SAFE-NEXT:    s_mov_b32 s3, s7
657; CI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
658; CI-IEEE-SAFE-NEXT:    s_mov_b32 s0, s10
659; CI-IEEE-SAFE-NEXT:    s_mov_b32 s1, s11
660; CI-IEEE-SAFE-NEXT:    buffer_load_dword v0, off, s[0:3], 0
661; CI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0xf800000
662; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x260
663; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, s8
664; CI-IEEE-SAFE-NEXT:    s_mov_b32 s5, s9
665; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
666; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
667; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e64 s[0:1], s0, v0
668; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
669; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
670; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, -1, v2
671; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
672; CI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, v0
673; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v4, v2, v0
674; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
675; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
676; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v6
677; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
678; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
679; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
680; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
681; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
682; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
683; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
684; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
685; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
686; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
687; CI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
688; CI-IEEE-SAFE-NEXT:    s_endpgm
689; GCN-UNSAFE-LABEL: neg_rsq_f32:
690; GCN-UNSAFE:       ; %bb.0:
691; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
692; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
693; GCN-UNSAFE-NEXT:    s_mov_b32 s6, -1
694; GCN-UNSAFE-NEXT:    s_mov_b32 s10, s6
695; GCN-UNSAFE-NEXT:    s_mov_b32 s11, s7
696; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
697; GCN-UNSAFE-NEXT:    s_mov_b32 s8, s2
698; GCN-UNSAFE-NEXT:    s_mov_b32 s9, s3
699; GCN-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
700; GCN-UNSAFE-NEXT:    s_mov_b32 s4, s0
701; GCN-UNSAFE-NEXT:    s_mov_b32 s5, s1
702; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
703; GCN-UNSAFE-NEXT:    v_sqrt_f32_e32 v0, v0
704; GCN-UNSAFE-NEXT:    v_rcp_f32_e64 v0, -v0
705; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
706; GCN-UNSAFE-NEXT:    s_endpgm
707  %val = load float, ptr addrspace(1) %in, align 4
708  %sqrt = call contract float @llvm.sqrt.f32(float %val)
709  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
710  store float %div, ptr addrspace(1) %out, align 4
711  ret void
712}
713
714define amdgpu_kernel void @neg_rsq_neg_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) {
715; GCN-DAZ-UNSAFE-LABEL: neg_rsq_neg_f32:
716; GCN-DAZ-UNSAFE:       ; %bb.0:
717; GCN-DAZ-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
718; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
719; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s6, -1
720; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s10, s6
721; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s11, s7
722; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
723; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s8, s2
724; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s9, s3
725; GCN-DAZ-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
726; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s4, s0
727; GCN-DAZ-UNSAFE-NEXT:    s_mov_b32 s5, s1
728; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
729; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
730; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
731; GCN-DAZ-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
732; GCN-DAZ-UNSAFE-NEXT:    s_endpgm
733;
734; GCN-IEEE-UNSAFE-LABEL: neg_rsq_neg_f32:
735; GCN-IEEE-UNSAFE:       ; %bb.0:
736; GCN-IEEE-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
737; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
738; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s6, -1
739; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s10, s6
740; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s11, s7
741; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
742; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s8, s2
743; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s9, s3
744; GCN-IEEE-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
745; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s4, s0
746; GCN-IEEE-UNSAFE-NEXT:    s_mov_b32 s5, s1
747; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
748; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
749; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
750; GCN-IEEE-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
751; GCN-IEEE-UNSAFE-NEXT:    s_endpgm
752;
753; GCN-DAZ-SAFE-LABEL: neg_rsq_neg_f32:
754; GCN-DAZ-SAFE:       ; %bb.0:
755; GCN-DAZ-SAFE-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
756; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s7, 0xf000
757; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s6, -1
758; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s10, s6
759; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s11, s7
760; GCN-DAZ-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
761; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s8, s2
762; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s9, s3
763; GCN-DAZ-SAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
764; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s2, 0x8f800000
765; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
766; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, s0
767; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s5, s1
768; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0)
769; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v0
770; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s2, v0
771; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
772; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v1, v0
773; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v0, v1
774; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0.5, v1
775; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v1, v3, 0.5
776; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
777; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v1, v4, v1
778; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v3, v3, v0
779; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v4, v1, v3
780; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v1
781; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
782; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
783; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
784; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
785; GCN-DAZ-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
786; GCN-DAZ-SAFE-NEXT:    s_endpgm
787;
788; SI-IEEE-SAFE-LABEL: neg_rsq_neg_f32:
789; SI-IEEE-SAFE:       ; %bb.0:
790; SI-IEEE-SAFE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
791; SI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
792; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
793; SI-IEEE-SAFE-NEXT:    s_mov_b32 s2, s6
794; SI-IEEE-SAFE-NEXT:    s_mov_b32 s3, s7
795; SI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
796; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, s10
797; SI-IEEE-SAFE-NEXT:    s_mov_b32 s1, s11
798; SI-IEEE-SAFE-NEXT:    buffer_load_dword v0, off, s[0:3], 0
799; SI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0x8f800000
800; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x260
801; SI-IEEE-SAFE-NEXT:    s_mov_b32 s2, 0x7f800000
802; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, s8
803; SI-IEEE-SAFE-NEXT:    s_mov_b32 s5, s9
804; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
805; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v0
806; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
807; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, s[0:1]
808; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
809; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, -1, v2
810; SI-IEEE-SAFE-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
811; SI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, v0
812; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v4, v2, v0
813; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
814; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
815; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v6
816; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
817; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
818; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
819; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
820; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
821; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
822; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], |v0|, s2
823; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v0, v1, s[0:1]
824; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
825; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
826; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
827; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
828; SI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
829; SI-IEEE-SAFE-NEXT:    s_endpgm
830;
831; CI-IEEE-SAFE-LABEL: neg_rsq_neg_f32:
832; CI-IEEE-SAFE:       ; %bb.0:
833; CI-IEEE-SAFE-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x9
834; CI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0xf000
835; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, -1
836; CI-IEEE-SAFE-NEXT:    s_mov_b32 s2, s6
837; CI-IEEE-SAFE-NEXT:    s_mov_b32 s3, s7
838; CI-IEEE-SAFE-NEXT:    s_waitcnt lgkmcnt(0)
839; CI-IEEE-SAFE-NEXT:    s_mov_b32 s0, s10
840; CI-IEEE-SAFE-NEXT:    s_mov_b32 s1, s11
841; CI-IEEE-SAFE-NEXT:    buffer_load_dword v0, off, s[0:3], 0
842; CI-IEEE-SAFE-NEXT:    s_mov_b32 s0, 0x8f800000
843; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v1, 0x260
844; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, s8
845; CI-IEEE-SAFE-NEXT:    s_mov_b32 s5, s9
846; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0)
847; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v0
848; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[0:1], s0, v0
849; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, s[0:1]
850; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
851; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v3, vcc, -1, v2
852; CI-IEEE-SAFE-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
853; CI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, v0
854; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v4, v2, v0
855; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v5
856; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
857; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v6
858; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
859; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
860; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
861; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
862; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
863; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
864; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
865; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
866; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
867; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
868; CI-IEEE-SAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
869; CI-IEEE-SAFE-NEXT:    s_endpgm
870; GCN-UNSAFE-LABEL: neg_rsq_neg_f32:
871; GCN-UNSAFE:       ; %bb.0:
872; GCN-UNSAFE-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
873; GCN-UNSAFE-NEXT:    s_mov_b32 s7, 0xf000
874; GCN-UNSAFE-NEXT:    s_mov_b32 s6, -1
875; GCN-UNSAFE-NEXT:    s_mov_b32 s10, s6
876; GCN-UNSAFE-NEXT:    s_mov_b32 s11, s7
877; GCN-UNSAFE-NEXT:    s_waitcnt lgkmcnt(0)
878; GCN-UNSAFE-NEXT:    s_mov_b32 s8, s2
879; GCN-UNSAFE-NEXT:    s_mov_b32 s9, s3
880; GCN-UNSAFE-NEXT:    buffer_load_dword v0, off, s[8:11], 0
881; GCN-UNSAFE-NEXT:    s_mov_b32 s4, s0
882; GCN-UNSAFE-NEXT:    s_mov_b32 s5, s1
883; GCN-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
884; GCN-UNSAFE-NEXT:    v_sqrt_f32_e64 v0, -v0
885; GCN-UNSAFE-NEXT:    v_rcp_f32_e64 v0, -v0
886; GCN-UNSAFE-NEXT:    buffer_store_dword v0, off, s[4:7], 0
887; GCN-UNSAFE-NEXT:    s_endpgm
888  %val = load float, ptr addrspace(1) %in, align 4
889  %val.fneg = fneg float %val
890  %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg)
891  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
892  store float %div, ptr addrspace(1) %out, align 4
893  ret void
894}
895
896define float @v_neg_rsq_neg_f32(float %val) {
897; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_f32:
898; GCN-DAZ-UNSAFE:       ; %bb.0:
899; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
901; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
902; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
903;
904; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_f32:
905; GCN-IEEE-UNSAFE:       ; %bb.0:
906; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
907; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
908; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
909; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
910;
911; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_f32:
912; GCN-DAZ-SAFE:       ; %bb.0:
913; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
915; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v0
916; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
917; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
918; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v1, v0
919; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, v0, v1
920; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0.5, v1
921; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
922; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v3, v2
923; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v2, v2, v0
924; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v1, v3, v1
925; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v4, v1, v2
926; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
927; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
928; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
929; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
930; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
931; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
932; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
933;
934; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32:
935; SI-IEEE-SAFE:       ; %bb.0:
936; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
938; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v0
939; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
940; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
941; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
942; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
943; SI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
944; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
945; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
946; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
947; SI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
948; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
949; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
950; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
951; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
952; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
953; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
954; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
955; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
956; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
957; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
958; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v0, v1, s[4:5]
959; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
960; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
961; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
962; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
963; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
964;
965; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32:
966; CI-IEEE-SAFE:       ; %bb.0:
967; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
969; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0xcf800000, v0
970; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
971; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
972; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
973; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
974; CI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
975; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
976; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
977; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
978; CI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
979; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
980; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
981; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
982; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
983; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
984; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
985; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
986; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
987; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
988; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
989; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
990; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
991; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
992  %val.fneg = fneg float %val
993  %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg)
994  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
995  ret float %div
996}
997
998define <2 x float> @v_neg_rsq_neg_v2f32(<2 x float> %val) {
999; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_v2f32:
1000; GCN-DAZ-UNSAFE:       ; %bb.0:
1001; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
1003; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v1, -v1
1004; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1005; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1006; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1007;
1008; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_v2f32:
1009; GCN-IEEE-UNSAFE:       ; %bb.0:
1010; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
1012; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v1, -v1
1013; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1014; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1015; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1016;
1017; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_v2f32:
1018; GCN-DAZ-SAFE:       ; %bb.0:
1019; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
1021; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s5, 0x4f800000
1022; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e64 v2, -v1, s5
1023; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
1024; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v1, v2, vcc
1025; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v2, v1
1026; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v1, v2
1027; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0.5, v2
1028; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v2, v3, 0.5
1029; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
1030; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, -v3, v3, v1
1031; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v4, v2
1032; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v5, v2, v3
1033; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1034; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1035; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e64 v3, -v0, s5
1036; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
1037; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v3, vcc
1038; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v3, v0
1039; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v4, 0x260
1040; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v4
1041; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[4:5]
1042; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, v0, v3
1043; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0.5, v3
1044; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, 0.5
1045; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v5, v2
1046; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v6, -v2, v2, v0
1047; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v5, v3
1048; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v6, v3, v2
1049; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1050; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1051; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
1052; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1053; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1054; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v1, -v1
1055; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1056;
1057; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32:
1058; SI-IEEE-SAFE:       ; %bb.0:
1059; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1060; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x8f800000
1061; SI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0x4f800000
1062; SI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v2, -v1, s7
1063; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v1
1064; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v1, v2, vcc
1065; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v1
1066; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1067; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v1
1068; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1069; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1070; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1071; SI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v1
1072; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1073; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1074; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1075; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1076; SI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v4, -v0, s7
1077; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
1078; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v4, vcc
1079; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v0
1080; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1081; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v3
1082; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[4:5]
1083; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
1084; SI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v2, v4, v0
1085; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1086; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
1087; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
1088; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v5, v4, v0
1089; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1090; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
1091; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
1092; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1093; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1094; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1095; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x7f800000
1096; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1097; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s6
1098; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, -v0, v2, s[4:5]
1099; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1100; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1101; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1102; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1103; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v1
1104; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v1|, s6
1105; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, -v1, v2, s[4:5]
1106; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1107; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1108; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1109; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
1110; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1111;
1112; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32:
1113; CI-IEEE-SAFE:       ; %bb.0:
1114; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1115; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x8f800000
1116; CI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0x4f800000
1117; CI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v2, -v1, s7
1118; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v1
1119; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v1, v2, vcc
1120; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v1
1121; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1122; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v1
1123; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1124; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1125; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1126; CI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v1
1127; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1128; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1129; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1130; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1131; CI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v4, -v0, s7
1132; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
1133; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v4, vcc
1134; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v0
1135; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1136; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v3
1137; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[4:5]
1138; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
1139; CI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v2, v4, v0
1140; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1141; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
1142; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
1143; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v5, v4, v0
1144; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1145; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
1146; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
1147; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1148; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1149; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1150; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1151; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1152; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1153; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1154; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1155; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v1
1156; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1157; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1158; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1159; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
1160; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1161  %val.fneg = fneg <2 x float> %val
1162  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val.fneg)
1163  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
1164  ret <2 x float> %div
1165}
1166
1167define float @v_neg_rsq_neg_f32_foldable_user(float %val0, float %val1) {
1168; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_f32_foldable_user:
1169; GCN-DAZ-UNSAFE:       ; %bb.0:
1170; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1171; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
1172; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v1
1173; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1174;
1175; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_f32_foldable_user:
1176; GCN-IEEE-UNSAFE:       ; %bb.0:
1177; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1178; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
1179; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v1
1180; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1181;
1182; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_f32_foldable_user:
1183; GCN-DAZ-SAFE:       ; %bb.0:
1184; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
1186; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v0
1187; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
1188; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
1189; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v2, v0
1190; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v0, v2
1191; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0.5, v2
1192; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v2, v3, 0.5
1193; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
1194; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, -v3, v3, v0
1195; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v4, v2
1196; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v5, v2, v3
1197; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1198; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1199; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1200; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1201; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1202; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1203; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
1204; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1205;
1206; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32_foldable_user:
1207; SI-IEEE-SAFE:       ; %bb.0:
1208; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
1210; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v0
1211; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
1212; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
1213; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
1214; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1215; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v0
1216; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1217; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1218; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1219; SI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v0
1220; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1221; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1222; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1223; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1224; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1225; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1226; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1227; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
1228; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1229; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
1230; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, -v0, v2, s[4:5]
1231; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1232; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1233; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1234; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1235; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
1236; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1237;
1238; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32_foldable_user:
1239; CI-IEEE-SAFE:       ; %bb.0:
1240; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
1242; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v0
1243; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
1244; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
1245; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
1246; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1247; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v0
1248; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1249; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1250; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1251; CI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v0
1252; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1253; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1254; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1255; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1256; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1257; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1258; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1259; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1260; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1261; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1262; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1263; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1264; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
1265; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1266  %val0.neg = fneg float %val0
1267  %sqrt = call contract float @llvm.sqrt.f32(float %val0.neg)
1268  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
1269  %user = fmul contract float %div, %val1
1270  ret float %user
1271}
1272
1273define <2 x float> @v_neg_rsq_neg_v2f32_foldable_user(<2 x float> %val0, <2 x float> %val1) {
1274; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user:
1275; GCN-DAZ-UNSAFE:       ; %bb.0:
1276; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1277; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
1278; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e64 v1, -v1
1279; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v2
1280; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e64 v1, -v1, v3
1281; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1282;
1283; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user:
1284; GCN-IEEE-UNSAFE:       ; %bb.0:
1285; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1286; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v0, -v0
1287; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e64 v1, -v1
1288; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v2
1289; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e64 v1, -v1, v3
1290; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1291;
1292; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user:
1293; GCN-DAZ-SAFE:       ; %bb.0:
1294; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0x8f800000
1296; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s5, 0x4f800000
1297; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e64 v4, -v1, s5
1298; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v1
1299; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v1, v4, vcc
1300; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v4, v1
1301; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, v1, v4
1302; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v4, 0.5, v4
1303; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v6, -v4, v5, 0.5
1304; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v5, v6, v5
1305; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, -v5, v5, v1
1306; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v4, v6, v4
1307; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v7, v4, v5
1308; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1309; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1310; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e64 v5, -v0, s5
1311; GCN-DAZ-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
1312; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v5, vcc
1313; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v5, v0
1314; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v6, 0x260
1315; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v6
1316; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1317; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v4, v0, v5
1318; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0.5, v5
1319; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, -v5, v4, 0.5
1320; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v4, v7, v4
1321; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v8, -v4, v4, v0
1322; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v5, v7, v5
1323; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v8, v5, v4
1324; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1325; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1326; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v6
1327; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1328; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1329; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v1, -v1
1330; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
1331; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
1332; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1333;
1334; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user:
1335; SI-IEEE-SAFE:       ; %bb.0:
1336; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1337; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x8f800000
1338; SI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0x4f800000
1339; SI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v4, -v1, s7
1340; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v1
1341; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v1, v4, vcc
1342; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v1
1343; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
1344; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v5, v4, v1
1345; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1346; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[4:5]
1347; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v4
1348; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v6, v4, v1
1349; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1350; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v5, v6, s[4:5]
1351; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1352; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1353; SI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v6, -v0, s7
1354; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
1355; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v6, vcc
1356; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v6, v0
1357; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v5, 0x260
1358; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v5
1359; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1360; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v6
1361; SI-IEEE-SAFE-NEXT:    v_fma_f32 v7, -v4, v6, v0
1362; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v7
1363; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
1364; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v6
1365; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v7, v6, v0
1366; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v6
1367; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
1368; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v6, 0x37800000, v4
1369; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1370; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
1371; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1372; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x7f800000
1373; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v0
1374; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s6
1375; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, -v0, v4, s[4:5]
1376; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1377; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1378; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1379; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
1380; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v1
1381; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v1|, s6
1382; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, -v1, v4, s[4:5]
1383; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1384; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1385; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1386; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v4, v1
1387; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
1388; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
1389; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1390;
1391; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user:
1392; CI-IEEE-SAFE:       ; %bb.0:
1393; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x8f800000
1395; CI-IEEE-SAFE-NEXT:    s_mov_b32 s7, 0x4f800000
1396; CI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v4, -v1, s7
1397; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v1
1398; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v1, v4, vcc
1399; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v1
1400; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
1401; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v5, v4, v1
1402; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1403; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[4:5]
1404; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v4
1405; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v6, v4, v1
1406; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1407; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v5, v6, s[4:5]
1408; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1409; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1410; CI-IEEE-SAFE-NEXT:    v_mul_f32_e64 v6, -v0, s7
1411; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e32 vcc, s6, v0
1412; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v6, vcc
1413; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v6, v0
1414; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v5, 0x260
1415; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v5
1416; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1417; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v6
1418; CI-IEEE-SAFE-NEXT:    v_fma_f32 v7, -v4, v6, v0
1419; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v7
1420; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
1421; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v6
1422; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v7, v6, v0
1423; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v6
1424; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
1425; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v6, 0x37800000, v4
1426; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1427; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
1428; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1429; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v0
1430; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1431; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1432; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1433; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
1434; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v1
1435; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1436; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1437; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1438; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v4, v1
1439; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
1440; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
1441; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1442  %val0.fneg = fneg <2 x float> %val0
1443  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0.fneg)
1444  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
1445  %user = fmul contract <2 x float> %div, %val1
1446  ret <2 x float> %user
1447}
1448
1449define float @v_neg_rsq_f32(float %val) {
1450; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_f32:
1451; GCN-DAZ-UNSAFE:       ; %bb.0:
1452; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1454; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1455; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1456;
1457; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_f32:
1458; GCN-IEEE-UNSAFE:       ; %bb.0:
1459; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1461; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1462; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1463;
1464; GCN-DAZ-SAFE-LABEL: v_neg_rsq_f32:
1465; GCN-DAZ-SAFE:       ; %bb.0:
1466; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1468; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1469; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1470; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1471; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v1, v0
1472; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, v0, v1
1473; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, 0.5, v1
1474; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, -v1, v2, 0.5
1475; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v3, v2
1476; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v2, v2, v0
1477; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v1, v3, v1
1478; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v1, v4, v1, v2
1479; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1480; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1481; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
1482; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1483; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1484; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1485; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1486;
1487; SI-IEEE-SAFE-LABEL: v_neg_rsq_f32:
1488; SI-IEEE-SAFE:       ; %bb.0:
1489; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1491; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1492; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1493; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1494; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
1495; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1496; SI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1497; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1498; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1499; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
1500; SI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
1501; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
1502; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1503; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1504; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1505; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
1506; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1507; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1508; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
1509; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
1510; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
1511; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, -v0, v1, s[4:5]
1512; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
1513; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1514; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1515; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
1516; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1517;
1518; CI-IEEE-SAFE-LABEL: v_neg_rsq_f32:
1519; CI-IEEE-SAFE:       ; %bb.0:
1520; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1521; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1522; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
1523; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1524; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
1525; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
1526; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
1527; CI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
1528; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
1529; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
1530; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
1531; CI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
1532; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
1533; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
1534; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
1535; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1536; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
1537; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
1538; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
1539; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v1, -v0
1540; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
1541; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1542; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1543; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
1544; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1545  %sqrt = call contract float @llvm.sqrt.f32(float %val)
1546  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
1547  ret float %div
1548}
1549
1550define <2 x float> @v_neg_rsq_v2f32(<2 x float> %val) {
1551; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_v2f32:
1552; GCN-DAZ-UNSAFE:       ; %bb.0:
1553; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1555; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v1, v1
1556; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1557; GCN-DAZ-UNSAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1558; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1559;
1560; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_v2f32:
1561; GCN-IEEE-UNSAFE:       ; %bb.0:
1562; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1563; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1564; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v1, v1
1565; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1566; GCN-IEEE-UNSAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1567; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1568;
1569; GCN-DAZ-SAFE-LABEL: v_neg_rsq_v2f32:
1570; GCN-DAZ-SAFE:       ; %bb.0:
1571; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1572; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1573; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v1
1574; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
1575; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1576; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v2, v1
1577; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v1, v2
1578; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0.5, v2
1579; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v2, v3, 0.5
1580; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
1581; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, -v3, v3, v1
1582; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v4, v2
1583; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v5, v2, v3
1584; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1585; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1586; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v0
1587; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1588; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
1589; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v3, v0
1590; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v4, 0x260
1591; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v4
1592; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[4:5]
1593; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, v0, v3
1594; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0.5, v3
1595; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, -v3, v2, 0.5
1596; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v5, v2
1597; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v6, -v2, v2, v0
1598; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v5, v3
1599; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v6, v3, v2
1600; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1601; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1602; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v4
1603; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1604; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1605; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v1, -v1
1606; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1607;
1608; SI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32:
1609; SI-IEEE-SAFE:       ; %bb.0:
1610; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1611; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0xf800000
1612; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v1
1613; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1614; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1615; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v1
1616; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1617; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v1
1618; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1619; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1620; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1621; SI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v1
1622; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1623; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1624; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1625; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1626; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v0
1627; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1628; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1629; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v0
1630; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1631; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v3
1632; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[4:5]
1633; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
1634; SI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v2, v4, v0
1635; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1636; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
1637; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
1638; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v5, v4, v0
1639; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1640; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
1641; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
1642; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1643; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1644; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1645; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x7f800000
1646; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1647; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s6
1648; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, -v0, v2, s[4:5]
1649; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1650; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1651; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1652; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1653; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v1
1654; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v1|, s6
1655; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, -v1, v2, s[4:5]
1656; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1657; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1658; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1659; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
1660; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1661;
1662; CI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32:
1663; CI-IEEE-SAFE:       ; %bb.0:
1664; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1665; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0xf800000
1666; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v1
1667; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1668; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
1669; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v1
1670; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1671; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v1
1672; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1673; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1674; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1675; CI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v1
1676; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1677; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1678; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1679; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1680; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v0
1681; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1682; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
1683; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v0
1684; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1685; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v3
1686; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[4:5]
1687; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v4
1688; CI-IEEE-SAFE-NEXT:    v_fma_f32 v5, -v2, v4, v0
1689; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v5
1690; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v4, v2, s[4:5]
1691; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], 1, v4
1692; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v5, v4, v0
1693; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1694; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v2, v5, s[4:5]
1695; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x37800000, v2
1696; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
1697; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1698; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1699; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1700; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1701; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1702; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1703; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1704; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v1
1705; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1706; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1707; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1708; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v2, v1
1709; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1710  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val)
1711  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
1712  ret <2 x float> %div
1713}
1714
1715define float @v_neg_rsq_f32_foldable_user(float %val0, float %val1) {
1716; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_f32_foldable_user:
1717; GCN-DAZ-UNSAFE:       ; %bb.0:
1718; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1720; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v1
1721; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1722;
1723; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_f32_foldable_user:
1724; GCN-IEEE-UNSAFE:       ; %bb.0:
1725; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1727; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v1
1728; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1729;
1730; GCN-DAZ-SAFE-LABEL: v_neg_rsq_f32_foldable_user:
1731; GCN-DAZ-SAFE:       ; %bb.0:
1732; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1733; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1734; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1735; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1736; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1737; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v2, v0
1738; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, v0, v2
1739; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v2, 0.5, v2
1740; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, -v2, v3, 0.5
1741; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v3, v3, v4, v3
1742; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, -v3, v3, v0
1743; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v2, v4, v2
1744; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v2, v5, v2, v3
1745; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1746; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1747; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1748; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1749; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1750; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1751; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
1752; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1753;
1754; SI-IEEE-SAFE-LABEL: v_neg_rsq_f32_foldable_user:
1755; SI-IEEE-SAFE:       ; %bb.0:
1756; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1757; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1758; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1759; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1760; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1761; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
1762; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1763; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v0
1764; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1765; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1766; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1767; SI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v0
1768; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1769; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1770; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1771; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1772; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1773; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1774; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1775; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
1776; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1777; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s4
1778; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, -v0, v2, s[4:5]
1779; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1780; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1781; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1782; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1783; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
1784; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1785;
1786; CI-IEEE-SAFE-LABEL: v_neg_rsq_f32_foldable_user:
1787; CI-IEEE-SAFE:       ; %bb.0:
1788; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1789; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1790; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v0
1791; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1792; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
1793; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v2, v0
1794; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], -1, v2
1795; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v3, v2, v0
1796; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v4
1797; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[4:5]
1798; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], 1, v2
1799; CI-IEEE-SAFE-NEXT:    v_fma_f32 v2, -v4, v2, v0
1800; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v2
1801; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v3, v4, s[4:5]
1802; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v3, 0x37800000, v2
1803; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
1804; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v3, 0x260
1805; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v3
1806; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
1807; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v2, -v0
1808; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v2, v2
1809; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1810; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1811; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v2, v0
1812; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v1
1813; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1814  %sqrt = call contract float @llvm.sqrt.f32(float %val0)
1815  %div = fdiv contract float -1.0, %sqrt, !fpmath !0
1816  %user = fmul contract float %div, %val1
1817  ret float %user
1818}
1819
1820define <2 x float> @v_neg_rsq_v2f32_foldable_user(<2 x float> %val0, <2 x float> %val1) {
1821; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_v2f32_foldable_user:
1822; GCN-DAZ-UNSAFE:       ; %bb.0:
1823; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1825; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v1, v1
1826; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v2
1827; GCN-DAZ-UNSAFE-NEXT:    v_mul_f32_e64 v1, -v1, v3
1828; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1829;
1830; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_v2f32_foldable_user:
1831; GCN-IEEE-UNSAFE:       ; %bb.0:
1832; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1833; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
1834; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v1, v1
1835; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e64 v0, -v0, v2
1836; GCN-IEEE-UNSAFE-NEXT:    v_mul_f32_e64 v1, -v1, v3
1837; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
1838;
1839; GCN-DAZ-SAFE-LABEL: v_neg_rsq_v2f32_foldable_user:
1840; GCN-DAZ-SAFE:       ; %bb.0:
1841; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1842; GCN-DAZ-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
1843; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
1844; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v1
1845; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1846; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v4, v1
1847; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, v1, v4
1848; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v4, 0.5, v4
1849; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v6, -v4, v5, 0.5
1850; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v5, v6, v5
1851; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, -v5, v5, v1
1852; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v4, v6, v4
1853; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v7, v4, v5
1854; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1855; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1856; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v0
1857; GCN-DAZ-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
1858; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
1859; GCN-DAZ-SAFE-NEXT:    v_rsq_f32_e32 v5, v0
1860; GCN-DAZ-SAFE-NEXT:    v_mov_b32_e32 v6, 0x260
1861; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v6
1862; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1863; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v4, v0, v5
1864; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0.5, v5
1865; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v7, -v5, v4, 0.5
1866; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v4, v7, v4
1867; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v8, -v4, v4, v0
1868; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v5, v5, v7, v5
1869; GCN-DAZ-SAFE-NEXT:    v_fma_f32 v4, v8, v5, v4
1870; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1871; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1872; GCN-DAZ-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v6
1873; GCN-DAZ-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1874; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v0, -v0
1875; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e64 v1, -v1
1876; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
1877; GCN-DAZ-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
1878; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
1879;
1880; SI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32_foldable_user:
1881; SI-IEEE-SAFE:       ; %bb.0:
1882; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0xf800000
1884; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
1885; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1886; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1887; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v1
1888; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
1889; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v5, v4, v1
1890; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1891; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[4:5]
1892; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v4
1893; SI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v6, v4, v1
1894; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1895; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v5, v6, s[4:5]
1896; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1897; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1898; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v0
1899; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1900; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1901; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v6, v0
1902; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v5, 0x260
1903; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v5
1904; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1905; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v6
1906; SI-IEEE-SAFE-NEXT:    v_fma_f32 v7, -v4, v6, v0
1907; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v7
1908; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
1909; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v6
1910; SI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v7, v6, v0
1911; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v6
1912; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
1913; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v6, 0x37800000, v4
1914; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1915; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
1916; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1917; SI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0x7f800000
1918; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v0
1919; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v0|, s6
1920; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, -v0, v4, s[4:5]
1921; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1922; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1923; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1924; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
1925; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v1
1926; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], |v1|, s6
1927; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, -v1, v4, s[4:5]
1928; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1929; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1930; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1931; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v4, v1
1932; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
1933; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
1934; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1935;
1936; CI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32_foldable_user:
1937; CI-IEEE-SAFE:       ; %bb.0:
1938; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939; CI-IEEE-SAFE-NEXT:    s_mov_b32 s6, 0xf800000
1940; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v1
1941; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v1
1942; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
1943; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v4, v1
1944; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v5, s[4:5], -1, v4
1945; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v5, v4, v1
1946; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v6
1947; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v5, v4, v5, s[4:5]
1948; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v6, s[4:5], 1, v4
1949; CI-IEEE-SAFE-NEXT:    v_fma_f32 v4, -v6, v4, v1
1950; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v4
1951; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v5, v6, s[4:5]
1952; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v5, 0x37800000, v4
1953; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
1954; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v0
1955; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s6, v0
1956; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
1957; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v6, v0
1958; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v5, 0x260
1959; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e64 s[4:5], v1, v5
1960; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
1961; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v4, s[4:5], -1, v6
1962; CI-IEEE-SAFE-NEXT:    v_fma_f32 v7, -v4, v6, v0
1963; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v7
1964; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v6, v4, s[4:5]
1965; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v7, s[4:5], 1, v6
1966; CI-IEEE-SAFE-NEXT:    v_fma_f32 v6, -v7, v6, v0
1967; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v6
1968; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[4:5]
1969; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v6, 0x37800000, v4
1970; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
1971; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v5
1972; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
1973; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v0
1974; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1975; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
1976; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
1977; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v4, v0
1978; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e64 v4, -v1
1979; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v4, v4
1980; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
1981; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v1, vcc, 0, v1
1982; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v4, v1
1983; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v0, v0, v2
1984; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, v1, v3
1985; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
1986  %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0)
1987  %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0
1988  %user = fmul contract <2 x float> %div, %val1
1989  ret <2 x float> %user
1990}
1991
1992define float @v_rsq_f32(float %val) {
1993; GCN-DAZ-LABEL: v_rsq_f32:
1994; GCN-DAZ:       ; %bb.0:
1995; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1996; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
1997; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
1998;
1999; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32:
2000; GCN-IEEE-UNSAFE:       ; %bb.0:
2001; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2002; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2003; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2004;
2005; GCN-IEEE-SAFE-LABEL: v_rsq_f32:
2006; GCN-IEEE-SAFE:       ; %bb.0:
2007; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2008; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x800000
2009; GCN-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2010; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, 0, 24, vcc
2011; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2012; GCN-IEEE-SAFE-NEXT:    v_rsq_f32_e32 v0, v0
2013; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, 0, 12, vcc
2014; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2015; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2016  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
2017  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2018  ret float %div
2019}
2020
2021define { float, float } @v_rsq_f32_multi_use(float %val) {
2022; GCN-DAZ-UNSAFE-LABEL: v_rsq_f32_multi_use:
2023; GCN-DAZ-UNSAFE:       ; %bb.0:
2024; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2025; GCN-DAZ-UNSAFE-NEXT:    v_sqrt_f32_e32 v2, v0
2026; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v1, v0
2027; GCN-DAZ-UNSAFE-NEXT:    v_mov_b32_e32 v0, v2
2028; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2029;
2030; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_multi_use:
2031; GCN-IEEE-UNSAFE:       ; %bb.0:
2032; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033; GCN-IEEE-UNSAFE-NEXT:    v_sqrt_f32_e32 v2, v0
2034; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v1, v0
2035; GCN-IEEE-UNSAFE-NEXT:    v_mov_b32_e32 v0, v2
2036; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2037;
2038; GCN-DAZ-SAFE-LABEL: v_rsq_f32_multi_use:
2039; GCN-DAZ-SAFE:       ; %bb.0:
2040; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2041; GCN-DAZ-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
2042; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e32 v1, v0
2043; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
2044;
2045; SI-IEEE-SAFE-LABEL: v_rsq_f32_multi_use:
2046; SI-IEEE-SAFE:       ; %bb.0:
2047; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2048; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
2049; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
2050; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2051; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2052; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
2053; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
2054; SI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
2055; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
2056; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
2057; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
2058; SI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
2059; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
2060; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
2061; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
2062; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2063; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
2064; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
2065; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2066; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
2067; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2068; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2069; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
2070; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
2071; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v2, v0
2072; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v2, vcc, 0, v2
2073; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2074; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2075;
2076; CI-IEEE-SAFE-LABEL: v_rsq_f32_multi_use:
2077; CI-IEEE-SAFE:       ; %bb.0:
2078; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2079; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
2080; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
2081; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2082; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2083; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
2084; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
2085; CI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
2086; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
2087; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
2088; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
2089; CI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
2090; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
2091; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
2092; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
2093; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2094; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
2095; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
2096; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2097; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2098; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
2099; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v2, v0
2100; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v2, vcc, 0, v2
2101; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v1, v1, v2
2102; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2103  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
2104  %insert.0 = insertvalue { float, float } poison, float %sqrt, 0
2105  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2106  %insert.1 = insertvalue { float, float } %insert.0, float %div, 1
2107  ret { float, float } %insert.1
2108}
2109
2110define float @v_rsq_f32_missing_contract0(float %val) {
2111; GCN-DAZ-UNSAFE-LABEL: v_rsq_f32_missing_contract0:
2112; GCN-DAZ-UNSAFE:       ; %bb.0:
2113; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2114; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2115; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2116;
2117; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_missing_contract0:
2118; GCN-IEEE-UNSAFE:       ; %bb.0:
2119; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2120; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2121; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2122;
2123; GCN-DAZ-SAFE-LABEL: v_rsq_f32_missing_contract0:
2124; GCN-DAZ-SAFE:       ; %bb.0:
2125; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126; GCN-DAZ-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
2127; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e32 v0, v0
2128; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
2129;
2130; SI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract0:
2131; SI-IEEE-SAFE:       ; %bb.0:
2132; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2133; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
2134; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
2135; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2136; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2137; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
2138; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
2139; SI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
2140; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
2141; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
2142; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
2143; SI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
2144; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
2145; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
2146; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
2147; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2148; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
2149; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
2150; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2151; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
2152; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2153; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2154; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
2155; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
2156; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2157; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2158; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
2159; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2160;
2161; CI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract0:
2162; CI-IEEE-SAFE:       ; %bb.0:
2163; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2164; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
2165; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
2166; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2167; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2168; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
2169; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
2170; CI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
2171; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
2172; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
2173; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
2174; CI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
2175; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
2176; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
2177; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
2178; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2179; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
2180; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
2181; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2182; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2183; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
2184; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2185; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2186; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
2187; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2188  %sqrt = call float @llvm.sqrt.f32(float %val), !fpmath !1
2189  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2190  ret float %div
2191}
2192
2193define float @v_rsq_f32_missing_contract1(float %val) {
2194; GCN-DAZ-UNSAFE-LABEL: v_rsq_f32_missing_contract1:
2195; GCN-DAZ-UNSAFE:       ; %bb.0:
2196; GCN-DAZ-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2197; GCN-DAZ-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2198; GCN-DAZ-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2199;
2200; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_missing_contract1:
2201; GCN-IEEE-UNSAFE:       ; %bb.0:
2202; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2203; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2204; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2205;
2206; GCN-DAZ-SAFE-LABEL: v_rsq_f32_missing_contract1:
2207; GCN-DAZ-SAFE:       ; %bb.0:
2208; GCN-DAZ-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2209; GCN-DAZ-SAFE-NEXT:    v_sqrt_f32_e32 v0, v0
2210; GCN-DAZ-SAFE-NEXT:    v_rcp_f32_e32 v0, v0
2211; GCN-DAZ-SAFE-NEXT:    s_setpc_b64 s[30:31]
2212;
2213; SI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract1:
2214; SI-IEEE-SAFE:       ; %bb.0:
2215; SI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2216; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
2217; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
2218; SI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2219; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2220; SI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
2221; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
2222; SI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
2223; SI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
2224; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
2225; SI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
2226; SI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
2227; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
2228; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
2229; SI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
2230; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2231; SI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
2232; SI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
2233; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2234; SI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x7f800000
2235; SI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2236; SI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 vcc, |v0|, s4
2237; SI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v0, v1, vcc
2238; SI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
2239; SI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2240; SI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2241; SI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
2242; SI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2243;
2244; CI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract1:
2245; CI-IEEE-SAFE:       ; %bb.0:
2246; CI-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2247; CI-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0xf800000
2248; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v0
2249; CI-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2250; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
2251; CI-IEEE-SAFE-NEXT:    v_sqrt_f32_e32 v1, v0
2252; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v2, s[4:5], -1, v1
2253; CI-IEEE-SAFE-NEXT:    v_fma_f32 v3, -v2, v1, v0
2254; CI-IEEE-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], 0, v3
2255; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, v1, v2, s[4:5]
2256; CI-IEEE-SAFE-NEXT:    v_add_i32_e64 v3, s[4:5], 1, v1
2257; CI-IEEE-SAFE-NEXT:    v_fma_f32 v1, -v3, v1, v0
2258; CI-IEEE-SAFE-NEXT:    v_cmp_lt_f32_e64 s[4:5], 0, v1
2259; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, v2, v3, s[4:5]
2260; CI-IEEE-SAFE-NEXT:    v_mul_f32_e32 v2, 0x37800000, v1
2261; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
2262; CI-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x260
2263; CI-IEEE-SAFE-NEXT:    v_cmp_class_f32_e32 vcc, v0, v2
2264; CI-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
2265; CI-IEEE-SAFE-NEXT:    v_frexp_mant_f32_e32 v1, v0
2266; CI-IEEE-SAFE-NEXT:    v_rcp_f32_e32 v1, v1
2267; CI-IEEE-SAFE-NEXT:    v_frexp_exp_i32_f32_e32 v0, v0
2268; CI-IEEE-SAFE-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
2269; CI-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v1, v0
2270; CI-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2271  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
2272  %div = fdiv float 1.0, %sqrt, !fpmath !1
2273  ret float %div
2274}
2275
2276; Test that we contract into FMA for an fadd user after introducing
2277; the fmul.
2278define float @v_rsq_f32_contractable_user(float %val0, float %val1) {
2279; GCN-DAZ-LABEL: v_rsq_f32_contractable_user:
2280; GCN-DAZ:       ; %bb.0:
2281; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2282; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2283; GCN-DAZ-NEXT:    v_add_f32_e32 v0, v0, v1
2284; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2285;
2286; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_contractable_user:
2287; GCN-IEEE-UNSAFE:       ; %bb.0:
2288; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2289; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2290; GCN-IEEE-UNSAFE-NEXT:    v_add_f32_e32 v0, v0, v1
2291; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2292;
2293; GCN-IEEE-SAFE-LABEL: v_rsq_f32_contractable_user:
2294; GCN-IEEE-SAFE:       ; %bb.0:
2295; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2296; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x800000
2297; GCN-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2298; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 24, vcc
2299; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2300; GCN-IEEE-SAFE-NEXT:    v_rsq_f32_e32 v0, v0
2301; GCN-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x45800000
2302; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
2303; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v0, v0, v2, v1
2304; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2305  %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
2306  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2307  %add = fadd contract float %div, %val1
2308  ret float %add
2309}
2310
2311; Missing contract on the fdiv
2312define float @v_rsq_f32_contractable_user_missing_contract0(float %val0, float %val1) {
2313; GCN-DAZ-LABEL: v_rsq_f32_contractable_user_missing_contract0:
2314; GCN-DAZ:       ; %bb.0:
2315; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2317; GCN-DAZ-NEXT:    v_add_f32_e32 v0, v0, v1
2318; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2319;
2320; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_contractable_user_missing_contract0:
2321; GCN-IEEE-UNSAFE:       ; %bb.0:
2322; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2323; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2324; GCN-IEEE-UNSAFE-NEXT:    v_add_f32_e32 v0, v0, v1
2325; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2326;
2327; GCN-IEEE-SAFE-LABEL: v_rsq_f32_contractable_user_missing_contract0:
2328; GCN-IEEE-SAFE:       ; %bb.0:
2329; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2330; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x800000
2331; GCN-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2332; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 24, vcc
2333; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2334; GCN-IEEE-SAFE-NEXT:    v_rsq_f32_e32 v0, v0
2335; GCN-IEEE-SAFE-NEXT:    v_mov_b32_e32 v2, 0x45800000
2336; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e32 v2, 1.0, v2, vcc
2337; GCN-IEEE-SAFE-NEXT:    v_fma_f32 v0, v0, v2, v1
2338; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2339  %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
2340  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2341  %add = fadd contract float %div, %val1
2342  ret float %add
2343}
2344
2345; Missing contract on the fadd
2346define float @v_rsq_f32_contractable_user_missing_contract1(float %val0, float %val1) {
2347; GCN-DAZ-LABEL: v_rsq_f32_contractable_user_missing_contract1:
2348; GCN-DAZ:       ; %bb.0:
2349; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2350; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2351; GCN-DAZ-NEXT:    v_add_f32_e32 v0, v0, v1
2352; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2353;
2354; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_contractable_user_missing_contract1:
2355; GCN-IEEE-UNSAFE:       ; %bb.0:
2356; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2358; GCN-IEEE-UNSAFE-NEXT:    v_add_f32_e32 v0, v0, v1
2359; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2360;
2361; GCN-IEEE-SAFE-LABEL: v_rsq_f32_contractable_user_missing_contract1:
2362; GCN-IEEE-SAFE:       ; %bb.0:
2363; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2364; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x800000
2365; GCN-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2366; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 24, vcc
2367; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2368; GCN-IEEE-SAFE-NEXT:    v_rsq_f32_e32 v0, v0
2369; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 12, vcc
2370; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v2
2371; GCN-IEEE-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
2372; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2373  %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1
2374  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2375  %add = fadd float %div, %val1
2376  ret float %add
2377}
2378
2379define float @v_rsq_f32_known_never_denormal(float nofpclass(sub) %val) {
2380; GCN-DAZ-LABEL: v_rsq_f32_known_never_denormal:
2381; GCN-DAZ:       ; %bb.0:
2382; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2383; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2384; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2385;
2386; GCN-IEEE-LABEL: v_rsq_f32_known_never_denormal:
2387; GCN-IEEE:       ; %bb.0:
2388; GCN-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2389; GCN-IEEE-NEXT:    v_rsq_f32_e32 v0, v0
2390; GCN-IEEE-NEXT:    s_setpc_b64 s[30:31]
2391  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
2392  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2393  ret float %div
2394}
2395
2396define float @v_rsq_f32_known_never_posdenormal(float nofpclass(psub) %val) {
2397; GCN-DAZ-LABEL: v_rsq_f32_known_never_posdenormal:
2398; GCN-DAZ:       ; %bb.0:
2399; GCN-DAZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2400; GCN-DAZ-NEXT:    v_rsq_f32_e32 v0, v0
2401; GCN-DAZ-NEXT:    s_setpc_b64 s[30:31]
2402;
2403; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_known_never_posdenormal:
2404; GCN-IEEE-UNSAFE:       ; %bb.0:
2405; GCN-IEEE-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2406; GCN-IEEE-UNSAFE-NEXT:    v_rsq_f32_e32 v0, v0
2407; GCN-IEEE-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
2408;
2409; GCN-IEEE-SAFE-LABEL: v_rsq_f32_known_never_posdenormal:
2410; GCN-IEEE-SAFE:       ; %bb.0:
2411; GCN-IEEE-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2412; GCN-IEEE-SAFE-NEXT:    s_mov_b32 s4, 0x800000
2413; GCN-IEEE-SAFE-NEXT:    v_cmp_gt_f32_e32 vcc, s4, v0
2414; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, 0, 24, vcc
2415; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2416; GCN-IEEE-SAFE-NEXT:    v_rsq_f32_e32 v0, v0
2417; GCN-IEEE-SAFE-NEXT:    v_cndmask_b32_e64 v1, 0, 12, vcc
2418; GCN-IEEE-SAFE-NEXT:    v_ldexp_f32_e32 v0, v0, v1
2419; GCN-IEEE-SAFE-NEXT:    s_setpc_b64 s[30:31]
2420  %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1
2421  %div = fdiv contract float 1.0, %sqrt, !fpmath !1
2422  ret float %div
2423}
2424
2425!0 = !{float 2.500000e+00}
2426!1 = !{float 1.000000e+00}
2427
2428attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2429;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
2430; CI-DAZ-SAFE: {{.*}}
2431; CI-DAZ-UNSAFE: {{.*}}
2432; CI-IEEE-UNSAFE: {{.*}}
2433; SI-DAZ-SAFE: {{.*}}
2434; SI-DAZ-UNSAFE: {{.*}}
2435; SI-IEEE-UNSAFE: {{.*}}
2436